Introduce Cuda support
This commit is contained in:
parent
8100ad24d6
commit
314da66ae5
|
@ -7,6 +7,7 @@ import re
|
|||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import List, Dict
|
||||
|
||||
IN_NIX_STORE = False
|
||||
|
||||
|
@ -53,10 +54,8 @@ NVIDIA_DSO_PATTERNS = [
|
|||
"libnvidia-vulkan-producer\.so.*$",
|
||||
"libnvidia-wayland-client\.so.*$",
|
||||
"libnvoptix\.so.*$",
|
||||
|
||||
# Host dependencies required by the nvidia DSOs to properly
|
||||
# operate
|
||||
|
||||
# libdrm
|
||||
"libdrm\.so.*$",
|
||||
# libffi
|
||||
|
@ -76,15 +75,12 @@ NVIDIA_DSO_PATTERNS = [
|
|||
# libwayland
|
||||
"libwayland-server\.so.*$",
|
||||
"libwayland-client\.so.*$",
|
||||
|
||||
]
|
||||
|
||||
CUDA_DSO_PATTERNS = [
|
||||
"libcudadebugger\.so.*$",
|
||||
"libcuda\.so.*$"
|
||||
]
|
||||
CUDA_DSO_PATTERNS = ["libcudadebugger\.so.*$", "libcuda\.so.*$"]
|
||||
|
||||
def find_files(path, files_patterns):
|
||||
|
||||
def find_files(path: str, files_patterns: List[str]):
|
||||
"""Scans the PATH directory looking for the files complying with
|
||||
the FILES_PATTERNS regexes list.
|
||||
|
||||
|
@ -104,7 +100,8 @@ def find_files(path, files_patterns):
|
|||
|
||||
return files
|
||||
|
||||
def find_nvidia_dsos(path):
|
||||
|
||||
def find_nvidia_dsos(path: str):
|
||||
"""Scans the PATH directory looking for the Nvidia driver shared
|
||||
libraries and their dependencies. A shared library is considered
|
||||
as a Nvidia one if its name maches a pattern contained in
|
||||
|
@ -113,7 +110,8 @@ def find_nvidia_dsos(path):
|
|||
Returns the list of the DSOs absolute paths."""
|
||||
return find_files(path, NVIDIA_DSO_PATTERNS)
|
||||
|
||||
def find_cuda_dsos(path):
|
||||
|
||||
def find_cuda_dsos(path: str):
|
||||
"""Scans the PATH directory looking for the cuda driver shared
|
||||
libraries. A shared library is considered
|
||||
as a cuda one if its name maches a pattern contained in
|
||||
|
@ -122,44 +120,51 @@ def find_cuda_dsos(path):
|
|||
Returns the list of the DSOs absolute paths."""
|
||||
return find_files(path, CUDA_DSO_PATTERNS)
|
||||
|
||||
def copy_and_patch_dsos_to_libs_dir(dsos, libs_dir):
|
||||
|
||||
def copy_and_patch_libs(dsos: List[str], libs_dir: str, rpath=None):
|
||||
"""Copies the graphic vendor DSOs to the cache directory before
|
||||
patchelf-ing them.
|
||||
|
||||
The DSOs can dlopen each other. Sadly, we don't want any host
|
||||
libraries to the LD_LIBRARY_PATH to prevent polluting the nix
|
||||
binary env. We won't be able to find them on runtime. We don't
|
||||
want to alter LD_LIBRARY_PATH, the only option left is to patch
|
||||
their ELFs runpath.
|
||||
binary env. The only option left is to patch their ELFs runpath to
|
||||
point to RPATH.
|
||||
|
||||
We also don't want to directly modify the host DSOs, we first copy
|
||||
them to the user's personal cache directory. We then alter their
|
||||
runpath to point to the cache directory."""
|
||||
rpath = rpath if (rpath is not None) else libs_dir
|
||||
for dso in dsos:
|
||||
basename = os.path.basename(dso)
|
||||
newpath = os.path.join(libs_dir, basename)
|
||||
log_info(f"Copying {basename} to {newpath}")
|
||||
shutil.copyfile(dso, newpath)
|
||||
shutil.copymode(dso, newpath)
|
||||
patch_dso(newpath, libs_dir)
|
||||
patch_dso(newpath, rpath)
|
||||
|
||||
|
||||
def log_info(string):
|
||||
def log_info(string: str):
|
||||
"""Prints STR to STDERR if the DEBUG environment variable is
|
||||
set."""
|
||||
if "DEBUG" in os.environ:
|
||||
print(f"[+] {string}", file=sys.stderr)
|
||||
|
||||
|
||||
def patch_dso(dsoPath, rpath):
|
||||
def patch_dso(dsoPath: str, rpath: str):
|
||||
"""Call patchelf to change the DSOPATH runpath with RPATH."""
|
||||
log_info(f"Patching {dsoPath}")
|
||||
log_info(f"Exec: {PATCHELF_PATH} --set-rpath {rpath} {dsoPath}")
|
||||
res = subprocess.run([PATCHELF_PATH, "--set-rpath", rpath, dsoPath])
|
||||
if res.returncode != 0:
|
||||
raise (f"Cannot patch {dsoPath}. Patchelf exited with {res.returncode}")
|
||||
raise BaseException(
|
||||
f"Cannot patch {dsoPath}. Patchelf exited with {res.returncode}"
|
||||
)
|
||||
|
||||
def generate_nvidia_egl_config_files(cache_dir, libs_dir):
|
||||
# NOTE: is this the right abstraction? Looks like I'm stitching
|
||||
# some loosely connected parts together for no good reason.
|
||||
|
||||
|
||||
def generate_nvidia_egl_config_files(cache_dir: str, libs_dir: str):
|
||||
"""Generates a set of JSON files describing the EGL exec
|
||||
envirnoment to libglvnd.
|
||||
|
||||
|
@ -167,26 +172,29 @@ def generate_nvidia_egl_config_files(cache_dir, libs_dir):
|
|||
Nvidia DSOs."""
|
||||
|
||||
def generate_egl_conf_json(dso):
|
||||
return json.dumps({
|
||||
"file_format_version": "1.0.0",
|
||||
"ICD": {
|
||||
"library_path": dso
|
||||
}})
|
||||
return json.dumps(
|
||||
{"file_format_version": "1.0.0", "ICD": {"library_path": dso}}
|
||||
)
|
||||
|
||||
egl_conf_dir = os.path.join(cache_dir, "egl-confs")
|
||||
os.makedirs(egl_conf_dir, exist_ok=True)
|
||||
dso_paths = [ ("10_nvidia.json", f"{libs_dir}/libEGL_nvidia.so.0"),
|
||||
("10_nvidia_wayland.json", f"{libs_dir}/libnvidia-egl-wayland.so.1"),
|
||||
("15_nvidia_gbm.json", f"{libs_dir}/libnvidia-egl-gbm.so.1") ]
|
||||
dso_paths = [
|
||||
("10_nvidia.json", f"{libs_dir}/libEGL_nvidia.so.0"),
|
||||
("10_nvidia_wayland.json", f"{libs_dir}/libnvidia-egl-wayland.so.1"),
|
||||
("15_nvidia_gbm.json", f"{libs_dir}/libnvidia-egl-gbm.so.1"),
|
||||
]
|
||||
|
||||
for (conf_file_name, dso_path) in dso_paths:
|
||||
with open(os.path.join(egl_conf_dir, conf_file_name), "w", encoding = "utf-8") as f:
|
||||
with open(
|
||||
os.path.join(egl_conf_dir, conf_file_name), "w", encoding="utf-8"
|
||||
) as f:
|
||||
log_info(f"Writing {dso_path} conf to {egl_conf_dir}")
|
||||
f.write(generate_egl_conf_json(dso_path))
|
||||
|
||||
return egl_conf_dir
|
||||
|
||||
def exec_binary(bin_path, args, cache_dir, libs_dir):
|
||||
|
||||
def exec_binary(bin_path: str, args: List[str], cache_dir: str, libs_dir: str):
|
||||
"""Replace the current python program with the program pointed by
|
||||
BIN_PATH.
|
||||
|
||||
|
@ -196,32 +204,88 @@ def exec_binary(bin_path, args, cache_dir, libs_dir):
|
|||
# The following two env variables are required by our patched libglvnd
|
||||
# implementation to figure out what kind of driver the host
|
||||
# machine is using.
|
||||
os.environ["NIX_GLVND_GLX_PATH"] = libs_dir
|
||||
os.environ["__GLX_VENDOR_LIBRARY_NAME"] = "nvidia"
|
||||
# The following env variable is pointing to the directory
|
||||
# containing the EGL configuration.
|
||||
os.environ["__EGL_VENDOR_LIBRARY_DIRS"] = generate_nvidia_egl_config_files(cache_dir, libs_dir)
|
||||
os.execv(bin_path, [bin_path] + args)
|
||||
|
||||
|
||||
def nvidia_main(cache_dir: str, gl_vendor_path: str):
|
||||
"""Prepares the environment necessary to run a opengl/cuda program
|
||||
on a Nvidia graphics card. It is by definition really stateful.
|
||||
|
||||
Roughly, we're going to:
|
||||
|
||||
1. Setup the nvidia cache directory.
|
||||
2. Find the nvidia DSOs in the GL_VENDOR_PATH.
|
||||
3. Copy these DSOs to their appropriate cache directories.
|
||||
4. Generate the EGL configuration files.
|
||||
5. Patchelf the runpath of what needs to be patched.
|
||||
6. Generate the env variables the main process is supposed to set.
|
||||
|
||||
Overall, we're using two different tricks to setup the GL/cuda envs:
|
||||
|
||||
- For Cuda and GLX: we're isolating the main DSOs in their own
|
||||
dirs, add these dirs to the LD_LIBRARY_PATH and patch their
|
||||
runpath to point to the generic cache dir.
|
||||
- For EGL: we're generating some JSON configuration files.
|
||||
libglvnd will later use these configuration files to directly
|
||||
load the appropriate DSOs. We don't need any
|
||||
LD_LIBRARY_PATH-fueled trick.
|
||||
|
||||
Keep in mind we want to keep the host system out of the
|
||||
LD_LIBRARY_PATH to make sure we won't inject any host DSOs (other
|
||||
than the GL/Cuda ones OFC) to the nix-built program.
|
||||
|
||||
This function returns a dictionary containing the env variables
|
||||
supposed to be added to the current process down the line."""
|
||||
log_info("Nvidia routine begins")
|
||||
cache_dir = os.path.join(cache_dir, "nvidia")
|
||||
libs_dir = os.path.join(cache_dir, "lib")
|
||||
cuda_dir = os.path.join(cache_dir, "cuda")
|
||||
log_info(f"Nvidia libs dir: {libs_dir}")
|
||||
log_info(f"Nvidia cuda dir: {libs_dir}")
|
||||
os.makedirs(libs_dir, exist_ok=True)
|
||||
os.makedirs(cuda_dir, exist_ok=True)
|
||||
log_info(f"Searching for the Nvidia OpenGL DSOs in {gl_vendor_path}")
|
||||
# Nvidia OpenGL DSOs
|
||||
opengl_dsos = find_files(gl_vendor_path, NVIDIA_DSO_PATTERNS)
|
||||
log_info(f"Found the following DSOs:")
|
||||
[log_info(dso) for dso in opengl_dsos]
|
||||
log_info("Patching the DSOs.")
|
||||
copy_and_patch_libs(opengl_dsos, libs_dir)
|
||||
log_info("Setting NVIDIA-specific env variables.")
|
||||
# Nvidia Cuda DSOs
|
||||
log_info(f"Searching for the Nvidia Cuda DSOs in {gl_vendor_path}")
|
||||
cuda_dsos = find_files(gl_vendor_path, CUDA_DSO_PATTERNS)
|
||||
log_info(f"Found the following DSOs:")
|
||||
[log_info(dso) for dso in cuda_dsos]
|
||||
log_info("Patching the DSOs.")
|
||||
copy_and_patch_libs(cuda_dsos, cuda_dir, libs_dir)
|
||||
log_info("Setting NVIDIA-specific env variables.")
|
||||
# Preparing the env
|
||||
new_env = {}
|
||||
log_info(f"__GLX_VENDOR_LIBRARY_NAME = nvidia")
|
||||
new_env["__GLX_VENDOR_LIBRARY_NAME"] = "nvidia"
|
||||
egl_config_files = generate_nvidia_egl_config_files(cache_dir, libs_dir)
|
||||
log_info(f"__EGL_VENDOR_LIBRARY_DIRS = {egl_config_files}")
|
||||
new_env["__EGL_VENDOR_LIBRARY_DIRS"] = egl_config_files
|
||||
ld_library_path = os.environ.get("LD_LIBRARY_PATH", None)
|
||||
ld_library_path = (
|
||||
cuda_dir if ld_library_path is None else f"{cuda_dir}:{ld_library_path}"
|
||||
)
|
||||
log_info(f"LD_LIBRARY_PATH = {ld_library_path}")
|
||||
new_env["LD_LIBRARY_PATH"] = ld_library_path
|
||||
return new_env
|
||||
|
||||
|
||||
def main(args):
|
||||
# 1. Scan NIX_GLVND_GLX_PATH for nvidia DSOs
|
||||
# 2. Copy DSOs
|
||||
# 3. Patchelf DSOs
|
||||
# 4. Execv program
|
||||
home = os.path.expanduser("~")
|
||||
xdg_cache_home = os.environ.get("XDG_CACHE_HOME", os.path.join(home, ".cache"))
|
||||
cache_dir = os.path.join(xdg_cache_home, "nix-gl-host")
|
||||
libs_dir = os.path.join(cache_dir, "lib")
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
os.makedirs(libs_dir, exist_ok=True)
|
||||
log_info(f'Using "{cache_dir}" as cache dir.')
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
log_info(f'Scanning "{args.GL_VENDOR_PATH}" for DSOs.')
|
||||
dsos = find_nvidia_dsos(args.GL_VENDOR_PATH)
|
||||
log_info(f"Found the following DSOs:")
|
||||
[log_info(dso) for dso in dsos]
|
||||
log_info("Patching the DSOs.")
|
||||
copy_and_patch_dsos_to_libs_dir(dsos, libs_dir)
|
||||
new_env = nvidia_main(cache_dir, args.GL_VENDOR_PATH)
|
||||
os.environ.update(new_env)
|
||||
exec_binary(args.NIX_BINARY, args.ARGS, cache_dir, libs_dir)
|
||||
return 0
|
||||
|
||||
|
@ -249,4 +313,4 @@ if __name__ == "__main__":
|
|||
)
|
||||
args = parser.parse_args()
|
||||
ret = main(args)
|
||||
os.exit(ret)
|
||||
sys.exit(ret)
|
||||
|
|
Loading…
Reference in New Issue