From 314da66ae5a95787267bd21f2f6b1db8e12278c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Baylac=20Jacqu=C3=A9?= Date: Fri, 2 Dec 2022 21:10:13 +0100 Subject: [PATCH] Introduce Cuda support --- nixglhost-wrapper.py | 156 ++++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 46 deletions(-) diff --git a/nixglhost-wrapper.py b/nixglhost-wrapper.py index 949d26a..6ea279d 100755 --- a/nixglhost-wrapper.py +++ b/nixglhost-wrapper.py @@ -7,6 +7,7 @@ import re import shutil import subprocess import sys +from typing import List, Dict IN_NIX_STORE = False @@ -53,10 +54,8 @@ NVIDIA_DSO_PATTERNS = [ "libnvidia-vulkan-producer\.so.*$", "libnvidia-wayland-client\.so.*$", "libnvoptix\.so.*$", - # Host dependencies required by the nvidia DSOs to properly # operate - # libdrm "libdrm\.so.*$", # libffi @@ -76,15 +75,12 @@ NVIDIA_DSO_PATTERNS = [ # libwayland "libwayland-server\.so.*$", "libwayland-client\.so.*$", - ] -CUDA_DSO_PATTERNS = [ - "libcudadebugger\.so.*$", - "libcuda\.so.*$" -] +CUDA_DSO_PATTERNS = ["libcudadebugger\.so.*$", "libcuda\.so.*$"] -def find_files(path, files_patterns): + +def find_files(path: str, files_patterns: List[str]): """Scans the PATH directory looking for the files complying with the FILES_PATTERNS regexes list. @@ -104,7 +100,8 @@ def find_files(path, files_patterns): return files -def find_nvidia_dsos(path): + +def find_nvidia_dsos(path: str): """Scans the PATH directory looking for the Nvidia driver shared libraries and their dependencies. A shared library is considered as a Nvidia one if its name maches a pattern contained in @@ -113,7 +110,8 @@ def find_nvidia_dsos(path): Returns the list of the DSOs absolute paths.""" return find_files(path, NVIDIA_DSO_PATTERNS) -def find_cuda_dsos(path): + +def find_cuda_dsos(path: str): """Scans the PATH directory looking for the cuda driver shared libraries. A shared library is considered as a cuda one if its name maches a pattern contained in @@ -122,44 +120,51 @@ def find_cuda_dsos(path): Returns the list of the DSOs absolute paths.""" return find_files(path, CUDA_DSO_PATTERNS) -def copy_and_patch_dsos_to_libs_dir(dsos, libs_dir): + +def copy_and_patch_libs(dsos: List[str], libs_dir: str, rpath=None): """Copies the graphic vendor DSOs to the cache directory before patchelf-ing them. The DSOs can dlopen each other. Sadly, we don't want any host libraries to the LD_LIBRARY_PATH to prevent polluting the nix - binary env. We won't be able to find them on runtime. We don't - want to alter LD_LIBRARY_PATH, the only option left is to patch - their ELFs runpath. + binary env. The only option left is to patch their ELFs runpath to + point to RPATH. We also don't want to directly modify the host DSOs, we first copy them to the user's personal cache directory. We then alter their runpath to point to the cache directory.""" + rpath = rpath if (rpath is not None) else libs_dir for dso in dsos: basename = os.path.basename(dso) newpath = os.path.join(libs_dir, basename) log_info(f"Copying {basename} to {newpath}") shutil.copyfile(dso, newpath) shutil.copymode(dso, newpath) - patch_dso(newpath, libs_dir) + patch_dso(newpath, rpath) -def log_info(string): +def log_info(string: str): """Prints STR to STDERR if the DEBUG environment variable is set.""" if "DEBUG" in os.environ: print(f"[+] {string}", file=sys.stderr) -def patch_dso(dsoPath, rpath): +def patch_dso(dsoPath: str, rpath: str): """Call patchelf to change the DSOPATH runpath with RPATH.""" log_info(f"Patching {dsoPath}") log_info(f"Exec: {PATCHELF_PATH} --set-rpath {rpath} {dsoPath}") res = subprocess.run([PATCHELF_PATH, "--set-rpath", rpath, dsoPath]) if res.returncode != 0: - raise (f"Cannot patch {dsoPath}. Patchelf exited with {res.returncode}") + raise BaseException( + f"Cannot patch {dsoPath}. Patchelf exited with {res.returncode}" + ) -def generate_nvidia_egl_config_files(cache_dir, libs_dir): + # NOTE: is this the right abstraction? Looks like I'm stitching + # some loosely connected parts together for no good reason. + + +def generate_nvidia_egl_config_files(cache_dir: str, libs_dir: str): """Generates a set of JSON files describing the EGL exec envirnoment to libglvnd. @@ -167,26 +172,29 @@ def generate_nvidia_egl_config_files(cache_dir, libs_dir): Nvidia DSOs.""" def generate_egl_conf_json(dso): - return json.dumps({ - "file_format_version": "1.0.0", - "ICD": { - "library_path": dso - }}) + return json.dumps( + {"file_format_version": "1.0.0", "ICD": {"library_path": dso}} + ) egl_conf_dir = os.path.join(cache_dir, "egl-confs") os.makedirs(egl_conf_dir, exist_ok=True) - dso_paths = [ ("10_nvidia.json", f"{libs_dir}/libEGL_nvidia.so.0"), - ("10_nvidia_wayland.json", f"{libs_dir}/libnvidia-egl-wayland.so.1"), - ("15_nvidia_gbm.json", f"{libs_dir}/libnvidia-egl-gbm.so.1") ] + dso_paths = [ + ("10_nvidia.json", f"{libs_dir}/libEGL_nvidia.so.0"), + ("10_nvidia_wayland.json", f"{libs_dir}/libnvidia-egl-wayland.so.1"), + ("15_nvidia_gbm.json", f"{libs_dir}/libnvidia-egl-gbm.so.1"), + ] for (conf_file_name, dso_path) in dso_paths: - with open(os.path.join(egl_conf_dir, conf_file_name), "w", encoding = "utf-8") as f: + with open( + os.path.join(egl_conf_dir, conf_file_name), "w", encoding="utf-8" + ) as f: log_info(f"Writing {dso_path} conf to {egl_conf_dir}") f.write(generate_egl_conf_json(dso_path)) return egl_conf_dir -def exec_binary(bin_path, args, cache_dir, libs_dir): + +def exec_binary(bin_path: str, args: List[str], cache_dir: str, libs_dir: str): """Replace the current python program with the program pointed by BIN_PATH. @@ -196,32 +204,88 @@ def exec_binary(bin_path, args, cache_dir, libs_dir): # The following two env variables are required by our patched libglvnd # implementation to figure out what kind of driver the host # machine is using. - os.environ["NIX_GLVND_GLX_PATH"] = libs_dir - os.environ["__GLX_VENDOR_LIBRARY_NAME"] = "nvidia" - # The following env variable is pointing to the directory - # containing the EGL configuration. - os.environ["__EGL_VENDOR_LIBRARY_DIRS"] = generate_nvidia_egl_config_files(cache_dir, libs_dir) os.execv(bin_path, [bin_path] + args) +def nvidia_main(cache_dir: str, gl_vendor_path: str): + """Prepares the environment necessary to run a opengl/cuda program + on a Nvidia graphics card. It is by definition really stateful. + + Roughly, we're going to: + + 1. Setup the nvidia cache directory. + 2. Find the nvidia DSOs in the GL_VENDOR_PATH. + 3. Copy these DSOs to their appropriate cache directories. + 4. Generate the EGL configuration files. + 5. Patchelf the runpath of what needs to be patched. + 6. Generate the env variables the main process is supposed to set. + + Overall, we're using two different tricks to setup the GL/cuda envs: + + - For Cuda and GLX: we're isolating the main DSOs in their own + dirs, add these dirs to the LD_LIBRARY_PATH and patch their + runpath to point to the generic cache dir. + - For EGL: we're generating some JSON configuration files. + libglvnd will later use these configuration files to directly + load the appropriate DSOs. We don't need any + LD_LIBRARY_PATH-fueled trick. + + Keep in mind we want to keep the host system out of the + LD_LIBRARY_PATH to make sure we won't inject any host DSOs (other + than the GL/Cuda ones OFC) to the nix-built program. + + This function returns a dictionary containing the env variables + supposed to be added to the current process down the line.""" + log_info("Nvidia routine begins") + cache_dir = os.path.join(cache_dir, "nvidia") + libs_dir = os.path.join(cache_dir, "lib") + cuda_dir = os.path.join(cache_dir, "cuda") + log_info(f"Nvidia libs dir: {libs_dir}") + log_info(f"Nvidia cuda dir: {libs_dir}") + os.makedirs(libs_dir, exist_ok=True) + os.makedirs(cuda_dir, exist_ok=True) + log_info(f"Searching for the Nvidia OpenGL DSOs in {gl_vendor_path}") + # Nvidia OpenGL DSOs + opengl_dsos = find_files(gl_vendor_path, NVIDIA_DSO_PATTERNS) + log_info(f"Found the following DSOs:") + [log_info(dso) for dso in opengl_dsos] + log_info("Patching the DSOs.") + copy_and_patch_libs(opengl_dsos, libs_dir) + log_info("Setting NVIDIA-specific env variables.") + # Nvidia Cuda DSOs + log_info(f"Searching for the Nvidia Cuda DSOs in {gl_vendor_path}") + cuda_dsos = find_files(gl_vendor_path, CUDA_DSO_PATTERNS) + log_info(f"Found the following DSOs:") + [log_info(dso) for dso in cuda_dsos] + log_info("Patching the DSOs.") + copy_and_patch_libs(cuda_dsos, cuda_dir, libs_dir) + log_info("Setting NVIDIA-specific env variables.") + # Preparing the env + new_env = {} + log_info(f"__GLX_VENDOR_LIBRARY_NAME = nvidia") + new_env["__GLX_VENDOR_LIBRARY_NAME"] = "nvidia" + egl_config_files = generate_nvidia_egl_config_files(cache_dir, libs_dir) + log_info(f"__EGL_VENDOR_LIBRARY_DIRS = {egl_config_files}") + new_env["__EGL_VENDOR_LIBRARY_DIRS"] = egl_config_files + ld_library_path = os.environ.get("LD_LIBRARY_PATH", None) + ld_library_path = ( + cuda_dir if ld_library_path is None else f"{cuda_dir}:{ld_library_path}" + ) + log_info(f"LD_LIBRARY_PATH = {ld_library_path}") + new_env["LD_LIBRARY_PATH"] = ld_library_path + return new_env + + def main(args): - # 1. Scan NIX_GLVND_GLX_PATH for nvidia DSOs - # 2. Copy DSOs - # 3. Patchelf DSOs - # 4. Execv program home = os.path.expanduser("~") xdg_cache_home = os.environ.get("XDG_CACHE_HOME", os.path.join(home, ".cache")) cache_dir = os.path.join(xdg_cache_home, "nix-gl-host") - libs_dir = os.path.join(cache_dir, "lib") - os.makedirs(cache_dir, exist_ok=True) - os.makedirs(libs_dir, exist_ok=True) log_info(f'Using "{cache_dir}" as cache dir.') + os.makedirs(cache_dir, exist_ok=True) log_info(f'Scanning "{args.GL_VENDOR_PATH}" for DSOs.') dsos = find_nvidia_dsos(args.GL_VENDOR_PATH) - log_info(f"Found the following DSOs:") - [log_info(dso) for dso in dsos] - log_info("Patching the DSOs.") - copy_and_patch_dsos_to_libs_dir(dsos, libs_dir) + new_env = nvidia_main(cache_dir, args.GL_VENDOR_PATH) + os.environ.update(new_env) exec_binary(args.NIX_BINARY, args.ARGS, cache_dir, libs_dir) return 0 @@ -249,4 +313,4 @@ if __name__ == "__main__": ) args = parser.parse_args() ret = main(args) - os.exit(ret) + sys.exit(ret)