Introduce a cache system

Copying & patching all the DSOs is a time consuming process (~10s on a
slow hard drive computer). We definitely don't want to go through it
for each process start, we need to introduce a cache.

For this cache, we go the concervative way. We're going to "resolve" a
DSO name (ie. find the DSO absolute path) and sha256-hash each DSO.
We're then going to compare the fingerprints to determine whether or
not we need to nuke and rebuild the DSO cache.

The cache state is persisted through a JSON file saved in the cache dir.
This commit is contained in:
Félix Baylac Jacqué 2022-12-04 10:05:31 +01:00
parent 494bac3613
commit 375148c949
4 changed files with 272 additions and 70 deletions

1
.envrc Normal file
View File

@ -0,0 +1 @@
use nix

View File

@ -19,12 +19,15 @@ pkgs.stdenvNoCC.mkDerivation {
patchShebangs $out/bin/nixglhost
'';
postCheck = ''
black --check $out/bin/nixglhost
doCheck = true;
checkPhase = ''
black --check src/*.py
nixpkgs-fmt --check *.nix
python src/nixglhost_wrapper_test.py
'';
installPhase = ''
install -D -m0755 nixglhost-wrapper.py $out/bin/nixglhost
install -D -m0755 src/nixglhost_wrapper.py $out/bin/nixglhost
'';
}

View File

@ -1,16 +1,20 @@
#!/usr/bin/env python3
import argparse
import hashlib
import json
import os
import re
import shutil
import subprocess
import sys
from typing import List, Dict
import time
from glob import glob
from typing import List, Literal, Dict, Tuple, TypedDict, TextIO, Optional
IN_NIX_STORE = False
if IN_NIX_STORE:
# The following paths are meant to be substituted by Nix at build
# time.
@ -19,6 +23,80 @@ else:
PATCHELF_PATH = "patchelf"
class ResolvedLib:
def __init__(self, name: str, fullpath: str, sha256: Optional[str] = None):
self.name: str = name
self.fullpath: str = fullpath
if sha256 is None:
h = hashlib.sha256()
with open(fullpath, "rb") as f:
h.update(f.read())
sha: str = h.hexdigest()
else:
sha = sha256
self.sha256: str = sha
def __repr__(self):
return f"ResolvedLib<{self.name}, {self.fullpath}, {self.sha256}>"
def to_dict(self) -> Dict:
return {"name": self.name, "fullpath": self.fullpath, "sha256": self.sha256}
def __eq__(self, o):
return (
self.name == o.name
and self.fullpath == o.fullpath
and self.sha256 == o.sha256
)
@classmethod
def from_dict(cls, d: Dict):
return ResolvedLib(d["name"], d["fullpath"], d["sha256"])
class HostDSOs:
def __init__(
self,
glx: Dict[str, ResolvedLib],
cuda: Dict[str, ResolvedLib],
generic: Dict[str, ResolvedLib],
version: int = 1,
):
self.glx = glx
self.cuda = cuda
self.generic = generic
self.version = version
def __eq__(self, other):
return (
self.glx == other.glx
and self.cuda == other.cuda
and self.generic == other.generic
and self.version == other.version
)
def to_json(self) -> str:
return json.dumps(
{
"version": 1,
"glx": {k: v.to_dict() for k, v in self.glx.items()},
"cuda": {k: v.to_dict() for k, v in self.cuda.items()},
"generic": {k: v.to_dict() for k, v in self.generic.items()},
},
sort_keys=True,
)
@classmethod
def from_json(cls, o: str):
d: Dict = json.loads(o)
return HostDSOs(
version=d["version"],
glx={k: ResolvedLib.from_dict(v) for k, v in d["glx"].items()},
cuda={k: ResolvedLib.from_dict(v) for k, v in d["cuda"].items()},
generic={k: ResolvedLib.from_dict(v) for k, v in d["generic"].items()},
)
# The following regexes list has been figured out by looking at the
# output of nix-build -A linuxPackages.nvidia_x11 before running
# ls ./result/lib | grep -E ".so$".
@ -29,7 +107,6 @@ NVIDIA_DSO_PATTERNS = [
"libEGL_nvidia\.so.*$",
"libGLESv1_CM_nvidia\.so.*$",
"libGLESv2_nvidia\.so.*$",
"libGLX_nvidia\.so.*$",
"libglxserver_nvidia\.so.*$",
"libnvcuvid\.so.*$",
"libnvidia-allocator\.so.*$",
@ -82,12 +159,64 @@ CUDA_DSO_PATTERNS = ["libcudadebugger\.so.*$", "libcuda\.so.*$"]
GLX_DSO_PATTERNS = ["libGLX_nvidia\.so.*$"]
def find_files(path: str, files_patterns: List[str]) -> List[str]:
"""Scans the PATH directory looking for the files complying with
the FILES_PATTERNS regexes list.
def get_ld_paths() -> List[str]:
"""
Vendored from https://github.com/albertz/system-tools/blob/master/bin/find-lib-in-path.py
Returns the list of the DSOs absolute paths."""
files = []
Find all the directories pointed by LD_LIBRARY_PATH and the ld cache."""
def parse_ld_conf_file(fn: str) -> List[str]:
paths = []
for l in open(fn).read().splitlines():
l = l.strip()
if not l:
continue
if l.startswith("#"):
continue
if l.startswith("include "):
dirglob = l[len("include ") :]
if dirglob[0] != "/":
dirglob = os.path.dirname(os.path.normpath(fn)) + "/" + dirglob
for sub_fn in glob(dirglob):
paths.extend(parse_ld_conf_file(sub_fn))
continue
paths.append(l)
return paths
LDPATH = os.getenv("LD_LIBRARY_PATH")
PREFIX = os.getenv("PREFIX") # Termux & etc.
paths = []
if LDPATH:
paths.extend(LDPATH.split(":"))
if os.path.exists("/etc/ld.so.conf"):
paths.extend(parse_ld_conf_file("/etc/ld.so.conf"))
else:
print('WARNING: file "/etc/ld.so.conf" not found.')
if PREFIX:
if os.path.exists(PREFIX + "/etc/ld.so.conf"):
paths.extend(parse_ld_conf_file(PREFIX + "/etc/ld.so.conf"))
else:
print('WARNING: file "' + PREFIX + '/etc/ld.so.conf" not found.')
paths.extend(
[
PREFIX + "/lib",
PREFIX + "/usr/lib",
PREFIX + "/lib64",
PREFIX + "/usr/lib64",
]
)
paths.extend(["/lib", "/usr/lib", "/lib64", "/usr/lib64"])
return [path for path in paths if os.path.isdir(path)]
def resolve_libraries(
paths: List[str], files_patterns: List[str]
) -> Dict[str, ResolvedLib]:
"""Scans the PATH directory looking for the files complying with
the FILES_PATTERNS regexes list. Each file matching the pattern will be found only once
Returns the list of the resolved DSOs."""
libraries: Dict[str, ResolvedLib] = {}
def is_dso_matching_pattern(filename):
for pattern in files_patterns:
@ -95,15 +224,19 @@ def find_files(path: str, files_patterns: List[str]) -> List[str]:
return True
return False
for f in os.listdir(path):
abs_file_path = os.path.abspath(os.path.join(path, f))
if os.path.isfile(abs_file_path) and is_dso_matching_pattern(abs_file_path):
files.append(abs_file_path)
return files
for path in paths:
for fname in os.listdir(path):
abs_file_path = os.path.abspath(os.path.join(path, fname))
if (
os.path.isfile(abs_file_path)
and is_dso_matching_pattern(abs_file_path)
and (fname not in libraries)
):
libraries[fname] = ResolvedLib(fname, abs_file_path)
return libraries
def copy_and_patch_libs(dsos: List[str], libs_dir: str, rpath=None) -> None:
def copy_and_patch_libs(dsos: List[ResolvedLib], libs_dir: str, rpath=None) -> None:
"""Copies the graphic vendor DSOs to the cache directory before
patchelf-ing them.
@ -117,11 +250,11 @@ def copy_and_patch_libs(dsos: List[str], libs_dir: str, rpath=None) -> None:
runpath to point to the cache directory."""
rpath = rpath if (rpath is not None) else libs_dir
for dso in dsos:
basename = os.path.basename(dso)
basename = os.path.basename(dso.fullpath)
newpath = os.path.join(libs_dir, basename)
log_info(f"Copying {basename} to {newpath}")
shutil.copyfile(dso, newpath)
shutil.copymode(dso, newpath)
log_info(f"Copying and patching {dso} to {newpath}")
shutil.copyfile(dso.fullpath, newpath)
shutil.copymode(dso.fullpath, newpath)
patch_dso(newpath, rpath)
@ -146,7 +279,9 @@ def patch_dso(dsoPath: str, rpath: str) -> None:
# some loosely connected parts together for no good reason.
def generate_nvidia_egl_config_files(cache_dir: str, libs_dir: str) -> str:
def generate_nvidia_egl_config_files(
cache_dir: str, libs_dir: str, egl_conf_dir: str
) -> str:
"""Generates a set of JSON files describing the EGL exec
envirnoment to libglvnd.
@ -158,8 +293,6 @@ def generate_nvidia_egl_config_files(cache_dir: str, libs_dir: str) -> str:
{"file_format_version": "1.0.0", "ICD": {"library_path": dso}}
)
egl_conf_dir = os.path.join(cache_dir, "egl-confs")
os.makedirs(egl_conf_dir, exist_ok=True)
dso_paths = [
("10_nvidia.json", f"{libs_dir}/libEGL_nvidia.so.0"),
("10_nvidia_wayland.json", f"{libs_dir}/libnvidia-egl-wayland.so.1"),
@ -176,27 +309,32 @@ def generate_nvidia_egl_config_files(cache_dir: str, libs_dir: str) -> str:
return egl_conf_dir
def exec_binary(bin_path: str, args: List[str]) -> None:
"""Replace the current python program with the program pointed by
BIN_PATH.
def is_dso_cache_up_to_date(dsos: HostDSOs, cache_file_path: str) -> bool:
"""Check whether or not we need to udate the host DSOs cache.
Sets the relevant libGLvnd env variables."""
log_info(f"Execv-ing {bin_path}")
log_info(f"Goodbye now.")
# The following two env variables are required by our patched libglvnd
# implementation to figure out what kind of driver the host
# machine is using.
os.execv(bin_path, [bin_path] + args)
We keep what's in the cache through a JSON file stored at the root
of the cache_dir. We consider a DSO to be up to date if its name
and its content sha256 are equivalent.
"""
log_info("Checking if the cache is up to date")
if os.path.isfile(cache_file_path):
with open(cache_file_path, "r", encoding="utf8") as f:
try:
cached_dsos: HostDSOs = HostDSOs.from_json(f.read())
except:
return False
return dsos == cached_dsos
return False
def nvidia_main(cache_dir: str, gl_vendor_path: str) -> Dict:
def nvidia_main(cache_dir: str, dso_vendor_paths: List[str]) -> Dict:
"""Prepares the environment necessary to run a opengl/cuda program
on a Nvidia graphics card. It is by definition really stateful.
Roughly, we're going to:
1. Setup the nvidia cache directory.
2. Find the nvidia DSOs in the GL_VENDOR_PATH.
2. Find the nvidia DSOs in the DSO_VENDOR_PATH.
3. Copy these DSOs to their appropriate cache directories.
4. Generate the EGL configuration files.
5. Patchelf the runpath of what needs to be patched.
@ -219,45 +357,47 @@ def nvidia_main(cache_dir: str, gl_vendor_path: str) -> Dict:
This function returns a dictionary containing the env variables
supposed to be added to the current process down the line."""
log_info("Nvidia routine begins")
log_info("Setting up Nvidia cache directory")
cache_dir = os.path.join(cache_dir, "nvidia")
libs_dir = os.path.join(cache_dir, "lib")
cuda_dir = os.path.join(cache_dir, "cuda")
glx_dir = os.path.join(cache_dir, "glx")
egl_dir = os.path.join(cache_dir, "egl-confs")
cache_file_path = os.path.join(cache_dir, "cache.json")
log_info(f"Nvidia libs dir: {libs_dir}")
log_info(f"Nvidia cuda dir: {libs_dir}")
os.makedirs(libs_dir, exist_ok=True)
os.makedirs(cuda_dir, exist_ok=True)
os.makedirs(glx_dir, exist_ok=True)
log_info(f"Searching for the Nvidia OpenGL DSOs in {gl_vendor_path}")
# Nvidia OpenGL DSOs
opengl_dsos = find_files(gl_vendor_path, NVIDIA_DSO_PATTERNS)
log_info(f"Found the following DSOs:")
for dso in opengl_dsos:
log_info(dso)
log_info("Patching the DSOs.")
copy_and_patch_libs(opengl_dsos, libs_dir)
# Nvidia Cuda DSOs
log_info(f"Searching for the Nvidia Cuda DSOs in {gl_vendor_path}")
cuda_dsos = find_files(gl_vendor_path, CUDA_DSO_PATTERNS)
log_info(f"Found the following DSOs:")
for dso in cuda_dsos:
log_info(dso)
log_info("Patching the DSOs.")
copy_and_patch_libs(cuda_dsos, cuda_dir, libs_dir)
# GLX DSOs
log_info(f"Searching for the Nvidia GLX DSOs in {gl_vendor_path}")
glx_dsos = find_files(gl_vendor_path, GLX_DSO_PATTERNS)
log_info(f"Found the following DSOs:")
for dso in glx_dsos:
log_info(dso)
log_info("Patching the DSOs.")
copy_and_patch_libs(glx_dsos, glx_dir, libs_dir)
# Preparing the env
log_info("Setting NVIDIA-specific env variables.")
os.makedirs(egl_dir, exist_ok=True)
# Find Host DSOS
log_info("Searching for the host DSOs")
dsos: HostDSOs = HostDSOs(
generic=resolve_libraries(dso_vendor_paths, NVIDIA_DSO_PATTERNS),
cuda=resolve_libraries(dso_vendor_paths, CUDA_DSO_PATTERNS),
glx=resolve_libraries(dso_vendor_paths, GLX_DSO_PATTERNS),
)
log_info("Caching and patching host DSOs")
# Cache/Patch DSOs
if not is_dso_cache_up_to_date(dsos, cache_file_path):
log_info("The cache is not up to date, regenerating it")
shutil.rmtree(cache_dir)
os.makedirs(libs_dir, exist_ok=True)
os.makedirs(cuda_dir, exist_ok=True)
os.makedirs(glx_dir, exist_ok=True)
os.makedirs(egl_dir, exist_ok=True)
copy_and_patch_libs(list(dsos.generic.values()), libs_dir, libs_dir)
copy_and_patch_libs(list(dsos.glx.values()), glx_dir, libs_dir)
copy_and_patch_libs(list(dsos.cuda.values()), cuda_dir, libs_dir)
log_info("Setting up NVIDIA-specific execution env variables.")
with open(cache_file_path, "w", encoding="utf8") as f:
f.write(dsos.to_json())
else:
log_info("The cache is up to date.")
egl_config_files = generate_nvidia_egl_config_files(cache_dir, libs_dir, egl_dir)
new_env = {}
log_info(f"__GLX_VENDOR_LIBRARY_NAME = nvidia")
new_env["__GLX_VENDOR_LIBRARY_NAME"] = "nvidia"
egl_config_files = generate_nvidia_egl_config_files(cache_dir, libs_dir)
log_info(f"__EGL_VENDOR_LIBRARY_DIRS = {egl_config_files}")
new_env["__EGL_VENDOR_LIBRARY_DIRS"] = egl_config_files
ld_library_path = os.environ.get("LD_LIBRARY_PATH", None)
@ -272,15 +412,30 @@ def nvidia_main(cache_dir: str, gl_vendor_path: str) -> Dict:
return new_env
def exec_binary(bin_path: str, args: List[str]) -> None:
"""Replace the current python program with the program pointed by
BIN_PATH.
Sets the relevant libGLvnd env variables."""
log_info(f"Execv-ing {bin_path}")
log_info(f"Goodbye now.")
# The following two env variables are required by our patched libglvnd
# implementation to figure out what kind of driver the host
# machine is using.
os.execv(bin_path, [bin_path] + args)
def main(args):
start_time = time.time()
home = os.path.expanduser("~")
xdg_cache_home = os.environ.get("XDG_CACHE_HOME", os.path.join(home, ".cache"))
cache_dir = os.path.join(xdg_cache_home, "nix-gl-host")
log_info(f'Using "{cache_dir}" as cache dir.')
os.makedirs(cache_dir, exist_ok=True)
log_info(f'Scanning "{args.GL_VENDOR_PATH}" for DSOs.')
new_env = nvidia_main(cache_dir, args.GL_VENDOR_PATH)
host_dsos_paths: List[str] = get_ld_paths()
new_env = nvidia_main(cache_dir, host_dsos_paths)
os.environ.update(new_env)
log_info(f"{time.time() - start_time} seconds elapsed since script start.")
exec_binary(args.NIX_BINARY, args.ARGS)
return 0
@ -290,11 +445,6 @@ if __name__ == "__main__":
prog="nixglhost-wrapper",
description="Wrapper used to massage the host GL drivers to work with your nix-built binary.",
)
parser.add_argument(
"GL_VENDOR_PATH",
type=str,
help="a path pointing to the directory containing your GL driver shared libraries",
)
parser.add_argument(
"NIX_BINARY",
type=str,

View File

@ -0,0 +1,48 @@
import unittest
from nixglhost_wrapper import HostDSOs, ResolvedLib
class TestCacheSerializer(unittest.TestCase):
def hostdso_json_golden_test(self):
hds = HostDSOs(
glx={
"dummyglx.so": ResolvedLib(
"dummyglx.so",
"/lib/dummyglx.so",
"031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406",
)
},
cuda={
"dummycuda.so": ResolvedLib(
"dummycuda.so",
"/lib/dummycuda.so",
"031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407",
)
},
generic={
"dummygeneric.so": ResolvedLib(
"dummygeneric.so",
"/lib/dummygeneric.so",
"031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408",
)
},
)
json_hds = hds.to_json()
self.assertIsNotNone(json_hds)
golden_hds = HostDSOs.from_json(json_hds)
self.assertEqual(hds, golden_hds)
self.assertEqual(hds.to_json(), golden_hds.to_json())
def test_eq_commut_jsons(self):
"""Checks that object equality is not sensible to JSON keys commutations"""
hds_json = '{"version": 1, "glx": {"dummyglx.so": {"name": "dummyglx.so", "fullpath": "/lib/dummyglx.so", "sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406"}}, "cuda": {"dummycuda.so": {"name": "dummycuda.so", "fullpath": "/lib/dummycuda.so", "sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"}, "dummycuda2.so": {"name": "dummycuda2.so", "fullpath": "/lib/dummycuda2.so", "sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"}}, "generic": {"dummygeneric.so": {"name": "dummygeneric.so", "fullpath": "/lib/dummygeneric.so", "sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408"}}}'
commut_hds_json = '{"version": 1, "glx": {"dummyglx.so": {"name": "dummyglx.so", "fullpath": "/lib/dummyglx.so", "sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406"}}, "cuda": {"dummycuda2.so": {"name": "dummycuda2.so", "fullpath": "/lib/dummycuda2.so", "sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"}, "dummycuda.so": {"name": "dummycuda.so", "fullpath": "/lib/dummycuda.so", "sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"}}, "generic": {"dummygeneric.so": {"name": "dummygeneric.so", "fullpath": "/lib/dummygeneric.so", "sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408"}}}'
hds = HostDSOs.from_json(hds_json)
commut_hds = HostDSOs.from_json(commut_hds_json)
self.assertEqual(hds, commut_hds)
self.assertEqual(hds.to_json(), commut_hds.to_json())
if __name__ == "__main__":
unittest.main()