Compare commits

...

3 Commits

Author SHA1 Message Date
Félix Baylac Jacqué c7c5f08cbc Cold cache: optimize cache generation
We manage to improve the cold cache generation from 98s to ~30s on my
desktop.

Two things have been done to improve that performance:

1. This one was stupid. I forgot a debug tracing routine that should
   have been removed in the code… This tracing routine was forcing us
   to cache the libraries… …twice. Massive facepalm. Addressing this
   reduced the cold runtime by 50%.
2. Instead of spinning up a patchelf subprocess for each library, we
   batch these operations as much as possible in a single subprocess.
   This trick shaves about 30% of the remaining runtime.
2022-12-14 19:20:31 +01:00
Félix Baylac Jacqué 97e35d20fa Rename nixglhost_wrapper -> nixglhost 2022-12-14 19:20:31 +01:00
Félix Baylac Jacqué 3ff2f01812 Hot Cache: use the DSO last write time/size instead of content hash
After profiling a nixglhost hot run, it turns out that we were
spending more than 98% of the run time reading and sha256-hashing
files.

Let's give up on content hashing the files and assume that using their
name, size and last write time is good enough.

On a hot run, we reduce the run time from about 3s to 0.3s on a
nvme-powered ryzen 7 desktop.

I guess this 10x speedup probably worth the little cache corectness we
lose on the way.
2022-12-14 19:20:10 +01:00
6 changed files with 96 additions and 75 deletions

View File

@ -24,10 +24,10 @@ pkgs.stdenvNoCC.mkDerivation {
checkPhase = ''
black --check src/*.py
nixpkgs-fmt --check *.nix
python src/nixglhost_wrapper_test.py
python src/nixglhost_test.py
'';
installPhase = ''
install -D -m0755 src/nixglhost_wrapper.py $out/bin/nixglhost
install -D -m0755 src/nixglhost.py $out/bin/nixglhost
'';
}

View File

@ -14,7 +14,7 @@ from glob import glob
from typing import List, Literal, Dict, Tuple, TypedDict, TextIO, Optional
IN_NIX_STORE = False
CACHE_VERSION = 2
CACHE_VERSION = 3
if IN_NIX_STORE:
@ -30,47 +30,55 @@ class ResolvedLib:
together with some metadata helping us to uniquely identify it."""
def __init__(
self, name: str, dirpath: str, fullpath: str, sha256: Optional[str] = None
self,
name: str,
dirpath: str,
fullpath: str,
last_modification: Optional[float] = None,
size: Optional[int] = None,
):
self.name: str = name
self.dirpath: str = dirpath
self.fullpath: str = fullpath
if sha256 is None:
h = hashlib.sha256()
with open(fullpath, "rb") as f:
h.update(f.read())
sha: str = h.hexdigest()
if size is None or last_modification is None:
stat = os.stat(fullpath)
self.last_modification: float = stat.st_atime
self.size: int = stat.st_size
else:
sha = sha256
self.sha256: str = sha
self.last_modification = last_modification
self.size = size
def __repr__(self):
return (
f"ResolvedLib<{self.name}, {self.dirpath}, {self.fullpath}, {self.sha256}>"
)
return f"ResolvedLib<{self.name}, {self.dirpath}, {self.fullpath}, {self.last_modification}, {self.size}>"
def to_dict(self) -> Dict:
return {
"name": self.name,
"dirpath": self.dirpath,
"fullpath": self.fullpath,
"sha256": self.sha256,
"last_modification": self.last_modification,
"size": self.size,
}
def __hash__(self):
return hash((self.name, self.dirpath, self.fullpath, self.sha256))
return hash(
(self.name, self.dirpath, self.fullpath, self.last_modification, self.size)
)
def __eq__(self, o):
return (
self.name == o.name
and self.fullpath == o.fullpath
and self.sha256 == o.sha256
and self.dirpath == o.dirpath
and self.last_modification == o.last_modification
and self.size == o.size
)
@classmethod
def from_dict(cls, d: Dict):
return ResolvedLib(d["name"], d["dirpath"], d["fullpath"], d["sha256"])
return ResolvedLib(
d["name"], d["dirpath"], d["fullpath"], d["last_modification"], d["size"]
)
class LibraryPath:
@ -164,7 +172,6 @@ class CacheDirContent:
# requiring to build/fetch the nvidia driver at runtime*.
# TODO: compile the regexes
NVIDIA_DSO_PATTERNS = [
"libEGL_nvidia\.so.*$",
"libGLESv1_CM_nvidia\.so.*$",
"libGLESv2_nvidia\.so.*$",
"libglxserver_nvidia\.so.*$",
@ -173,8 +180,6 @@ NVIDIA_DSO_PATTERNS = [
"libnvidia-cfg\.so.*$",
"libnvidia-compiler\.so.*$",
"libnvidia-eglcore\.so.*$",
"libnvidia-egl-gbm\.so.*$",
"libnvidia-egl-wayland\.so.*$",
"libnvidia-encode\.so.*$",
"libnvidia-fbc\.so.*$",
"libnvidia-glcore\.so.*$",
@ -312,6 +317,7 @@ def copy_and_patch_libs(
we first copy them to the user's personal cache directory, we then
alter their runpath to point to the cache directory."""
rpath = rpath if (rpath is not None) else dest_dir
new_paths: List[str] = []
for dso in dsos:
basename = os.path.basename(dso.fullpath)
newpath = os.path.join(dest_dir, basename)
@ -319,7 +325,8 @@ def copy_and_patch_libs(
shutil.copyfile(dso.fullpath, newpath)
# Provide write permissions to ensure we can patch this binary.
os.chmod(newpath, os.stat(dso.fullpath).st_mode | stat.S_IWUSR)
patch_dso(newpath, rpath)
new_paths.append(newpath)
patch_dsos(new_paths, rpath)
def log_info(string: str) -> None:
@ -329,14 +336,14 @@ def log_info(string: str) -> None:
print(f"[+] {string}", file=sys.stderr)
def patch_dso(dsoPath: str, rpath: str) -> None:
"""Call patchelf to change the DSOPATH runpath with RPATH."""
log_info(f"Patching {dsoPath}")
log_info(f"Exec: {PATCHELF_PATH} --set-rpath {rpath} {dsoPath}")
res = subprocess.run([PATCHELF_PATH, "--set-rpath", rpath, dsoPath])
def patch_dsos(dsoPaths: List[str], rpath: str) -> None:
"""Call patchelf to change the DSOS runpath with RPATH."""
log_info(f"Patching {dsoPaths}")
log_info(f"Exec: {PATCHELF_PATH} --set-rpath {rpath} {dsoPaths}")
res = subprocess.run([PATCHELF_PATH, "--set-rpath", rpath] + dsoPaths)
if res.returncode != 0:
raise BaseException(
f"Cannot patch {dsoPath}. Patchelf exited with {res.returncode}"
f"Cannot patch {dsoPaths}. Patchelf exited with {res.returncode}"
)
@ -374,8 +381,8 @@ def is_dso_cache_up_to_date(dsos: CacheDirContent, cache_file_path: str) -> bool
We keep what's in the cache through a JSON file stored at the root
of the cache_dir. We consider a dynamically shared object to be up
to date if its name, its full path and its content sha256 are
equivalent."""
to date if its name, its full path, its size and last modification
timestamp are equivalent."""
log_info("Checking if the cache is up to date")
if os.path.isfile(cache_file_path):
with open(cache_file_path, "r", encoding="utf8") as f:
@ -497,9 +504,7 @@ def nvidia_main(
shutil.rmtree(cache_dir)
cache_paths: List[str] = []
for p in cache_content.paths:
np = cache_library_path(p, cache_dir)
log_info(f"Caching {np}")
log_info(f"p {p}")
log_info(f"Caching {p}")
cache_paths.append(cache_library_path(p, cache_dir))
log_info(f"Caching ")
with open(cache_file_path, "w", encoding="utf8") as f:
@ -570,7 +575,7 @@ def main(args):
if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog="nixglhost-wrapper",
prog="nixglhost",
description="Wrapper used to massage the host GL drivers to work with your nix-built binary.",
)
parser.add_argument(

View File

@ -1,7 +1,7 @@
import unittest
import os
from nixglhost_wrapper import CacheDirContent, LibraryPath, ResolvedLib
from nixglhost import CacheDirContent, LibraryPath, ResolvedLib
class TestCacheSerializer(unittest.TestCase):
@ -9,18 +9,12 @@ class TestCacheSerializer(unittest.TestCase):
lp = LibraryPath(
glx=[
ResolvedLib(
"dummyglx.so",
"/lib",
"/lib/dummyglx.so",
"031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406",
"dummyglx.so", "/lib", "/lib/dummyglx.so", 1670260550.481498, 1612
)
],
cuda=[
ResolvedLib(
"dummycuda.so",
"/lib",
"/lib/dummycuda.so",
"031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407",
"dummycuda.so", "/lib", "/lib/dummycuda.so", 2670260550.481498, 2612
)
],
generic=[
@ -28,15 +22,13 @@ class TestCacheSerializer(unittest.TestCase):
"dummygeneric.so",
"/lib",
"/lib/dummygeneric.so",
"031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408",
3670260550.481498,
3612,
)
],
egl=[
ResolvedLib(
"dummyegl.so",
"/lib",
"/lib/dummyegl.so",
"031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9409",
"dummyegl.so", "/lib", "/lib/dummyegl.so", 4670260550.481498, 4612
)
],
path="/path/to/lib/dir",

View File

@ -6,7 +6,8 @@
"name": "dummyglx.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyglx.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406"
"last_modification": 1670260550.481498,
"size": 1612
}
],
"cuda": [
@ -14,7 +15,8 @@
"name": "dummycuda.so",
"dirpath": "/lib",
"fullpath": "/lib/dummycuda.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"
"last_modification": 2670260550.481498,
"size": 2612
}
],
"generic": [
@ -22,7 +24,8 @@
"name": "dummygeneric.so",
"dirpath": "/lib",
"fullpath": "/lib/dummygeneric.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408"
"last_modification": 3670260550.481498,
"size": 3612
}
],
"egl": [
@ -30,7 +33,8 @@
"name": "dummyegl.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyegl.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9409"
"last_modification": 4670260550.481498,
"size": 4612
}
],
"path": "/path/to/lib/dir"
@ -41,7 +45,8 @@
"name": "dummyglx.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyglx.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406"
"last_modification": 5670260550.481498,
"size": 5612
}
],
"cuda": [
@ -49,7 +54,8 @@
"name": "dummycuda.so",
"dirpath": "/lib",
"fullpath": "/lib/dummycuda.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"
"last_modification": 6670260550.481498,
"size": 6612
}
],
"generic": [
@ -57,7 +63,8 @@
"name": "dummygeneric.so",
"dirpath": "/lib",
"fullpath": "/lib/dummygeneric.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408"
"last_modification": 7670260550.481498,
"size": 7612
}
],
"egl": [
@ -65,11 +72,12 @@
"name": "dummyegl.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyegl.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9409"
"last_modification": 8670260550.481498,
"size": 8612
}
],
"path": "/path/to/lib/dir2"
}
],
"version": 2
"version": 3
}

View File

@ -6,7 +6,8 @@
"name": "dummyglx.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyglx.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406"
"last_modification": 5670260550.481498,
"size": 5612
}
],
"cuda": [
@ -14,7 +15,8 @@
"name": "dummycuda.so",
"dirpath": "/lib",
"fullpath": "/lib/dummycuda.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"
"last_modification": 6670260550.481498,
"size": 6612
}
],
"generic": [
@ -22,7 +24,8 @@
"name": "dummygeneric.so",
"dirpath": "/lib",
"fullpath": "/lib/dummygeneric.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408"
"last_modification": 7670260550.481498,
"size": 7612
}
],
"egl": [
@ -30,7 +33,8 @@
"name": "dummyegl.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyegl.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9409"
"last_modification": 8670260550.481498,
"size": 8612
}
],
"path": "/path/to/lib/dir2"
@ -41,7 +45,8 @@
"name": "dummyglx.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyglx.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406"
"last_modification": 1670260550.481498,
"size": 1612
}
],
"cuda": [
@ -49,7 +54,8 @@
"name": "dummycuda.so",
"dirpath": "/lib",
"fullpath": "/lib/dummycuda.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"
"last_modification": 2670260550.481498,
"size": 2612
}
],
"generic": [
@ -57,7 +63,8 @@
"name": "dummygeneric.so",
"dirpath": "/lib",
"fullpath": "/lib/dummygeneric.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408"
"last_modification": 3670260550.481498,
"size": 3612
}
],
"egl": [
@ -65,11 +72,12 @@
"name": "dummyegl.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyegl.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9409"
"last_modification": 4670260550.481498,
"size": 4612
}
],
"path": "/path/to/lib/dir"
}
],
"version": 2
"version": 3
}

View File

@ -6,7 +6,8 @@
"name": "dummyglx.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyglx.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9406"
"last_modification": 2670260550.481498,
"size": 2612
}
],
"cuda": [
@ -14,7 +15,8 @@
"name": "dummycuda.so",
"dirpath": "/lib",
"fullpath": "/lib/dummycuda.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"
"last_modification": 2670260550.481498,
"size": 2612
}
],
"generic": [
@ -22,7 +24,8 @@
"name": "dummygeneric.so",
"dirpath": "/lib",
"fullpath": "/lib/dummygeneric.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408"
"last_modification": 3670260550.481498,
"size": 3612
}
],
"egl": [
@ -30,7 +33,8 @@
"name": "dummyegl.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyegl.so",
"sha256": "031edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9409"
"last_modification": 4670260550.481498,
"size": 4612
}
],
"path": "/path/to/lib/dir"
@ -41,7 +45,8 @@
"name": "dummyglx.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyglx.so",
"sha256": "4444444444444444444444444444444444444444444444444444444444444444"
"last_modification": 5670260550.481498,
"size": 5612
}
],
"cuda": [
@ -49,7 +54,8 @@
"name": "dummycuda.so",
"dirpath": "/lib",
"fullpath": "/lib/dummycuda.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9407"
"last_modification": 6670260550.481498,
"size": 6612
}
],
"generic": [
@ -57,7 +63,8 @@
"name": "dummygeneric.so",
"dirpath": "/lib",
"fullpath": "/lib/dummygeneric.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9408"
"last_modification": 7670260550.481498,
"size": 7612
}
],
"egl": [
@ -65,11 +72,12 @@
"name": "dummyegl.so",
"dirpath": "/lib",
"fullpath": "/lib/dummyegl.so",
"sha256": "131edd7d41651593c5fe5c006fa5752b37fddff7bc4e843aa6af0c950f4b9409"
"last_modification": 8670260550.481498,
"size": 8612
}
],
"path": "/path/to/lib/dir2"
}
],
"version": 2
"version": 3
}