Skip to content

Commit b1448b5

Browse files
authored
Add CLI to control caching (#69)
* Collect cache statistics and make it easier to allow using the FileCache with a directory structure that represents a project. * Add '--no-cache' CLI option and show in output whether the cache was hit or not for each file. * Make sure docstub generated stubs are ignored when walking a source package. * Add the `clean` subcommand * Add tests for caching-related CLI * Validate cache directory before cleaning it * Preserve click command decorator for stub files * Fix and annotate verbose parameter * Use "\f" to hide "Parameter" docstring section of clean command
1 parent 75d1362 commit b1448b5

8 files changed

Lines changed: 389 additions & 56 deletions

File tree

docs/command_line.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@ Options:
1515
-h, --help Show this message and exit.
1616
1717
Commands:
18-
run Generate Python stub files.
18+
clean Clean the cache.
19+
run Generate Python stub files.
1920
```
2021

2122
<!--- end cli-docstub --->
@@ -52,8 +53,30 @@ Options:
5253
--allow-errors INT Allow this many or fewer errors. If docstub reports
5354
more, exit with error code '1'. This is useful to adopt
5455
docstub gradually. [default: 0; x>=0]
56+
--no-cache Ignore pre-existing cache and don't create a new one.
5557
-v, --verbose Print more details (repeatable).
5658
-h, --help Show this message and exit.
5759
```
5860

5961
<!--- end cli-docstub-run --->
62+
63+
64+
## Command `docstub clean`
65+
66+
<!--- The following block is checked by the test suite --->
67+
<!--- begin cli-docstub-clean --->
68+
69+
```plain
70+
Usage: docstub clean [OPTIONS]
71+
72+
Clean the cache.
73+
74+
Looks for a cache directory relative to the current working directory. If
75+
one exists, remove it.
76+
77+
Options:
78+
-v, --verbose Print more details (repeatable).
79+
-h, --help Show this message and exit.
80+
```
81+
82+
<!--- end cli-docstub-clean --->

src/docstub/_cache.py

Lines changed: 102 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,21 @@
55
logger = logging.getLogger(__name__)
66

77

8+
CACHE_DIR_NAME = ".docstub_cache"
9+
10+
811
CACHEDIR_TAG_CONTENT = """\
9-
Signature: 8a477f597d28d172789f06886806bc55\
10-
# This file is a cache directory tag automatically created by docstub.\n"
11-
# For information about cache directory tags see https://bford.info/cachedir/\n"
12+
Signature: 8a477f597d28d172789f06886806bc55
13+
# Mark this directory as a cache [1], created by docstub [2]
14+
# [1] https://bford.info/cachedir/
15+
# [2] https://github.com/scientific-python/docstub
16+
"""
17+
18+
19+
GITHUB_IGNORE_CONTENT = """\
20+
# Make git ignore this cache directory, created by docstub [1]
21+
# [1] https://github.com/scientific-python/docstub
22+
*
1223
"""
1324

1425

@@ -43,22 +54,42 @@ def create_cache(path):
4354
"""
4455
path.mkdir(parents=True, exist_ok=True)
4556
cachdir_tag_path = path / "CACHEDIR.TAG"
46-
cachdir_tag_content = (
47-
"Signature: 8a477f597d28d172789f06886806bc55\n"
48-
"# This file is a cache directory tag automatically created by docstub.\n"
49-
"# For information about cache directory tags see https://bford.info/cachedir/\n"
50-
)
57+
5158
if not cachdir_tag_path.is_file():
5259
with open(cachdir_tag_path, "w") as fp:
53-
fp.write(cachdir_tag_content)
60+
fp.write(CACHEDIR_TAG_CONTENT)
5461

5562
gitignore_path = path / ".gitignore"
56-
gitignore_content = (
57-
"# This file is a cache directory automatically created by docstub.\n" "*\n"
58-
)
5963
if not gitignore_path.is_file():
6064
with open(gitignore_path, "w") as fp:
61-
fp.write(gitignore_content)
65+
fp.write(GITHUB_IGNORE_CONTENT)
66+
67+
68+
def validate_cache(path):
69+
"""Make sure the given path is a cache created by docstub.
70+
71+
Parameters
72+
----------
73+
path : Path
74+
75+
Raises
76+
------
77+
FileNotFoundError
78+
"""
79+
if not path.is_dir():
80+
raise FileNotFoundError(f"expected '{path}' to be a valid directory")
81+
82+
if not path.name == CACHE_DIR_NAME:
83+
raise FileNotFoundError(
84+
f"expected directory '{path}' be named '{CACHE_DIR_NAME}'"
85+
)
86+
87+
cachdir_tag_path = path / "CACHEDIR.TAG"
88+
if not cachdir_tag_path.is_file():
89+
raise FileNotFoundError(f"expected '{path}' to contain a 'CACHEDIR.TAG' file")
90+
gitignore_path = path / ".gitignore"
91+
if not gitignore_path.is_file():
92+
raise FileNotFoundError(f"expected '{path}' to contain a '.gitignore' file")
6293

6394

6495
class FuncSerializer[T](Protocol):
@@ -90,9 +121,24 @@ class FileCache:
90121
This class can cache results of a function to the disk. A unique key is
91122
generated from the arguments to the function, and the result is cached
92123
inside a file named after this key.
124+
125+
Attributes
126+
----------
127+
func : Callable
128+
The function whose output shall be cached.
129+
serializer : FuncSerializer
130+
An interface that matches the given `func`. It must implement the
131+
`FuncSerializer` protocol.
132+
sub_dir : str
133+
A unique name to structure multiple / parallel caches inside `cache_dir`.
134+
cache_hits, cache_misses : int
135+
Records how many times this object returned results from a cache (hits)
136+
or by computing it (misses).
137+
cached_last_call : bool or None
138+
Whether the last call was cached. ``None`` if not called yet.
93139
"""
94140

95-
def __init__(self, *, func, serializer, cache_dir, name):
141+
def __init__(self, *, func, serializer, cache_dir, sub_dir=None):
96142
"""
97143
Parameters
98144
----------
@@ -103,27 +149,45 @@ def __init__(self, *, func, serializer, cache_dir, name):
103149
`FuncSerializer` protocol.
104150
cache_dir : Path
105151
The directory of the cache.
106-
name : str
107-
A unique name to separate parallel caches inside `cache_dir`.
152+
sub_dir : str
153+
A unique name to structure multiple / parallel caches inside `cache_dir`.
108154
"""
109155
self.func = func
110156
self.serializer = serializer
111157
self._cache_dir = cache_dir
112-
self.name = name
158+
self.sub_dir = sub_dir
159+
160+
self.cache_hits = 0
161+
self.cache_misses = 0
162+
self.cached_last_call = None
113163

114164
@cached_property
115-
def named_cache_dir(self):
116-
"""Path to the named subdirectory inside the cache.
165+
def cache_dir(self):
166+
"""Return and create cache dir on first use - also check its size.
167+
168+
Returns
169+
-------
170+
cache_dir : Path
171+
"""
172+
create_cache(self._cache_dir)
173+
174+
if _directory_size(self._cache_dir) > 512 * 1024**2:
175+
logger.warning("cache size at %r exceeds 512 MiB", self._cache_dir)
176+
177+
return self._cache_dir
178+
179+
@property
180+
def cache_sub_dir(self):
181+
"""Create and return path to a specific subdirectory inside the cache.
117182
118183
Warns when cache size exceeds 512 MiB.
119184
"""
120-
cache_dir = self._cache_dir
121-
create_cache(cache_dir)
122-
if _directory_size(cache_dir) > 512 * 1024**2:
123-
logger.warning("cache size at %r exceeds 512 MiB", cache_dir)
124-
_named_cache_dir = cache_dir / self.name
125-
_named_cache_dir.mkdir(parents=True, exist_ok=True)
126-
return _named_cache_dir
185+
named_dir = self.cache_dir
186+
if self.sub_dir:
187+
named_dir /= self.sub_dir
188+
named_dir.mkdir(parents=True, exist_ok=True)
189+
190+
return named_dir
127191

128192
def __call__(self, *args, **kwargs):
129193
"""Call the wrapped `func` and cache each result in a file.
@@ -138,14 +202,25 @@ def __call__(self, *args, **kwargs):
138202
data : Any
139203
"""
140204
key = self.serializer.hash_args(*args, **kwargs)
141-
entry_path = self.named_cache_dir / f"{key}{self.serializer.suffix}"
205+
entry_path = self.cache_sub_dir / f"{key}{self.serializer.suffix}"
206+
142207
if entry_path.is_file():
208+
# `data` is already cached
143209
with entry_path.open("rb") as fp:
144210
raw = fp.read()
145211
data = self.serializer.deserialize(raw)
212+
213+
self.cached_last_call = True
214+
self.cache_hits += 1
215+
146216
else:
217+
# `data` isn't cached, write cache
147218
data = self.func(*args, **kwargs)
148219
raw = self.serializer.serialize(data)
149220
with entry_path.open("xb") as fp:
150221
fp.write(raw)
222+
223+
self.cached_last_call = False
224+
self.cache_misses += 1
225+
151226
return data

0 commit comments

Comments
 (0)