55logger = logging .getLogger (__name__ )
66
77
8+ CACHE_DIR_NAME = ".docstub_cache"
9+
10+
811CACHEDIR_TAG_CONTENT = """\
9- Signature: 8a477f597d28d172789f06886806bc55\
10- # This file is a cache directory tag automatically created by docstub.\n "
11- # For information about cache directory tags see https://bford.info/cachedir/\n "
12+ Signature: 8a477f597d28d172789f06886806bc55
13+ # Mark this directory as a cache [1], created by docstub [2]
14+ # [1] https://bford.info/cachedir/
15+ # [2] https://github.com/scientific-python/docstub
16+ """
17+
18+
19+ GITHUB_IGNORE_CONTENT = """\
20+ # Make git ignore this cache directory, created by docstub [1]
21+ # [1] https://github.com/scientific-python/docstub
22+ *
1223"""
1324
1425
@@ -43,22 +54,42 @@ def create_cache(path):
4354 """
4455 path .mkdir (parents = True , exist_ok = True )
4556 cachdir_tag_path = path / "CACHEDIR.TAG"
46- cachdir_tag_content = (
47- "Signature: 8a477f597d28d172789f06886806bc55\n "
48- "# This file is a cache directory tag automatically created by docstub.\n "
49- "# For information about cache directory tags see https://bford.info/cachedir/\n "
50- )
57+
5158 if not cachdir_tag_path .is_file ():
5259 with open (cachdir_tag_path , "w" ) as fp :
53- fp .write (cachdir_tag_content )
60+ fp .write (CACHEDIR_TAG_CONTENT )
5461
5562 gitignore_path = path / ".gitignore"
56- gitignore_content = (
57- "# This file is a cache directory automatically created by docstub.\n " "*\n "
58- )
5963 if not gitignore_path .is_file ():
6064 with open (gitignore_path , "w" ) as fp :
61- fp .write (gitignore_content )
65+ fp .write (GITHUB_IGNORE_CONTENT )
66+
67+
68+ def validate_cache (path ):
69+ """Make sure the given path is a cache created by docstub.
70+
71+ Parameters
72+ ----------
73+ path : Path
74+
75+ Raises
76+ ------
77+ FileNotFoundError
78+ """
79+ if not path .is_dir ():
80+ raise FileNotFoundError (f"expected '{ path } ' to be a valid directory" )
81+
82+ if not path .name == CACHE_DIR_NAME :
83+ raise FileNotFoundError (
84+ f"expected directory '{ path } ' be named '{ CACHE_DIR_NAME } '"
85+ )
86+
87+ cachdir_tag_path = path / "CACHEDIR.TAG"
88+ if not cachdir_tag_path .is_file ():
89+ raise FileNotFoundError (f"expected '{ path } ' to contain a 'CACHEDIR.TAG' file" )
90+ gitignore_path = path / ".gitignore"
91+ if not gitignore_path .is_file ():
92+ raise FileNotFoundError (f"expected '{ path } ' to contain a '.gitignore' file" )
6293
6394
6495class FuncSerializer [T ](Protocol ):
@@ -90,9 +121,24 @@ class FileCache:
90121 This class can cache results of a function to the disk. A unique key is
91122 generated from the arguments to the function, and the result is cached
92123 inside a file named after this key.
124+
125+ Attributes
126+ ----------
127+ func : Callable
128+ The function whose output shall be cached.
129+ serializer : FuncSerializer
130+ An interface that matches the given `func`. It must implement the
131+ `FuncSerializer` protocol.
132+ sub_dir : str
133+ A unique name to structure multiple / parallel caches inside `cache_dir`.
134+ cache_hits, cache_misses : int
135+ Records how many times this object returned results from a cache (hits)
136+ or by computing it (misses).
137+ cached_last_call : bool or None
138+ Whether the last call was cached. ``None`` if not called yet.
93139 """
94140
95- def __init__ (self , * , func , serializer , cache_dir , name ):
141+ def __init__ (self , * , func , serializer , cache_dir , sub_dir = None ):
96142 """
97143 Parameters
98144 ----------
@@ -103,27 +149,45 @@ def __init__(self, *, func, serializer, cache_dir, name):
103149 `FuncSerializer` protocol.
104150 cache_dir : Path
105151 The directory of the cache.
106- name : str
107- A unique name to separate parallel caches inside `cache_dir`.
152+ sub_dir : str
153+ A unique name to structure multiple / parallel caches inside `cache_dir`.
108154 """
109155 self .func = func
110156 self .serializer = serializer
111157 self ._cache_dir = cache_dir
112- self .name = name
158+ self .sub_dir = sub_dir
159+
160+ self .cache_hits = 0
161+ self .cache_misses = 0
162+ self .cached_last_call = None
113163
114164 @cached_property
115- def named_cache_dir (self ):
116- """Path to the named subdirectory inside the cache.
165+ def cache_dir (self ):
166+ """Return and create cache dir on first use - also check its size.
167+
168+ Returns
169+ -------
170+ cache_dir : Path
171+ """
172+ create_cache (self ._cache_dir )
173+
174+ if _directory_size (self ._cache_dir ) > 512 * 1024 ** 2 :
175+ logger .warning ("cache size at %r exceeds 512 MiB" , self ._cache_dir )
176+
177+ return self ._cache_dir
178+
179+ @property
180+ def cache_sub_dir (self ):
181+ """Create and return path to a specific subdirectory inside the cache.
117182
118183 Warns when cache size exceeds 512 MiB.
119184 """
120- cache_dir = self ._cache_dir
121- create_cache (cache_dir )
122- if _directory_size (cache_dir ) > 512 * 1024 ** 2 :
123- logger .warning ("cache size at %r exceeds 512 MiB" , cache_dir )
124- _named_cache_dir = cache_dir / self .name
125- _named_cache_dir .mkdir (parents = True , exist_ok = True )
126- return _named_cache_dir
185+ named_dir = self .cache_dir
186+ if self .sub_dir :
187+ named_dir /= self .sub_dir
188+ named_dir .mkdir (parents = True , exist_ok = True )
189+
190+ return named_dir
127191
128192 def __call__ (self , * args , ** kwargs ):
129193 """Call the wrapped `func` and cache each result in a file.
@@ -138,14 +202,25 @@ def __call__(self, *args, **kwargs):
138202 data : Any
139203 """
140204 key = self .serializer .hash_args (* args , ** kwargs )
141- entry_path = self .named_cache_dir / f"{ key } { self .serializer .suffix } "
205+ entry_path = self .cache_sub_dir / f"{ key } { self .serializer .suffix } "
206+
142207 if entry_path .is_file ():
208+ # `data` is already cached
143209 with entry_path .open ("rb" ) as fp :
144210 raw = fp .read ()
145211 data = self .serializer .deserialize (raw )
212+
213+ self .cached_last_call = True
214+ self .cache_hits += 1
215+
146216 else :
217+ # `data` isn't cached, write cache
147218 data = self .func (* args , ** kwargs )
148219 raw = self .serializer .serialize (data )
149220 with entry_path .open ("xb" ) as fp :
150221 fp .write (raw )
222+
223+ self .cached_last_call = False
224+ self .cache_misses += 1
225+
151226 return data
0 commit comments