-
Notifications
You must be signed in to change notification settings - Fork 28
Expand file tree
/
Copy pathcache.py
More file actions
376 lines (327 loc) · 11.5 KB
/
cache.py
File metadata and controls
376 lines (327 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
"""
This implements the instance caching in SQLObject. Caching is
relatively aggressive. All objects are retained so long as they are
in memory, by keeping weak references to objects. We also keep other
objects in a cache that doesn't allow them to be garbage collected
(unless caching is turned off).
"""
import threading
from weakref import ref
class CacheFactory(object):
"""
CacheFactory caches object creation. Each object should be
referenced by a single hashable ID (note tuples of hashable
values are also hashable).
"""
def __init__(self, cullFrequency=100, cullFraction=2,
cache=True):
"""
Every cullFrequency times that an item is retrieved from
this cache, the cull method is called.
The cull method then expires an arbitrary fraction of
the cached objects. The idea is at no time will the cache
be entirely emptied, placing a potentially high load at that
moment, but everything object will have its time to go
eventually. The fraction is given as an integer, and one
in that many objects are expired (i.e., the default is 1/2
of objects are expired).
By setting cache to False, items won't be cached.
However, in all cases a weak reference is kept to created
objects, and if the object hasn't been garbage collected
it will be returned.
"""
self.cullFrequency = cullFrequency
self.cullCount = 0
self.cullOffset = 0
self.cullFraction = cullFraction
self.doCache = cache
if self.doCache:
self.cache = {}
self.expiredCache = {}
self.lock = threading.Lock()
def tryGet(self, id):
"""
This returns None, or the object in cache.
"""
value = self.expiredCache.get(id)
if value:
# it's actually a weakref:
return value()
if not self.doCache:
return None
return self.cache.get(id)
def get(self, id):
"""
This method can cause deadlocks! tryGet is safer
This returns the object found in cache, or None. If None,
then the cache will remain locked! This is so that the
calling function can create the object in a threadsafe manner
before releasing the lock. You should use this like (note
that ``cache`` is actually a CacheSet object in this
example)::
obj = cache.get(some_id, my_class)
if obj is None:
try:
obj = create_object(some_id)
cache.put(some_id, my_class, obj)
finally:
cache.finishPut(cls)
This method checks both the main cache (which retains
references) and the 'expired' cache, which retains only weak
references.
"""
if self.doCache:
if self.cullCount > self.cullFrequency:
# Two threads could hit the cull in a row, but
# that's not so bad. At least by setting cullCount
# back to zero right away we avoid this. The cull
# method has a lock, so it's threadsafe.
self.cullCount = 0
self.cull()
else:
self.cullCount = self.cullCount + 1
try:
return self.cache[id]
except KeyError:
pass
self.lock.acquire()
try:
val = self.cache[id]
except KeyError:
pass
else:
self.lock.release()
return val
try:
val = self.expiredCache[id]()
except KeyError:
return None
else:
del self.expiredCache[id]
if val is None:
return None
self.cache[id] = val
self.lock.release()
return val
else:
try:
val = self.expiredCache[id]()
if val is not None:
return val
except KeyError:
pass
self.lock.acquire()
try:
val = self.expiredCache[id]()
except KeyError:
return None
else:
if val is None:
del self.expiredCache[id]
return None
self.lock.release()
return val
def put(self, id, obj):
"""
Puts an object into the cache. Should only be called after
.get(), so that duplicate objects don't end up in the cache.
"""
if self.doCache:
self.cache[id] = obj
else:
self.expiredCache[id] = ref(obj)
def finishPut(self):
"""
Releases the lock that is retained when .get() is called and
returns None.
"""
self.lock.release()
def created(self, id, obj):
"""
Inserts and object into the cache. Should be used when no one
else knows about the object yet, so there cannot be any object
already in the cache. After a database INSERT is an example
of this situation.
"""
if self.doCache:
if self.cullCount > self.cullFrequency:
# Two threads could hit the cull in a row, but
# that's not so bad. At least by setting cullCount
# back to zero right away we avoid this. The cull
# method has a lock, so it's threadsafe.
self.cullCount = 0
self.cull()
else:
self.cullCount = self.cullCount + 1
self.cache[id] = obj
else:
self.expiredCache[id] = ref(obj)
def cull(self):
"""Runs through the cache and expires objects
E.g., if ``cullFraction`` is 3, then every third object is moved to
the 'expired' (aka weakref) cache.
"""
self.lock.acquire()
try:
# remove dead references from the expired cache
keys = list(self.expiredCache.keys())
for key in keys:
if self.expiredCache[key]() is None:
self.expiredCache.pop(key, None)
keys = list(self.cache.keys())
for i in range(self.cullOffset, len(keys), self.cullFraction):
id = keys[i]
# create a weakref, then remove from the cache
obj = ref(self.cache[id])
del self.cache[id]
# the object may have been gc'd when removed from the cache
# above, no need to place in expiredCache
if obj() is not None:
self.expiredCache[id] = obj
# This offset tries to balance out which objects we
# expire, so no object will just hang out in the cache
# forever.
self.cullOffset = (self.cullOffset + 1) % self.cullFraction
finally:
self.lock.release()
def clear(self):
"""
Removes everything from the cache. Warning! This can cause
duplicate objects in memory.
"""
if self.doCache:
self.cache.clear()
self.expiredCache.clear()
def expire(self, id):
"""
Expires a single object. Typically called after a delete.
Doesn't even keep a weakref. (@@: bad name?)
"""
if not self.doCache:
return
self.lock.acquire()
try:
if id in self.cache:
del self.cache[id]
if id in self.expiredCache:
del self.expiredCache[id]
finally:
self.lock.release()
def expireAll(self):
"""
Expires all objects, moving them all into the expired/weakref
cache.
"""
if not self.doCache:
return
self.lock.acquire()
try:
for key, value in self.cache.items():
self.expiredCache[key] = ref(value)
self.cache = {}
finally:
self.lock.release()
def allIDs(self):
"""
Returns the IDs of all objects in the cache.
"""
if self.doCache:
all = list(self.cache.keys())
else:
all = []
for id, value in self.expiredCache.items():
if value():
all.append(id)
return all
def getAll(self):
"""
Return all the objects in the cache.
"""
if self.doCache:
all = list(self.cache.values())
else:
all = []
for value in self.expiredCache.values():
if value():
all.append(value())
return all
class CacheSet(object):
"""
A CacheSet is used to collect and maintain a series of caches. In
SQLObject, there is one CacheSet per connection, and one Cache
in the CacheSet for each class, since IDs are not unique across
classes. It contains methods similar to Cache, but that take
a ``cls`` argument.
"""
def __init__(self, *args, **kw):
self.caches = {}
self.args = args
self.kw = kw
def get(self, id, cls):
try:
return self.caches[cls.__name__].get(id)
except KeyError:
self.caches[cls.__name__] = CacheFactory(*self.args, **self.kw)
return self.caches[cls.__name__].get(id)
def put(self, id, cls, obj):
self.caches[cls.__name__].put(id, obj)
def finishPut(self, cls):
self.caches[cls.__name__].finishPut()
def created(self, id, cls, obj):
try:
self.caches[cls.__name__].created(id, obj)
except KeyError:
self.caches[cls.__name__] = CacheFactory(*self.args, **self.kw)
self.caches[cls.__name__].created(id, obj)
def expire(self, id, cls):
try:
self.caches[cls.__name__].expire(id)
except KeyError:
pass
def clear(self, cls=None):
if cls is None:
for cache in self.caches.values():
cache.clear()
elif cls.__name__ in self.caches:
self.caches[cls.__name__].clear()
def tryGet(self, id, cls):
return self.tryGetByName(id, cls.__name__)
def tryGetByName(self, id, clsname):
try:
return self.caches[clsname].tryGet(id)
except KeyError:
return None
def allIDs(self, cls):
try:
self.caches[cls.__name__].allIDs()
except KeyError:
return []
def allSubCaches(self):
return self.caches.values()
def allSubCachesByClassNames(self):
return self.caches
def weakrefAll(self, cls=None):
"""
Move all objects in the cls (or if not given, then in all
classes) to the weakref dictionary, where they can be
collected.
"""
if cls is None:
for cache in self.caches.values():
cache.expireAll()
elif cls.__name__ in self.caches:
self.caches[cls.__name__].expireAll()
def getAll(self, cls=None):
"""
Returns all instances in the cache for the given class or all
classes.
"""
if cls is None:
results = []
for cache in self.caches.values():
results.extend(cache.getAll())
return results
elif cls.__name__ in self.caches:
return self.caches[cls.__name__].getAll()
else:
return []