Reduce memory footprint of caches (#9886)

This commit is contained in:
Erik Johnston 2021-04-28 11:59:28 +01:00 committed by GitHub
parent dd2d32dcdb
commit 391bfe9a7b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 60 additions and 18 deletions

1
changelog.d/9886.misc Normal file
View file

@ -0,0 +1 @@
Reduce memory usage of the LRU caches.

View file

@ -17,8 +17,10 @@ from functools import wraps
from typing import ( from typing import (
Any, Any,
Callable, Callable,
Collection,
Generic, Generic,
Iterable, Iterable,
List,
Optional, Optional,
Type, Type,
TypeVar, TypeVar,
@ -57,13 +59,56 @@ class _Node:
__slots__ = ["prev_node", "next_node", "key", "value", "callbacks"] __slots__ = ["prev_node", "next_node", "key", "value", "callbacks"]
def __init__( def __init__(
self, prev_node, next_node, key, value, callbacks: Optional[set] = None self,
prev_node,
next_node,
key,
value,
callbacks: Collection[Callable[[], None]] = (),
): ):
self.prev_node = prev_node self.prev_node = prev_node
self.next_node = next_node self.next_node = next_node
self.key = key self.key = key
self.value = value self.value = value
self.callbacks = callbacks or set()
# Set of callbacks to run when the node gets deleted. We store as a list
# rather than a set to keep memory usage down (and since we expect few
# entries per node, the performance of checking for duplication in a
# list vs using a set is negligible).
#
# Note that we store this as an optional list to keep the memory
# footprint down. Storing `None` is free as its a singleton, while empty
# lists are 56 bytes (and empty sets are 216 bytes, if we did the naive
# thing and used sets).
self.callbacks = None # type: Optional[List[Callable[[], None]]]
self.add_callbacks(callbacks)
def add_callbacks(self, callbacks: Collection[Callable[[], None]]) -> None:
"""Add to stored list of callbacks, removing duplicates."""
if not callbacks:
return
if not self.callbacks:
self.callbacks = []
for callback in callbacks:
if callback not in self.callbacks:
self.callbacks.append(callback)
def run_and_clear_callbacks(self) -> None:
"""Run all callbacks and clear the stored list of callbacks. Used when
the node is being deleted.
"""
if not self.callbacks:
return
for callback in self.callbacks:
callback()
self.callbacks = None
class LruCache(Generic[KT, VT]): class LruCache(Generic[KT, VT]):
@ -177,10 +222,10 @@ class LruCache(Generic[KT, VT]):
self.len = synchronized(cache_len) self.len = synchronized(cache_len)
def add_node(key, value, callbacks: Optional[set] = None): def add_node(key, value, callbacks: Collection[Callable[[], None]] = ()):
prev_node = list_root prev_node = list_root
next_node = prev_node.next_node next_node = prev_node.next_node
node = _Node(prev_node, next_node, key, value, callbacks or set()) node = _Node(prev_node, next_node, key, value, callbacks)
prev_node.next_node = node prev_node.next_node = node
next_node.prev_node = node next_node.prev_node = node
cache[key] = node cache[key] = node
@ -211,16 +256,15 @@ class LruCache(Generic[KT, VT]):
deleted_len = size_callback(node.value) deleted_len = size_callback(node.value)
cached_cache_len[0] -= deleted_len cached_cache_len[0] -= deleted_len
for cb in node.callbacks: node.run_and_clear_callbacks()
cb()
node.callbacks.clear()
return deleted_len return deleted_len
@overload @overload
def cache_get( def cache_get(
key: KT, key: KT,
default: Literal[None] = None, default: Literal[None] = None,
callbacks: Iterable[Callable[[], None]] = ..., callbacks: Collection[Callable[[], None]] = ...,
update_metrics: bool = ..., update_metrics: bool = ...,
) -> Optional[VT]: ) -> Optional[VT]:
... ...
@ -229,7 +273,7 @@ class LruCache(Generic[KT, VT]):
def cache_get( def cache_get(
key: KT, key: KT,
default: T, default: T,
callbacks: Iterable[Callable[[], None]] = ..., callbacks: Collection[Callable[[], None]] = ...,
update_metrics: bool = ..., update_metrics: bool = ...,
) -> Union[T, VT]: ) -> Union[T, VT]:
... ...
@ -238,13 +282,13 @@ class LruCache(Generic[KT, VT]):
def cache_get( def cache_get(
key: KT, key: KT,
default: Optional[T] = None, default: Optional[T] = None,
callbacks: Iterable[Callable[[], None]] = (), callbacks: Collection[Callable[[], None]] = (),
update_metrics: bool = True, update_metrics: bool = True,
): ):
node = cache.get(key, None) node = cache.get(key, None)
if node is not None: if node is not None:
move_node_to_front(node) move_node_to_front(node)
node.callbacks.update(callbacks) node.add_callbacks(callbacks)
if update_metrics and metrics: if update_metrics and metrics:
metrics.inc_hits() metrics.inc_hits()
return node.value return node.value
@ -260,10 +304,8 @@ class LruCache(Generic[KT, VT]):
# We sometimes store large objects, e.g. dicts, which cause # We sometimes store large objects, e.g. dicts, which cause
# the inequality check to take a long time. So let's only do # the inequality check to take a long time. So let's only do
# the check if we have some callbacks to call. # the check if we have some callbacks to call.
if node.callbacks and value != node.value: if value != node.value:
for cb in node.callbacks: node.run_and_clear_callbacks()
cb()
node.callbacks.clear()
# We don't bother to protect this by value != node.value as # We don't bother to protect this by value != node.value as
# generally size_callback will be cheap compared with equality # generally size_callback will be cheap compared with equality
@ -273,7 +315,7 @@ class LruCache(Generic[KT, VT]):
cached_cache_len[0] -= size_callback(node.value) cached_cache_len[0] -= size_callback(node.value)
cached_cache_len[0] += size_callback(value) cached_cache_len[0] += size_callback(value)
node.callbacks.update(callbacks) node.add_callbacks(callbacks)
move_node_to_front(node) move_node_to_front(node)
node.value = value node.value = value
@ -326,8 +368,7 @@ class LruCache(Generic[KT, VT]):
list_root.next_node = list_root list_root.next_node = list_root
list_root.prev_node = list_root list_root.prev_node = list_root
for node in cache.values(): for node in cache.values():
for cb in node.callbacks: node.run_and_clear_callbacks()
cb()
cache.clear() cache.clear()
if size_callback: if size_callback:
cached_cache_len[0] = 0 cached_cache_len[0] = 0