Build a function memoizer [Part-3]
Kailash Sankar
Posted on July 26, 2020
To summarise the previous parts, we started with a memoizer which supports function with numeric params, updated it to support complex inputs, verified async support and added a clear cache function.
Next we'll add an option to set cache size limit. For that we need to:
- accept the limit as a user input
- change the cache data structure to something where we can easily identify the least recently used entry
- when the cache limit is hit, we remove the least used value while caching a new one
- every time a cached value is referenced we have to refresh it to make it the recently used one
If we use an array, inserting new values at the front and moving values to the front would be expensive operations.
A linked list will allow us to add/remove values easily and efficiently(O(1) cost), but to find a cached value we would have to search through the whole list. We'll worry about that later, for now let's try and see if linked list solves the problem.
For a refresher on linked list I recommend reading these posts = Interview Cake, Basecs
To illustrate, cache will start with cache = null
and as we cache more entries it will look like
cache = nodeA -> nodeB -> nodeC -> null
If we lookup nodeB then the cache will become
cache = nodeB -> nodeA -> nodeC -> null
If our cache size is 3 and we add a new nodeD
cache = nodeD -> nodeB -> nodeA -> null
Cache node structure
function Node(key, value) {
this.key = key;
this.value = value;
this.next = null;
this.prev = null;
}
Keeping a reference to the previous node will make it easy to remove from tail and also while moving nodes from the middle to the top(refresh).
Overall frame of the Cache
const DEFAULT_CACHE_SIZE = 10;
function Cache(params = {}) {
let head = null;
let tail = null;
let size = 0;
let options = {
cacheSize: DEFAULT_CACHE_SIZE,
...params,
};
// operations
function add() {}
function remove() {}
function refresh() {}
function find() {}
function clear() {}
function print() {} // for debugging/testing
// allowed operations
return {
add,
find,
clear,
print
};
}
Add a new node to the cache
function add(key, value) {
const node = new Node(key, value);
if (head) {
node.next = head;
head.prev = node;
}
// set the tail node
if (!tail) {
tail = node;
}
head = node;
size++;
// remove a node if we reach size limit
if (size > options.cacheSize) {
remove();
}
return node;
}
Remove a node from the tail, the previous node becomes the tail
function remove() {
if (tail) {
const prev = tail.prev;
tail = prev;
// in case head/tail are the same
if (prev) {
prev.next = null;
}
size--;
}
}
Move a referenced node to the head
function refresh(node) {
if (head === node) {
return;
}
// remove from current position
if (node.prev) {
node.prev.next = node.next;
}
if (node.next) {
node.next.prev = node.prev;
}
// add to top
node.next = head;
head.prev = node;
head = node;
// update tail if refreshed node is the tail node
if (tail === node) {
tail = node.prev;
}
node.prev = null;
}
Check if a key is in cache
function find(key) {
let node = head;
while (node) {
if (node.key === key) {
refresh(node);
return node;
}
node = node.next;
}
return null;
}
Clear the cache
function clear() {
head = null;
tail = null;
size = 0;
// garabage collector will take care of the rest. right?
}
Print the nodes, only for testing
function print() {
let node = head;
let out = [];
while (node) {
out.push(`[${node.key}: ${node.value}]`);
node = node.next;
}
console.log(out.join(" -> "));
}
Test if the cache works
const testCache = Cache({ cacheSize: 3 });
testCache.add("1-2", 3);
testCache.add("2-3", 5);
testCache.add("5-5", 10);
testCache.add("4-2", 6);
testCache.print();
// output: [4-2: 6] -> [5-5: 10] -> [2-3: 5]
// entry "1-2" was remove to maintain size as 3
testCache.find("2-3");
testCache.print();
// output: [2-3: 5] -> [4-2: 6] -> [5-5: 10]
// "2-3" was brought up as it was referenced
testCache.add("32-1", 33);
testCache.print();
// output: [32-1: 33] -> [2-3: 5] -> [4-2: 6]
testCache.find("2-2"); // not cached
testCache.find("32-1");
testCache.print();
// output: [32-1: 33] -> [2-3: 5] -> [4-2: 6]
Looks good, now let's replace the simple object cache with this one.
function memoizer(fn, options) {
const resultsCache = Cache(options);
// memoized wrapper function
function memoized(...args) {
const cacheKey = generateCacheKey(args);
let cachedNode = resultsCache.find(cacheKey);
if (!cachedNode) {
// cached value not found, call fn and cache result
const result = fn(...args);
cachedNode = resultsCache.add(cacheKey, result);
}
// return result from cache;
return cachedNode.value;
}
// clear cache
memoized.clearCache = resultsCache.clear;
return memoized;
}
I moved all the tests from part 1 & 2 to Jest and ran it against the new cache and it was successful.
The downside from the simple object cache we had earlier is the lookup cost, it increases with the size of our cache as we have to iterate to find the right node. We can achieve the same lookup speed of an object here by maintaining one with cache key pointing to the node in the linked list.
The approach will occupy extra space but since we are building a cache the goal is to get speed at the cost of space.
A few changes across
// main
let hash = {};
// add
hash[key] = node;
// remove
delete hash[tail.key];
// find
if (key in hash) {
const node = hash[key];
refresh(node);
return node;
}
// clear
hash = {};
What we've ended up with is a crude implementation of LRU cache.
The next part of the series will add support for time based expiry to cached values.
Photo by Steve Johnson on Unsplash
Posted on July 26, 2020
Join Our Newsletter. No Spam, Only the good stuff.
Sign up to receive the latest update from our blog.