Python: LRU 캐시 만들어보기

컴퓨터/파이썬 2020. 11. 16. 22:07

728x90

Least Recently Used

가장 오래 사용하지 않은 페이지 교체 알고리즘

Cache replacement policies - Wikipedia

Page replacement algorithm.

en.wikipedia.org

1. LRU

파이썬 functools.lru_cache 소스: @Github

from functools import lru_cache


@lru_cache(maxsize=32)
def fibo(n):
    if n == 1:
        return 0
    elif n == 2:
        return 1
    else:
        return fibo(n - 1) + fibo(n - 2)

파이썬 실제 lru_cache는 이 글 구현된 버전보다 빠르다,

(따로 linked class를 만들지도 않고, 다 함수 안에서 끝난다.)

하지만 소스를 보면 어떻게 작동하는지 쉽게 이해하기 어렵다.

그래서 LRU 캐시 핵심인, Doubly Linked List를 이용해 직접 구현해 볼 것이다.

2. 구현

클래스 데코레이터 만드는 법

init 함수에 function을 저장한다.
__call__, dunder 메소드에 파라미터를 넘기고, 결과를 return

class myDec:
    def __init__(self, function):
        self.function = function
        
    def __call__(self, *args, **kwargs):
        result = self.function(*args, **kwargs)
        return result
        
        
@myDec
def hi(a):
    return a

# 위는 아래와 같다.
# myDec(hi)(a)

이중 연결 리스트 노드

Doubly Linked List는 각 노드가 이전 노드와, 다음 노드를 저장한 리스트다.

args를 key로 쓰일 것이며, val은 함수 return 값으로 사용

ex) fibo(3) = 1, (key = 3, val = 1)

class Node:
    __slots__ = ("key", "val", "prev", "next")

    def __init__(self, key, val):
        self.key = key
        self.val = val
        self.prev = None
        self.next = None

이중 연결 리스트 클래스

hits와 miss는 파이썬 lru_cache처럼

캐시에 있는 값이면 hit++, 아니면 miss++

from threading import Lock  # 연결 리스트 업데이트는 thread-safe하지 않다.

class pyLRU:
    lock = Lock()
    maxsize = None
    __slots__ = ("function", "cache", "head", "tail", "hits", "miss")

    def __init__(self, function):
        self.function = function
        self.cache = {}
        self.head = Node("head", None)
        self.tail = Node("tail", None)
        self.head.next = self.tail
        self.tail.prev = self.head
        self.hits = 0
        self.miss = 0

call

데코레이터로 사용하기 위함

class pyLRU:
    # ...
    
    # 데코레이터로 사용
    def __call__(self, *args, **kwargs):
        if args in self.cache:
            self._check()  # 캐시 최대 사이즈가 maxsize 이상인지
            self.hits += 1  # 캐시 hit

            self._get(args)  # 부르면 연결 리스트 맨 뒤로 값을 업데이트

            return self.cache[args]

        self._check()    # 캐시 최대 사이즈가 maxsize 이상인지

        result = self.function(*args, **kwargs)
        self.cache[args] = result
        self.miss += 1

        node = Node(args, result)
        self._add(node)

        return result

add, remove, get

get 함수에서 노드를 부르면 맨 뒤로 업데이트하는 이유는

가장 오래 사용하지 않은 페이지 교체 알고리즘이기 때문에

부르면 최근 사용된 것으로 생각하면 된다.

class pyLRU:
    # ...

    def cache_info(self):
        # ex) CacheInfo(hits=247, misses=250, maxsize=32, currsize=32)
        return f"CacheInfo(hits={self.hits}, misses={self.miss}, maxsize={self.maxsize}, currsize={len(self.cache)})"

    def _check(self):  # 캐시 크기가 지정된 maxsize 이상?
        if self.maxsize is not None and len(self.cache) >= self.maxsize:
            node = self.head.next
            self._remove(node)
            if node.key in self.cache:
                del self.cache[node.key]
                
    def print(self):  # 캐시 연결 리스트 노드 보기
        head = self.head
        while head != self.tail:
            print(head.key, end=" -> ")
            head = head.next
        print("tail")

    def _add(self, node):  # 노드 추가 함수
        with self.lock:
            p = self.tail.prev
            p.next = node
            self.tail.prev = node
            node.prev = p
            node.next = self.tail

    def _remove(self, node):  # 노드 제거 함수
        with self.lock:
            p = node.prev
            n = node.next
            p.next = n
            n.prev = p

    def _get(self, args):
        # 부르면 맨 뒤로 업데이트
        # 양쪽에서 노드 찾기
        head = self.head
        tail = self.tail
        while head and tail:
            if head.key == args:
                node = head
                self._remove(node)
                self._add(node)
                break
            head = head.next

            if tail.key == args:
                node = tail
                self._remove(node)
                self._add(node)
                break
            tail = tail.prev

결과

from threading import Lock  # Linked list updates aren't thread-safe


class pyLRU:
    lock = Lock()
    maxsize = None
    __slots__ = ("function", "cache", "head", "tail", "hits", "miss")

    def __init__(self, function):
        self.function = function
        self.cache = {}
        self.head = Node("head", None)
        self.tail = Node("tail", None)
        self.head.next = self.tail
        self.tail.prev = self.head
        self.hits = 0
        self.miss = 0

    def __call__(self, *args, **kwargs):
        if args in self.cache:
            self._check()
            self.hits += 1

            self._get(args)

            return self.cache[args]

        self._check()

        result = self.function(*args, **kwargs)
        self.cache[args] = result
        self.miss += 1

        node = Node(args, result)
        self._add(node)

        return result

    def cache_info(self):
        return f"CacheInfo(hits={self.hits}, misses={self.miss}, maxsize={self.maxsize}, currsize={len(self.cache)})"

    def _check(self):
        if self.maxsize is not None and len(self.cache) >= self.maxsize:
            node = self.head.next
            self._remove(node)
            if node.key in self.cache:
                del self.cache[node.key]

    def _add(self, node):
        with self.lock:
            p = self.tail.prev
            p.next = node
            self.tail.prev = node
            node.prev = p
            node.next = self.tail

    def _remove(self, node):
        with self.lock:
            p = node.prev
            n = node.next
            p.next = n
            n.prev = p

    def _get(self, args):
        # 부르면 맨 뒤로 업데이트
        head = self.head
        tail = self.tail
        while head and tail:
            if head.key == args:
                node = head
                self._remove(node)
                self._add(node)
                break
            head = head.next

            if tail.key == args:
                node = tail
                self._remove(node)
                self._add(node)
                break
            tail = tail.prev

    def print(self):
        head = self.head
        while head != self.tail:
            print(head.key, end=" -> ")
            head = head.next
        print("tail")

빠르긴 한데 꽤 느리다.

그래서 Cython으로 이용해서 성능을 업데이트 해볼 것이다.

pyLRU.maxsize = 32
FIBO = 250

@pyLRU
def fibo(n):
    if n == 1:
        return 0
    elif n == 2:
        return 1
    else:
        return fibo(n - 1) + fibo(n - 2)


from timeit import default_timer

start = default_timer()
print(fibo(FIBO))
print(f"{default_timer() - start:.5f}s")
print(fibo.cache_info())
fibo.print()

"""

# Python 버전
4880197746793002076754294951020699004973287771475874
0.00185s
CacheInfo(hits=247, misses=250, maxsize=32, currsize=32)
head -> (219,) -> (220,) -> (221,) -> (222,) -> (223,) -> (224,) -> (225,) -> (226,) -> (227,) -> (228,) -> (229,) -> (230,) -> (231,) -> (232,) -> (233,) -> (234,) -> (235,) -> (236,) -> (237,) -> (238,) -> (239,) -> (240,) -> (241,) -> (242,) -> (243,) -> (244,) -> (245,) -> (246,) -> (247,) -> (249,) -> (248,) -> (250,) -> tail


# Cython 버전
4880197746793002076754294951020699004973287771475874
0.00049s
CacheInfo(hits=247, misses=250, maxsize=32, currsize=32)

# Python lru_cache 버전
4880197746793002076754294951020699004973287771475874
0.00013s
CacheInfo(hits=247, misses=250, maxsize=32, currsize=32)
"""

Cython 버전

완벽하진 않지만, 대충 cython 느낌으로 만들어 보았다.

cimport cython
from threading import Lock

class Node:
    __slots__ = ("key", "val", "prev", "next")
    def __init__(self, key, val):
        self.key = key
        self.val = val
        self.prev = None
        self.next = None

# @cython.boundscheck(False)
# @cython.wraparound(False)
class LRU(_LRU):
    def __init__(self, function):
        super().__init__(function)
        pass

cdef class _LRU:
    lock = Lock()
    maxsize = None
    cdef int hits, miss
    cdef dict cache
    cdef head, tail

    __slots__ = ("function", "cache", "head", "tail", "hits", "miss")

    def __init__(self, function):
        self.function = function
        self.cache = {}
        self.head = Node("head", None)
        self.tail = Node("tail", None)
        self.head.next = self.tail
        self.tail.prev = self.head
        self.hits = 0
        self.miss = 0

    def __call__(self, *args, **kwargs):
        if args in self.cache:
            self._check()
            self.hits += 1

            self._get(args)

            return self.cache[args]

        self._check()

        result = self.function(*args, **kwargs)
        self.cache[args] = result
        self.miss += 1

        node = Node(args, result)
        self._add(node)

        return result

    cpdef cache_info(self):
        return f"CacheInfo(hits={self.hits}, misses={self.miss}, maxsize={self.maxsize}, currsize={len(self.cache)})"

    cdef _check(self):
        if self.maxsize is not None and len(self.cache) >= self.maxsize:
            node = self.head.next
            self._remove(node)
            if node.key in self.cache:
                del self.cache[node.key]

    cdef _add(self, node):
        p = self.tail.prev
        p.next = node
        self.tail.prev = node
        node.prev = p
        node.next = self.tail

    cdef _remove(self, node):
        p = node.prev
        n = node.next
        p.next = n
        n.prev = p

    cdef _get(self, args):
        # 부르면 맨 뒤로 업데이트
        head = self.head
        tail = self.tail
        while head and tail:
            if head.key == args:
                node = head
                self._remove(node)
                self._add(node)
                break
            head = head.next

            if tail.key == args:
                node = tail
                self._remove(node)
                self._add(node)
                break
            tail = tail.prev

결과

파이썬 lru_cache 속도엔 비교가 안되지만,

기존 구현 버전 3배 이상으로 빨라졌다.

LRU.maxsize = 32
FIBO = 250

@LRU
def fibo(n):
    if n == 1:
        return 0
    elif n == 2:
        return 1
    else:
        return fibo(n - 1) + fibo(n - 2)


start = default_timer()
print(fibo(FIBO))
print(f"{default_timer() - start:.5f}s")
print(fibo.cache_info())

"""

# Python 버전
4880197746793002076754294951020699004973287771475874
0.00185s
CacheInfo(hits=247, misses=250, maxsize=32, currsize=32)

# Cython 버전
4880197746793002076754294951020699004973287771475874
0.00049s
CacheInfo(hits=247, misses=250, maxsize=32, currsize=32)

# Python lru_cache 버전
4880197746793002076754294951020699004973287771475874
0.00013s
CacheInfo(hits=247, misses=250, maxsize=32, currsize=32)
"""

참고

LRU Cache 구현 @GeeksforGeeks

LRU Cache 구현 @Medium

728x90

저작자표시 비영리 변경금지

'컴퓨터 > 파이썬' 카테고리의 다른 글

Elixir에서 Python 이용하기 (0)	2020.11.21
Python: 행렬 곱셈 알고리즘 소개 (0)	2020.11.15
파이썬: Monad (모나드) (1)	2020.11.11

인기포스트 MORE POST

ABOUT ME

STUDY BITS STUDY BITS

Least Recently Used

1. LRU

2. 구현

클래스 데코레이터 만드는 법

이중 연결 리스트 노드

이중 연결 리스트 클래스

call

add, remove, get

결과

Cython 버전

결과

참고

LRU Cache 구현 @GeeksforGeeks

LRU Cache 구현 @Medium

'컴퓨터 > 파이썬' 카테고리의 다른 글

티스토리툴바

인기포스트 MORE POST

ABOUT ME

Least Recently Used

1. LRU

2. 구현

클래스 데코레이터 만드는 법

이중 연결 리스트 노드

이중 연결 리스트 클래스

__call__

add, remove, get

결과

Cython 버전

결과

참고

LRU Cache 구현 @GeeksforGeeks

LRU Cache 구현 @Medium

'컴퓨터 > 파이썬' 카테고리의 다른 글

관련글 관련글 더보기

티스토리툴바

call