hf_text-generation-inference/server/bloom_inference/cache.py

49 lines
1.2 KiB
Python
Raw Normal View History

2022-10-08 04:30:12 -06:00
import torch
from dataclasses import dataclass
from typing import Dict, Optional, List
from bloom_inference.pb import generate_pb2
from bloom_inference.utils import NextTokenChooser, StoppingCriteria
@dataclass
class CacheEntry:
batch_id: int
request_ids: List[int]
input_ids: Dict[str, torch.Tensor]
all_input_ids: List[torch.Tensor]
next_token_choosers: List[NextTokenChooser]
stopping_criterias: List[StoppingCriteria]
def __len__(self):
return len(self.request_ids)
def to_pb(self):
return generate_pb2.CacheEntry(
id=self.batch_id,
request_ids=self.request_ids,
sequence_length=max(len(entry) for entry in self.all_input_ids),
)
class Cache:
def __init__(self):
self.cache: Dict[str, CacheEntry] = {}
def pop(self, batch_id: str) -> Optional[CacheEntry]:
return self.cache.pop(batch_id, None)
def set(self, entry: CacheEntry):
if entry is not None:
self.cache[entry.batch_id] = entry
def delete(self, batch_id: str):
del self.cache[batch_id]
def clear(self):
self.cache.clear()
def __len__(self):
return len(self.cache.keys())