This repository has been archived on 2024-10-27. You can view files and clone it, but cannot push or open issues or pull requests.
2023-10-01 16:04:53 -06:00
|
|
|
import asyncio
|
|
|
|
|
|
|
|
import aiohttp
|
2023-09-20 20:30:31 -06:00
|
|
|
import tiktoken
|
|
|
|
|
|
|
|
from llm_server import opts
|
|
|
|
|
2023-09-27 14:36:49 -06:00
|
|
|
|
2023-09-30 19:41:50 -06:00
|
|
|
def tokenize(prompt: str, backend_url: str) -> int:
|
2023-10-01 00:20:00 -06:00
|
|
|
assert backend_url
|
2023-09-25 22:32:48 -06:00
|
|
|
if not prompt:
|
|
|
|
return 0
|
2023-10-01 16:04:53 -06:00
|
|
|
|
|
|
|
async def run():
|
|
|
|
tokenizer = tiktoken.get_encoding("cl100k_base")
|
|
|
|
|
|
|
|
async def send_chunk(chunk):
|
|
|
|
try:
|
|
|
|
async with session.post(f'{backend_url}/tokenize', json={'input': chunk}, verify_ssl=opts.verify_ssl, timeout=opts.backend_generate_request_timeout) as response:
|
|
|
|
j = await response.json()
|
|
|
|
return j['length']
|
|
|
|
except Exception as e:
|
|
|
|
print(f'Failed to tokenize using VLLM -', f'{e.__class__.__name__}: {e}')
|
|
|
|
return len(tokenizer.encode(chunk)) + 10
|
|
|
|
|
|
|
|
chunk_size = 300
|
|
|
|
chunks = [prompt[i:i + chunk_size] for i in range(0, len(prompt), chunk_size)]
|
|
|
|
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
|
|
tasks = [send_chunk(chunk) for chunk in chunks]
|
|
|
|
lengths = await asyncio.gather(*tasks)
|
|
|
|
|
|
|
|
return sum(lengths)
|
|
|
|
|
|
|
|
return asyncio.run(run())
|