local-llm-server/llm_server/llm/vllm/tokenize.py

22 lines
565 B
Python
Raw Normal View History

import traceback
import requests
import tiktoken
from llm_server import opts
tokenizer = tiktoken.get_encoding("cl100k_base")
def tokenize(prompt: str) -> int:
if not prompt:
# The tokenizers have issues when the prompt is None.
return 0
try:
r = requests.post(f'{opts.backend_url}/tokenize', json={'input': prompt}, verify=opts.verify_ssl, timeout=opts.backend_generate_request_timeout)
j = r.json()
return j['length']
except:
2023-09-25 18:18:29 -06:00
traceback.print_exc()
return len(tokenizer.encode(prompt)) + 10