chore(client): Support Pydantic 2 (#900)

This should allow users to use either Pydantic 2 or Pydantic 1.

I couldn't run all tests locally because I reran them too often and got
rate limited, but I believe this is sufficient.
This commit is contained in:
Jelle Zijlstra 2023-09-06 05:12:08 -07:00 committed by GitHub
parent 033230ae66
commit c8bbbd8129
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 16 deletions

View File

@ -12,7 +12,7 @@ repository = "https://github.com/huggingface/text-generation-inference"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.7" python = "^3.7"
pydantic = "^1.10" pydantic = "> 1.10, < 3"
aiohttp = "^3.8" aiohttp = "^3.8"
huggingface-hub = ">= 0.12, < 1.0" huggingface-hub = ">= 0.12, < 1.0"

View File

@ -18,21 +18,21 @@ class Parameters(BaseModel):
# Stop generating tokens if a member of `stop_sequences` is generated # Stop generating tokens if a member of `stop_sequences` is generated
stop: List[str] = [] stop: List[str] = []
# Random sampling seed # Random sampling seed
seed: Optional[int] seed: Optional[int] = None
# The value used to module the logits distribution. # The value used to module the logits distribution.
temperature: Optional[float] temperature: Optional[float] = None
# The number of highest probability vocabulary tokens to keep for top-k-filtering. # The number of highest probability vocabulary tokens to keep for top-k-filtering.
top_k: Optional[int] top_k: Optional[int] = None
# If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or # If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
# higher are kept for generation. # higher are kept for generation.
top_p: Optional[float] top_p: Optional[float] = None
# truncate inputs tokens to the given size # truncate inputs tokens to the given size
truncate: Optional[int] truncate: Optional[int] = None
# Typical Decoding mass # Typical Decoding mass
# See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information # See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information
typical_p: Optional[float] typical_p: Optional[float] = None
# Generate best_of sequences and return the one if the highest token logprobs # Generate best_of sequences and return the one if the highest token logprobs
best_of: Optional[int] best_of: Optional[int] = None
# Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
watermark: bool = False watermark: bool = False
# Get generation details # Get generation details
@ -114,7 +114,7 @@ class Request(BaseModel):
# Prompt # Prompt
inputs: str inputs: str
# Generation parameters # Generation parameters
parameters: Optional[Parameters] parameters: Optional[Parameters] = None
# Whether to stream output tokens # Whether to stream output tokens
stream: bool = False stream: bool = False
@ -145,7 +145,7 @@ class InputToken(BaseModel):
text: str text: str
# Logprob # Logprob
# Optional since the logprob of the first token cannot be computed # Optional since the logprob of the first token cannot be computed
logprob: Optional[float] logprob: Optional[float] = None
# Generated tokens # Generated tokens
@ -180,7 +180,7 @@ class BestOfSequence(BaseModel):
# Number of generated tokens # Number of generated tokens
generated_tokens: int generated_tokens: int
# Sampling seed if sampling was activated # Sampling seed if sampling was activated
seed: Optional[int] seed: Optional[int] = None
# Decoder input tokens, empty if decoder_input_details is False # Decoder input tokens, empty if decoder_input_details is False
prefill: List[InputToken] prefill: List[InputToken]
# Generated tokens # Generated tokens
@ -196,7 +196,7 @@ class Details(BaseModel):
# Number of generated tokens # Number of generated tokens
generated_tokens: int generated_tokens: int
# Sampling seed if sampling was activated # Sampling seed if sampling was activated
seed: Optional[int] seed: Optional[int] = None
# Decoder input tokens, empty if decoder_input_details is False # Decoder input tokens, empty if decoder_input_details is False
prefill: List[InputToken] prefill: List[InputToken]
# Generated tokens # Generated tokens
@ -204,7 +204,7 @@ class Details(BaseModel):
# Most likely tokens # Most likely tokens
top_tokens: Optional[List[List[Token]]] top_tokens: Optional[List[List[Token]]]
# Additional sequences when using the `best_of` parameter # Additional sequences when using the `best_of` parameter
best_of_sequences: Optional[List[BestOfSequence]] best_of_sequences: Optional[List[BestOfSequence]] = None
# `generate` return value # `generate` return value
@ -222,7 +222,7 @@ class StreamDetails(BaseModel):
# Number of generated tokens # Number of generated tokens
generated_tokens: int generated_tokens: int
# Sampling seed if sampling was activated # Sampling seed if sampling was activated
seed: Optional[int] seed: Optional[int] = None
# `generate_stream` return value # `generate_stream` return value
@ -233,10 +233,10 @@ class StreamResponse(BaseModel):
top_tokens: Optional[List[Token]] top_tokens: Optional[List[Token]]
# Complete generated text # Complete generated text
# Only available when the generation is finished # Only available when the generation is finished
generated_text: Optional[str] generated_text: Optional[str] = None
# Generation details # Generation details
# Only available when the generation is finished # Only available when the generation is finished
details: Optional[StreamDetails] details: Optional[StreamDetails] = None
# Inference API currently deployed model # Inference API currently deployed model