diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index a52bdd81..915ac7aa 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -12,7 +12,7 @@ repository = "https://github.com/huggingface/text-generation-inference" [tool.poetry.dependencies] python = "^3.7" -pydantic = "^1.10" +pydantic = "> 1.10, < 3" aiohttp = "^3.8" huggingface-hub = ">= 0.12, < 1.0" diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py index 38f75253..20083b19 100644 --- a/clients/python/text_generation/types.py +++ b/clients/python/text_generation/types.py @@ -18,21 +18,21 @@ class Parameters(BaseModel): # Stop generating tokens if a member of `stop_sequences` is generated stop: List[str] = [] # Random sampling seed - seed: Optional[int] + seed: Optional[int] = None # The value used to module the logits distribution. - temperature: Optional[float] + temperature: Optional[float] = None # The number of highest probability vocabulary tokens to keep for top-k-filtering. - top_k: Optional[int] + top_k: Optional[int] = None # If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or # higher are kept for generation. - top_p: Optional[float] + top_p: Optional[float] = None # truncate inputs tokens to the given size - truncate: Optional[int] + truncate: Optional[int] = None # Typical Decoding mass # See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information - typical_p: Optional[float] + typical_p: Optional[float] = None # Generate best_of sequences and return the one if the highest token logprobs - best_of: Optional[int] + best_of: Optional[int] = None # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226) watermark: bool = False # Get generation details @@ -114,7 +114,7 @@ class Request(BaseModel): # Prompt inputs: str # Generation parameters - parameters: Optional[Parameters] + parameters: Optional[Parameters] = None # Whether to stream output tokens stream: bool = False @@ -145,7 +145,7 @@ class InputToken(BaseModel): text: str # Logprob # Optional since the logprob of the first token cannot be computed - logprob: Optional[float] + logprob: Optional[float] = None # Generated tokens @@ -180,7 +180,7 @@ class BestOfSequence(BaseModel): # Number of generated tokens generated_tokens: int # Sampling seed if sampling was activated - seed: Optional[int] + seed: Optional[int] = None # Decoder input tokens, empty if decoder_input_details is False prefill: List[InputToken] # Generated tokens @@ -196,7 +196,7 @@ class Details(BaseModel): # Number of generated tokens generated_tokens: int # Sampling seed if sampling was activated - seed: Optional[int] + seed: Optional[int] = None # Decoder input tokens, empty if decoder_input_details is False prefill: List[InputToken] # Generated tokens @@ -204,7 +204,7 @@ class Details(BaseModel): # Most likely tokens top_tokens: Optional[List[List[Token]]] # Additional sequences when using the `best_of` parameter - best_of_sequences: Optional[List[BestOfSequence]] + best_of_sequences: Optional[List[BestOfSequence]] = None # `generate` return value @@ -222,7 +222,7 @@ class StreamDetails(BaseModel): # Number of generated tokens generated_tokens: int # Sampling seed if sampling was activated - seed: Optional[int] + seed: Optional[int] = None # `generate_stream` return value @@ -233,10 +233,10 @@ class StreamResponse(BaseModel): top_tokens: Optional[List[Token]] # Complete generated text # Only available when the generation is finished - generated_text: Optional[str] + generated_text: Optional[str] = None # Generation details # Only available when the generation is finished - details: Optional[StreamDetails] + details: Optional[StreamDetails] = None # Inference API currently deployed model