- sections: - local: index title: Text Generation Inference - local: quicktour title: Quick Tour - local: installation_nvidia title: Using TGI with Nvidia GPUs - local: installation_amd title: Using TGI with AMD GPUs - local: installation_gaudi title: Using TGI with Intel Gaudi - local: installation_inferentia title: Using TGI with AWS Inferentia - local: installation title: Installation from source - local: supported_models title: Supported Models and Hardware - local: messages_api title: Messages API - local: architecture title: Internal Architecture title: Getting started - sections: - local: basic_tutorials/consuming_tgi title: Consuming TGI - local: basic_tutorials/preparing_model title: Preparing Model for Serving - local: basic_tutorials/gated_model_access title: Serving Private & Gated Models - local: basic_tutorials/using_cli title: Using TGI CLI - local: basic_tutorials/launcher title: All TGI CLI options - local: basic_tutorials/non_core_models title: Non-core Model Serving - local: basic_tutorials/safety title: Safety - local: basic_tutorials/using_guidance title: Using Guidance, JSON, tools - local: basic_tutorials/visual_language_models title: Visual Language Models - local: basic_tutorials/monitoring title: Monitoring TGI with Prometheus and Grafana - local: basic_tutorials/train_medusa title: Train Medusa title: Tutorials - sections: - local: conceptual/streaming title: Streaming - local: conceptual/quantization title: Quantization - local: conceptual/tensor_parallelism title: Tensor Parallelism - local: conceptual/paged_attention title: PagedAttention - local: conceptual/safetensors title: Safetensors - local: conceptual/flash_attention title: Flash Attention - local: conceptual/speculation title: Speculation (Medusa, ngram) - local: conceptual/guidance title: How Guidance Works (via outlines - local: conceptual/lora title: LoRA (Low-Rank Adaptation) title: Conceptual Guides