hf_text-generation-inference/docs/source/_toctree.yml

69 lines
2.1 KiB
YAML

- sections:
- local: index
title: Text Generation Inference
- local: quicktour
title: Quick Tour
- local: installation_nvidia
title: Using TGI with Nvidia GPUs
- local: installation_amd
title: Using TGI with AMD GPUs
- local: installation_gaudi
title: Using TGI with Intel Gaudi
- local: installation_inferentia
title: Using TGI with AWS Inferentia
- local: installation
title: Installation from source
- local: supported_models
title: Supported Models and Hardware
- local: messages_api
title: Messages API
- local: architecture
title: Internal Architecture
title: Getting started
- sections:
- local: basic_tutorials/consuming_tgi
title: Consuming TGI
- local: basic_tutorials/preparing_model
title: Preparing Model for Serving
- local: basic_tutorials/gated_model_access
title: Serving Private & Gated Models
- local: basic_tutorials/using_cli
title: Using TGI CLI
- local: basic_tutorials/launcher
title: All TGI CLI options
- local: basic_tutorials/non_core_models
title: Non-core Model Serving
- local: basic_tutorials/safety
title: Safety
- local: basic_tutorials/using_guidance
title: Using Guidance, JSON, tools
- local: basic_tutorials/visual_language_models
title: Visual Language Models
- local: basic_tutorials/monitoring
title: Monitoring TGI with Prometheus and Grafana
- local: basic_tutorials/train_medusa
title: Train Medusa
title: Tutorials
- sections:
- local: conceptual/streaming
title: Streaming
- local: conceptual/quantization
title: Quantization
- local: conceptual/tensor_parallelism
title: Tensor Parallelism
- local: conceptual/paged_attention
title: PagedAttention
- local: conceptual/safetensors
title: Safetensors
- local: conceptual/flash_attention
title: Flash Attention
- local: conceptual/speculation
title: Speculation (Medusa, ngram)
- local: conceptual/guidance
title: How Guidance Works (via outlines
- local: conceptual/lora
title: LoRA (Low-Rank Adaptation)
title: Conceptual Guides