diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 545389fd..dcfde28a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -17,6 +17,7 @@ jobs: outputs: docker_image: ${{ steps.final.outputs.docker_image }} docker_devices: ${{ steps.final.outputs.docker_devices }} + docker_volume: ${{ steps.final.outputs.docker_volume}} runs_on: ${{ steps.final.outputs.runs_on }} label: ${{ steps.final.outputs.label }} concurrency: @@ -157,6 +158,33 @@ jobs: echo "runs_on=${{ env.RUNS_ON }}" >> "$GITHUB_OUTPUT" echo "label=${{ env.LABEL }}" >> "$GITHUB_OUTPUT" + if [[ ${{ inputs.hardware }} == "rocm" ]] + then + echo "docker_volume=/data/cache/.cache/huggingface/hub" + else + echo "docker_volume=/mnt/cache" >> "$GITHUB_OUTPUT" + fi + + + prepare_integration_tests: + runs-on: ["self-hosted", "${{ needs.build-and-push.outputs.runs_on }}", "multi-gpu"] + needs: build-and-push + concurrency: + group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest' + container: + image: ${{ needs.build-and-push.outputs.docker_image }} + options: --shm-size "16gb" --ipc host -v ${{ needs.build-and-push.outputs.docker_volume }}:/data + steps: + - name: Clean Hugging Face cache + run: | + if [[ ${{ inputs.hardware }} == "rocm" ]] + then + python clean_cache.py + fi + + integration_tests: concurrency: group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }} @@ -188,7 +216,6 @@ jobs: - name: Run tests run: | - export DOCKER_VOLUME=/mnt/cache export DOCKER_DEVICES=${{ needs.build-and-push.outputs.docker_devices }} export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }} diff --git a/integration-tests/clean_cache.py b/integration-tests/clean_cache.py new file mode 100644 index 00000000..58a78dd4 --- /dev/null +++ b/integration-tests/clean_cache.py @@ -0,0 +1,115 @@ +import huggingface_hub + +REQUIRED_MODELS = { + "bigscience/bloom-560m": "main", + "TinyLlama/TinyLlama-1.1B-Chat-v1.0": "main", + "abhinavkulkarni/codellama-CodeLlama-7b-Python-hf-w4-g128-awq": "main", + "tiiuae/falcon-7b": "main", + "TechxGenus/gemma-2b-GPTQ": "main", + "google/gemma-2b": "main", + "openai-community/gpt2": "main", + "turboderp/Llama-3-8B-Instruct-exl2": "2.5bpw", + "huggingface/llama-7b-gptq": "main", + "neuralmagic/llama-2-7b-chat-marlin": "main", + "huggingface/llama-7b": "main", + "FasterDecoding/medusa-vicuna-7b-v1.3": "refs/pr/1", + "mistralai/Mistral-7B-Instruct-v0.1": "main", + "OpenAssistant/oasst-sft-1-pythia-12b": "main", + "stabilityai/stablelm-tuned-alpha-3b": "main", + "google/paligemma-3b-pt-224": "main", + "microsoft/phi-2": "main", + "Qwen/Qwen1.5-0.5B": "main", + "bigcode/starcoder": "main", + "Narsil/starcoder-gptq": "main", + "bigcode/starcoder2-3b": "main", + "HuggingFaceM4/idefics-9b-instruct": "main", + "HuggingFaceM4/idefics2-8b": "main", + "llava-hf/llava-v1.6-mistral-7b-hf": "main", + "state-spaces/mamba-130m": "main", + "mosaicml/mpt-7b": "main", + "bigscience/mt0-base": "main", + "google/flan-t5-xxl": "main", +} + + +def cleanup_cache(): + # Retrieve the size per model for all models used in the CI. + size_per_model = {} + for model_id, revision in REQUIRED_MODELS.items(): + model_size = 0 + all_files = huggingface_hub.list_repo_files( + model_id, + repo_type="model", + revision=revision, + token=token, + ) + + extension = None + if any(".safetensors" in filename for filename in all_files): + extension = ".safetensors" + elif any(".pt" in filename for filename in all_files): + extension = ".pt" + elif any(".bin" in filename for filename in all_files): + extension = ".bin" + + for filename in all_files: + if filename.endswith(extension): + file_url = huggingface_hub.hf_hub_url( + model_id, filename, revision=revision + ) + file_metadata = huggingface_hub.get_hf_file_metadata( + file_url, token=token + ) + model_size += file_metadata.size * 1e-9 # in GB + + size_per_model[model_id] = model_size + + cached_dir = huggingface_hub.scan_cache_dir() + + cache_size_per_model = {} + cached_required_size_per_model = {} + cached_shas_per_model = {} + + # Retrieve the SHAs and model ids of other non-necessary models in the cache. + for repo in cached_dir.repos: + if repo.repo_id in REQUIRED_MODELS: + cached_required_size_per_model[repo.repo_id] = ( + repo.size_on_disk * 1e-9 + ) # in GB + elif repo.repo_type == "model": + cache_size_per_model[repo.repo_id] = repo.size_on_disk * 1e-9 # in GB + + shas = [] + for _, ref in repo.refs.items(): + shas.append(ref.commit_hash) + cached_shas_per_model[repo.repo_id] = shas + + total_required_cached_size = sum(cached_required_size_per_model.values()) + total_other_cached_size = sum(cache_size_per_model.values()) + total_required_size = sum(size_per_model.values()) + + total_non_cached_required_size = total_required_size - total_required_cached_size + + free_memory = shutil.disk_usage("/data").free * 1e-9 + if free_memory + total_other_cached_size < total_non_cached_required_size * 1.05: + raise ValueError( + "Not enough space on device to execute the complete CI, please clean up the CI machine" + ) + + while free_memory < total_non_cached_required_size * 1.05: + if len(cache_size_per_model) == 0: + raise ValueError("This should not happen.") + + largest_model_id = max(cache_size_per_model, key=cache_size_per_model.get) + + print("Removing", largest_model_id) + for sha in cached_shas_per_model[largest_model_id]: + huggingface_hub.scan_cache_dir().delete_revisions(sha).execute() + + del cache_size_per_model[largest_model_id] + + free_memory = shutil.disk_usage("/data").free * 1e-9 + + +if __name__ == "__main__": + cleanup_cache()