do not skip workflow on cuda, fix no space left no device
This commit is contained in:
parent
f16f0ad92b
commit
09a41f2c43
|
@ -174,7 +174,7 @@ jobs:
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.job }}-${{ needs.build-and-push.outputs.label }}-${{ github.head_ref || github.run_id }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
if: needs.build-and-push.outputs.runs_on == 'amd-gpu-tgi'
|
if: needs.build-and-push.outputs.runs_on != 'ubuntu-latest'
|
||||||
container:
|
container:
|
||||||
image: ${{ needs.build-and-push.outputs.docker_image }}
|
image: ${{ needs.build-and-push.outputs.docker_image }}
|
||||||
options: --shm-size "16gb" --ipc host -v ${{ needs.build-and-push.outputs.docker_volume }}:/data
|
options: --shm-size "16gb" --ipc host -v ${{ needs.build-and-push.outputs.docker_volume }}:/data
|
||||||
|
@ -191,7 +191,7 @@ jobs:
|
||||||
pwd
|
pwd
|
||||||
echo "ls:"
|
echo "ls:"
|
||||||
ls
|
ls
|
||||||
python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }}
|
python integration-tests/clean_cache_and_download.py --token ${{ secrets.HF_TOKEN }} --cache-dir /data
|
||||||
fi
|
fi
|
||||||
|
|
||||||
integration_tests:
|
integration_tests:
|
||||||
|
@ -243,4 +243,8 @@ jobs:
|
||||||
echo "SYSTEM:"
|
echo "SYSTEM:"
|
||||||
echo $SYSTEM
|
echo $SYSTEM
|
||||||
|
|
||||||
|
export DOCKER_VOLUME=${{ needs.build-and-push.outputs.docker_volume }}
|
||||||
|
echo "DOCKER_VOLUME:"
|
||||||
|
echo $DOCKER_VOLUME
|
||||||
|
|
||||||
pytest -s -vvvvv integration-tests
|
pytest -s -vvvvv integration-tests
|
||||||
|
|
|
@ -35,7 +35,7 @@ REQUIRED_MODELS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def cleanup_cache(token: str):
|
def cleanup_cache(token: str, cache_dir: str):
|
||||||
# Retrieve the size per model for all models used in the CI.
|
# Retrieve the size per model for all models used in the CI.
|
||||||
size_per_model = {}
|
size_per_model = {}
|
||||||
extension_per_model = {}
|
extension_per_model = {}
|
||||||
|
@ -74,7 +74,7 @@ def cleanup_cache(token: str):
|
||||||
total_required_size = sum(size_per_model.values())
|
total_required_size = sum(size_per_model.values())
|
||||||
print(f"Total required disk: {total_required_size:.2f} GB")
|
print(f"Total required disk: {total_required_size:.2f} GB")
|
||||||
|
|
||||||
cached_dir = huggingface_hub.scan_cache_dir()
|
cached_dir = huggingface_hub.scan_cache_dir(cache_dir)
|
||||||
|
|
||||||
cache_size_per_model = {}
|
cache_size_per_model = {}
|
||||||
cached_required_size_per_model = {}
|
cached_required_size_per_model = {}
|
||||||
|
@ -121,7 +121,7 @@ def cleanup_cache(token: str):
|
||||||
|
|
||||||
print("Removing", largest_model_id)
|
print("Removing", largest_model_id)
|
||||||
for sha in cached_shas_per_model[largest_model_id]:
|
for sha in cached_shas_per_model[largest_model_id]:
|
||||||
huggingface_hub.scan_cache_dir().delete_revisions(sha).execute()
|
huggingface_hub.scan_cache_dir(cache_dir).delete_revisions(sha).execute()
|
||||||
|
|
||||||
del cache_size_per_model[largest_model_id]
|
del cache_size_per_model[largest_model_id]
|
||||||
|
|
||||||
|
@ -135,10 +135,11 @@ if __name__ == "__main__":
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--token", help="Hugging Face Hub token.", required=True, type=str
|
"--token", help="Hugging Face Hub token.", required=True, type=str
|
||||||
)
|
)
|
||||||
|
parser.add_argument("--cache-dir", help="Hub cache path.", required=True, type=str)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
start = time.time()
|
start = time.time()
|
||||||
extension_per_model = cleanup_cache(args.token)
|
extension_per_model = cleanup_cache(args.token, args.cache_dir)
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
|
||||||
print(f"Cache cleanup done in {end - start:.2f} s")
|
print(f"Cache cleanup done in {end - start:.2f} s")
|
||||||
|
@ -153,6 +154,7 @@ if __name__ == "__main__":
|
||||||
revision=revision,
|
revision=revision,
|
||||||
token=args.token,
|
token=args.token,
|
||||||
allow_patterns=f"*{extension_per_model[model_id]}",
|
allow_patterns=f"*{extension_per_model[model_id]}",
|
||||||
|
cache_dir=args.cache_dir,
|
||||||
)
|
)
|
||||||
end = time.time()
|
end = time.time()
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue