238 lines
7.7 KiB
Python
238 lines
7.7 KiB
Python
import time
|
|
import os
|
|
|
|
from datetime import timedelta
|
|
from loguru import logger
|
|
from pathlib import Path
|
|
from typing import Optional, List
|
|
|
|
from huggingface_hub import file_download, hf_api, HfApi, hf_hub_download
|
|
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
|
|
from huggingface_hub.utils import (
|
|
LocalEntryNotFoundError,
|
|
EntryNotFoundError,
|
|
RevisionNotFoundError, # noqa # Import here to ease try/except in other part of the lib
|
|
)
|
|
|
|
WEIGHTS_CACHE_OVERRIDE = os.getenv("WEIGHTS_CACHE_OVERRIDE", None)
|
|
HF_HUB_OFFLINE = os.environ.get("HF_HUB_OFFLINE", "0").lower() in ["true", "1", "yes"]
|
|
|
|
|
|
def _cached_weight_files(
|
|
model_id: str, revision: Optional[str], extension: str
|
|
) -> List[str]:
|
|
"""Guess weight files from the cached revision snapshot directory"""
|
|
d = _get_cached_revision_directory(model_id, revision)
|
|
if not d:
|
|
return []
|
|
filenames = _weight_files_from_dir(d, extension)
|
|
return filenames
|
|
|
|
|
|
def _weight_hub_files_from_model_info(
|
|
info: hf_api.ModelInfo, extension: str
|
|
) -> List[str]:
|
|
return [
|
|
s.rfilename
|
|
for s in info.siblings
|
|
if s.rfilename.endswith(extension)
|
|
and len(s.rfilename.split("/")) == 1
|
|
and "arguments" not in s.rfilename
|
|
and "args" not in s.rfilename
|
|
and "training" not in s.rfilename
|
|
and "medusa_lm_head" not in s.rfilename
|
|
]
|
|
|
|
|
|
def _weight_files_from_dir(d: Path, extension: str) -> List[str]:
|
|
# os.walk: do not iterate, just scan for depth 1, not recursively
|
|
# see _weight_hub_files_from_model_info, that's also what is
|
|
# done there with the len(s.rfilename.split("/")) == 1 condition
|
|
root, _, files = next(os.walk(str(d)))
|
|
filenames = [
|
|
os.path.join(root, f)
|
|
for f in files
|
|
if f.endswith(extension)
|
|
and "arguments" not in f
|
|
and "args" not in f
|
|
and "adapter" not in f
|
|
and "training" not in f
|
|
and "medusa_lm_head" not in f
|
|
]
|
|
return filenames
|
|
|
|
|
|
def _get_cached_revision_directory(
|
|
model_id: str, revision: Optional[str]
|
|
) -> Optional[Path]:
|
|
if revision is None:
|
|
revision = "main"
|
|
|
|
repo_cache = Path(HUGGINGFACE_HUB_CACHE) / Path(
|
|
file_download.repo_folder_name(repo_id=model_id, repo_type="model")
|
|
)
|
|
|
|
if not repo_cache.is_dir():
|
|
# No cache for this model
|
|
return None
|
|
|
|
refs_dir = repo_cache / "refs"
|
|
snapshots_dir = repo_cache / "snapshots"
|
|
|
|
# Resolve refs (for instance to convert main to the associated commit sha)
|
|
if refs_dir.is_dir():
|
|
revision_file = refs_dir / revision
|
|
if revision_file.exists():
|
|
with revision_file.open() as f:
|
|
revision = f.read()
|
|
|
|
# Check if revision folder exists
|
|
if not snapshots_dir.exists():
|
|
return None
|
|
cached_shas = os.listdir(snapshots_dir)
|
|
if revision not in cached_shas:
|
|
# No cache for this revision and we won't try to return a random revision
|
|
return None
|
|
|
|
return snapshots_dir / revision
|
|
|
|
|
|
def weight_hub_files(
|
|
model_id: str, revision: Optional[str] = None, extension: str = ".safetensors"
|
|
) -> List[str]:
|
|
"""Get the weights filenames on the hub"""
|
|
api = HfApi()
|
|
|
|
if HF_HUB_OFFLINE:
|
|
filenames = _cached_weight_files(model_id, revision, extension)
|
|
else:
|
|
# Online case, fetch model info from the Hub
|
|
info = api.model_info(model_id, revision=revision)
|
|
filenames = _weight_hub_files_from_model_info(info, extension)
|
|
|
|
if not filenames:
|
|
raise EntryNotFoundError(
|
|
f"No {extension} weights found for model {model_id} and revision {revision}.",
|
|
None,
|
|
)
|
|
|
|
return filenames
|
|
|
|
|
|
def try_to_load_from_cache(
|
|
model_id: str, revision: Optional[str], filename: str
|
|
) -> Optional[Path]:
|
|
"""Try to load a file from the Hugging Face cache"""
|
|
|
|
d = _get_cached_revision_directory(model_id, revision)
|
|
if not d:
|
|
return None
|
|
|
|
# Check if file exists in cache
|
|
cached_file = d / filename
|
|
return cached_file if cached_file.is_file() else None
|
|
|
|
|
|
def weight_files(
|
|
model_id: str, revision: Optional[str] = None, extension: str = ".safetensors"
|
|
) -> List[Path]:
|
|
"""Get the local files"""
|
|
# Local model
|
|
d = Path(model_id)
|
|
if d.exists() and d.is_dir():
|
|
local_files = _weight_files_from_dir(d, extension)
|
|
if not local_files:
|
|
raise FileNotFoundError(
|
|
f"No local weights found in {model_id} with extension {extension}"
|
|
)
|
|
return [Path(f) for f in local_files]
|
|
|
|
try:
|
|
filenames = weight_hub_files(model_id, revision, extension)
|
|
except EntryNotFoundError as e:
|
|
if extension != ".safetensors":
|
|
raise e
|
|
# Try to see if there are pytorch weights
|
|
pt_filenames = weight_hub_files(model_id, revision, extension=".bin")
|
|
# Change pytorch extension to safetensors extension
|
|
# It is possible that we have safetensors weights locally even though they are not on the
|
|
# hub if we converted weights locally without pushing them
|
|
filenames = [
|
|
f"{Path(f).stem.lstrip('pytorch_')}.safetensors" for f in pt_filenames
|
|
]
|
|
|
|
if WEIGHTS_CACHE_OVERRIDE is not None:
|
|
files = []
|
|
for filename in filenames:
|
|
p = Path(WEIGHTS_CACHE_OVERRIDE) / filename
|
|
if not p.exists():
|
|
raise FileNotFoundError(
|
|
f"File {p} not found in {WEIGHTS_CACHE_OVERRIDE}."
|
|
)
|
|
files.append(p)
|
|
return files
|
|
|
|
files = []
|
|
for filename in filenames:
|
|
cache_file = try_to_load_from_cache(
|
|
model_id, revision=revision, filename=filename
|
|
)
|
|
if cache_file is None:
|
|
raise LocalEntryNotFoundError(
|
|
f"File {filename} of model {model_id} not found in "
|
|
f"{os.getenv('HUGGINGFACE_HUB_CACHE', 'the local cache')}. "
|
|
f"Please run `text-generation-server download-weights {model_id}` first."
|
|
)
|
|
files.append(cache_file)
|
|
|
|
return files
|
|
|
|
|
|
def download_weights(
|
|
filenames: List[str], model_id: str, revision: Optional[str] = None
|
|
) -> List[Path]:
|
|
"""Download the safetensors files from the hub"""
|
|
|
|
def download_file(fname, tries=5, backoff: int = 5):
|
|
local_file = try_to_load_from_cache(model_id, revision, fname)
|
|
if local_file is not None:
|
|
logger.info(f"File {fname} already present in cache.")
|
|
return Path(local_file)
|
|
|
|
for idx in range(tries):
|
|
try:
|
|
logger.info(f"Download file: {fname}")
|
|
stime = time.time()
|
|
local_file = hf_hub_download(
|
|
filename=fname,
|
|
repo_id=model_id,
|
|
revision=revision,
|
|
local_files_only=HF_HUB_OFFLINE,
|
|
)
|
|
logger.info(
|
|
f"Downloaded {local_file} in {timedelta(seconds=int(time.time() - stime))}."
|
|
)
|
|
return Path(local_file)
|
|
except Exception as e:
|
|
if idx + 1 == tries:
|
|
raise e
|
|
logger.error(e)
|
|
logger.info(f"Retrying in {backoff} seconds")
|
|
time.sleep(backoff)
|
|
logger.info(f"Retry {idx + 1}/{tries - 1}")
|
|
|
|
# We do this instead of using tqdm because we want to parse the logs with the launcher
|
|
start_time = time.time()
|
|
files = []
|
|
for i, filename in enumerate(filenames):
|
|
file = download_file(filename)
|
|
|
|
elapsed = timedelta(seconds=int(time.time() - start_time))
|
|
remaining = len(filenames) - (i + 1)
|
|
eta = (elapsed / (i + 1)) * remaining if remaining > 0 else 0
|
|
|
|
logger.info(f"Download: [{i + 1}/{len(filenames)}] -- ETA: {eta}")
|
|
files.append(file)
|
|
|
|
return files
|