hf_text-generation-inference/server/text_generation_server/utils/convert.py

import datetime
import torch
import os

from loguru import logger
from pathlib import Path
from safetensors.torch import save_file, load_file, _find_shared_tensors, _is_complete
from typing import List, Dict
from collections import defaultdict


def _remove_duplicate_names(
    state_dict: Dict[str, torch.Tensor],
    *,
    preferred_names: List[str] = None,
    discard_names: List[str] = None,
) -> Dict[str, List[str]]:
    if preferred_names is None:
        preferred_names = []
    preferred_names = set(preferred_names)
    if discard_names is None:
        discard_names = []
    discard_names = set(discard_names)

    shareds = _find_shared_tensors(state_dict)
    to_remove = defaultdict(list)
    for shared in shareds:
        complete_names = set(
            [name for name in shared if _is_complete(state_dict[name])]
        )
        if not complete_names:
            raise RuntimeError(
                f"Error while trying to find names to remove to save state dict, but found no suitable name to keep for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model since you could be storing much more memory than needed. Please refer to https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an issue."
            )

        keep_name = sorted(list(complete_names))[0]

        # Mecanism to preferentially select keys to keep
        # coming from the on-disk file to allow
        # loading models saved with a different choice
        # of keep_name
        preferred = complete_names.difference(discard_names)
        if preferred:
            keep_name = sorted(list(preferred))[0]

        if preferred_names:
            preferred = preferred_names.intersection(complete_names)
            if preferred:
                keep_name = sorted(list(preferred))[0]
        for name in sorted(shared):
            if name != keep_name:
                to_remove[keep_name].append(name)
    return to_remove


def convert_file(pt_file: Path, sf_file: Path, discard_names: List[str]):
    """
    Convert a pytorch file to a safetensors file
    This will remove duplicate tensors from the file.

    Unfortunately, this might not respect *transformers* convention.
    Forcing us to check for potentially different keys during load when looking
    for specific tensors (making tensor sharing explicit).
    """
    loaded = torch.load(pt_file, map_location="cpu")
    if "state_dict" in loaded:
        loaded = loaded["state_dict"]
    to_removes = _remove_duplicate_names(loaded, discard_names=discard_names)

    metadata = {"format": "pt"}
    for kept_name, to_remove_group in to_removes.items():
        for to_remove in to_remove_group:
            if to_remove not in metadata:
                metadata[to_remove] = kept_name
            del loaded[to_remove]
    # Force tensors to be contiguous
    loaded = {k: v.contiguous() for k, v in loaded.items()}

    dirname = os.path.dirname(sf_file)
    os.makedirs(dirname, exist_ok=True)
    save_file(loaded, sf_file, metadata=metadata)
    reloaded = load_file(sf_file)
    for k in loaded:
        pt_tensor = loaded[k]
        sf_tensor = reloaded[k]
        if not torch.equal(pt_tensor, sf_tensor):
            raise RuntimeError(f"The output tensors do not match for key {k}")


def convert_files(pt_files: List[Path], sf_files: List[Path], discard_names: List[str]):
    assert len(pt_files) == len(sf_files)

    N = len(pt_files)
    # We do this instead of using tqdm because we want to parse the logs with the launcher

    for i, (pt_file, sf_file) in enumerate(zip(pt_files, sf_files)):
        # Skip blacklisted files
        if (
            "arguments" in pt_file.name
            or "args" in pt_file.name
            or "training" in pt_file.name
        ):
            continue

        start = datetime.datetime.now()
        convert_file(pt_file, sf_file, discard_names)
        elapsed = datetime.datetime.now() - start
        logger.info(f"Convert: [{i + 1}/{N}] -- Took: {elapsed}")
fix(server): Removes the parallelism in file convertion (during download) (#275) 2023-05-04 07:22:54 -06:00			`import datetime`
feat: add safetensors conversion (#63) 2023-02-14 05:02:16 -07:00			`import torch`
feat(server): Update convert logic. (#483) Should be more robust to shared tensors (ok when using `from_pretrained). But forcing us to add new checks in our loading code (since the chosen key to keep might be different from `transformers`). --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-41-161.ec2.internal> 2023-06-23 04:40:46 -06:00			`import os`
feat: add safetensors conversion (#63) 2023-02-14 05:02:16 -07:00
			`from loguru import logger`
			`from pathlib import Path`
fix(server): harden the weights choice to save on disk. (#561) - Look at `transformers` base class to check for `_key_to_ignore_on_load_missing` or `_tied_weights` which are the standard attributes to select the keys to NOT save on disk (since they are ignored) - Modified safetensors code (to be reflected in safetensors even if it's an internal function). - Will not work for trust_remote_code=True repos (like santacoder). Should help with : https://github.com/huggingface/text-generation-inference/issues/555 and : https://github.com/huggingface/text-generation-inference/pull/501 and https://github.com/huggingface/text-generation-inference/issues/556 and https://github.com/huggingface/text-generation-inference/issues/482#issuecomment-1623713593 2023-07-07 06:50:12 -06:00			`from safetensors.torch import save_file, load_file, _find_shared_tensors, _is_complete`
			`from typing import List, Dict`
			`from collections import defaultdict`
feat: add safetensors conversion (#63) 2023-02-14 05:02:16 -07:00

fix(server): harden the weights choice to save on disk. (#561) - Look at `transformers` base class to check for `_key_to_ignore_on_load_missing` or `_tied_weights` which are the standard attributes to select the keys to NOT save on disk (since they are ignored) - Modified safetensors code (to be reflected in safetensors even if it's an internal function). - Will not work for trust_remote_code=True repos (like santacoder). Should help with : https://github.com/huggingface/text-generation-inference/issues/555 and : https://github.com/huggingface/text-generation-inference/pull/501 and https://github.com/huggingface/text-generation-inference/issues/556 and https://github.com/huggingface/text-generation-inference/issues/482#issuecomment-1623713593 2023-07-07 06:50:12 -06:00			`def _remove_duplicate_names(`
			`state_dict: Dict[str, torch.Tensor],`
			`*,`
			`preferred_names: List[str] = None,`
			`discard_names: List[str] = None,`
			`) -> Dict[str, List[str]]:`
			`if preferred_names is None:`
			`preferred_names = []`
			`preferred_names = set(preferred_names)`
			`if discard_names is None:`
			`discard_names = []`
			`discard_names = set(discard_names)`

			`shareds = _find_shared_tensors(state_dict)`
			`to_remove = defaultdict(list)`
			`for shared in shareds:`
			`complete_names = set(`
			`[name for name in shared if _is_complete(state_dict[name])]`
			`)`
			`if not complete_names:`
			`raise RuntimeError(`
			`f"Error while trying to find names to remove to save state dict, but found no suitable name to keep for saving amongst: {shared}. None is covering the entire storage.Refusing to save/load the model since you could be storing much more memory than needed. Please refer to https://huggingface.co/docs/safetensors/torch_shared_tensors for more information. Or open an issue."`
			`)`

			`keep_name = sorted(list(complete_names))[0]`

			`# Mecanism to preferentially select keys to keep`
			`# coming from the on-disk file to allow`
			`# loading models saved with a different choice`
			`# of keep_name`
			`preferred = complete_names.difference(discard_names)`
			`if preferred:`
			`keep_name = sorted(list(preferred))[0]`

			`if preferred_names:`
			`preferred = preferred_names.intersection(complete_names)`
			`if preferred:`
			`keep_name = sorted(list(preferred))[0]`
			`for name in sorted(shared):`
			`if name != keep_name:`
			`to_remove[keep_name].append(name)`
			`return to_remove`


			`def convert_file(pt_file: Path, sf_file: Path, discard_names: List[str]):`
feat: add safetensors conversion (#63) 2023-02-14 05:02:16 -07:00			`"""`
			`Convert a pytorch file to a safetensors file`
feat(server): Update convert logic. (#483) Should be more robust to shared tensors (ok when using `from_pretrained). But forcing us to add new checks in our loading code (since the chosen key to keep might be different from `transformers`). --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-41-161.ec2.internal> 2023-06-23 04:40:46 -06:00			`This will remove duplicate tensors from the file.`
feat: add safetensors conversion (#63) 2023-02-14 05:02:16 -07:00
feat(server): Update convert logic. (#483) Should be more robust to shared tensors (ok when using `from_pretrained). But forcing us to add new checks in our loading code (since the chosen key to keep might be different from `transformers`). --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-41-161.ec2.internal> 2023-06-23 04:40:46 -06:00			`Unfortunately, this might not respect transformers convention.`
			`Forcing us to check for potentially different keys during load when looking`
			`for specific tensors (making tensor sharing explicit).`
			`"""`
			`loaded = torch.load(pt_file, map_location="cpu")`
			`if "state_dict" in loaded:`
			`loaded = loaded["state_dict"]`
fix(server): harden the weights choice to save on disk. (#561) - Look at `transformers` base class to check for `_key_to_ignore_on_load_missing` or `_tied_weights` which are the standard attributes to select the keys to NOT save on disk (since they are ignored) - Modified safetensors code (to be reflected in safetensors even if it's an internal function). - Will not work for trust_remote_code=True repos (like santacoder). Should help with : https://github.com/huggingface/text-generation-inference/issues/555 and : https://github.com/huggingface/text-generation-inference/pull/501 and https://github.com/huggingface/text-generation-inference/issues/556 and https://github.com/huggingface/text-generation-inference/issues/482#issuecomment-1623713593 2023-07-07 06:50:12 -06:00			`to_removes = _remove_duplicate_names(loaded, discard_names=discard_names)`
feat(server): Update convert logic. (#483) Should be more robust to shared tensors (ok when using `from_pretrained). But forcing us to add new checks in our loading code (since the chosen key to keep might be different from `transformers`). --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-41-161.ec2.internal> 2023-06-23 04:40:46 -06:00
			`metadata = {"format": "pt"}`
			`for kept_name, to_remove_group in to_removes.items():`
			`for to_remove in to_remove_group:`
			`if to_remove not in metadata:`
			`metadata[to_remove] = kept_name`
			`del loaded[to_remove]`
			`# Force tensors to be contiguous`
			`loaded = {k: v.contiguous() for k, v in loaded.items()}`

			`dirname = os.path.dirname(sf_file)`
			`os.makedirs(dirname, exist_ok=True)`
			`save_file(loaded, sf_file, metadata=metadata)`
			`reloaded = load_file(sf_file)`
			`for k in loaded:`
			`pt_tensor = loaded[k]`
			`sf_tensor = reloaded[k]`
			`if not torch.equal(pt_tensor, sf_tensor):`
			`raise RuntimeError(f"The output tensors do not match for key {k}")`
feat: add safetensors conversion (#63) 2023-02-14 05:02:16 -07:00

fix(server): harden the weights choice to save on disk. (#561) - Look at `transformers` base class to check for `_key_to_ignore_on_load_missing` or `_tied_weights` which are the standard attributes to select the keys to NOT save on disk (since they are ignored) - Modified safetensors code (to be reflected in safetensors even if it's an internal function). - Will not work for trust_remote_code=True repos (like santacoder). Should help with : https://github.com/huggingface/text-generation-inference/issues/555 and : https://github.com/huggingface/text-generation-inference/pull/501 and https://github.com/huggingface/text-generation-inference/issues/556 and https://github.com/huggingface/text-generation-inference/issues/482#issuecomment-1623713593 2023-07-07 06:50:12 -06:00			`def convert_files(pt_files: List[Path], sf_files: List[Path], discard_names: List[str]):`
fea(server): decrease convert RAM requirements (#286) 2023-05-05 09:57:02 -06:00			`assert len(pt_files) == len(sf_files)`
feat: add safetensors conversion (#63) 2023-02-14 05:02:16 -07:00
fix(server): Removes the parallelism in file convertion (during download) (#275) 2023-05-04 07:22:54 -06:00			`N = len(pt_files)`
feat: add safetensors conversion (#63) 2023-02-14 05:02:16 -07:00			`# We do this instead of using tqdm because we want to parse the logs with the launcher`
fea(server): decrease convert RAM requirements (#286) 2023-05-05 09:57:02 -06:00
			`for i, (pt_file, sf_file) in enumerate(zip(pt_files, sf_files)):`
fix(server): blacklist local files (#609) Close #589 #602 2023-07-13 13:54:55 -06:00			`# Skip blacklisted files`
			`if (`
			`"arguments" in pt_file.name`
			`or "args" in pt_file.name`
			`or "training" in pt_file.name`
			`):`
			`continue`

fix(server): fix convert (#284) 2023-05-05 07:28:08 -06:00			`start = datetime.datetime.now()`
fix(server): harden the weights choice to save on disk. (#561) - Look at `transformers` base class to check for `_key_to_ignore_on_load_missing` or `_tied_weights` which are the standard attributes to select the keys to NOT save on disk (since they are ignored) - Modified safetensors code (to be reflected in safetensors even if it's an internal function). - Will not work for trust_remote_code=True repos (like santacoder). Should help with : https://github.com/huggingface/text-generation-inference/issues/555 and : https://github.com/huggingface/text-generation-inference/pull/501 and https://github.com/huggingface/text-generation-inference/issues/556 and https://github.com/huggingface/text-generation-inference/issues/482#issuecomment-1623713593 2023-07-07 06:50:12 -06:00			`convert_file(pt_file, sf_file, discard_names)`
fix(server): Removes the parallelism in file convertion (during download) (#275) 2023-05-04 07:22:54 -06:00			`elapsed = datetime.datetime.now() - start`
			`logger.info(f"Convert: [{i + 1}/{N}] -- Took: {elapsed}")`