2023-02-14 05:02:16 -07:00
|
|
|
import concurrent
|
|
|
|
import time
|
2023-05-04 07:22:54 -06:00
|
|
|
import datetime
|
2023-02-14 05:02:16 -07:00
|
|
|
import torch
|
|
|
|
|
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
|
|
from collections import defaultdict
|
|
|
|
from datetime import timedelta
|
|
|
|
from loguru import logger
|
|
|
|
from pathlib import Path
|
|
|
|
from safetensors.torch import load_file, save_file
|
2023-05-05 09:57:02 -06:00
|
|
|
from safetensors import safe_open
|
2023-02-14 05:02:16 -07:00
|
|
|
from typing import Dict, List
|
|
|
|
|
|
|
|
|
|
|
|
def check_file_size(source_file: Path, target_file: Path):
|
|
|
|
"""
|
|
|
|
Check that two files are close in size
|
|
|
|
"""
|
|
|
|
source_file_size = source_file.stat().st_size
|
|
|
|
target_file_size = target_file.stat().st_size
|
|
|
|
|
|
|
|
if (source_file_size - target_file_size) / source_file_size > 0.01:
|
|
|
|
raise RuntimeError(
|
|
|
|
f"""The file size different is more than 1%:
|
|
|
|
- {source_file}: {source_file_size}
|
|
|
|
- {target_file}: {target_file_size}
|
|
|
|
"""
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def remove_shared_pointers(tensors: Dict[str, torch.Tensor]):
|
|
|
|
"""
|
|
|
|
For a Dict of tensors, check if two or more tensors point to the same underlying memory and
|
|
|
|
remove them
|
|
|
|
"""
|
|
|
|
ptrs = defaultdict(list)
|
|
|
|
for k, v in tensors.items():
|
|
|
|
ptrs[v.data_ptr()].append(k)
|
|
|
|
|
|
|
|
# Iterate over all found memory addresses
|
|
|
|
for ptr, names in ptrs.items():
|
|
|
|
if len(names) > 1:
|
|
|
|
# Multiple tensors are point to the same memory
|
|
|
|
# Only keep the first tensor
|
|
|
|
for name in names[1:]:
|
|
|
|
tensors.pop(name)
|
|
|
|
|
|
|
|
|
2023-05-05 09:57:02 -06:00
|
|
|
def convert_file(pt_file: Path, sf_file: Path):
|
2023-02-14 05:02:16 -07:00
|
|
|
"""
|
|
|
|
Convert a pytorch file to a safetensors file
|
|
|
|
"""
|
2023-05-05 09:57:02 -06:00
|
|
|
logger.info(f"Convert {pt_file} to {sf_file}.")
|
2023-02-16 09:18:53 -07:00
|
|
|
|
2023-02-14 05:02:16 -07:00
|
|
|
pt_state = torch.load(pt_file, map_location="cpu")
|
|
|
|
if "state_dict" in pt_state:
|
|
|
|
pt_state = pt_state["state_dict"]
|
|
|
|
|
|
|
|
remove_shared_pointers(pt_state)
|
|
|
|
|
|
|
|
# Tensors need to be contiguous
|
|
|
|
pt_state = {k: v.contiguous() for k, v in pt_state.items()}
|
|
|
|
|
2023-05-05 09:57:02 -06:00
|
|
|
sf_file.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
save_file(pt_state, str(sf_file), metadata={"format": "pt"})
|
2023-02-14 05:02:16 -07:00
|
|
|
|
|
|
|
# Check that both files are close in size
|
2023-05-05 09:57:02 -06:00
|
|
|
check_file_size(pt_file, sf_file)
|
2023-02-14 05:02:16 -07:00
|
|
|
|
|
|
|
# Load safetensors state
|
2023-05-05 09:57:02 -06:00
|
|
|
for k in pt_state:
|
2023-02-14 05:02:16 -07:00
|
|
|
pt_tensor = pt_state[k]
|
2023-05-05 09:57:02 -06:00
|
|
|
with safe_open(sf_file, framework="pt") as f:
|
|
|
|
sf_tensor = f.get_tensor(k)
|
|
|
|
if not torch.equal(pt_tensor, sf_tensor):
|
|
|
|
raise RuntimeError(f"The output tensors do not match for key {k}")
|
2023-02-14 05:02:16 -07:00
|
|
|
|
|
|
|
|
2023-05-05 09:57:02 -06:00
|
|
|
def convert_files(pt_files: List[Path], sf_files: List[Path]):
|
|
|
|
assert len(pt_files) == len(sf_files)
|
2023-02-14 05:02:16 -07:00
|
|
|
|
2023-05-04 07:22:54 -06:00
|
|
|
N = len(pt_files)
|
2023-02-14 05:02:16 -07:00
|
|
|
# We do this instead of using tqdm because we want to parse the logs with the launcher
|
2023-05-05 09:57:02 -06:00
|
|
|
|
|
|
|
for i, (pt_file, sf_file) in enumerate(zip(pt_files, sf_files)):
|
2023-05-05 07:28:08 -06:00
|
|
|
start = datetime.datetime.now()
|
|
|
|
convert_file(pt_file, sf_file)
|
2023-05-04 07:22:54 -06:00
|
|
|
elapsed = datetime.datetime.now() - start
|
|
|
|
logger.info(f"Convert: [{i + 1}/{N}] -- Took: {elapsed}")
|