@njhill, temporary workaround to be able to run our CI as secrets are not available to runners run by external contributors. I will ask around to see if there is a better way. Co-authored-by: Nick Hill <nickhill@us.ibm.com>
This commit is contained in:
parent
1ba78207e6
commit
e649bf9a55
|
@ -131,6 +131,9 @@ class BLOOMSharded(BLOOM):
|
||||||
file, framework="pt", device=str(device) if quantize is None else "cpu"
|
file, framework="pt", device=str(device) if quantize is None else "cpu"
|
||||||
) as f:
|
) as f:
|
||||||
for name in f.keys():
|
for name in f.keys():
|
||||||
|
if name.startswith("transformer.") or name.startswith("lm_head."):
|
||||||
|
full_name = name
|
||||||
|
else:
|
||||||
full_name = f"transformer.{name}"
|
full_name = f"transformer.{name}"
|
||||||
|
|
||||||
module_name, param_name = full_name.rsplit(".", 1)
|
module_name, param_name = full_name.rsplit(".", 1)
|
||||||
|
@ -157,7 +160,7 @@ class BLOOMSharded(BLOOM):
|
||||||
# XXX: Hack for Rowlinear to add the bias only once.
|
# XXX: Hack for Rowlinear to add the bias only once.
|
||||||
if rank != 0:
|
if rank != 0:
|
||||||
tensor = torch.zeros_like(tensor)
|
tensor = torch.zeros_like(tensor)
|
||||||
elif isinstance(module, TensorParallelEmbedding):
|
elif isinstance(module, TensorParallelEmbedding) or name == "lm_head.weight":
|
||||||
size = slice_.get_shape()[0]
|
size = slice_.get_shape()[0]
|
||||||
block_size = size // world_size
|
block_size = size // world_size
|
||||||
start = rank * block_size
|
start = rank * block_size
|
||||||
|
|
|
@ -504,6 +504,7 @@ class CausalLM(Model):
|
||||||
position_ids=position_ids,
|
position_ids=position_ids,
|
||||||
past_key_values=past_key_values,
|
past_key_values=past_key_values,
|
||||||
use_cache=True,
|
use_cache=True,
|
||||||
|
return_dict=True,
|
||||||
)
|
)
|
||||||
return outputs.logits, outputs.past_key_values
|
return outputs.logits, outputs.past_key_values
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue