Chore: Fix naming issues regarding head_size, there can only be one.
This commit is contained in:
parent
5fca30ee15
commit
dee649c60c
|
@ -149,15 +149,14 @@ class MistralAttention(torch.nn.Module):
|
||||||
bias=False,
|
bias=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
head_size = config.hidden_size // config.num_attention_heads
|
|
||||||
self.query_key_value = TensorParallelMultiAdapterLinear.load(
|
self.query_key_value = TensorParallelMultiAdapterLinear.load(
|
||||||
query_key_value,
|
query_key_value,
|
||||||
layer_id,
|
layer_id,
|
||||||
["q_proj", "k_proj", "v_proj"],
|
["q_proj", "k_proj", "v_proj"],
|
||||||
sizes=[
|
sizes=[
|
||||||
head_size * config.num_attention_heads,
|
self.head_size * config.num_attention_heads,
|
||||||
head_size * config.num_key_value_heads,
|
self.head_size * config.num_key_value_heads,
|
||||||
head_size * config.num_key_value_heads,
|
self.head_size * config.num_key_value_heads,
|
||||||
],
|
],
|
||||||
process_group=weights.process_group,
|
process_group=weights.process_group,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue