Remove redundant code
#106
by
GloomScythe
- opened
- modeling_deepseek.py +0 -1
modeling_deepseek.py
CHANGED
@@ -921,7 +921,6 @@ class DeepseekV3FlashAttention2(DeepseekV3Attention):
|
|
921 |
k_nope, value_states = torch.split(
|
922 |
kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1
|
923 |
)
|
924 |
-
kv_seq_len = value_states.shape[-2]
|
925 |
|
926 |
kv_seq_len = value_states.shape[-2]
|
927 |
if past_key_value is not None:
|
|
|
921 |
k_nope, value_states = torch.split(
|
922 |
kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1
|
923 |
)
|
|
|
924 |
|
925 |
kv_seq_len = value_states.shape[-2]
|
926 |
if past_key_value is not None:
|