Files changed (1) hide show
  1. modeling_deepseek.py +0 -1
modeling_deepseek.py CHANGED
@@ -921,7 +921,6 @@ class DeepseekV3FlashAttention2(DeepseekV3Attention):
921
  k_nope, value_states = torch.split(
922
  kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1
923
  )
924
- kv_seq_len = value_states.shape[-2]
925
 
926
  kv_seq_len = value_states.shape[-2]
927
  if past_key_value is not None:
 
921
  k_nope, value_states = torch.split(
922
  kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1
923
  )
 
924
 
925
  kv_seq_len = value_states.shape[-2]
926
  if past_key_value is not None: