Rename inference-cache-config/llama2.json to inference-cache-config/llama2-7b-13b.json

Files changed (1) hide show

inference-cache-config/{llama2.json → llama2-7b-13b.json} RENAMED Viewed

@@ -98,19 +98,5 @@
       "num_cores": 24,
       "auto_cast_type": "fp16"
     }
-  ],
-  "meta-llama/Llama-2-70b-chat-hf": [
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    }
   ]
 }

       "num_cores": 24,
       "auto_cast_type": "fp16"
     }
   ]
 }