Rename inference-cache-config/llama2.json to inference-cache-config/llama2-7b-13b.json
Browse files
inference-cache-config/{llama2.json → llama2-7b-13b.json}
RENAMED
|
@@ -98,19 +98,5 @@
|
|
| 98 |
"num_cores": 24,
|
| 99 |
"auto_cast_type": "fp16"
|
| 100 |
}
|
| 101 |
-
],
|
| 102 |
-
"meta-llama/Llama-2-70b-chat-hf": [
|
| 103 |
-
{
|
| 104 |
-
"batch_size": 1,
|
| 105 |
-
"sequence_length": 4096,
|
| 106 |
-
"num_cores": 24,
|
| 107 |
-
"auto_cast_type": "fp16"
|
| 108 |
-
},
|
| 109 |
-
{
|
| 110 |
-
"batch_size": 4,
|
| 111 |
-
"sequence_length": 4096,
|
| 112 |
-
"num_cores": 24,
|
| 113 |
-
"auto_cast_type": "fp16"
|
| 114 |
-
}
|
| 115 |
]
|
| 116 |
}
|
|
|
|
| 98 |
"num_cores": 24,
|
| 99 |
"auto_cast_type": "fp16"
|
| 100 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
]
|
| 102 |
}
|