nbroad's picture
Update onnx_config.pbtext
50b9337 verified
raw
history blame
614 Bytes
name: "deberta"
platform: "onnxruntime_onnx"
max_batch_size: 8
input [
{
name: "input_ids"
data_type: TYPE_INT64
dims: [ 512 ]
},
{
name: "attention_mask"
data_type: TYPE_INT64
dims: [ 512 ]
}
]
output [
{
name: "output"
data_type: TYPE_FP32
dims: [ 2 ]
}
]
dynamic_batching {
preferred_batch_size: [ 4, 8 ]
max_queue_delay_microseconds: 100
}
instance_group [
{
count: 1
kind: KIND_GPU
}
]
optimization { execution_accelerators {
gpu_execution_accelerator : [ {
name : "tensorrt"
parameters { key: "precision_mode" value: "FP16" }
} ]
}}