| name: "deberta" | |
| platform: "tensorrt_plan" | |
| max_batch_size: 8 | |
| input [ | |
| { | |
| name: "input_ids" | |
| data_type: TYPE_INT32 | |
| dims: [ -1, 512 ] | |
| }, | |
| { | |
| name: "attention_mask" | |
| data_type: TYPE_INT32 | |
| dims: [ -1, 512 ] | |
| } | |
| ] | |
| output [ | |
| { | |
| name: "output" | |
| data_type: TYPE_FP32 | |
| dims: [ -1, 2 ] | |
| } | |
| ] | |
| dynamic_batching { | |
| preferred_batch_size: [ 4, 8 ] | |
| max_queue_delay_microseconds: 100 | |
| } | |
| instance_group [ | |
| { | |
| count: 1 | |
| kind: KIND_GPU | |
| } | |
| ] |