RNNTJoin with masking

Browse files

Files changed (4) hide show

RNNTJoint.mlmodelc/analytics/coremldata.bin +1 -1
RNNTJoint.mlmodelc/coremldata.bin +2 -2
RNNTJoint.mlmodelc/metadata.json +100 -0
RNNTJoint.mlmodelc/model.mil +40 -14

RNNTJoint.mlmodelc/analytics/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6123e6464c990b52c62d86bd580a11c746cfcb5a01ae51b9ecbe92cfcef68852
 size 243

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e2e71a7aa301d6b972a3a7a4e158f9f6f55161ec0444cb9f55dc633149de40a
 size 243

RNNTJoint.mlmodelc/coremldata.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f1dc6e96c2f63c8636f5a09968abd2bff0ded029e5a205886c10228421a029b
-size 394

 version https://git-lfs.github.com/spec/v1
+oid sha256:77d35499e836cd5226a494eae0fd69d4dd92df161dd50f3c486d6947f5301a5d
+size 436

RNNTJoint.mlmodelc/metadata.json ADDED Viewed

	@@ -0,0 +1,100 @@

+[
+  {
+    "metadataOutputVersion" : "3.0",
+    "storagePrecision" : "Float16",
+    "outputSchema" : [
+      {
+        "hasShapeFlexibility" : "0",
+        "isOptional" : "0",
+        "dataType" : "Float32",
+        "formattedType" : "MultiArray (Float32)",
+        "shortDescription" : "",
+        "shape" : "[]",
+        "name" : "logits",
+        "type" : "MultiArray"
+      }
+    ],
+    "modelParameters" : [
+    ],
+    "specificationVersion" : 6,
+    "mlProgramOperationTypeHistogram" : {
+      "Log" : 1,
+      "Concat" : 1,
+      "Linear" : 3,
+      "Range1d" : 1,
+      "Shape" : 2,
+      "RealDiv" : 1,
+      "Relu" : 1,
+      "Mul" : 1,
+      "Cast" : 4,
+      "Tile" : 1,
+      "Add" : 1,
+      "ExpandDims" : 5,
+      "Softmax" : 1,
+      "Gather" : 2,
+      "Less" : 1
+    },
+    "computePrecision" : "Mixed (Float16, Float32, Int32)",
+    "isUpdatable" : "0",
+    "stateSchema" : [
+    ],
+    "availability" : {
+      "macOS" : "12.0",
+      "tvOS" : "15.0",
+      "visionOS" : "1.0",
+      "watchOS" : "8.0",
+      "iOS" : "15.0",
+      "macCatalyst" : "15.0"
+    },
+    "modelType" : {
+      "name" : "MLModelType_mlProgram"
+    },
+    "userDefinedMetadata" : {
+      "com.github.apple.coremltools.source_dialect" : "TorchScript",
+      "com.github.apple.coremltools.source" : "torch==2.5.0",
+      "com.github.apple.coremltools.version" : "8.3.0"
+    },
+    "inputSchema" : [
+      {
+        "dataType" : "Float32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1...100 × 1...1025 × 1024",
+        "shapeRange" : "[[1, 100], [1, 1025], [1024, 1024]]",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 1024)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1, 1024]",
+        "name" : "encoder_outputs",
+        "shortDescription" : ""
+      },
+      {
+        "dataType" : "Float32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1...100 × 1...1025 × 640",
+        "shapeRange" : "[[1, 100], [1, 1025], [640, 640]]",
+        "formattedType" : "MultiArray (Float32 1 × 1 × 640)",
+        "type" : "MultiArray",
+        "shape" : "[1, 1, 640]",
+        "name" : "decoder_outputs",
+        "shortDescription" : ""
+      },
+      {
+        "dataType" : "Int32",
+        "hasShapeFlexibility" : "1",
+        "isOptional" : "0",
+        "shapeFlexibility" : "1...100",
+        "shapeRange" : "[[1, 100]]",
+        "formattedType" : "MultiArray (Int32 1)",
+        "type" : "MultiArray",
+        "shape" : "[1]",
+        "name" : "encoder_length",
+        "shortDescription" : ""
+      }
+    ],
+    "generatedClassName" : "RNNTJointWithMask",
+    "method" : "predict"
+  }
+]

RNNTJoint.mlmodelc/model.mil CHANGED Viewed

@@ -1,31 +1,57 @@
 program(1.0)
-[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3405.2.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
 {
-    func main<ios15>(tensor<fp32, [?, ?, ?]> decoder_outputs, tensor<fp32, [?, ?, ?]> encoder_outputs) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"decoder_outputs", [1, 1, 1]}, {"encoder_outputs", [1, 1, 1]}}), ("RangeDims", {{"decoder_outputs", [[1, 100], [1, 1025], [1, 640]]}, {"encoder_outputs", [[1, 100], [1, 1025], [1, 1024]]}})))] {
             tensor<string, []> encoder_outputs_to_fp16_dtype_0 = const()[name = tensor<string, []>("encoder_outputs_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
             tensor<fp16, [640, 1024]> joint_enc_weight_to_fp16 = const()[name = tensor<string, []>("joint_enc_weight_to_fp16"), val = tensor<fp16, [640, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
             tensor<fp16, [640]> joint_enc_bias_to_fp16 = const()[name = tensor<string, []>("joint_enc_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1310848)))];
-            tensor<fp16, [?, ?, ?]> encoder_outputs_to_fp16 = cast(dtype = encoder_outputs_to_fp16_dtype_0, x = encoder_outputs)[name = tensor<string, []>("cast_2")];
-            tensor<fp16, [?, ?, 640]> linear_0_cast_fp16 = linear(bias = joint_enc_bias_to_fp16, weight = joint_enc_weight_to_fp16, x = encoder_outputs_to_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
             tensor<string, []> decoder_outputs_to_fp16_dtype_0 = const()[name = tensor<string, []>("decoder_outputs_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
             tensor<fp16, [640, 640]> joint_pred_weight_to_fp16 = const()[name = tensor<string, []>("joint_pred_weight_to_fp16"), val = tensor<fp16, [640, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1312192)))];
             tensor<fp16, [640]> joint_pred_bias_to_fp16 = const()[name = tensor<string, []>("joint_pred_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2131456)))];
-            tensor<fp16, [?, ?, ?]> decoder_outputs_to_fp16 = cast(dtype = decoder_outputs_to_fp16_dtype_0, x = decoder_outputs)[name = tensor<string, []>("cast_1")];
             tensor<fp16, [?, ?, 640]> linear_1_cast_fp16 = linear(bias = joint_pred_bias_to_fp16, weight = joint_pred_weight_to_fp16, x = decoder_outputs_to_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
             tensor<int32, [1]> f_axes_0 = const()[name = tensor<string, []>("f_axes_0"), val = tensor<int32, [1]>([2])];
             tensor<fp16, [?, ?, 1, 640]> f_cast_fp16 = expand_dims(axes = f_axes_0, x = linear_0_cast_fp16)[name = tensor<string, []>("f_cast_fp16")];
             tensor<int32, [1]> g_axes_0 = const()[name = tensor<string, []>("g_axes_0"), val = tensor<int32, [1]>([1])];
             tensor<fp16, [?, 1, ?, 640]> g_cast_fp16 = expand_dims(axes = g_axes_0, x = linear_1_cast_fp16)[name = tensor<string, []>("g_cast_fp16")];
-            tensor<fp16, [?, ?, ?, 640]> input_1_cast_fp16 = add(x = f_cast_fp16, y = g_cast_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
-            tensor<fp16, [?, ?, ?, 640]> input_3_cast_fp16 = relu(x = input_1_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
             tensor<fp16, [8198, 640]> joint_joint_net_2_weight_to_fp16 = const()[name = tensor<string, []>("joint_joint_net_2_weight_to_fp16"), val = tensor<fp16, [8198, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2132800)))];
             tensor<fp16, [8198]> joint_joint_net_2_bias_to_fp16 = const()[name = tensor<string, []>("joint_joint_net_2_bias_to_fp16"), val = tensor<fp16, [8198]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12626304)))];
-            tensor<fp16, [?, ?, ?, 8198]> linear_2_cast_fp16 = linear(bias = joint_joint_net_2_bias_to_fp16, weight = joint_joint_net_2_weight_to_fp16, x = input_3_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
-            tensor<int32, []> var_29 = const()[name = tensor<string, []>("op_29"), val = tensor<int32, []>(-1)];
-            tensor<fp16, [?, ?, ?, 8198]> var_31_softmax_cast_fp16 = softmax(axis = var_29, x = linear_2_cast_fp16)[name = tensor<string, []>("op_31_softmax_cast_fp16")];
-            tensor<fp16, []> var_31_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_31_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
-            tensor<fp16, [?, ?, ?, 8198]> var_31_cast_fp16 = log(epsilon = var_31_epsilon_0_to_fp16, x = var_31_softmax_cast_fp16)[name = tensor<string, []>("op_31_cast_fp16")];
-            tensor<string, []> var_31_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_31_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
-            tensor<fp32, [?, ?, ?, 8198]> logits = cast(dtype = var_31_cast_fp16_to_fp32_dtype_0, x = var_31_cast_fp16)[name = tensor<string, []>("cast_0")];
         } -> (logits);
 }

 program(1.0)
+[buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3404.23.1"}, {"coremltools-component-torch", "2.5.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})]
 {
+    func main<ios15>(tensor<fp32, [?, ?, 640]> decoder_outputs, tensor<int32, [?]> encoder_length, tensor<fp32, [?, ?, 1024]> encoder_outputs) [FlexibleShapeInformation = tuple<tuple<tensor<string, []>, dict<tensor<string, []>, tensor<int32, [?]>>>, tuple<tensor<string, []>, dict<tensor<string, []>, list<tensor<int32, [2]>, ?>>>>((("DefaultShapes", {{"decoder_outputs", [1, 1, 640]}, {"encoder_length", [1]}, {"encoder_outputs", [1, 1, 1024]}}), ("RangeDims", {{"decoder_outputs", [[1, 100], [1, 1025], [640, 640]]}, {"encoder_length", [[1, 100]]}, {"encoder_outputs", [[1, 100], [1, 1025], [1024, 1024]]}})))] {
             tensor<string, []> encoder_outputs_to_fp16_dtype_0 = const()[name = tensor<string, []>("encoder_outputs_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [?, ?, 1024]> encoder_outputs_to_fp16 = cast(dtype = encoder_outputs_to_fp16_dtype_0, x = encoder_outputs)[name = tensor<string, []>("cast_5")];
+            tensor<int32, [3]> var_11_shape_cast_fp16 = shape(x = encoder_outputs_to_fp16)[name = tensor<string, []>("op_11_shape_cast_fp16")];
+            tensor<int32, []> gather_0_indices_0 = const()[name = tensor<string, []>("gather_0_indices_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> gather_0_axis_0 = const()[name = tensor<string, []>("gather_0_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> gather_0 = gather(axis = gather_0_axis_0, indices = gather_0_indices_0, x = var_11_shape_cast_fp16)[name = tensor<string, []>("gather_0")];
+            tensor<int32, []> gather_1_indices_0 = const()[name = tensor<string, []>("gather_1_indices_0"), val = tensor<int32, []>(1)];
+            tensor<int32, []> gather_1_axis_0 = const()[name = tensor<string, []>("gather_1_axis_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> gather_1 = gather(axis = gather_1_axis_0, indices = gather_1_indices_0, x = var_11_shape_cast_fp16)[name = tensor<string, []>("gather_1")];
+            tensor<int32, []> const_0 = const()[name = tensor<string, []>("const_0"), val = tensor<int32, []>(0)];
+            tensor<int32, []> const_1 = const()[name = tensor<string, []>("const_1"), val = tensor<int32, []>(1)];
+            tensor<int32, [?]> time_indices_1 = range_1d(end = gather_1, start = const_0, step = const_1)[name = tensor<string, []>("time_indices_1")];
+            tensor<int32, [1]> var_25_axes_0 = const()[name = tensor<string, []>("op_25_axes_0"), val = tensor<int32, [1]>([0])];
+            tensor<int32, [1, ?]> var_25 = expand_dims(axes = var_25_axes_0, x = time_indices_1)[name = tensor<string, []>("op_25")];
+            tensor<int32, []> concat_0_axis_0 = const()[name = tensor<string, []>("concat_0_axis_0"), val = tensor<int32, []>(0)];
+            tensor<bool, []> concat_0_interleave_0 = const()[name = tensor<string, []>("concat_0_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<int32, [2]> concat_0 = concat(axis = concat_0_axis_0, interleave = concat_0_interleave_0, values = (gather_0, gather_1))[name = tensor<string, []>("concat_0")];
+            tensor<int32, [2]> shape_0 = shape(x = var_25)[name = tensor<string, []>("shape_0")];
+            tensor<int32, [2]> real_div_0 = real_div(x = concat_0, y = shape_0)[name = tensor<string, []>("real_div_0")];
+            tensor<int32, [?, ?]> time_indices = tile(reps = real_div_0, x = var_25)[name = tensor<string, []>("time_indices")];
+            tensor<int32, [1]> encoder_length_expanded_axes_0 = const()[name = tensor<string, []>("encoder_length_expanded_axes_0"), val = tensor<int32, [1]>([1])];
+            tensor<int32, [?, 1]> encoder_length_expanded = expand_dims(axes = encoder_length_expanded_axes_0, x = encoder_length)[name = tensor<string, []>("encoder_length_expanded")];
+            tensor<bool, [?, ?]> encoder_mask = less(x = time_indices, y = encoder_length_expanded)[name = tensor<string, []>("encoder_mask")];
+            tensor<int32, [1]> var_33_axes_0 = const()[name = tensor<string, []>("op_33_axes_0"), val = tensor<int32, [1]>([-1])];
+            tensor<bool, [?, ?, 1]> var_33 = expand_dims(axes = var_33_axes_0, x = encoder_mask)[name = tensor<string, []>("op_33")];
+            tensor<string, []> cast_1_to_fp16_dtype_0 = const()[name = tensor<string, []>("cast_1_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
+            tensor<fp16, [?, ?, 1]> var_33_to_fp16 = cast(dtype = cast_1_to_fp16_dtype_0, x = var_33)[name = tensor<string, []>("cast_4")];
+            tensor<fp16, [?, ?, 1024]> input_1_cast_fp16 = mul(x = encoder_outputs_to_fp16, y = var_33_to_fp16)[name = tensor<string, []>("input_1_cast_fp16")];
             tensor<fp16, [640, 1024]> joint_enc_weight_to_fp16 = const()[name = tensor<string, []>("joint_enc_weight_to_fp16"), val = tensor<fp16, [640, 1024]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64)))];
             tensor<fp16, [640]> joint_enc_bias_to_fp16 = const()[name = tensor<string, []>("joint_enc_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1310848)))];
+            tensor<fp16, [?, ?, 640]> linear_0_cast_fp16 = linear(bias = joint_enc_bias_to_fp16, weight = joint_enc_weight_to_fp16, x = input_1_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")];
             tensor<string, []> decoder_outputs_to_fp16_dtype_0 = const()[name = tensor<string, []>("decoder_outputs_to_fp16_dtype_0"), val = tensor<string, []>("fp16")];
             tensor<fp16, [640, 640]> joint_pred_weight_to_fp16 = const()[name = tensor<string, []>("joint_pred_weight_to_fp16"), val = tensor<fp16, [640, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(1312192)))];
             tensor<fp16, [640]> joint_pred_bias_to_fp16 = const()[name = tensor<string, []>("joint_pred_bias_to_fp16"), val = tensor<fp16, [640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2131456)))];
+            tensor<fp16, [?, ?, 640]> decoder_outputs_to_fp16 = cast(dtype = decoder_outputs_to_fp16_dtype_0, x = decoder_outputs)[name = tensor<string, []>("cast_3")];
             tensor<fp16, [?, ?, 640]> linear_1_cast_fp16 = linear(bias = joint_pred_bias_to_fp16, weight = joint_pred_weight_to_fp16, x = decoder_outputs_to_fp16)[name = tensor<string, []>("linear_1_cast_fp16")];
             tensor<int32, [1]> f_axes_0 = const()[name = tensor<string, []>("f_axes_0"), val = tensor<int32, [1]>([2])];
             tensor<fp16, [?, ?, 1, 640]> f_cast_fp16 = expand_dims(axes = f_axes_0, x = linear_0_cast_fp16)[name = tensor<string, []>("f_cast_fp16")];
             tensor<int32, [1]> g_axes_0 = const()[name = tensor<string, []>("g_axes_0"), val = tensor<int32, [1]>([1])];
             tensor<fp16, [?, 1, ?, 640]> g_cast_fp16 = expand_dims(axes = g_axes_0, x = linear_1_cast_fp16)[name = tensor<string, []>("g_cast_fp16")];
+            tensor<fp16, [?, ?, ?, 640]> input_3_cast_fp16 = add(x = f_cast_fp16, y = g_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<fp16, [?, ?, ?, 640]> input_5_cast_fp16 = relu(x = input_3_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
             tensor<fp16, [8198, 640]> joint_joint_net_2_weight_to_fp16 = const()[name = tensor<string, []>("joint_joint_net_2_weight_to_fp16"), val = tensor<fp16, [8198, 640]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2132800)))];
             tensor<fp16, [8198]> joint_joint_net_2_bias_to_fp16 = const()[name = tensor<string, []>("joint_joint_net_2_bias_to_fp16"), val = tensor<fp16, [8198]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(12626304)))];
+            tensor<fp16, [?, ?, ?, 8198]> linear_2_cast_fp16 = linear(bias = joint_joint_net_2_bias_to_fp16, weight = joint_joint_net_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")];
+            tensor<int32, []> var_60 = const()[name = tensor<string, []>("op_60"), val = tensor<int32, []>(-1)];
+            tensor<fp16, [?, ?, ?, 8198]> var_62_softmax_cast_fp16 = softmax(axis = var_60, x = linear_2_cast_fp16)[name = tensor<string, []>("op_62_softmax_cast_fp16")];
+            tensor<fp16, []> var_62_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_62_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x0p+0)];
+            tensor<fp16, [?, ?, ?, 8198]> var_62_cast_fp16 = log(epsilon = var_62_epsilon_0_to_fp16, x = var_62_softmax_cast_fp16)[name = tensor<string, []>("op_62_cast_fp16")];
+            tensor<string, []> var_62_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_62_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")];
+            tensor<fp32, [?, ?, ?, 8198]> logits = cast(dtype = var_62_cast_fp16_to_fp32_dtype_0, x = var_62_cast_fp16)[name = tensor<string, []>("cast_2")];
         } -> (logits);
 }