|  | --- | 
					
						
						|  | datasets: | 
					
						
						|  | - go_emotions | 
					
						
						|  | language: | 
					
						
						|  | - en | 
					
						
						|  | library_name: transformers | 
					
						
						|  | model-index: | 
					
						
						|  | - name: text-classification-goemotions | 
					
						
						|  | results: | 
					
						
						|  | - task: | 
					
						
						|  | name: Text Classification | 
					
						
						|  | type: text-classification | 
					
						
						|  | dataset: | 
					
						
						|  | name: go_emotions | 
					
						
						|  | type: multilabel_classification | 
					
						
						|  | config: simplified | 
					
						
						|  | split: test | 
					
						
						|  | args: simplified | 
					
						
						|  | metrics: | 
					
						
						|  | - name: F1 | 
					
						
						|  | type: f1 | 
					
						
						|  | value: 0.487 | 
					
						
						|  | --- | 
					
						
						|  |  | 
					
						
						|  | # Text Classification GoEmotions | 
					
						
						|  |  | 
					
						
						|  | This model is a onnx quantized fined-tuned version of [nreimers/MiniLMv2-L6-H384-distilled-from-RoBERTa-Large](https://huggingface.co/nreimers/MiniLMv2-L6-H384-distilled-from-RoBERTa-Large) on the on the [go_emotions](https://huggingface.co/datasets/go_emotions) dataset using [tasinho/text-classification-goemotions](https://huggingface.co/tasinhoque/text-classification-goemotions) as teacher model. | 
					
						
						|  |  | 
					
						
						|  | # Load the Model | 
					
						
						|  |  | 
					
						
						|  | ```py | 
					
						
						|  | import os | 
					
						
						|  | import numpy as np | 
					
						
						|  | import json | 
					
						
						|  |  | 
					
						
						|  | from tokenizers import Tokenizer | 
					
						
						|  | from onnxruntime import InferenceSession | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | # !git clone https://huggingface.co/Ngit/MiniLMv2-L6-H384-goemotions-v2-onnx | 
					
						
						|  |  | 
					
						
						|  | model_name = "Ngit/MiniLMv2-L6-H384-goemotions-v2-onnx" | 
					
						
						|  | tokenizer = Tokenizer.from_pretrained(model_name) | 
					
						
						|  | tokenizer.enable_padding( | 
					
						
						|  | pad_token="<pad>", | 
					
						
						|  | pad_id=1, | 
					
						
						|  | ) | 
					
						
						|  | tokenizer.enable_truncation(max_length=256) | 
					
						
						|  | batch_size = 16 | 
					
						
						|  |  | 
					
						
						|  | texts = ["I am angry",] | 
					
						
						|  | outputs = [] | 
					
						
						|  | model = InferenceSession("MiniLMv2-L6-H384-goemotions-v2-onnx\model_optimized_quantized.onnx", providers=['CUDAExecutionProvider']) | 
					
						
						|  |  | 
					
						
						|  | with open(os.path.join("MiniLMv2-L6-H384-goemotions-v2-onnx", "config.json"), "r") as f: | 
					
						
						|  | config = json.load(f) | 
					
						
						|  |  | 
					
						
						|  | output_names = [output.name for output in model.get_outputs()] | 
					
						
						|  | input_names = [input.name for input in model.get_inputs()] | 
					
						
						|  |  | 
					
						
						|  | for subtexts in np.array_split(np.array(texts), len(texts) // batch_size + 1): | 
					
						
						|  | encodings = tokenizer.encode_batch(list(subtexts)) | 
					
						
						|  | inputs = { | 
					
						
						|  | "input_ids": np.vstack( | 
					
						
						|  | [encoding.ids for encoding in encodings], dtype=np.int64 | 
					
						
						|  | ), | 
					
						
						|  | "attention_mask": np.vstack( | 
					
						
						|  | [encoding.attention_mask for encoding in encodings], dtype=np.int64 | 
					
						
						|  | ), | 
					
						
						|  | "token_type_ids": np.vstack( | 
					
						
						|  | [encoding.type_ids for encoding in encodings], dtype=np.int64 | 
					
						
						|  | ), | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | for input_name in input_names: | 
					
						
						|  | if input_name not in inputs: | 
					
						
						|  | raise ValueError(f"Input name {input_name} not found in inputs") | 
					
						
						|  |  | 
					
						
						|  | inputs = {input_name: inputs[input_name] for input_name in input_names} | 
					
						
						|  | output = np.squeeze( | 
					
						
						|  | np.stack( | 
					
						
						|  | model.run(output_names=output_names, input_feed=inputs) | 
					
						
						|  | ), | 
					
						
						|  | axis=0, | 
					
						
						|  | ) | 
					
						
						|  | outputs.append(output) | 
					
						
						|  |  | 
					
						
						|  | outputs = np.concatenate(outputs, axis=0) | 
					
						
						|  | scores = 1 / (1 + np.exp(-outputs)) | 
					
						
						|  | results = [] | 
					
						
						|  | for item in scores: | 
					
						
						|  | labels = [] | 
					
						
						|  | scores = [] | 
					
						
						|  | for idx, s in enumerate(item): | 
					
						
						|  | labels.append(config["id2label"][str(idx)]) | 
					
						
						|  | scores.append(float(s)) | 
					
						
						|  | results.append({"labels": labels, "scores": scores}) | 
					
						
						|  |  | 
					
						
						|  | results | 
					
						
						|  | ``` | 
					
						
						|  | # Training hyperparameters | 
					
						
						|  |  | 
					
						
						|  | The following hyperparameters were used during training: | 
					
						
						|  | - learning_rate: 6e-05 | 
					
						
						|  | - train_batch_size: 64 | 
					
						
						|  | - eval_batch_size: 64 | 
					
						
						|  | - seed: 42 | 
					
						
						|  | - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 | 
					
						
						|  | - lr_scheduler_type: linear | 
					
						
						|  | - num_epochs: 40 | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | # Metrics (comparison with teacher model) | 
					
						
						|  |  | 
					
						
						|  | | Teacher (params)    |   Student (params)     | Set         | Score (teacher)    |    Score (student)      | | 
					
						
						|  | |--------------------|-------------|----------|--------| --------| | 
					
						
						|  | | tasinhoque/text-classification-goemotions (355M) |      MiniLMv2-L6-H384-goemotions-v2    | Validation  | 0.514252 |0.484898 | | 
					
						
						|  | | tasinhoque/text-classification-goemotions (33M) |      MiniLMv2-L6-H384-goemotions-v2 (original model)   | Test  | 0.501937 |  0.486890 | | 
					
						
						|  |  | 
					
						
						|  | # Training Code, Evaluation & Deployment | 
					
						
						|  |  | 
					
						
						|  | Check | 
					
						
						|  |  | 
					
						
						|  |  |