tarekziade HF Staff commited on Jun 21, 2024

Commit

622b2a4

verified ·

1 Parent(s): bfcb944

New training

Browse files

Files changed (19) hide show

.DS_Store +0 -0
README.md +65 -34
config.json +1 -1
merges.txt +0 -0
onnx/decoder_model.onnx +3 -0
onnx/decoder_model_merged.onnx +3 -0
onnx/decoder_model_merged_quantized.onnx +3 -0
onnx/decoder_model_quantized.onnx +3 -0
onnx/decoder_with_past_model.onnx +3 -0
onnx/decoder_with_past_model_quantized.onnx +3 -0
onnx/encoder_model.onnx +3 -0
onnx/encoder_model_quantized.onnx +3 -0
pytorch_model.bin +1 -1
quantize_config.json +125 -0
special_tokens_map.json +6 -0
tokenizer.json +0 -0
tokenizer_config.json +9 -0
training_args.bin +1 -1
vocab.json +0 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md CHANGED Viewed

@@ -1,50 +1,81 @@
 ---
 tags:
-- generated_from_trainer
 model-index:
-- name: test-push
-  results: []
 ---
-<!-- This model card has been generated automatically according to the information the Trainer had access to. You
-should probably proofread and complete it, then remove this comment. -->
-# test-push
-This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
-## Model description
-More information needed
-## Intended uses & limitations
-More information needed
-## Training and evaluation data
-More information needed
-## Training procedure
-### Training hyperparameters
-The following hyperparameters were used during training:
-- learning_rate: 5e-05
-- train_batch_size: 50
-- eval_batch_size: 50
-- seed: 42
-- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: linear
-- num_epochs: 1
-### Training results
 ### Framework versions
-- Transformers 4.33.2
-- Pytorch 2.3.1
-- Datasets 2.20.0
-- Tokenizers 0.13.3

 ---
 tags:
+  - image-to-text
+  - image-captioning
+license: apache-2.0
+metrics:
+  - rouge
+datasets:
+  - nlphuji/flickr30k
+widget:
+  - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/savanna.jpg
+    example_title: Savanna
+  - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/football-match.jpg
+    example_title: Football Match
+  - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/airport.jpg
+    example_title: Airport
+base_model:
+  - google/vit-base-patch16-224-in21k
 model-index:
+  - name: mozilla/distilvit
+    results:
+      - task:
+          type: image-to-text
+          name: Image To Text
+        dataset:
+          name: nlphuji/flickr30k
+          type: nlphuji/flickr30k
+        metrics:
+          - name: ROUGE-1
+            type: rouge
+            value: 43.006
+            verified: true
+          - name: ROUGE-2
+            type: rouge
+            value: 16.9939
+            verified: true
+          - name: ROUGE-L
+            type: rouge
+            value: 38.8923
+            verified: true
+          - name: ROUGE-LSUM
+            type: rouge
+            value: 38.8877
+            verified: true
+          - name: loss
+            type: loss
+            value: 0.19939416646957397
+          - name: gen_len
+            type: gen_len
+            value: 11.327256736227712
+            verified: true
 ---
+# distilvit
+This model is a work in progress. Fine-tuned version of those base models:
+- a VIT model for the image encoder: https://huggingface.co/google/vit-base-patch16-224-in21k
+- a Distilled GPT-2 model for the text decoder: https://huggingface.co/distilbert/distilgpt2
+This model was trained on:
+- Flickr30k : https://huggingface.co/datasets/nlphuji/flickr30k
+- COCO 2017: https://cocodataset.org
+You can get that checkpoint using the 3083a3cef6e3c8dd90df3f088074bbe836b0f403 commit.
+It was then further fine-tuned on :
+- Flickr30k debiased: https://huggingface.co/datasets/Mozilla/flickr30k-transformed-captions
+- DocOrNot: https://huggingface.co/datasets/Mozilla/docornot
+You can find the code used to create the model here: https://github.com/mozilla/distilvit
 ### Framework versions
+- Transformers 4.40.2
+- Pytorch 2.3.0+cu121
+- Datasets 2.19.1
+- Tokenizers 0.19.1

config.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "architectures": [
     "VisionEncoderDecoderModel"
   ],
@@ -2173,6 +2174,5 @@
   "model_type": "vision-encoder-decoder",
   "pad_token_id": 50256,
   "tie_word_embeddings": false,
-  "torch_dtype": "float32",
   "transformers_version": "4.33.2"
 }

 {
+  "_name_or_path": "/Users/tarekziade/Dev/distilvit/distilvit/../vit-base-patch16-224-distilgpt2",
   "architectures": [
     "VisionEncoderDecoderModel"
   ],
   "model_type": "vision-encoder-decoder",
   "pad_token_id": 50256,
   "tie_word_embeddings": false,
   "transformers_version": "4.33.2"
 }

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx/decoder_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1407c3bf1f40883552e100b56dede3ae3f5028169f3b71d520390a049418ab9e
+size 385864797

onnx/decoder_model_merged.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:088ae054ab9b988b348314ea9e9c87966c43c0f6699180aa80c1fe67fc3a5089
+size 387342586

onnx/decoder_model_merged_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a557dd52eb6662df9fbfc7c6eb70acc6f4e3b7e05139131de99c399cc7233c0d
+size 99759578

onnx/decoder_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:534c5578ebe765a98a8ecd4526b6ba7a86054a63bd7ddb07f4df00824f9eeacb
+size 98065762

onnx/decoder_with_past_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:420f2371373ba5a0347e9fc0fe7a6b8ee66b99a37a71b1fc9aca83935639b44a
+size 385864377

onnx/decoder_with_past_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c46d9d9457d906cf1912d8a8a15d44c934558874b1ccdff5fadea7afb04cb96
+size 98063169

onnx/encoder_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:baf53cebbcc6d277bb1874d78e729e591a81334f90ad96a9b2b99a60836c8dca
+size 343440632

onnx/encoder_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e3c7495809ab8fa02188d486192b1a6a5ad3d33bc5c91dfe795e08d4603692b
+size 87038172

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92b15e097c140fe56e3cce918035e1bd564727b3e0cb0ec3ddbdebdb1f702572
 size 730047834

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b26ea0227ceb2870c6d45cf7000830cea7d5a0727d6996941d1564195d5a2e6
 size 730047834

quantize_config.json ADDED Viewed

	@@ -0,0 +1,125 @@

+{
+    "per_channel": false,
+    "reduce_range": false,
+    "per_model_config": {
+        "decoder_with_past_model": {
+            "op_types": [
+                "Gemm",
+                "Slice",
+                "Transpose",
+                "Constant",
+                "Range",
+                "Pow",
+                "Unsqueeze",
+                "Where",
+                "Softmax",
+                "Concat",
+                "ReduceMean",
+                "Sub",
+                "Split",
+                "Shape",
+                "MatMul",
+                "ConstantOfShape",
+                "Div",
+                "Tanh",
+                "Reshape",
+                "Squeeze",
+                "Add",
+                "Gather",
+                "Sqrt",
+                "Mul",
+                "Cast"
+            ],
+            "weight_type": "QInt8"
+        },
+        "decoder_model": {
+            "op_types": [
+                "Gemm",
+                "Slice",
+                "Transpose",
+                "Constant",
+                "Range",
+                "Pow",
+                "Unsqueeze",
+                "Where",
+                "Softmax",
+                "Concat",
+                "ReduceMean",
+                "Sub",
+                "Split",
+                "Shape",
+                "MatMul",
+                "ConstantOfShape",
+                "Div",
+                "Tanh",
+                "Reshape",
+                "Squeeze",
+                "Add",
+                "Gather",
+                "Sqrt",
+                "Mul",
+                "Cast"
+            ],
+            "weight_type": "QInt8"
+        },
+        "encoder_model": {
+            "op_types": [
+                "Equal",
+                "Slice",
+                "Transpose",
+                "Constant",
+                "Pow",
+                "Unsqueeze",
+                "Where",
+                "Softmax",
+                "Concat",
+                "ReduceMean",
+                "Sub",
+                "Shape",
+                "MatMul",
+                "ConstantOfShape",
+                "Conv",
+                "Div",
+                "Erf",
+                "Reshape",
+                "Expand",
+                "Add",
+                "Gather",
+                "Sqrt",
+                "Mul"
+            ],
+            "weight_type": "QUInt8"
+        },
+        "decoder_model_merged": {
+            "op_types": [
+                "Gemm",
+                "Slice",
+                "Transpose",
+                "Constant",
+                "Range",
+                "If",
+                "Pow",
+                "Unsqueeze",
+                "Where",
+                "Softmax",
+                "Concat",
+                "ReduceMean",
+                "Sub",
+                "Split",
+                "Shape",
+                "MatMul",
+                "ConstantOfShape",
+                "Div",
+                "Tanh",
+                "Reshape",
+                "Squeeze",
+                "Add",
+                "Gather",
+                "Sqrt",
+                "Mul",
+                "Cast"
+            ],
+            "weight_type": "QInt8"
+        }
+    }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "pad_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75027ebf1d2059df780db4f160a3b3b8bff13fad52e4cec941ead5253f74ed00
 size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0a2ac50f309f8c9847a82159d9a9ac78e7a2325898793a1789b7a803a96a996
 size 4728

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

New training

🎉 Free Image Generator Now Available!