ydshieh
/

flax-vision-encoder-decoder-vit-gpt2-coco-en

Model card Files Files and versions

xet

Community

ydshieh commited on Dec 18, 2021

Commit

a5be38e

1 Parent(s): fd1b4a2

improve code

Browse files

Files changed (1) hide show

run_image_captioning_flax.py +13 -9

run_image_captioning_flax.py CHANGED Viewed

@@ -48,6 +48,7 @@ from huggingface_hub import Repository
 from transformers import (
     CONFIG_MAPPING,
     FLAX_MODEL_FOR_VISION_2_SEQ_MAPPING,
     AutoConfig,
     AutoFeatureExtractor,
     AutoTokenizer,
@@ -73,6 +74,7 @@ except (LookupError, OSError):
 MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_VISION_2_SEQ_MAPPING.keys())
 MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
 # Copied from transformers.models.bart.modeling_flax_bart.shift_tokens_right
@@ -196,15 +198,15 @@ class ModelArguments:
     )
     model_type: Optional[str] = field(
         default='vision-encoder-decoder',
-        metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
     )
     encoder_model_type: Optional[str] = field(
         default=None,
-        metadata={"help": "If training from scratch, pass a encoder model type from the list: " + ", ".join(MODEL_TYPES)},
     )
     decoder_model_type: Optional[str] = field(
         default=None,
-        metadata={"help": "If training from scratch, pass a decoder model type from the list: " + ", ".join(MODEL_TYPES)},
     )
     config_name: Optional[str] = field(
         default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
@@ -570,7 +572,7 @@ def main():
         config.decoder.eos_token_id = eos_token_id
         config.decoder.pad_token_id = pad_token_id
-    # Set `encoder-decoder` (top-level) specific config
     config.decoder_start_token_id = decoder_start_token_id
     config.bos_token_id = bos_token_id
     config.eos_token_id = eos_token_id
@@ -630,7 +632,7 @@ def main():
                 decoder_dtype=getattr(jnp, model_args.dtype),
             )
-            # Set `encoder-decoder` (top-level) specific config
             model.config.decoder_start_token_id = decoder_start_token_id
             model.config.bos_token_id = bos_token_id
             model.config.eos_token_id = eos_token_id
@@ -729,6 +731,7 @@ def main():
     shift_tokens_right_fn = getattr(model_module, "shift_tokens_right", shift_tokens_right)
     def filter_fn(examples):
         bools = []
         for image_file in examples[image_column]:
@@ -1163,7 +1166,8 @@ def main():
     if not os.path.isdir(os.path.join(training_args.output_dir)):
         os.makedirs(os.path.join(training_args.output_dir), exist_ok=True)
-    def save_ckpt(ckpt_dir: str, commit_msg: str =""):
         # save checkpoint after each epoch and push checkpoint to the hub
         if jax.process_index() == 0:
@@ -1259,7 +1263,7 @@ def main():
                     json.dump(metrics, f, indent=4, sort_keys=True)
                 # Update report
-                with open(os.path.join(training_args.output_dir, 'report.txt'), 'a', encoding='UTF-8') as fp:
                     fp.write(desc + '\n')
                 # Save metrics (only for the evaluation/prediction being done along with training)
@@ -1325,7 +1329,7 @@ def main():
                     logger.info(desc)
-                    with open(os.path.join(training_args.output_dir, 'report.txt'), 'a', encoding='UTF-8') as fp:
                         fp.write(desc + '\n')
                     # Save metrics
@@ -1347,7 +1351,7 @@ def main():
                 logger.info(desc)
-                with open(os.path.join(training_args.output_dir, 'report.txt'), 'a', encoding='UTF-8') as fp:
                     fp.write(desc + '\n')
                 # Save metrics

 from transformers import (
     CONFIG_MAPPING,
     FLAX_MODEL_FOR_VISION_2_SEQ_MAPPING,
+    FLAX_MODEL_FOR_CAUSAL_LM_MAPPING,
     AutoConfig,
     AutoFeatureExtractor,
     AutoTokenizer,
 MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_VISION_2_SEQ_MAPPING.keys())
 MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
+DECODER_MODEL_TYPES = tuple(conf.model_type for conf in list(FLAX_MODEL_FOR_CAUSAL_LM_MAPPING.keys()))
 # Copied from transformers.models.bart.modeling_flax_bart.shift_tokens_right
     )
     model_type: Optional[str] = field(
         default='vision-encoder-decoder',
+        metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)}
     )
     encoder_model_type: Optional[str] = field(
         default=None,
+        metadata={"help": "If training from scratch, pass a encoder model type from the library"}
     )
     decoder_model_type: Optional[str] = field(
         default=None,
+        metadata={"help": "If training from scratch, pass a decoder model type from the list: " + ", ".join(DECODER_MODEL_TYPES)}
     )
     config_name: Optional[str] = field(
         default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
         config.decoder.eos_token_id = eos_token_id
         config.decoder.pad_token_id = pad_token_id
+    # Set `encoder-decoder` (top-level) specific config (not always necessary, but can avoid generate() error sometimes)
     config.decoder_start_token_id = decoder_start_token_id
     config.bos_token_id = bos_token_id
     config.eos_token_id = eos_token_id
                 decoder_dtype=getattr(jnp, model_args.dtype),
             )
+            # Set `encoder-decoder` (top-level) specific config (not always necessary, but can avoid generate() error sometimes)
             model.config.decoder_start_token_id = decoder_start_token_id
             model.config.bos_token_id = bos_token_id
             model.config.eos_token_id = eos_token_id
     shift_tokens_right_fn = getattr(model_module, "shift_tokens_right", shift_tokens_right)
     def filter_fn(examples):
+        """remove problematic images"""
         bools = []
         for image_file in examples[image_column]:
     if not os.path.isdir(os.path.join(training_args.output_dir)):
         os.makedirs(os.path.join(training_args.output_dir), exist_ok=True)
+    def save_ckpt(ckpt_dir: str, commit_msg: str = ""):
+        """save checkpoints and push to Hugging Face Hub if specified"""
         # save checkpoint after each epoch and push checkpoint to the hub
         if jax.process_index() == 0:
                     json.dump(metrics, f, indent=4, sort_keys=True)
                 # Update report
+                with open(os.path.join(training_args.output_dir, 'log'), 'a', encoding='UTF-8') as fp:
                     fp.write(desc + '\n')
                 # Save metrics (only for the evaluation/prediction being done along with training)
                     logger.info(desc)
+                    with open(os.path.join(training_args.output_dir, 'log'), 'a', encoding='UTF-8') as fp:
                         fp.write(desc + '\n')
                     # Save metrics
                 logger.info(desc)
+                with open(os.path.join(training_args.output_dir, 'log'), 'a', encoding='UTF-8') as fp:
                     fp.write(desc + '\n')
                 # Save metrics