ydshieh
/

flax-vision-encoder-decoder-vit-gpt2-coco-en

Model card Files Files and versions

xet

Community

ydshieh commited on Dec 18, 2021

Commit

afddfdc

1 Parent(s): 9a97c24

improve doc

Browse files

Files changed (1) hide show

run_image_captioning_flax.py +26 -16

run_image_captioning_flax.py CHANGED Viewed

@@ -755,6 +755,9 @@ def main():
     # Setting padding="max_length" as we need fixed length inputs for jitted functions
     def tokenization_fn(examples, max_target_length):
         captions = []
         for caption in examples[caption_column]:
@@ -784,6 +787,9 @@ def main():
         return model_inputs
     def feature_extraction_fn(examples):
         images = [Image.open(image_file) for image_file in examples[image_column]]
         encoder_inputs = feature_extractor(images=images, return_tensors="np")
@@ -792,6 +798,9 @@ def main():
         return model_inputs
     def preprocess_fn(examples, max_target_length):
         model_inputs = {}
         model_inputs.update(tokenization_fn(examples, max_target_length))
@@ -817,10 +826,15 @@ def main():
         }
     )
-    function_kwarg = preprocess_fn if not training_args.block_size else tokenization_fn
-    features_kwarg = features if not training_args.block_size else None
-    remove_columns_kwarg = [x for x in column_names if x != image_column or not training_args.block_size]
-    processors = "tokenizer and feature extractor" if not training_args.block_size else "tokenizer"
     if training_args.do_train:
         if "train" not in dataset:
@@ -837,11 +851,11 @@ def main():
             # kept image paths
             remove_columns=remove_columns_kwarg,
             load_from_cache_file=not data_args.overwrite_cache,
-            desc=f"Running {processors} on train dataset",
             fn_kwargs={"max_target_length": data_args.max_target_length},
             features=features_kwarg,
         )
-        if not training_args.block_size:
             train_dataset = train_dataset.with_format("numpy")
     if training_args.do_eval:
@@ -859,11 +873,11 @@ def main():
             # kept image paths
             remove_columns=remove_columns_kwarg,
             load_from_cache_file=not data_args.overwrite_cache,
-            desc=f"Running {processors} on validation dataset",
             fn_kwargs={"max_target_length": data_args.val_max_target_length},
             features=features_kwarg,
         )
-        if not training_args.block_size:
             eval_dataset = eval_dataset.with_format("numpy")
     if training_args.do_predict:
@@ -881,17 +895,13 @@ def main():
             # kept image paths
             remove_columns=remove_columns_kwarg,
             load_from_cache_file=not data_args.overwrite_cache,
-            desc=f"Running {processors} on prediction dataset",
             fn_kwargs={"max_target_length": data_args.val_max_target_length},
             features=features_kwarg,
         )
-        if not training_args.block_size:
             predict_dataset = predict_dataset.with_format("numpy")
-    # Split the dataset into several chunks - each chunk is processed (.map) without cache to create a
-    # data loader separately (in a sequential order).
-    block_size = training_args.block_size
     # Store some constant
     train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
@@ -1187,7 +1197,7 @@ def main():
         preds = []
         labels = []
-        batches = get_batch_iter(rng, dataset, block_size=block_size, batch_size=eval_batch_size, keep_in_memory=False, shuffle=False, split=split)
         steps = len(dataset) // eval_batch_size
         for _ in tqdm(range(steps), desc=f"{'Predicting' if split == 'test' else 'Evaluating'}...", position=2, leave=False):
             # Model forward
@@ -1295,7 +1305,7 @@ def main():
             train_metrics = []
-            train_batches = get_batch_iter(input_rng, train_dataset, block_size=block_size, batch_size=train_batch_size, keep_in_memory=True, shuffle=True, split="train")
             # train
             for (batch_idx, _) in enumerate(tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False)):

     # Setting padding="max_length" as we need fixed length inputs for jitted functions
     def tokenization_fn(examples, max_target_length):
+        """
+        Run tokenization on captions.
+        """
         captions = []
         for caption in examples[caption_column]:
         return model_inputs
     def feature_extraction_fn(examples):
+        """
+        Run feature extraction on images
+        """
         images = [Image.open(image_file) for image_file in examples[image_column]]
         encoder_inputs = feature_extractor(images=images, return_tensors="np")
         return model_inputs
     def preprocess_fn(examples, max_target_length):
+        """
+        Run tokenization + image feature extraction
+        """
         model_inputs = {}
         model_inputs.update(tokenization_fn(examples, max_target_length))
         }
     )
+    # If `block_size` is `0`, tokenization & image feature extraction is done before training
+    run_feat_ext_before_training = training_args.block_size == 0
+    # Used in .map() below
+    function_kwarg = preprocess_fn if run_feat_ext_before_training else tokenization_fn
+    # `features` is used only for the final preprocessed dataset (for the performance purpose).
+    features_kwarg = features if run_feat_ext_before_training else None
+    # Keep `image_column` if the feature extraction is done during training
+    remove_columns_kwarg = [x for x in column_names if x != image_column or run_feat_ext_before_training]
+    processor_names = "tokenizer and feature extractor" if run_feat_ext_before_training else "tokenizer"
     if training_args.do_train:
         if "train" not in dataset:
             # kept image paths
             remove_columns=remove_columns_kwarg,
             load_from_cache_file=not data_args.overwrite_cache,
+            desc=f"Running {processor_names} on train dataset",
             fn_kwargs={"max_target_length": data_args.max_target_length},
             features=features_kwarg,
         )
+        if run_feat_ext_before_training:
             train_dataset = train_dataset.with_format("numpy")
     if training_args.do_eval:
             # kept image paths
             remove_columns=remove_columns_kwarg,
             load_from_cache_file=not data_args.overwrite_cache,
+            desc=f"Running {processor_names} on validation dataset",
             fn_kwargs={"max_target_length": data_args.val_max_target_length},
             features=features_kwarg,
         )
+        if run_feat_ext_before_training:
             eval_dataset = eval_dataset.with_format("numpy")
     if training_args.do_predict:
             # kept image paths
             remove_columns=remove_columns_kwarg,
             load_from_cache_file=not data_args.overwrite_cache,
+            desc=f"Running {processor_names} on prediction dataset",
             fn_kwargs={"max_target_length": data_args.val_max_target_length},
             features=features_kwarg,
         )
+        if run_feat_ext_before_training:
             predict_dataset = predict_dataset.with_format("numpy")
     # Store some constant
     train_batch_size = int(training_args.per_device_train_batch_size) * jax.device_count()
         preds = []
         labels = []
+        batches = get_batch_iter(rng, dataset, block_size=training_args.block_size, batch_size=eval_batch_size, keep_in_memory=False, shuffle=False, split=split)
         steps = len(dataset) // eval_batch_size
         for _ in tqdm(range(steps), desc=f"{'Predicting' if split == 'test' else 'Evaluating'}...", position=2, leave=False):
             # Model forward
             train_metrics = []
+            train_batches = get_batch_iter(input_rng, train_dataset, block_size=training_args.block_size, batch_size=train_batch_size, keep_in_memory=True, shuffle=True, split="train")
             # train
             for (batch_idx, _) in enumerate(tqdm(range(steps_per_epoch), desc="Training...", position=1, leave=False)):