ydshieh
/

flax-vision-encoder-decoder-vit-gpt2-coco-en

Model card Files Files and versions

xet

Community

ydshieh commited on Dec 14, 2021

Commit

e70a466

1 Parent(s): 4450aac

cleanup

Browse files

Files changed (1) hide show

run_image_captioning_flax.py +1 -48

run_image_captioning_flax.py CHANGED Viewed

@@ -680,6 +680,7 @@ def main():
         return bools
     def tokenization_fn(examples, max_target_length):
         captions = []
@@ -728,43 +729,6 @@ def main():
         return model_inputs
-    # Setting padding="max_length" as we need fixed length inputs for jitted functions
-    def preprocess_function(examples, max_target_length):
-        pixel_values = []
-        captions = []
-        for image_file, caption in zip(examples[image_column], examples[caption_column]):
-            with Image.open(image_file) as image:
-                try:
-                    encoder_inputs = feature_extractor(images=image, return_tensors="np")
-                except:
-                    continue
-                pixel_values.append(encoder_inputs.pixel_values)
-                captions.append(caption.lower() + ' ' + tokenizer.eos_token)
-        pixel_values = np.concatenate(pixel_values)
-        targets = captions
-        model_inputs = {}
-        model_inputs['pixel_values'] = pixel_values
-        # Setup the tokenizer for targets
-        with tokenizer.as_target_tokenizer():
-            labels = tokenizer(
-                targets, max_length=max_target_length, padding="max_length", truncation=True, return_tensors="np"
-            )
-        model_inputs["labels"] = labels["input_ids"]
-        decoder_input_ids = shift_tokens_right_fn(
-            labels["input_ids"], config.pad_token_id, config.decoder_start_token_id
-        )
-        model_inputs["decoder_input_ids"] = np.asarray(decoder_input_ids)
-        # We need decoder_attention_mask so we can ignore pad tokens from loss
-        model_inputs["decoder_attention_mask"] = labels["attention_mask"]
-        return model_inputs
     features = datasets.Features(
         {
             "pixel_values": datasets.Array3D(
@@ -874,18 +838,11 @@ def main():
         steps = num_examples // batch_size + int(num_examples % batch_size > 0 and not drop_last_batch)
         num_splits = steps // steps_per_split + int(steps % steps_per_split > 0)
-        if drop_last_batch:
-            num_examples = steps * batch_size
         if shuffle:
             indices = jax.random.permutation(input_rng, len(ds))
         else:
             indices = jnp.arange(len(ds))
-        max_target_length = data_args.max_target_length
-        if split in ["valid", "test"]:
-            max_target_length = data_args.val_max_target_length
         for idx in range(num_splits):
             start_idx = block_size * idx
@@ -902,17 +859,13 @@ def main():
             }
             _ds =_ds.map(
-                # preprocess_function,
                 feature_extraction_fn,
                 batched=True,
                 num_proc=data_args.preprocessing_num_workers,
-                # remove_columns=column_names,
                 remove_columns=[image_column],
                 load_from_cache_file=not data_args.overwrite_cache,
                 features=features,
-                # desc=f"Running tokenizer on {names[split]} dataset".replace("  ", " "),
                 desc=f"Running feature extraction on {names[split]} dataset".replace("  ", " "),
-                # fn_kwargs={"max_target_length": max_target_length},
             )
             _ds = _ds.with_format("numpy")

         return bools
+    # Setting padding="max_length" as we need fixed length inputs for jitted functions
     def tokenization_fn(examples, max_target_length):
         captions = []
         return model_inputs
     features = datasets.Features(
         {
             "pixel_values": datasets.Array3D(
         steps = num_examples // batch_size + int(num_examples % batch_size > 0 and not drop_last_batch)
         num_splits = steps // steps_per_split + int(steps % steps_per_split > 0)
         if shuffle:
             indices = jax.random.permutation(input_rng, len(ds))
         else:
             indices = jnp.arange(len(ds))
         for idx in range(num_splits):
             start_idx = block_size * idx
             }
             _ds =_ds.map(
                 feature_extraction_fn,
                 batched=True,
                 num_proc=data_args.preprocessing_num_workers,
                 remove_columns=[image_column],
                 load_from_cache_file=not data_args.overwrite_cache,
                 features=features,
                 desc=f"Running feature extraction on {names[split]} dataset".replace("  ", " "),
             )
             _ds = _ds.with_format("numpy")