ydshieh
commited on
Commit
·
4450aac
1
Parent(s):
8f31d11
fix
Browse files
run_image_captioning_flax.py
CHANGED
@@ -794,7 +794,7 @@ def main():
|
|
794 |
batched=True,
|
795 |
num_proc=data_args.preprocessing_num_workers,
|
796 |
# kept image paths
|
797 |
-
remove_columns=column_names
|
798 |
load_from_cache_file=not data_args.overwrite_cache,
|
799 |
desc=f"Running tokenizer on train dataset",
|
800 |
fn_kwargs={"max_target_length": data_args.max_target_length},
|
@@ -813,7 +813,7 @@ def main():
|
|
813 |
batched=True,
|
814 |
num_proc=data_args.preprocessing_num_workers,
|
815 |
# kept image paths
|
816 |
-
remove_columns=column_names
|
817 |
load_from_cache_file=not data_args.overwrite_cache,
|
818 |
desc=f"Running tokenizer on validation dataset",
|
819 |
fn_kwargs={"max_target_length": data_args.val_max_target_length},
|
@@ -832,7 +832,7 @@ def main():
|
|
832 |
batched=True,
|
833 |
num_proc=data_args.preprocessing_num_workers,
|
834 |
# kept image paths
|
835 |
-
remove_columns=column_names
|
836 |
load_from_cache_file=not data_args.overwrite_cache,
|
837 |
desc=f"Running tokenizer on prediction dataset",
|
838 |
fn_kwargs={"max_target_length": data_args.val_max_target_length},
|
|
|
794 |
batched=True,
|
795 |
num_proc=data_args.preprocessing_num_workers,
|
796 |
# kept image paths
|
797 |
+
remove_columns=[x for x in column_names if x != image_column],
|
798 |
load_from_cache_file=not data_args.overwrite_cache,
|
799 |
desc=f"Running tokenizer on train dataset",
|
800 |
fn_kwargs={"max_target_length": data_args.max_target_length},
|
|
|
813 |
batched=True,
|
814 |
num_proc=data_args.preprocessing_num_workers,
|
815 |
# kept image paths
|
816 |
+
remove_columns=[x for x in column_names if x != image_column],
|
817 |
load_from_cache_file=not data_args.overwrite_cache,
|
818 |
desc=f"Running tokenizer on validation dataset",
|
819 |
fn_kwargs={"max_target_length": data_args.val_max_target_length},
|
|
|
832 |
batched=True,
|
833 |
num_proc=data_args.preprocessing_num_workers,
|
834 |
# kept image paths
|
835 |
+
remove_columns=[x for x in column_names if x != image_column],
|
836 |
load_from_cache_file=not data_args.overwrite_cache,
|
837 |
desc=f"Running tokenizer on prediction dataset",
|
838 |
fn_kwargs={"max_target_length": data_args.val_max_target_length},
|