diff --git "a/run.log" "b/run.log" --- "a/run.log" +++ "b/run.log" @@ -1,5 +1,5 @@ -2021-07-09 13:29:34.504834: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory -[13:29:35] - INFO - __main__ - Training/evaluation parameters TrainingArguments( +2021-07-10 11:09:15.922051: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory +[11:09:17] - INFO - __main__ - Training/evaluation parameters TrainingArguments( _n_gpu=-1, adafactor=False, adam_beta1=0.9, @@ -35,7 +35,7 @@ local_rank=-1, log_level=-1, log_level_replica=-1, log_on_each_node=True, -logging_dir=./runs/Jul09_13-29-35_t1v-n-112df4a9-w-0, +logging_dir=./runs/Jul10_11-09-17_t1v-n-112df4a9-w-0, logging_first_step=False, logging_steps=500, logging_strategy=IntervalStrategy.STEPS, @@ -74,339 +74,194 @@ warmup_ratio=0.0, warmup_steps=1000, weight_decay=0.0, ) -[13:29:35] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[13:29:35] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[13:29:35] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[13:29:35] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[13:29:35] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[13:29:36] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[13:29:36] - WARNING - datasets.builder - Reusing dataset oscar (/home/nipunsadvilkar/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_als/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[13:29:36] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[13:29:36] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[13:29:36] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[13:29:36] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[13:29:36] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[13:29:36] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[13:29:36] - WARNING - datasets.builder - Reusing dataset oscar (/home/nipunsadvilkar/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_als/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[13:29:36] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 -[13:29:36] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 -[13:29:36] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[13:29:36] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 -[13:29:36] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 -[13:29:36] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 -[13:29:36] - WARNING - datasets.builder - Reusing dataset oscar (/home/nipunsadvilkar/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_als/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) -[13:29:36] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/nipunsadvilkar/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_als/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-0f52086e7b10d7e8.arrow -[13:29:36] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/nipunsadvilkar/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_als/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-a39e5f5a5c6c69fc.arrow -[13:29:36] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/nipunsadvilkar/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_als/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-dff30eb991bed7ea.arrow -[13:29:36] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/nipunsadvilkar/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_als/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-7b9e9747e032f3c3.arrow -[13:29:37] - INFO - absl - Starting the local TPU driver. -[13:29:37] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// -[13:29:37] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: Host Interpreter TPU -[13:29:40] - DEBUG - git.cmd - Popen(['git', 'version'], cwd=/home/nipunsadvilkar/sample/roberta-base-mr, universal_newlines=False, shell=None, istream=None) -[13:29:40] - DEBUG - git.cmd - Popen(['git', 'version'], cwd=/home/nipunsadvilkar/sample/roberta-base-mr, universal_newlines=False, shell=None, istream=None) -[13:29:41] - DEBUG - git.cmd - Popen(['git', 'rev-parse', '--show-toplevel'], cwd=/home/nipunsadvilkar/sample/roberta-base-mr, universal_newlines=False, shell=None, istream=None) -[13:29:41] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): api.wandb.ai:443 -[13:29:41] - DEBUG - urllib3.connectionpool - https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 None -wandb: Currently logged in as: nipunsadvilkar (use `wandb login --relogin` to force relogin) -2021-07-09 13:29:42.294064: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory -wandb: Tracking run with wandb version 0.10.33 -wandb: Syncing run avid-shape-8 -wandb: View project at https://wandb.ai/nipunsadvilkar/hf-flax-robert-base-mr -wandb: View run at https://wandb.ai/nipunsadvilkar/hf-flax-robert-base-mr/runs/xrzp5klf -wandb: Run data is saved locally in /home/nipunsadvilkar/sample/roberta-base-mr/wandb/run-20210709_132941-xrzp5klf -wandb: Run `wandb offline` to turn off syncing. -2021-07-09 13:29:43.266078: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory -2021-07-09 13:29:43.266109: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) - -/home/nipunsadvilkar/roberta_mr_env/lib/python3.8/site-packages/jax/lib/xla_bridge.py:382: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. - warnings.warn( -/home/nipunsadvilkar/roberta_mr_env/lib/python3.8/site-packages/jax/lib/xla_bridge.py:369: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. - warnings.warn( - Epoch ... (1/8): 0%| | 0/8 [00:00