Some small refactoring for consistant settings
Browse files
HuggingFace_Mistral_Transformer_Single_Instrument.ipynb
CHANGED
@@ -175,6 +175,16 @@
|
|
175 |
"Then data augmentation is performed on each subset independently, and the MIDIs are split into smaller chunks that make approximately the desired token sequence length for training."
|
176 |
]
|
177 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
{
|
179 |
"cell_type": "code",
|
180 |
"execution_count": null,
|
@@ -204,7 +214,7 @@
|
|
204 |
" files_paths=files_paths,\n",
|
205 |
" tokenizer=tokenizer,\n",
|
206 |
" save_dir=subset_chunks_dir,\n",
|
207 |
-
" max_seq_len=
|
208 |
" num_overlap_bars=2,\n",
|
209 |
" )\n",
|
210 |
"\n",
|
@@ -230,7 +240,7 @@
|
|
230 |
"midi_paths_valid = list(root_save.joinpath(Path(\"Maestro_valid\")).glob(\"**/*.mid\")) + list(root_save.joinpath(Path(\"Maestro_valid\")).glob(\"**/*.midi\")) \n",
|
231 |
"midi_paths_test = list(root_save.joinpath(Path(\"Maestro_test\")).glob(\"**/*.mid\")) + list(root_save.joinpath(Path(\"Maestro_test\")).glob(\"**/*.midi\"))\n",
|
232 |
"\n",
|
233 |
-
"
|
234 |
"\n",
|
235 |
"dataset_train = DatasetMIDI(midi_paths_train, **kwargs_dataset)\n",
|
236 |
"dataset_valid = DatasetMIDI(midi_paths_valid, **kwargs_dataset)\n",
|
@@ -255,7 +265,7 @@
|
|
255 |
},
|
256 |
"outputs": [],
|
257 |
"source": [
|
258 |
-
"testing_files = \n",
|
259 |
"preview_files_path = []\n",
|
260 |
"for testing_file in testing_files:\n",
|
261 |
" preview_files_path.append(Path(testing_file))\n",
|
@@ -265,7 +275,7 @@
|
|
265 |
" files_paths=preview_files_path,\n",
|
266 |
" tokenizer=tokenizer,\n",
|
267 |
" save_dir=preview_dir,\n",
|
268 |
-
" max_seq_len=
|
269 |
" num_overlap_bars=2,\n",
|
270 |
" )\n"
|
271 |
]
|
@@ -286,7 +296,7 @@
|
|
286 |
" file_name_lookup.append(p3.name)\n",
|
287 |
" return file_name_lookup.index(p3.name)\n",
|
288 |
" \n",
|
289 |
-
"kwargs_dataset = {\"max_seq_len\":
|
290 |
"dataset_preview = DatasetMIDI(midi_split_preview, **kwargs_dataset)"
|
291 |
]
|
292 |
},
|
@@ -362,14 +372,14 @@
|
|
362 |
"source": [
|
363 |
"# Creates model\n",
|
364 |
"model_config = MistralConfig(\n",
|
365 |
-
" vocab_size=len(tokenizer)
|
366 |
-
" hidden_size=512
|
367 |
-
" intermediate_size=2048
|
368 |
-
" num_hidden_layers=8
|
369 |
-
" num_attention_heads=8
|
370 |
-
" num_key_value_heads=4
|
371 |
-
" sliding_window=256
|
372 |
-
" max_position_embeddings=8192
|
373 |
" pad_token_id=tokenizer['PAD_None'],\n",
|
374 |
" bos_token_id=tokenizer['BOS_None'],\n",
|
375 |
" eos_token_id=tokenizer['EOS_None'],\n",
|
@@ -449,7 +459,7 @@
|
|
449 |
" learning_rate=1e-4,\n",
|
450 |
" weight_decay=0.01,\n",
|
451 |
" max_grad_norm=3.0,\n",
|
452 |
-
" max_steps=
|
453 |
" lr_scheduler_type=\"cosine_with_restarts\",\n",
|
454 |
" warmup_ratio=0.3,\n",
|
455 |
" log_level=\"debug\",\n",
|
@@ -485,10 +495,20 @@
|
|
485 |
" compute_metrics=compute_metrics,\n",
|
486 |
" callbacks=None,\n",
|
487 |
" preprocess_logits_for_metrics=preprocess_logits,\n",
|
|
|
488 |
")\n",
|
489 |
"\n"
|
490 |
]
|
491 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
492 |
{
|
493 |
"cell_type": "code",
|
494 |
"execution_count": null,
|
|
|
175 |
"Then data augmentation is performed on each subset independently, and the MIDIs are split into smaller chunks that make approximately the desired token sequence length for training."
|
176 |
]
|
177 |
},
|
178 |
+
{
|
179 |
+
"cell_type": "code",
|
180 |
+
"execution_count": null,
|
181 |
+
"metadata": {},
|
182 |
+
"outputs": [],
|
183 |
+
"source": [
|
184 |
+
"sequence_length = 1024 # The maximum sequence length for data samples.\n",
|
185 |
+
"kwargs_dataset = {\"max_seq_len\": sequence_length, \"tokenizer\": tokenizer, \"bos_token_id\": tokenizer[\"BOS_None\"], \"eos_token_id\": tokenizer[\"EOS_None\"]}"
|
186 |
+
]
|
187 |
+
},
|
188 |
{
|
189 |
"cell_type": "code",
|
190 |
"execution_count": null,
|
|
|
214 |
" files_paths=files_paths,\n",
|
215 |
" tokenizer=tokenizer,\n",
|
216 |
" save_dir=subset_chunks_dir,\n",
|
217 |
+
" max_seq_len=sequence_length,\n",
|
218 |
" num_overlap_bars=2,\n",
|
219 |
" )\n",
|
220 |
"\n",
|
|
|
240 |
"midi_paths_valid = list(root_save.joinpath(Path(\"Maestro_valid\")).glob(\"**/*.mid\")) + list(root_save.joinpath(Path(\"Maestro_valid\")).glob(\"**/*.midi\")) \n",
|
241 |
"midi_paths_test = list(root_save.joinpath(Path(\"Maestro_test\")).glob(\"**/*.mid\")) + list(root_save.joinpath(Path(\"Maestro_test\")).glob(\"**/*.midi\"))\n",
|
242 |
"\n",
|
243 |
+
"\n",
|
244 |
"\n",
|
245 |
"dataset_train = DatasetMIDI(midi_paths_train, **kwargs_dataset)\n",
|
246 |
"dataset_valid = DatasetMIDI(midi_paths_valid, **kwargs_dataset)\n",
|
|
|
265 |
},
|
266 |
"outputs": [],
|
267 |
"source": [
|
268 |
+
"#testing_files = \n",
|
269 |
"preview_files_path = []\n",
|
270 |
"for testing_file in testing_files:\n",
|
271 |
" preview_files_path.append(Path(testing_file))\n",
|
|
|
275 |
" files_paths=preview_files_path,\n",
|
276 |
" tokenizer=tokenizer,\n",
|
277 |
" save_dir=preview_dir,\n",
|
278 |
+
" max_seq_len=sequence_length,\n",
|
279 |
" num_overlap_bars=2,\n",
|
280 |
" )\n"
|
281 |
]
|
|
|
296 |
" file_name_lookup.append(p3.name)\n",
|
297 |
" return file_name_lookup.index(p3.name)\n",
|
298 |
" \n",
|
299 |
+
"kwargs_dataset = {\"max_seq_len\": sequence_length, \"tokenizer\": tokenizer, \"bos_token_id\": tokenizer[\"BOS_None\"], \"eos_token_id\": tokenizer[\"EOS_None\"], \"func_to_get_labels\" : func_to_get_labels}\n",
|
300 |
"dataset_preview = DatasetMIDI(midi_split_preview, **kwargs_dataset)"
|
301 |
]
|
302 |
},
|
|
|
372 |
"source": [
|
373 |
"# Creates model\n",
|
374 |
"model_config = MistralConfig(\n",
|
375 |
+
" vocab_size=len(tokenizer), #from miditok output default 32K\n",
|
376 |
+
" hidden_size=512, # default 4096\n",
|
377 |
+
" intermediate_size=2048, # default 14336\n",
|
378 |
+
" num_hidden_layers=8, # default 32\n",
|
379 |
+
" num_attention_heads=8, # default 32\n",
|
380 |
+
" num_key_value_heads=4, # default 8\n",
|
381 |
+
" sliding_window=256, # default 4096\n",
|
382 |
+
" max_position_embeddings=sequence_length + 256, # 8192 this was before # default 4096*32\n",
|
383 |
" pad_token_id=tokenizer['PAD_None'],\n",
|
384 |
" bos_token_id=tokenizer['BOS_None'],\n",
|
385 |
" eos_token_id=tokenizer['EOS_None'],\n",
|
|
|
459 |
" learning_rate=1e-4,\n",
|
460 |
" weight_decay=0.01,\n",
|
461 |
" max_grad_norm=3.0,\n",
|
462 |
+
" max_steps=40000,\n",
|
463 |
" lr_scheduler_type=\"cosine_with_restarts\",\n",
|
464 |
" warmup_ratio=0.3,\n",
|
465 |
" log_level=\"debug\",\n",
|
|
|
495 |
" compute_metrics=compute_metrics,\n",
|
496 |
" callbacks=None,\n",
|
497 |
" preprocess_logits_for_metrics=preprocess_logits,\n",
|
498 |
+
" \n",
|
499 |
")\n",
|
500 |
"\n"
|
501 |
]
|
502 |
},
|
503 |
+
{
|
504 |
+
"cell_type": "code",
|
505 |
+
"execution_count": null,
|
506 |
+
"metadata": {},
|
507 |
+
"outputs": [],
|
508 |
+
"source": [
|
509 |
+
"print(model)"
|
510 |
+
]
|
511 |
+
},
|
512 |
{
|
513 |
"cell_type": "code",
|
514 |
"execution_count": null,
|
README.md
CHANGED
@@ -22,6 +22,9 @@ I have trained a MidiTok tokeniser (REMI) and its made by spliting multi-track m
|
|
22 |
We then trained in on a small dataset.
|
23 |
Its using the Mistral model that has been cut down quite a bit.
|
24 |
|
|
|
|
|
|
|
25 |
### Training hyperparameters
|
26 |
|
27 |
The following hyperparameters were used during training:
|
|
|
22 |
We then trained in on a small dataset.
|
23 |
Its using the Mistral model that has been cut down quite a bit.
|
24 |
|
25 |
+
### What else needs to be done
|
26 |
+
Update model training to use small positional embeddings for the model 1024 + a padding amount like 8
|
27 |
+
|
28 |
### Training hyperparameters
|
29 |
|
30 |
The following hyperparameters were used during training:
|