Adding filter to work on more
Browse files
HuggingFace_Mistral_Transformer_Single_Instrument.ipynb
CHANGED
@@ -185,6 +185,38 @@
|
|
185 |
"kwargs_dataset = {\"max_seq_len\": sequence_length, \"tokenizer\": tokenizer, \"bos_token_id\": tokenizer[\"BOS_None\"], \"eos_token_id\": tokenizer[\"EOS_None\"]}"
|
186 |
]
|
187 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
{
|
189 |
"cell_type": "code",
|
190 |
"execution_count": null,
|
|
|
185 |
"kwargs_dataset = {\"max_seq_len\": sequence_length, \"tokenizer\": tokenizer, \"bos_token_id\": tokenizer[\"BOS_None\"], \"eos_token_id\": tokenizer[\"EOS_None\"]}"
|
186 |
]
|
187 |
},
|
188 |
+
{
|
189 |
+
"cell_type": "code",
|
190 |
+
"execution_count": null,
|
191 |
+
"metadata": {},
|
192 |
+
"outputs": [],
|
193 |
+
"source": [
|
194 |
+
"def remove_files_with_boring_data( file_paths: list[Path], rms_threshold: float = 0.01) -> list[Path]:\n",
|
195 |
+
" \"\"\"\n",
|
196 |
+
" Remove files with boring data, i.e. files with low RMS.\n",
|
197 |
+
" \"\"\"\n",
|
198 |
+
" from symusic import Score\n",
|
199 |
+
" from tqdm import tqdm\n",
|
200 |
+
" import numpy as np\n",
|
201 |
+
"\n",
|
202 |
+
" rms = lambda data: (sum(x * x for x in data) / len(data)) ** 0.5\n",
|
203 |
+
"\n",
|
204 |
+
" filtered_files = []\n",
|
205 |
+
" for file_path in tqdm(file_paths, desc=\"Filtering boring files\"):\n",
|
206 |
+
" try:\n",
|
207 |
+
" scores = [Score(file_path)]\n",
|
208 |
+
" except SCORE_LOADING_EXCEPTION:\n",
|
209 |
+
" continue\n",
|
210 |
+
"\n",
|
211 |
+
" for track in scores[0].tracks:\n",
|
212 |
+
" values = track.notes['pitch']\n",
|
213 |
+
" result = rms(values)\n",
|
214 |
+
"\n",
|
215 |
+
"\n",
|
216 |
+
" filtered_files.append(file_path)\n",
|
217 |
+
" return filtered_files"
|
218 |
+
]
|
219 |
+
},
|
220 |
{
|
221 |
"cell_type": "code",
|
222 |
"execution_count": null,
|