asigalov61 commited on
Commit
d136ff6
·
verified ·
1 Parent(s): b8e4da9

Upload TMIDIX.py

Browse files
Files changed (1) hide show
  1. TMIDIX.py +1064 -21
TMIDIX.py CHANGED
@@ -1,6 +1,5 @@
1
  #! /usr/bin/python3
2
 
3
-
4
  r'''###############################################################################
5
  ###################################################################################
6
  #
@@ -8,7 +7,7 @@ r'''############################################################################
8
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
9
  # Version 1.0
10
  #
11
- # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1342
12
  #
13
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
14
  #
@@ -21,19 +20,19 @@ r'''############################################################################
21
  #
22
  ###################################################################################
23
  ###################################################################################
24
- # Copyright 2025 Project Los Angeles / Tegridy Code
25
  #
26
- # Licensed under the Apache License, Version 2.0 (the "License");
27
- # you may not use this file except in compliance with the License.
28
- # You may obtain a copy of the License at
29
  #
30
- # http://www.apache.org/licenses/LICENSE-2.0
31
  #
32
- # Unless required by applicable law or agreed to in writing, software
33
- # distributed under the License is distributed on an "AS IS" BASIS,
34
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
35
- # See the License for the specific language governing permissions and
36
- # limitations under the License.
37
  ###################################################################################
38
  ###################################################################################
39
  #
@@ -1446,8 +1445,9 @@ def _encode(events_lol, unknown_callback=None, never_add_eot=False,
1446
  # pjb.com.au
1447
  #
1448
  # Project Los Angeles
1449
- # Tegridy Code 2021
1450
- # https://github.com/Tegridy-Code/Project-Los-Angeles
 
1451
  #
1452
  ###################################################################################
1453
  ###################################################################################
@@ -1457,8 +1457,6 @@ import os
1457
 
1458
  import datetime
1459
 
1460
- import copy
1461
-
1462
  from datetime import datetime
1463
 
1464
  import secrets
@@ -1475,12 +1473,12 @@ import multiprocessing
1475
 
1476
  from itertools import zip_longest
1477
  from itertools import groupby
 
1478
  from collections import Counter
 
1479
 
1480
  from operator import itemgetter
1481
 
1482
- import sys
1483
-
1484
  from abc import ABC, abstractmethod
1485
 
1486
  from difflib import SequenceMatcher as SM
@@ -1490,6 +1488,10 @@ import math
1490
 
1491
  import matplotlib.pyplot as plt
1492
 
 
 
 
 
1493
  ###################################################################################
1494
  #
1495
  # Original TMIDI Tegridy helper functions
@@ -4179,6 +4181,17 @@ def advanced_score_processor(raw_score,
4179
  basic_single_track_score.append(ev)
4180
  num_tracks += 1
4181
 
 
 
 
 
 
 
 
 
 
 
 
4182
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4183
  basic_single_track_score.sort(key=lambda x: x[1])
4184
 
@@ -4193,7 +4206,7 @@ def advanced_score_processor(raw_score,
4193
  enhanced_single_track_score.append(event)
4194
  num_patch_changes += 1
4195
 
4196
- if event[0] == 'note':
4197
  if event[3] != 9:
4198
  event.extend([patches[event[3]]])
4199
  all_score_patches.extend([patches[event[3]]])
@@ -11182,7 +11195,1037 @@ def rle_decode_ones(encoding, size=(128, 128)):
11182
  return matrix
11183
 
11184
  ###################################################################################
11185
- #
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11186
  # This is the end of the TMIDI X Python module
11187
- #
11188
  ###################################################################################
 
1
  #! /usr/bin/python3
2
 
 
3
  r'''###############################################################################
4
  ###################################################################################
5
  #
 
7
  # Tegridy MIDI X Module (TMIDI X / tee-midi eks)
8
  # Version 1.0
9
  #
10
+ # NOTE: TMIDI X Module starts after the partial MIDI.py module @ line 1437
11
  #
12
  # Based upon MIDI.py module v.6.7. by Peter Billam / pjb.com.au
13
  #
 
20
  #
21
  ###################################################################################
22
  ###################################################################################
23
+ # Copyright 2025 Project Los Angeles / Tegridy Code
24
  #
25
+ # Licensed under the Apache License, Version 2.0 (the "License");
26
+ # you may not use this file except in compliance with the License.
27
+ # You may obtain a copy of the License at
28
  #
29
+ # http://www.apache.org/licenses/LICENSE-2.0
30
  #
31
+ # Unless required by applicable law or agreed to in writing, software
32
+ # distributed under the License is distributed on an "AS IS" BASIS,
33
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
34
+ # See the License for the specific language governing permissions and
35
+ # limitations under the License.
36
  ###################################################################################
37
  ###################################################################################
38
  #
 
1445
  # pjb.com.au
1446
  #
1447
  # Project Los Angeles
1448
+ # Tegridy Code 2025
1449
+ #
1450
+ # https://github.com/Tegridy-Code/Project-Los-Angeles
1451
  #
1452
  ###################################################################################
1453
  ###################################################################################
 
1457
 
1458
  import datetime
1459
 
 
 
1460
  from datetime import datetime
1461
 
1462
  import secrets
 
1473
 
1474
  from itertools import zip_longest
1475
  from itertools import groupby
1476
+
1477
  from collections import Counter
1478
+ from collections import defaultdict
1479
 
1480
  from operator import itemgetter
1481
 
 
 
1482
  from abc import ABC, abstractmethod
1483
 
1484
  from difflib import SequenceMatcher as SM
 
1488
 
1489
  import matplotlib.pyplot as plt
1490
 
1491
+ import psutil
1492
+
1493
+ import json
1494
+
1495
  ###################################################################################
1496
  #
1497
  # Original TMIDI Tegridy helper functions
 
4181
  basic_single_track_score.append(ev)
4182
  num_tracks += 1
4183
 
4184
+ for e in basic_single_track_score:
4185
+
4186
+ if e[0] == 'note':
4187
+ e[3] = e[3] % 16
4188
+ e[4] = e[4] % 128
4189
+ e[5] = e[5] % 128
4190
+
4191
+ if e[0] == 'patch_change':
4192
+ e[2] = e[2] % 16
4193
+ e[3] = e[3] % 128
4194
+
4195
  basic_single_track_score.sort(key=lambda x: x[4] if x[0] == 'note' else 128, reverse=True)
4196
  basic_single_track_score.sort(key=lambda x: x[1])
4197
 
 
4206
  enhanced_single_track_score.append(event)
4207
  num_patch_changes += 1
4208
 
4209
+ if event[0] == 'note':
4210
  if event[3] != 9:
4211
  event.extend([patches[event[3]]])
4212
  all_score_patches.extend([patches[event[3]]])
 
11195
  return matrix
11196
 
11197
  ###################################################################################
11198
+
11199
+ def vertical_list_search(list_of_lists, trg_list):
11200
+
11201
+ src_list = list_of_lists
11202
+
11203
+ if not src_list or not trg_list:
11204
+ return []
11205
+
11206
+ num_rows = len(src_list)
11207
+ k = len(trg_list)
11208
+
11209
+ row_sets = [set(row) for row in src_list]
11210
+
11211
+ results = []
11212
+
11213
+ for start in range(num_rows - k + 1):
11214
+ valid = True
11215
+
11216
+ for offset, target in enumerate(trg_list):
11217
+
11218
+ if target not in row_sets[start + offset]:
11219
+ valid = False
11220
+ break
11221
+
11222
+ if valid:
11223
+ results.append(list(range(start, start + k)))
11224
+
11225
+ return results
11226
+
11227
+ ###################################################################################
11228
+
11229
+ def smooth_values(values, window_size=3):
11230
+
11231
+ smoothed = []
11232
+
11233
+ for i in range(len(values)):
11234
+
11235
+ start = max(0, i - window_size // 2)
11236
+ end = min(len(values), i + window_size // 2 + 1)
11237
+
11238
+ window = values[start:end]
11239
+
11240
+ smoothed.append(int(sum(window) / len(window)))
11241
+
11242
+ return smoothed
11243
+
11244
+ ###################################################################################
11245
+
11246
+ def is_mostly_wide_peaks_and_valleys(values,
11247
+ min_range=32,
11248
+ threshold=0.7,
11249
+ smoothing_window=5
11250
+ ):
11251
+
11252
+ if not values:
11253
+ return False
11254
+
11255
+ smoothed_values = smooth_values(values, smoothing_window)
11256
+
11257
+ value_range = max(smoothed_values) - min(smoothed_values)
11258
+
11259
+ if value_range < min_range:
11260
+ return False
11261
+
11262
+ if all(v == smoothed_values[0] for v in smoothed_values):
11263
+ return False
11264
+
11265
+ trend_types = []
11266
+
11267
+ for i in range(1, len(smoothed_values)):
11268
+ if smoothed_values[i] > smoothed_values[i - 1]:
11269
+ trend_types.append(1)
11270
+
11271
+ elif smoothed_values[i] < smoothed_values[i - 1]:
11272
+ trend_types.append(-1)
11273
+
11274
+ else:
11275
+ trend_types.append(0)
11276
+
11277
+ trend_count = trend_types.count(1) + trend_types.count(-1)
11278
+
11279
+ proportion = trend_count / len(trend_types)
11280
+
11281
+ return proportion >= threshold
11282
+
11283
+ ###################################################################################
11284
+
11285
+ def system_memory_utilization(return_dict=False):
11286
+
11287
+ if return_dict:
11288
+ return dict(psutil.virtual_memory()._asdict())
11289
+
11290
+ else:
11291
+ print('RAM memory % used:', psutil.virtual_memory()[2])
11292
+ print('RAM Used (GB):', psutil.virtual_memory()[3]/(1024**3))
11293
+
11294
+ ###################################################################################
11295
+
11296
+ def create_files_list(datasets_paths=['./'],
11297
+ files_exts=['.mid', '.midi', '.kar', '.MID', '.MIDI', '.KAR'],
11298
+ randomize_files_list=True,
11299
+ verbose=True
11300
+ ):
11301
+ if verbose:
11302
+ print('=' * 70)
11303
+ print('Searching for files...')
11304
+ print('This may take a while on a large dataset in particular...')
11305
+ print('=' * 70)
11306
+
11307
+ filez_set = defaultdict(None)
11308
+
11309
+ files_exts = tuple(files_exts)
11310
+
11311
+ for dataset_addr in tqdm.tqdm(datasets_paths, disable=not verbose):
11312
+ for dirpath, dirnames, filenames in os.walk(dataset_addr):
11313
+ for file in filenames:
11314
+ if file not in filez_set and file.endswith(files_exts):
11315
+ filez_set[os.path.join(dirpath, file)] = None
11316
+
11317
+ filez = list(filez_set.keys())
11318
+
11319
+ if verbose:
11320
+ print('Done!')
11321
+ print('=' * 70)
11322
+
11323
+ if filez:
11324
+ if randomize_files_list:
11325
+
11326
+ if verbose:
11327
+ print('Randomizing file list...')
11328
+
11329
+ random.shuffle(filez)
11330
+
11331
+ if verbose:
11332
+ print('Done!')
11333
+ print('=' * 70)
11334
+
11335
+ if verbose:
11336
+ print('Found', len(filez), 'files.')
11337
+ print('=' * 70)
11338
+
11339
+ else:
11340
+ if verbose:
11341
+ print('Could not find any files...')
11342
+ print('Please check dataset dirs and files extensions...')
11343
+ print('=' * 70)
11344
+
11345
+ return filez
11346
+
11347
+ ###################################################################################
11348
+
11349
+ def has_consecutive_trend(nums, count):
11350
+
11351
+ if len(nums) < count:
11352
+ return False
11353
+
11354
+ increasing_streak = 1
11355
+ decreasing_streak = 1
11356
+
11357
+ for i in range(1, len(nums)):
11358
+ if nums[i] > nums[i - 1]:
11359
+ increasing_streak += 1
11360
+ decreasing_streak = 1
11361
+
11362
+ elif nums[i] < nums[i - 1]:
11363
+ decreasing_streak += 1
11364
+ increasing_streak = 1
11365
+
11366
+ else:
11367
+ increasing_streak = decreasing_streak = 1
11368
+
11369
+ if increasing_streak == count or decreasing_streak == count:
11370
+ return True
11371
+
11372
+ return False
11373
+
11374
+ ###################################################################################
11375
+
11376
+ def escore_notes_primary_features(escore_notes):
11377
+
11378
+ #=================================================================
11379
+
11380
+ def mean(values):
11381
+ return sum(values) / len(values) if values else None
11382
+
11383
+ def std(values):
11384
+ if not values:
11385
+ return None
11386
+ m = mean(values)
11387
+ return math.sqrt(sum((x - m) ** 2 for x in values) / len(values)) if m is not None else None
11388
+
11389
+ def skew(values):
11390
+ if not values:
11391
+ return None
11392
+ m = mean(values)
11393
+ s = std(values)
11394
+ if s is None or s == 0:
11395
+ return None
11396
+ return sum(((x - m) / s) ** 3 for x in values) / len(values)
11397
+
11398
+ def kurtosis(values):
11399
+ if not values:
11400
+ return None
11401
+ m = mean(values)
11402
+ s = std(values)
11403
+ if s is None or s == 0:
11404
+ return None
11405
+ return sum(((x - m) / s) ** 4 for x in values) / len(values) - 3
11406
+
11407
+ def median(values):
11408
+ if not values:
11409
+ return None
11410
+ srt = sorted(values)
11411
+ n = len(srt)
11412
+ mid = n // 2
11413
+ if n % 2 == 0:
11414
+ return (srt[mid - 1] + srt[mid]) / 2.0
11415
+ return srt[mid]
11416
+
11417
+ def percentile(values, p):
11418
+ if not values:
11419
+ return None
11420
+ srt = sorted(values)
11421
+ n = len(srt)
11422
+ k = (n - 1) * p / 100.0
11423
+ f = int(k)
11424
+ c = k - f
11425
+ if f + 1 < n:
11426
+ return srt[f] * (1 - c) + srt[f + 1] * c
11427
+ return srt[f]
11428
+
11429
+ def diff(values):
11430
+ if not values or len(values) < 2:
11431
+ return []
11432
+ return [values[i + 1] - values[i] for i in range(len(values) - 1)]
11433
+
11434
+ def mad(values):
11435
+ if not values:
11436
+ return None
11437
+ m = median(values)
11438
+ return median([abs(x - m) for x in values])
11439
+
11440
+ def entropy(values):
11441
+ if not values:
11442
+ return None
11443
+ freq = {}
11444
+ for v in values:
11445
+ freq[v] = freq.get(v, 0) + 1
11446
+ total = len(values)
11447
+ ent = 0.0
11448
+ for count in freq.values():
11449
+ p_val = count / total
11450
+ ent -= p_val * math.log2(p_val)
11451
+ return ent
11452
+
11453
+ def mode(values):
11454
+ if not values:
11455
+ return None
11456
+ freq = {}
11457
+ for v in values:
11458
+ freq[v] = freq.get(v, 0) + 1
11459
+ max_count = max(freq.values())
11460
+ modes = [k for k, count in freq.items() if count == max_count]
11461
+ return min(modes)
11462
+
11463
+
11464
+ #=================================================================
11465
+
11466
+ sp_score = solo_piano_escore_notes(escore_notes)
11467
+
11468
+ dscore = delta_score_notes(sp_score)
11469
+
11470
+ seq = []
11471
+
11472
+ for d in dscore:
11473
+ seq.extend([d[1], d[2], d[4]])
11474
+
11475
+ #=================================================================
11476
+
11477
+ n = len(seq)
11478
+ if n % 3 != 0:
11479
+ seq = seq[: n - (n % 3)]
11480
+ arr = [seq[i:i + 3] for i in range(0, len(seq), 3)]
11481
+
11482
+ #=================================================================
11483
+
11484
+ features = {}
11485
+
11486
+ delta_times = [row[0] for row in arr]
11487
+ if delta_times:
11488
+ features['delta_times_mean'] = mean(delta_times)
11489
+ features['delta_times_std'] = std(delta_times)
11490
+ features['delta_times_min'] = min(delta_times)
11491
+ features['delta_times_max'] = max(delta_times)
11492
+ features['delta_times_skew'] = skew(delta_times)
11493
+ features['delta_times_kurtosis'] = kurtosis(delta_times)
11494
+ delta_zero_count = sum(1 for x in delta_times if x == 0)
11495
+ features['delta_times_zero_ratio'] = delta_zero_count / len(delta_times)
11496
+ nonzero_dt = [x for x in delta_times if x != 0]
11497
+ if nonzero_dt:
11498
+ features['delta_times_nonzero_mean'] = mean(nonzero_dt)
11499
+ features['delta_times_nonzero_std'] = std(nonzero_dt)
11500
+ else:
11501
+ features['delta_times_nonzero_mean'] = None
11502
+ features['delta_times_nonzero_std'] = None
11503
+ features['delta_times_mad'] = mad(delta_times)
11504
+ features['delta_times_cv'] = (features['delta_times_std'] / features['delta_times_mean']
11505
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11506
+ features['delta_times_entropy'] = entropy(delta_times)
11507
+ features['delta_times_range'] = max(delta_times) - min(delta_times)
11508
+ features['delta_times_median'] = median(delta_times)
11509
+ features['delta_times_quantile_25'] = percentile(delta_times, 25)
11510
+ features['delta_times_quantile_75'] = percentile(delta_times, 75)
11511
+ if (features['delta_times_quantile_25'] is not None and features['delta_times_quantile_75'] is not None):
11512
+ features['delta_times_iqr'] = features['delta_times_quantile_75'] - features['delta_times_quantile_25']
11513
+ else:
11514
+ features['delta_times_iqr'] = None
11515
+ else:
11516
+ for key in ['delta_times_mean', 'delta_times_std', 'delta_times_min', 'delta_times_max',
11517
+ 'delta_times_skew', 'delta_times_kurtosis', 'delta_times_zero_ratio',
11518
+ 'delta_times_nonzero_mean', 'delta_times_nonzero_std', 'delta_times_mad',
11519
+ 'delta_times_cv', 'delta_times_entropy', 'delta_times_range', 'delta_times_median',
11520
+ 'delta_times_quantile_25', 'delta_times_quantile_75', 'delta_times_iqr']:
11521
+ features[key] = None
11522
+
11523
+ #=================================================================
11524
+
11525
+ durations = [row[1] for row in arr]
11526
+ if durations:
11527
+ features['durations_mean'] = mean(durations)
11528
+ features['durations_std'] = std(durations)
11529
+ features['durations_min'] = min(durations)
11530
+ features['durations_max'] = max(durations)
11531
+ features['durations_skew'] = skew(durations)
11532
+ features['durations_kurtosis'] = kurtosis(durations)
11533
+ features['durations_mad'] = mad(durations)
11534
+ features['durations_cv'] = (features['durations_std'] / features['durations_mean']
11535
+ if features['durations_mean'] and features['durations_mean'] != 0 else None)
11536
+ features['durations_entropy'] = entropy(durations)
11537
+ features['durations_range'] = max(durations) - min(durations)
11538
+ features['durations_median'] = median(durations)
11539
+ features['durations_quantile_25'] = percentile(durations, 25)
11540
+ features['durations_quantile_75'] = percentile(durations, 75)
11541
+ if features['durations_quantile_25'] is not None and features['durations_quantile_75'] is not None:
11542
+ features['durations_iqr'] = features['durations_quantile_75'] - features['durations_quantile_25']
11543
+ else:
11544
+ features['durations_iqr'] = None
11545
+ else:
11546
+ for key in ['durations_mean', 'durations_std', 'durations_min', 'durations_max',
11547
+ 'durations_skew', 'durations_kurtosis', 'durations_mad', 'durations_cv',
11548
+ 'durations_entropy', 'durations_range', 'durations_median', 'durations_quantile_25',
11549
+ 'durations_quantile_75', 'durations_iqr']:
11550
+ features[key] = None
11551
+
11552
+ #=================================================================
11553
+
11554
+ pitches = [row[2] for row in arr]
11555
+ if pitches:
11556
+ features['pitches_mean'] = mean(pitches)
11557
+ features['pitches_std'] = std(pitches)
11558
+ features['pitches_min'] = min(pitches)
11559
+ features['pitches_max'] = max(pitches)
11560
+ features['pitches_skew'] = skew(pitches)
11561
+ features['pitches_kurtosis'] = kurtosis(pitches)
11562
+ features['pitches_range'] = max(pitches) - min(pitches)
11563
+ features['pitches_median'] = median(pitches)
11564
+ features['pitches_quantile_25'] = percentile(pitches, 25)
11565
+ features['pitches_quantile_75'] = percentile(pitches, 75)
11566
+ if len(pitches) > 1:
11567
+ dps = diff(pitches)
11568
+ features['pitches_diff_mean'] = mean(dps)
11569
+ features['pitches_diff_std'] = std(dps)
11570
+ else:
11571
+ features['pitches_diff_mean'] = None
11572
+ features['pitches_diff_std'] = None
11573
+ features['pitches_mad'] = mad(pitches)
11574
+ if len(pitches) > 2:
11575
+ peaks = sum(1 for i in range(1, len(pitches)-1)
11576
+ if pitches[i] > pitches[i-1] and pitches[i] > pitches[i+1])
11577
+ valleys = sum(1 for i in range(1, len(pitches)-1)
11578
+ if pitches[i] < pitches[i-1] and pitches[i] < pitches[i+1])
11579
+ else:
11580
+ peaks, valleys = None, None
11581
+ features['pitches_peak_count'] = peaks
11582
+ features['pitches_valley_count'] = valleys
11583
+ if len(pitches) > 1:
11584
+ x = list(range(len(pitches)))
11585
+ denominator = (len(x) * sum(xi ** 2 for xi in x) - sum(x) ** 2)
11586
+ if denominator != 0:
11587
+ slope = (len(x) * sum(x[i] * pitches[i] for i in range(len(x))) -
11588
+ sum(x) * sum(pitches)) / denominator
11589
+ else:
11590
+ slope = None
11591
+ features['pitches_trend_slope'] = slope
11592
+ else:
11593
+ features['pitches_trend_slope'] = None
11594
+
11595
+ features['pitches_unique_count'] = len(set(pitches))
11596
+ pitch_class_hist = {i: 0 for i in range(12)}
11597
+ for p in pitches:
11598
+ pitch_class_hist[p % 12] += 1
11599
+ total_pitch = len(pitches)
11600
+ for i in range(12):
11601
+ features[f'pitches_pc_{i}'] = (pitch_class_hist[i] / total_pitch) if total_pitch > 0 else None
11602
+
11603
+ max_asc = 0
11604
+ cur_asc = 0
11605
+ max_desc = 0
11606
+ cur_desc = 0
11607
+ for i in range(1, len(pitches)):
11608
+ if pitches[i] > pitches[i-1]:
11609
+ cur_asc += 1
11610
+ max_asc = max(max_asc, cur_asc)
11611
+ cur_desc = 0
11612
+ elif pitches[i] < pitches[i-1]:
11613
+ cur_desc += 1
11614
+ max_desc = max(max_desc, cur_desc)
11615
+ cur_asc = 0
11616
+ else:
11617
+ cur_asc = 0
11618
+ cur_desc = 0
11619
+ features['pitches_max_consecutive_ascending'] = max_asc if pitches else None
11620
+ features['pitches_max_consecutive_descending'] = max_desc if pitches else None
11621
+ p_intervals = diff(pitches)
11622
+ features['pitches_median_diff'] = median(p_intervals) if p_intervals else None
11623
+ if p_intervals:
11624
+ dc = sum(1 for i in range(1, len(p_intervals))
11625
+ if (p_intervals[i] > 0 and p_intervals[i-1] < 0) or (p_intervals[i] < 0 and p_intervals[i-1] > 0))
11626
+ features['pitches_direction_changes'] = dc
11627
+ else:
11628
+ features['pitches_direction_changes'] = None
11629
+ else:
11630
+ for key in (['pitches_mean', 'pitches_std', 'pitches_min', 'pitches_max', 'pitches_skew',
11631
+ 'pitches_kurtosis', 'pitches_range', 'pitches_median', 'pitches_quantile_25',
11632
+ 'pitches_quantile_75', 'pitches_diff_mean', 'pitches_diff_std', 'pitches_mad',
11633
+ 'pitches_peak_count', 'pitches_valley_count', 'pitches_trend_slope',
11634
+ 'pitches_unique_count', 'pitches_max_consecutive_ascending', 'pitches_max_consecutive_descending',
11635
+ 'pitches_median_diff', 'pitches_direction_changes'] +
11636
+ [f'pitches_pc_{i}' for i in range(12)]):
11637
+ features[key] = None
11638
+
11639
+ #=================================================================
11640
+
11641
+ overall = [x for row in arr for x in row]
11642
+ if overall:
11643
+ features['overall_mean'] = mean(overall)
11644
+ features['overall_std'] = std(overall)
11645
+ features['overall_min'] = min(overall)
11646
+ features['overall_max'] = max(overall)
11647
+ features['overall_cv'] = (features['overall_std'] / features['overall_mean']
11648
+ if features['overall_mean'] and features['overall_mean'] != 0 else None)
11649
+ else:
11650
+ for key in ['overall_mean', 'overall_std', 'overall_min', 'overall_max', 'overall_cv']:
11651
+ features[key] = None
11652
+
11653
+ #=================================================================
11654
+
11655
+ onsets = []
11656
+ cumulative = 0
11657
+ for dt in delta_times:
11658
+ onsets.append(cumulative)
11659
+ cumulative += dt
11660
+ if onsets and durations:
11661
+ overall_piece_duration = onsets[-1] + durations[-1]
11662
+ else:
11663
+ overall_piece_duration = None
11664
+ features['overall_piece_duration'] = overall_piece_duration
11665
+ features['overall_notes_density'] = (len(arr) / overall_piece_duration
11666
+ if overall_piece_duration and overall_piece_duration > 0 else None)
11667
+ features['rhythm_ratio'] = (features['durations_mean'] / features['delta_times_mean']
11668
+ if features['delta_times_mean'] and features['delta_times_mean'] != 0 else None)
11669
+ features['overall_sum_delta_times'] = (sum(delta_times) if delta_times else None)
11670
+ features['overall_sum_durations'] = (sum(durations) if durations else None)
11671
+ features['overall_voicing_ratio'] = (sum(durations) / overall_piece_duration
11672
+ if overall_piece_duration and durations else None)
11673
+ features['overall_onset_std'] = std(onsets) if onsets else None
11674
+
11675
+ #=================================================================
11676
+
11677
+ chords_raw = []
11678
+ chords_pc = []
11679
+ current_group = []
11680
+ for i, note in enumerate(arr):
11681
+ dt = note[0]
11682
+ if i == 0:
11683
+ current_group = [i]
11684
+ else:
11685
+ if dt == 0:
11686
+ current_group.append(i)
11687
+ else:
11688
+ if len(current_group) >= 2:
11689
+ chord_notes = [arr[j][2] for j in current_group]
11690
+ chords_raw.append(tuple(sorted(chord_notes)))
11691
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11692
+
11693
+ current_group = [i]
11694
+
11695
+ if current_group and len(current_group) >= 2:
11696
+ chord_notes = [arr[j][2] for j in current_group]
11697
+ chords_raw.append(tuple(sorted(chord_notes)))
11698
+ chords_pc.append(tuple(sorted(set(p % 12 for p in chord_notes))))
11699
+
11700
+ if chords_raw:
11701
+ chord_count = len(chords_raw)
11702
+ features['chords_count'] = chord_count
11703
+ features['chords_density'] = (chord_count / overall_piece_duration
11704
+ if overall_piece_duration and chord_count is not None else None)
11705
+ chord_sizes = [len(ch) for ch in chords_raw]
11706
+ features['chords_size_mean'] = mean(chord_sizes)
11707
+ features['chords_size_std'] = std(chord_sizes)
11708
+ features['chords_size_min'] = min(chord_sizes) if chord_sizes else None
11709
+ features['chords_size_max'] = max(chord_sizes) if chord_sizes else None
11710
+ features['chords_unique_raw_count'] = len(set(chords_raw))
11711
+ features['chords_unique_pc_count'] = len(set(chords_pc))
11712
+ features['chords_entropy_raw'] = entropy(chords_raw)
11713
+ features['chords_entropy_pc'] = entropy(chords_pc)
11714
+ if len(chords_raw) > 1:
11715
+ rep_raw = sum(1 for i in range(1, len(chords_raw)) if chords_raw[i] == chords_raw[i - 1])
11716
+ features['chords_repeat_ratio_raw'] = rep_raw / (len(chords_raw) - 1)
11717
+ else:
11718
+ features['chords_repeat_ratio_raw'] = None
11719
+ if len(chords_pc) > 1:
11720
+ rep_pc = sum(1 for i in range(1, len(chords_pc)) if chords_pc[i] == chords_pc[i - 1])
11721
+ features['chords_repeat_ratio_pc'] = rep_pc / (len(chords_pc) - 1)
11722
+ else:
11723
+ features['chords_repeat_ratio_pc'] = None
11724
+ if len(chords_raw) > 1:
11725
+ bigrams_raw = [(chords_raw[i], chords_raw[i + 1]) for i in range(len(chords_raw) - 1)]
11726
+ features['chords_bigram_entropy_raw'] = entropy(bigrams_raw)
11727
+ else:
11728
+ features['chords_bigram_entropy_raw'] = None
11729
+ if len(chords_pc) > 1:
11730
+ bigrams_pc = [(chords_pc[i], chords_pc[i + 1]) for i in range(len(chords_pc) - 1)]
11731
+ features['chords_bigram_entropy_pc'] = entropy(bigrams_pc)
11732
+ else:
11733
+ features['chords_bigram_entropy_pc'] = None
11734
+ features['chords_mode_raw'] = mode(chords_raw)
11735
+ features['chords_mode_pc'] = mode(chords_pc)
11736
+ if chords_pc:
11737
+ pc_sizes = [len(ch) for ch in chords_pc]
11738
+ features['chords_pc_size_mean'] = mean(pc_sizes)
11739
+ else:
11740
+ features['chords_pc_size_mean'] = None
11741
+ else:
11742
+ for key in ['chords_count', 'chords_density', 'chords_size_mean', 'chords_size_std',
11743
+ 'chords_size_min', 'chords_size_max', 'chords_unique_raw_count', 'chords_unique_pc_count',
11744
+ 'chords_entropy_raw', 'chords_entropy_pc', 'chords_repeat_ratio_raw', 'chords_repeat_ratio_pc',
11745
+ 'chords_bigram_entropy_raw', 'chords_bigram_entropy_pc', 'chords_mode_raw', 'chords_mode_pc',
11746
+ 'chords_pc_size_mean']:
11747
+ features[key] = None
11748
+
11749
+ #=================================================================
11750
+
11751
+ if delta_times:
11752
+ med_dt = features['delta_times_median']
11753
+ iqr_dt = features['delta_times_iqr']
11754
+ threshold_a = med_dt + 1.5 * iqr_dt if med_dt is not None and iqr_dt is not None else None
11755
+ threshold_b = percentile(delta_times, 90)
11756
+ if threshold_a is not None and threshold_b is not None:
11757
+ phrase_threshold = max(threshold_a, threshold_b)
11758
+ elif threshold_a is not None:
11759
+ phrase_threshold = threshold_a
11760
+ elif threshold_b is not None:
11761
+ phrase_threshold = threshold_b
11762
+ else:
11763
+ phrase_threshold = None
11764
+ else:
11765
+ phrase_threshold = None
11766
+
11767
+ phrases = []
11768
+ current_phrase = []
11769
+ if onsets:
11770
+ current_phrase.append(0)
11771
+ for i in range(len(onsets) - 1):
11772
+ gap = onsets[i + 1] - onsets[i]
11773
+ if phrase_threshold is not None and gap > phrase_threshold:
11774
+ phrases.append(current_phrase)
11775
+ current_phrase = []
11776
+ current_phrase.append(i + 1)
11777
+ if current_phrase:
11778
+ phrases.append(current_phrase)
11779
+ if phrases:
11780
+ phrase_note_counts = []
11781
+ phrase_durations = []
11782
+ phrase_densities = []
11783
+ phrase_mean_pitches = []
11784
+ phrase_pitch_ranges = []
11785
+ phrase_start_times = []
11786
+ phrase_end_times = []
11787
+ for phrase in phrases:
11788
+ note_count = len(phrase)
11789
+ phrase_note_counts.append(note_count)
11790
+ ph_start = onsets[phrase[0]]
11791
+ ph_end = onsets[phrase[-1]] + durations[phrase[-1]]
11792
+ phrase_start_times.append(ph_start)
11793
+ phrase_end_times.append(ph_end)
11794
+ ph_duration = ph_end - ph_start
11795
+ phrase_durations.append(ph_duration)
11796
+ density = note_count / ph_duration if ph_duration > 0 else None
11797
+ phrase_densities.append(density)
11798
+ ph_pitches = [pitches[i] for i in phrase if i < len(pitches)]
11799
+ phrase_mean_pitches.append(mean(ph_pitches) if ph_pitches else None)
11800
+ phrase_pitch_ranges.append((max(ph_pitches) - min(ph_pitches)) if ph_pitches else None)
11801
+ if len(phrases) > 1:
11802
+ phrase_gaps = []
11803
+ for i in range(len(phrases) - 1):
11804
+ gap = phrase_start_times[i + 1] - phrase_end_times[i]
11805
+ phrase_gaps.append(gap if gap > 0 else 0)
11806
+ else:
11807
+ phrase_gaps = []
11808
+ features['phrases_count'] = len(phrases)
11809
+ features['phrases_avg_note_count'] = mean(phrase_note_counts) if phrase_note_counts else None
11810
+ features['phrases_std_note_count'] = std(phrase_note_counts) if phrase_note_counts else None
11811
+ features['phrases_min_note_count'] = min(phrase_note_counts) if phrase_note_counts else None
11812
+ features['phrases_max_note_count'] = max(phrase_note_counts) if phrase_note_counts else None
11813
+ features['phrases_avg_duration'] = mean(phrase_durations) if phrase_durations else None
11814
+ features['phrases_std_duration'] = std(phrase_durations) if phrase_durations else None
11815
+ features['phrases_min_duration'] = min(phrase_durations) if phrase_durations else None
11816
+ features['phrases_max_duration'] = max(phrase_durations) if phrase_durations else None
11817
+ features['phrases_avg_density'] = mean(phrase_densities) if phrase_densities else None
11818
+ features['phrases_std_density'] = std(phrase_densities) if phrase_densities else None
11819
+ features['phrases_avg_mean_pitch'] = mean(phrase_mean_pitches) if phrase_mean_pitches else None
11820
+ features['phrases_avg_pitch_range'] = mean(phrase_pitch_ranges) if phrase_pitch_ranges else None
11821
+ if phrase_gaps:
11822
+ features['phrases_avg_gap'] = mean(phrase_gaps)
11823
+ features['phrases_std_gap'] = std(phrase_gaps)
11824
+ features['phrases_min_gap'] = min(phrase_gaps)
11825
+ features['phrases_max_gap'] = max(phrase_gaps)
11826
+ else:
11827
+ features['phrases_avg_gap'] = None
11828
+ features['phrases_std_gap'] = None
11829
+ features['phrases_min_gap'] = None
11830
+ features['phrases_max_gap'] = None
11831
+ features['phrases_threshold'] = phrase_threshold
11832
+ else:
11833
+ for key in ['phrases_count', 'phrases_avg_note_count', 'phrases_std_note_count',
11834
+ 'phrases_min_note_count', 'phrases_max_note_count', 'phrases_avg_duration',
11835
+ 'phrases_std_duration', 'phrases_min_duration', 'phrases_max_duration',
11836
+ 'phrases_avg_density', 'phrases_std_density', 'phrases_avg_mean_pitch',
11837
+ 'phrases_avg_pitch_range', 'phrases_avg_gap', 'phrases_std_gap',
11838
+ 'phrases_min_gap', 'phrases_max_gap', 'phrases_threshold']:
11839
+ features[key] = None
11840
+
11841
+ #=================================================================
11842
+
11843
+ return features
11844
+
11845
+ ###################################################################################
11846
+
11847
+ def winsorized_normalize(data, new_range=(0, 255), clip=1.5):
11848
+
11849
+ #=================================================================
11850
+
11851
+ new_min, new_max = new_range
11852
+
11853
+ #=================================================================
11854
+
11855
+ def percentile(values, p):
11856
+
11857
+ srt = sorted(values)
11858
+ n = len(srt)
11859
+ if n == 1:
11860
+ return srt[0]
11861
+ k = (n - 1) * p / 100.0
11862
+ f = int(k)
11863
+ c = k - f
11864
+ if f + 1 < n:
11865
+ return srt[f] * (1 - c) + srt[f + 1] * c
11866
+
11867
+ return srt[f]
11868
+
11869
+ #=================================================================
11870
+
11871
+ q1 = percentile(data, 25)
11872
+ q3 = percentile(data, 75)
11873
+ iqr = q3 - q1
11874
+
11875
+ lower_bound_w = q1 - clip * iqr
11876
+ upper_bound_w = q3 + clip * iqr
11877
+
11878
+ data_min = min(data)
11879
+ data_max = max(data)
11880
+ effective_low = max(lower_bound_w, data_min)
11881
+ effective_high = min(upper_bound_w, data_max)
11882
+
11883
+ #=================================================================
11884
+
11885
+ if effective_high == effective_low:
11886
+
11887
+ if data_max == data_min:
11888
+ return [int(new_min)] * len(data)
11889
+
11890
+ normalized = [(x - data_min) / (data_max - data_min) for x in data]
11891
+
11892
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
11893
+
11894
+ #=================================================================
11895
+
11896
+ clipped = [x if x >= effective_low else effective_low for x in data]
11897
+ clipped = [x if x <= effective_high else effective_high for x in clipped]
11898
+
11899
+ normalized = [(x - effective_low) / (effective_high - effective_low) for x in clipped]
11900
+
11901
+ #=================================================================
11902
+
11903
+ return [int(round(new_min + norm * (new_max - new_min))) for norm in normalized]
11904
+
11905
+ ###################################################################################
11906
+
11907
+ def tokenize_features_to_ints_winsorized(features, new_range=(0, 255), clip=1.5, none_token=-1):
11908
+
11909
+ values = []
11910
+ tokens = []
11911
+
11912
+ #=================================================================
11913
+
11914
+ def process_value(val):
11915
+
11916
+ if isinstance(val, (int, float)):
11917
+ return int(round(abs(val)))
11918
+
11919
+ elif isinstance(val, (list, tuple)):
11920
+ return int(round(abs(sum(val) / len(val))))
11921
+
11922
+ else:
11923
+ return int(abs(hash(val)) % (10 ** 8))
11924
+
11925
+ #=================================================================
11926
+
11927
+ for key in sorted(features.keys()):
11928
+
11929
+ value = features[key]
11930
+
11931
+ if value is None:
11932
+ tokens.append(none_token)
11933
+ values.append(none_token)
11934
+
11935
+ else:
11936
+ tokens.append(process_value(value))
11937
+
11938
+ if isinstance(value, (list, tuple)):
11939
+ values.append(sum(value) / len(value))
11940
+
11941
+ else:
11942
+ values.append(value)
11943
+
11944
+ #=================================================================
11945
+
11946
+ norm_tokens = winsorized_normalize(tokens, new_range, clip)
11947
+
11948
+ #=================================================================
11949
+
11950
+ return values, tokens, norm_tokens
11951
+
11952
+ ###################################################################################
11953
+
11954
+ def write_jsonl(records_dicts_list,
11955
+ file_name='data',
11956
+ file_ext='.jsonl',
11957
+ file_mode='w',
11958
+ line_sep='\n',
11959
+ verbose=True
11960
+ ):
11961
+
11962
+ if verbose:
11963
+ print('=' * 70)
11964
+ print('Writing', len(records_dicts_list), 'records to jsonl file...')
11965
+ print('=' * 70)
11966
+
11967
+ if not os.path.splitext(file_name)[1]:
11968
+ file_name += file_ext
11969
+
11970
+ l_count = 0
11971
+
11972
+ with open(file_name, mode=file_mode) as f:
11973
+ for record in tqdm.tqdm(records_dicts_list, disable=not verbose):
11974
+ f.write(json.dumps(record) + line_sep)
11975
+ l_count += 1
11976
+
11977
+ f.close()
11978
+
11979
+ if verbose:
11980
+ print('=' * 70)
11981
+ print('Written total of', l_count, 'jsonl records.')
11982
+ print('=' * 70)
11983
+ print('Done!')
11984
+ print('=' * 70)
11985
+
11986
+ ###################################################################################
11987
+
11988
+ def read_jsonl(file_name='data',
11989
+ file_ext='.jsonl',
11990
+ verbose=True
11991
+ ):
11992
+
11993
+ if verbose:
11994
+ print('=' * 70)
11995
+ print('Reading jsonl file...')
11996
+ print('=' * 70)
11997
+
11998
+ if not os.path.splitext(file_name)[1]:
11999
+ file_name += file_ext
12000
+
12001
+ with open(file_name, 'r') as f:
12002
+
12003
+ records = []
12004
+ gl_count = 0
12005
+
12006
+ for i, line in tqdm.tqdm(enumerate(f), disable=not verbose):
12007
+
12008
+ try:
12009
+ record = json.loads(line)
12010
+ records.append(record)
12011
+ gl_count += 1
12012
+
12013
+ except KeyboardInterrupt:
12014
+ if verbose:
12015
+ print('=' * 70)
12016
+ print('Stoping...')
12017
+ print('=' * 70)
12018
+
12019
+ f.close()
12020
+
12021
+ return records
12022
+
12023
+ except json.JSONDecodeError:
12024
+ if verbose:
12025
+ print('=' * 70)
12026
+ print('[ERROR] Line', i, 'is corrupted! Skipping it...')
12027
+ print('=' * 70)
12028
+
12029
+ continue
12030
+
12031
+ f.close()
12032
+
12033
+ if verbose:
12034
+ print('=' * 70)
12035
+ print('Loaded total of', gl_count, 'jsonl records.')
12036
+ print('=' * 70)
12037
+ print('Done!')
12038
+ print('=' * 70)
12039
+
12040
+ return records
12041
+
12042
+ ###################################################################################
12043
+
12044
+ def read_jsonl_lines(lines_indexes_list,
12045
+ file_name='data',
12046
+ file_ext='.jsonl',
12047
+ verbose=True
12048
+ ):
12049
+
12050
+ if verbose:
12051
+ print('=' * 70)
12052
+ print('Reading jsonl file...')
12053
+ print('=' * 70)
12054
+
12055
+ if not os.path.splitext(file_name)[1]:
12056
+ file_name += file_ext
12057
+
12058
+ records = []
12059
+ l_count = 0
12060
+
12061
+ lines_indexes_list.sort(reverse=True)
12062
+
12063
+ with open(file_name, 'r') as f:
12064
+ for current_line_number, line in tqdm.tqdm(enumerate(f)):
12065
+
12066
+ try:
12067
+ if current_line_number in lines_indexes_list:
12068
+ record = json.loads(line)
12069
+ records.append(record)
12070
+ lines_indexes_list = lines_indexes_list[:-1]
12071
+ l_count += 1
12072
+
12073
+ if not lines_indexes_list:
12074
+ break
12075
+
12076
+ except KeyboardInterrupt:
12077
+ if verbose:
12078
+ print('=' * 70)
12079
+ print('Stoping...')
12080
+ print('=' * 70)
12081
+
12082
+ f.close()
12083
+
12084
+ return records
12085
+
12086
+ except json.JSONDecodeError:
12087
+ if verbose:
12088
+ print('=' * 70)
12089
+ print('[ERROR] Line', current_line_number, 'is corrupted! Skipping it...')
12090
+ print('=' * 70)
12091
+
12092
+ continue
12093
+
12094
+ f.close()
12095
+
12096
+ if verbose:
12097
+ print('=' * 70)
12098
+ print('Loaded total of', l_count, 'jsonl records.')
12099
+ print('=' * 70)
12100
+ print('Done!')
12101
+ print('=' * 70)
12102
+
12103
+ return records
12104
+
12105
+ ###################################################################################
12106
+
12107
+ def compute_base(x: int, n: int) -> int:
12108
+
12109
+ if x < 0:
12110
+ raise ValueError("x must be non-negative.")
12111
+ if x == 0:
12112
+ return 2
12113
+
12114
+ b = max(2, int(x ** (1 / n)))
12115
+
12116
+ if b ** n <= x:
12117
+ b += 1
12118
+
12119
+ return b
12120
+
12121
+ ###################################################################################
12122
+
12123
+ def encode_int_auto(x: int, n: int) -> tuple[int, list[int]]:
12124
+
12125
+ base = compute_base(x, n)
12126
+ digits = [0] * n
12127
+
12128
+ for i in range(n - 1, -1, -1):
12129
+ digits[i] = x % base
12130
+ x //= base
12131
+
12132
+ return base, digits
12133
+
12134
+ ###################################################################################
12135
+
12136
+ def decode_int_auto(base: int, digits: list[int]) -> int:
12137
+
12138
+ x = 0
12139
+ for digit in digits:
12140
+ if digit < 0 or digit >= base:
12141
+ raise ValueError(f"Each digit must be in the range 0 to {base - 1}. Invalid digit: {digit}")
12142
+
12143
+ x = x * base + digit
12144
+
12145
+ return x
12146
+
12147
+ ###################################################################################
12148
+
12149
+ def encode_int_manual(x, base, n):
12150
+
12151
+ digits = [0] * n
12152
+
12153
+ for i in range(n - 1, -1, -1):
12154
+ digits[i] = x % base
12155
+ x //= base
12156
+
12157
+ return digits
12158
+
12159
+ ###################################################################################
12160
+
12161
+ def escore_notes_pitches_chords_signature(escore_notes,
12162
+ max_patch=128,
12163
+ sort_by_counts=False,
12164
+ use_full_chords=False
12165
+ ):
12166
+
12167
+ escore_notes = [e for e in escore_notes if e[6] <= max_patch % 129]
12168
+
12169
+ if escore_notes:
12170
+
12171
+ cscore = chordify_score([1000, escore_notes])
12172
+
12173
+ sig = []
12174
+ dsig = []
12175
+
12176
+ drums_offset = 321 + 128
12177
+
12178
+ bad_chords_counter = 0
12179
+
12180
+ for c in cscore:
12181
+
12182
+ all_pitches = [e[4] if e[3] != 9 else e[4]+128 for e in c]
12183
+ chord = sorted(set(all_pitches))
12184
+
12185
+ pitches = sorted([p for p in chord if p < 128], reverse=True)
12186
+ drums = [(d+drums_offset)-128 for d in chord if d > 127]
12187
+
12188
+ if pitches:
12189
+ if len(pitches) > 1:
12190
+ tones_chord = sorted(set([p % 12 for p in pitches]))
12191
+
12192
+ try:
12193
+ sig_token = ALL_CHORDS_SORTED.index(tones_chord) + 128
12194
+ except:
12195
+ checked_tones_chord = check_and_fix_tones_chord(tones_chord, use_full_chords=use_full_chords)
12196
+ sig_token = ALL_CHORDS_SORTED.index(checked_tones_chord) + 128
12197
+ bad_chords_counter += 1
12198
+
12199
+ elif len(pitches) == 1:
12200
+ sig_token = pitches[0]
12201
+
12202
+ sig.append(sig_token)
12203
+
12204
+ if drums:
12205
+ dsig.extend(drums)
12206
+
12207
+ sig_p = {}
12208
+
12209
+ for item in sig+dsig:
12210
+
12211
+ if item in sig_p:
12212
+ sig_p[item] += 1
12213
+
12214
+ else:
12215
+ sig_p[item] = 1
12216
+
12217
+ sig_p[-1] = bad_chords_counter
12218
+
12219
+ fsig = [list(v) for v in sig_p.items()]
12220
+
12221
+ if sort_by_counts:
12222
+ fsig.sort(key=lambda x: x[1], reverse=True)
12223
+
12224
+ return fsig
12225
+
12226
+ else:
12227
+ return []
12228
+
12229
+ ###################################################################################
12230
  # This is the end of the TMIDI X Python module
 
12231
  ###################################################################################