asigalov61 commited on
Commit
2bf25f5
Β·
verified Β·
1 Parent(s): 4207313

Upload TMIDIX.py

Browse files
Files changed (1) hide show
  1. TMIDIX.py +167 -10
TMIDIX.py CHANGED
@@ -51,7 +51,7 @@ r'''############################################################################
51
 
52
  ###################################################################################
53
 
54
- __version__ = "26.3.2"
55
 
56
  print('=' * 70)
57
  print('TMIDIX Python module')
@@ -1483,10 +1483,13 @@ import tqdm
1483
 
1484
  import multiprocessing
1485
 
 
 
1486
  from itertools import zip_longest
1487
  from itertools import groupby
1488
  from itertools import cycle
1489
  from itertools import product
 
1490
 
1491
  from collections import Counter
1492
  from collections import defaultdict
@@ -1494,6 +1497,7 @@ from collections import OrderedDict
1494
  from collections import deque
1495
 
1496
  from operator import itemgetter
 
1497
 
1498
  from abc import ABC, abstractmethod
1499
 
@@ -9201,6 +9205,135 @@ def find_lrno_pattern_fast(lst):
9201
 
9202
  ###################################################################################
9203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9204
  def find_chunk_indexes(original_list, chunk, ignore_index=-1):
9205
 
9206
  chunk_length = len(chunk)
@@ -9231,31 +9364,55 @@ def find_chunk_indexes(original_list, chunk, ignore_index=-1):
9231
  def escore_notes_lrno_pattern_fast(escore_notes,
9232
  channels_index=3,
9233
  pitches_index=4,
9234
- zero_start_time=True
 
 
 
 
 
9235
  ):
 
 
 
 
 
 
9236
 
9237
  cscore = chordify_score([1000, escore_notes])
9238
 
9239
  score_chords = []
9240
 
9241
  for c in cscore:
9242
-
9243
- tchord = sorted(set([e[pitches_index] % 12 for e in c if e[channels_index] != 9]))
9244
-
9245
  chord_tok = -1
 
 
 
 
 
9246
 
9247
  if tchord:
9248
 
9249
- if tchord not in ALL_CHORDS_FULL:
9250
- tchord = check_and_fix_tones_chord(tchord)
 
 
9251
 
9252
- chord_tok = ALL_CHORDS_FULL.index(tchord)
9253
 
9254
  score_chords.append(chord_tok)
9255
 
9256
  schords = [c for c in score_chords if c != -1]
9257
-
9258
- lrno = find_lrno_pattern_fast(schords)
 
 
 
 
 
 
9259
 
9260
  if lrno:
9261
 
 
51
 
52
  ###################################################################################
53
 
54
+ __version__ = "26.3.24"
55
 
56
  print('=' * 70)
57
  print('TMIDIX Python module')
 
1483
 
1484
  import multiprocessing
1485
 
1486
+ import bisect
1487
+
1488
  from itertools import zip_longest
1489
  from itertools import groupby
1490
  from itertools import cycle
1491
  from itertools import product
1492
+ from itertools import accumulate
1493
 
1494
  from collections import Counter
1495
  from collections import defaultdict
 
1497
  from collections import deque
1498
 
1499
  from operator import itemgetter
1500
+ from operator import ne as _ne
1501
 
1502
  from abc import ABC, abstractmethod
1503
 
 
9205
 
9206
  ###################################################################################
9207
 
9208
+ def find_fuzzy_lrno_pattern_fast(lst, threshold=0, prefix_suffix_len=1):
9209
+
9210
+ """
9211
+ Find the longest repeating non-overlapping fuzzy pattern in a list of ints.
9212
+
9213
+ Parameters
9214
+ ----------
9215
+ lst : list[int]
9216
+ threshold : int β€” max element mismatches allowed in the *middle*
9217
+ segment (0 = exact, delegates to fast solver).
9218
+ prefix_suffix_len : int β€” p; prefix lst[i:i+p] and suffix lst[i+L-p:i+L]
9219
+ must match exactly in both occurrences.
9220
+
9221
+ Returns
9222
+ -------
9223
+ list[int] β€” first occurrence of the longest fuzzy pattern, or [].
9224
+ """
9225
+
9226
+ # ── validation / fast paths ───────────────────────────────────────────────
9227
+ if threshold == 0:
9228
+ return find_lrno_pattern_fast(lst)
9229
+
9230
+ p = int(prefix_suffix_len)
9231
+ n = len(lst)
9232
+ min_len = p + p or 1 # max(2p, 1); "or 1" handles p=0
9233
+
9234
+ if n < min_len + min_len:
9235
+ return []
9236
+
9237
+ # ── local aliases β€” eliminates repeated global dict look-ups ─────────────
9238
+ _br = bisect.bisect_right
9239
+ _ac = accumulate
9240
+ _p = p
9241
+ _pp = p + p # constant used in the hot loop
9242
+
9243
+ best_len = 0
9244
+ best_start = 0
9245
+
9246
+ # ── group starting positions by their exact p-element prefix ─────────────
9247
+ # Positions are appended 0 … limit-1, so each group list is sorted.
9248
+ limit = n - min_len + 1
9249
+
9250
+ if _p:
9251
+ groups: dict = defaultdict(list)
9252
+ for i in range(limit):
9253
+ groups[tuple(lst[i : i + _p])].append(i)
9254
+ group_iter = groups.values()
9255
+ else:
9256
+ # p == 0: no prefix constraint; one implicit group over all positions.
9257
+ group_iter = [range(limit)]
9258
+
9259
+ # ── main pair search ──────────────────────────────────────────────────────
9260
+ for positions in group_iter:
9261
+ m = len(positions)
9262
+ if m < 2:
9263
+ continue
9264
+
9265
+ # Materialise to a list for O(1) indexed access
9266
+ pos = list(positions) if not isinstance(positions, list) else positions
9267
+
9268
+ for a in range(m - 1):
9269
+ i = pos[a]
9270
+
9271
+ # Upper-bound: best possible pattern length anchored at i is ⌊(n-i)/2βŒ‹.
9272
+ # pos is sorted β†’ all later a have larger i β†’ safe to break.
9273
+ if (n - i) >> 1 <= best_len:
9274
+ break
9275
+
9276
+ for b in range(a + 1, m):
9277
+ j = pos[b] # j > i (positions are sorted)
9278
+
9279
+ nj = n - j
9280
+ if nj <= best_len:
9281
+ break # j grows β†’ nj shrinks; no further j can help
9282
+
9283
+ # Non-overlap + right-fit: max pattern length for this pair
9284
+ max_L = j - i if (j - i) < nj else nj
9285
+ if max_L <= best_len:
9286
+ continue # this j too close; a larger j might still work
9287
+
9288
+ mid_len = max_L - _pp
9289
+ if mid_len < 0:
9290
+ continue # pair too close to fit even a 2p-length pattern
9291
+
9292
+ # ── zero-length middle (max_L == 2p exactly) ─────────────────
9293
+ if mid_len == 0:
9294
+ # Only a prefix+suffix exists; suffix is lst[i+p : i+2p]
9295
+ if _pp > best_len and lst[i + _p : i + _pp] == lst[j + _p : j + _pp]:
9296
+ best_len = _pp
9297
+ best_start = i
9298
+ continue
9299
+
9300
+ # ── middle mismatch array + cumulative sum (all C-level) ──────
9301
+ # diff[k] = (lst[i+p+k] != lst[j+p+k]) k = 0 … mid_len-1
9302
+ # cum[k] = Ξ£ diff[0:k] k = 0 … mid_len
9303
+ ip = i + _p
9304
+ jp = j + _p
9305
+ diff = list(map(_ne, lst[ip : ip + mid_len], lst[jp : jp + mid_len]))
9306
+ cum = list(_ac(diff, initial=0)) # len = mid_len + 1
9307
+
9308
+ # ── binary search: largest middle length ≀ threshold errors ───
9309
+ # cum is non-decreasing; bisect_right gives the insertion point
9310
+ # for threshold+1, so -1 gives the last index ≀ threshold.
9311
+ k = _br(cum, threshold) - 1 # k ∈ [0, mid_len]
9312
+ cand_L = k + _pp # = k + 2p
9313
+ if cand_L <= best_len:
9314
+ continue
9315
+
9316
+ # ── suffix scan (typically 1-2 iterations) ───────────────────
9317
+ # For any L ≀ cand_L: cum[L-2p] ≀ cum[k] ≀ threshold βœ“
9318
+ # Only the exact suffix match needs to be verified.
9319
+ if not _p:
9320
+ # p == 0 β†’ no suffix constraint; k is the answer directly.
9321
+ best_len = cand_L # = k when p=0
9322
+ best_start = i
9323
+ else:
9324
+ for L in range(cand_L, best_len, -1):
9325
+ if L < _pp:
9326
+ break # below minimum pattern length
9327
+ lp = L - _p
9328
+ if lst[i + lp : i + L] == lst[j + lp : j + L]:
9329
+ best_len = L
9330
+ best_start = i
9331
+ break
9332
+
9333
+ return lst[best_start : best_start + best_len]
9334
+
9335
+ ###################################################################################
9336
+
9337
  def find_chunk_indexes(original_list, chunk, ignore_index=-1):
9338
 
9339
  chunk_length = len(chunk)
 
9364
  def escore_notes_lrno_pattern_fast(escore_notes,
9365
  channels_index=3,
9366
  pitches_index=4,
9367
+ zero_start_time=True,
9368
+ use_full_chords=True,
9369
+ skip_pitches=False,
9370
+ fuzzy_matching=False,
9371
+ fuzzy_thres=5,
9372
+ fuzzy_ps_len=3
9373
  ):
9374
+
9375
+ if use_full_chords:
9376
+ CHORDS = ALL_CHORDS_FULL
9377
+
9378
+ else:
9379
+ CHORDS = ALL_CHORDS_SORTED
9380
 
9381
  cscore = chordify_score([1000, escore_notes])
9382
 
9383
  score_chords = []
9384
 
9385
  for c in cscore:
9386
+
9387
+ pitches = sorted(set([e[pitches_index] for e in c if e[channels_index] != 9]))
9388
+
9389
  chord_tok = -1
9390
+ tchord = []
9391
+
9392
+ if (skip_pitches and len(pitches) > 1) or not skip_pitches:
9393
+
9394
+ tchord = sorted(set([p % 12 for p in pitches]))
9395
 
9396
  if tchord:
9397
 
9398
+ if tchord not in ALL_CHORDS_SORTED:
9399
+ tchord = check_and_fix_tones_chord(tchord,
9400
+ use_full_chords=use_full_chords
9401
+ )
9402
 
9403
+ chord_tok = ALL_CHORDS_SORTED.index(tchord)
9404
 
9405
  score_chords.append(chord_tok)
9406
 
9407
  schords = [c for c in score_chords if c != -1]
9408
+
9409
+ if fuzzy_matching:
9410
+ lrno = find_fuzzy_lrno_pattern_fast(schords,
9411
+ fuzzy_thres,
9412
+ fuzzy_ps_len
9413
+ )
9414
+ else:
9415
+ lrno = find_lrno_pattern_fast(schords)
9416
 
9417
  if lrno:
9418