Upload TMIDIX.py
Browse files
TMIDIX.py
CHANGED
|
@@ -51,7 +51,7 @@ r'''############################################################################
|
|
| 51 |
|
| 52 |
###################################################################################
|
| 53 |
|
| 54 |
-
__version__ = "26.3.
|
| 55 |
|
| 56 |
print('=' * 70)
|
| 57 |
print('TMIDIX Python module')
|
|
@@ -1483,10 +1483,13 @@ import tqdm
|
|
| 1483 |
|
| 1484 |
import multiprocessing
|
| 1485 |
|
|
|
|
|
|
|
| 1486 |
from itertools import zip_longest
|
| 1487 |
from itertools import groupby
|
| 1488 |
from itertools import cycle
|
| 1489 |
from itertools import product
|
|
|
|
| 1490 |
|
| 1491 |
from collections import Counter
|
| 1492 |
from collections import defaultdict
|
|
@@ -1494,6 +1497,7 @@ from collections import OrderedDict
|
|
| 1494 |
from collections import deque
|
| 1495 |
|
| 1496 |
from operator import itemgetter
|
|
|
|
| 1497 |
|
| 1498 |
from abc import ABC, abstractmethod
|
| 1499 |
|
|
@@ -9201,6 +9205,135 @@ def find_lrno_pattern_fast(lst):
|
|
| 9201 |
|
| 9202 |
###################################################################################
|
| 9203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9204 |
def find_chunk_indexes(original_list, chunk, ignore_index=-1):
|
| 9205 |
|
| 9206 |
chunk_length = len(chunk)
|
|
@@ -9231,31 +9364,55 @@ def find_chunk_indexes(original_list, chunk, ignore_index=-1):
|
|
| 9231 |
def escore_notes_lrno_pattern_fast(escore_notes,
|
| 9232 |
channels_index=3,
|
| 9233 |
pitches_index=4,
|
| 9234 |
-
zero_start_time=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9235 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9236 |
|
| 9237 |
cscore = chordify_score([1000, escore_notes])
|
| 9238 |
|
| 9239 |
score_chords = []
|
| 9240 |
|
| 9241 |
for c in cscore:
|
| 9242 |
-
|
| 9243 |
-
|
| 9244 |
-
|
| 9245 |
chord_tok = -1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9246 |
|
| 9247 |
if tchord:
|
| 9248 |
|
| 9249 |
-
if tchord not in
|
| 9250 |
-
tchord = check_and_fix_tones_chord(tchord
|
|
|
|
|
|
|
| 9251 |
|
| 9252 |
-
chord_tok =
|
| 9253 |
|
| 9254 |
score_chords.append(chord_tok)
|
| 9255 |
|
| 9256 |
schords = [c for c in score_chords if c != -1]
|
| 9257 |
-
|
| 9258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9259 |
|
| 9260 |
if lrno:
|
| 9261 |
|
|
|
|
| 51 |
|
| 52 |
###################################################################################
|
| 53 |
|
| 54 |
+
__version__ = "26.3.24"
|
| 55 |
|
| 56 |
print('=' * 70)
|
| 57 |
print('TMIDIX Python module')
|
|
|
|
| 1483 |
|
| 1484 |
import multiprocessing
|
| 1485 |
|
| 1486 |
+
import bisect
|
| 1487 |
+
|
| 1488 |
from itertools import zip_longest
|
| 1489 |
from itertools import groupby
|
| 1490 |
from itertools import cycle
|
| 1491 |
from itertools import product
|
| 1492 |
+
from itertools import accumulate
|
| 1493 |
|
| 1494 |
from collections import Counter
|
| 1495 |
from collections import defaultdict
|
|
|
|
| 1497 |
from collections import deque
|
| 1498 |
|
| 1499 |
from operator import itemgetter
|
| 1500 |
+
from operator import ne as _ne
|
| 1501 |
|
| 1502 |
from abc import ABC, abstractmethod
|
| 1503 |
|
|
|
|
| 9205 |
|
| 9206 |
###################################################################################
|
| 9207 |
|
| 9208 |
+
def find_fuzzy_lrno_pattern_fast(lst, threshold=0, prefix_suffix_len=1):
|
| 9209 |
+
|
| 9210 |
+
"""
|
| 9211 |
+
Find the longest repeating non-overlapping fuzzy pattern in a list of ints.
|
| 9212 |
+
|
| 9213 |
+
Parameters
|
| 9214 |
+
----------
|
| 9215 |
+
lst : list[int]
|
| 9216 |
+
threshold : int β max element mismatches allowed in the *middle*
|
| 9217 |
+
segment (0 = exact, delegates to fast solver).
|
| 9218 |
+
prefix_suffix_len : int β p; prefix lst[i:i+p] and suffix lst[i+L-p:i+L]
|
| 9219 |
+
must match exactly in both occurrences.
|
| 9220 |
+
|
| 9221 |
+
Returns
|
| 9222 |
+
-------
|
| 9223 |
+
list[int] β first occurrence of the longest fuzzy pattern, or [].
|
| 9224 |
+
"""
|
| 9225 |
+
|
| 9226 |
+
# ββ validation / fast paths βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9227 |
+
if threshold == 0:
|
| 9228 |
+
return find_lrno_pattern_fast(lst)
|
| 9229 |
+
|
| 9230 |
+
p = int(prefix_suffix_len)
|
| 9231 |
+
n = len(lst)
|
| 9232 |
+
min_len = p + p or 1 # max(2p, 1); "or 1" handles p=0
|
| 9233 |
+
|
| 9234 |
+
if n < min_len + min_len:
|
| 9235 |
+
return []
|
| 9236 |
+
|
| 9237 |
+
# ββ local aliases β eliminates repeated global dict look-ups βββββββββββββ
|
| 9238 |
+
_br = bisect.bisect_right
|
| 9239 |
+
_ac = accumulate
|
| 9240 |
+
_p = p
|
| 9241 |
+
_pp = p + p # constant used in the hot loop
|
| 9242 |
+
|
| 9243 |
+
best_len = 0
|
| 9244 |
+
best_start = 0
|
| 9245 |
+
|
| 9246 |
+
# ββ group starting positions by their exact p-element prefix βββββββββββββ
|
| 9247 |
+
# Positions are appended 0 β¦ limit-1, so each group list is sorted.
|
| 9248 |
+
limit = n - min_len + 1
|
| 9249 |
+
|
| 9250 |
+
if _p:
|
| 9251 |
+
groups: dict = defaultdict(list)
|
| 9252 |
+
for i in range(limit):
|
| 9253 |
+
groups[tuple(lst[i : i + _p])].append(i)
|
| 9254 |
+
group_iter = groups.values()
|
| 9255 |
+
else:
|
| 9256 |
+
# p == 0: no prefix constraint; one implicit group over all positions.
|
| 9257 |
+
group_iter = [range(limit)]
|
| 9258 |
+
|
| 9259 |
+
# ββ main pair search ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9260 |
+
for positions in group_iter:
|
| 9261 |
+
m = len(positions)
|
| 9262 |
+
if m < 2:
|
| 9263 |
+
continue
|
| 9264 |
+
|
| 9265 |
+
# Materialise to a list for O(1) indexed access
|
| 9266 |
+
pos = list(positions) if not isinstance(positions, list) else positions
|
| 9267 |
+
|
| 9268 |
+
for a in range(m - 1):
|
| 9269 |
+
i = pos[a]
|
| 9270 |
+
|
| 9271 |
+
# Upper-bound: best possible pattern length anchored at i is β(n-i)/2β.
|
| 9272 |
+
# pos is sorted β all later a have larger i β safe to break.
|
| 9273 |
+
if (n - i) >> 1 <= best_len:
|
| 9274 |
+
break
|
| 9275 |
+
|
| 9276 |
+
for b in range(a + 1, m):
|
| 9277 |
+
j = pos[b] # j > i (positions are sorted)
|
| 9278 |
+
|
| 9279 |
+
nj = n - j
|
| 9280 |
+
if nj <= best_len:
|
| 9281 |
+
break # j grows β nj shrinks; no further j can help
|
| 9282 |
+
|
| 9283 |
+
# Non-overlap + right-fit: max pattern length for this pair
|
| 9284 |
+
max_L = j - i if (j - i) < nj else nj
|
| 9285 |
+
if max_L <= best_len:
|
| 9286 |
+
continue # this j too close; a larger j might still work
|
| 9287 |
+
|
| 9288 |
+
mid_len = max_L - _pp
|
| 9289 |
+
if mid_len < 0:
|
| 9290 |
+
continue # pair too close to fit even a 2p-length pattern
|
| 9291 |
+
|
| 9292 |
+
# ββ zero-length middle (max_L == 2p exactly) βββββββββββββββββ
|
| 9293 |
+
if mid_len == 0:
|
| 9294 |
+
# Only a prefix+suffix exists; suffix is lst[i+p : i+2p]
|
| 9295 |
+
if _pp > best_len and lst[i + _p : i + _pp] == lst[j + _p : j + _pp]:
|
| 9296 |
+
best_len = _pp
|
| 9297 |
+
best_start = i
|
| 9298 |
+
continue
|
| 9299 |
+
|
| 9300 |
+
# ββ middle mismatch array + cumulative sum (all C-level) ββββββ
|
| 9301 |
+
# diff[k] = (lst[i+p+k] != lst[j+p+k]) k = 0 β¦ mid_len-1
|
| 9302 |
+
# cum[k] = Ξ£ diff[0:k] k = 0 β¦ mid_len
|
| 9303 |
+
ip = i + _p
|
| 9304 |
+
jp = j + _p
|
| 9305 |
+
diff = list(map(_ne, lst[ip : ip + mid_len], lst[jp : jp + mid_len]))
|
| 9306 |
+
cum = list(_ac(diff, initial=0)) # len = mid_len + 1
|
| 9307 |
+
|
| 9308 |
+
# ββ binary search: largest middle length β€ threshold errors βββ
|
| 9309 |
+
# cum is non-decreasing; bisect_right gives the insertion point
|
| 9310 |
+
# for threshold+1, so -1 gives the last index β€ threshold.
|
| 9311 |
+
k = _br(cum, threshold) - 1 # k β [0, mid_len]
|
| 9312 |
+
cand_L = k + _pp # = k + 2p
|
| 9313 |
+
if cand_L <= best_len:
|
| 9314 |
+
continue
|
| 9315 |
+
|
| 9316 |
+
# ββ suffix scan (typically 1-2 iterations) βββββββββββββββββββ
|
| 9317 |
+
# For any L β€ cand_L: cum[L-2p] β€ cum[k] β€ threshold β
|
| 9318 |
+
# Only the exact suffix match needs to be verified.
|
| 9319 |
+
if not _p:
|
| 9320 |
+
# p == 0 β no suffix constraint; k is the answer directly.
|
| 9321 |
+
best_len = cand_L # = k when p=0
|
| 9322 |
+
best_start = i
|
| 9323 |
+
else:
|
| 9324 |
+
for L in range(cand_L, best_len, -1):
|
| 9325 |
+
if L < _pp:
|
| 9326 |
+
break # below minimum pattern length
|
| 9327 |
+
lp = L - _p
|
| 9328 |
+
if lst[i + lp : i + L] == lst[j + lp : j + L]:
|
| 9329 |
+
best_len = L
|
| 9330 |
+
best_start = i
|
| 9331 |
+
break
|
| 9332 |
+
|
| 9333 |
+
return lst[best_start : best_start + best_len]
|
| 9334 |
+
|
| 9335 |
+
###################################################################################
|
| 9336 |
+
|
| 9337 |
def find_chunk_indexes(original_list, chunk, ignore_index=-1):
|
| 9338 |
|
| 9339 |
chunk_length = len(chunk)
|
|
|
|
| 9364 |
def escore_notes_lrno_pattern_fast(escore_notes,
|
| 9365 |
channels_index=3,
|
| 9366 |
pitches_index=4,
|
| 9367 |
+
zero_start_time=True,
|
| 9368 |
+
use_full_chords=True,
|
| 9369 |
+
skip_pitches=False,
|
| 9370 |
+
fuzzy_matching=False,
|
| 9371 |
+
fuzzy_thres=5,
|
| 9372 |
+
fuzzy_ps_len=3
|
| 9373 |
):
|
| 9374 |
+
|
| 9375 |
+
if use_full_chords:
|
| 9376 |
+
CHORDS = ALL_CHORDS_FULL
|
| 9377 |
+
|
| 9378 |
+
else:
|
| 9379 |
+
CHORDS = ALL_CHORDS_SORTED
|
| 9380 |
|
| 9381 |
cscore = chordify_score([1000, escore_notes])
|
| 9382 |
|
| 9383 |
score_chords = []
|
| 9384 |
|
| 9385 |
for c in cscore:
|
| 9386 |
+
|
| 9387 |
+
pitches = sorted(set([e[pitches_index] for e in c if e[channels_index] != 9]))
|
| 9388 |
+
|
| 9389 |
chord_tok = -1
|
| 9390 |
+
tchord = []
|
| 9391 |
+
|
| 9392 |
+
if (skip_pitches and len(pitches) > 1) or not skip_pitches:
|
| 9393 |
+
|
| 9394 |
+
tchord = sorted(set([p % 12 for p in pitches]))
|
| 9395 |
|
| 9396 |
if tchord:
|
| 9397 |
|
| 9398 |
+
if tchord not in ALL_CHORDS_SORTED:
|
| 9399 |
+
tchord = check_and_fix_tones_chord(tchord,
|
| 9400 |
+
use_full_chords=use_full_chords
|
| 9401 |
+
)
|
| 9402 |
|
| 9403 |
+
chord_tok = ALL_CHORDS_SORTED.index(tchord)
|
| 9404 |
|
| 9405 |
score_chords.append(chord_tok)
|
| 9406 |
|
| 9407 |
schords = [c for c in score_chords if c != -1]
|
| 9408 |
+
|
| 9409 |
+
if fuzzy_matching:
|
| 9410 |
+
lrno = find_fuzzy_lrno_pattern_fast(schords,
|
| 9411 |
+
fuzzy_thres,
|
| 9412 |
+
fuzzy_ps_len
|
| 9413 |
+
)
|
| 9414 |
+
else:
|
| 9415 |
+
lrno = find_lrno_pattern_fast(schords)
|
| 9416 |
|
| 9417 |
if lrno:
|
| 9418 |
|