File size: 908 Bytes
b46126b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | import numpy as np
import glob
# Helper to map index to letter
idx_to_base = ['A', 'C', 'G', 'T']
# Find your files
pwm_files = glob.glob("token*_pwm.npy")
pwm_files.sort()
print(f"{'Token ID':<10} | {'Consensus Sequence (50bp)':<55}")
print("-" * 70)
for pwm_file in pwm_files:
# Get ID
tid = pwm_file.split("token")[1].split("_")[0]
# Load Matrix (50, 4)
pwm = np.load(pwm_file)
# Generate Consensus String
consensus = []
for row in pwm:
# row is [prob_A, prob_C, prob_G, prob_T]
max_idx = np.argmax(row)
max_val = row[max_idx]
# If the probability is low (e.g., < 0.4), it's just noise/background
if max_val < 0.25:
consensus.append(".") # Low confidence
else:
consensus.append(idx_to_base[max_idx])
seq_str = "".join(consensus)
print(f"{tid:<10} | {seq_str}")
|