File size: 908 Bytes
b46126b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import numpy as np
import glob

# Helper to map index to letter
idx_to_base = ['A', 'C', 'G', 'T']

# Find your files
pwm_files = glob.glob("token*_pwm.npy")
pwm_files.sort()

print(f"{'Token ID':<10} | {'Consensus Sequence (50bp)':<55}")
print("-" * 70)

for pwm_file in pwm_files:
    # Get ID
    tid = pwm_file.split("token")[1].split("_")[0]
    
    # Load Matrix (50, 4)
    pwm = np.load(pwm_file)
    
    # Generate Consensus String
    consensus = []
    for row in pwm:
        # row is [prob_A, prob_C, prob_G, prob_T]
        max_idx = np.argmax(row)
        max_val = row[max_idx]
        
        # If the probability is low (e.g., < 0.4), it's just noise/background
        if max_val < 0.25:
            consensus.append(".") # Low confidence
        else:
            consensus.append(idx_to_base[max_idx])
            
    seq_str = "".join(consensus)
    print(f"{tid:<10} | {seq_str}")