| import numpy as np | |
| import glob | |
| # Helper to map index to letter | |
| idx_to_base = ['A', 'C', 'G', 'T'] | |
| # Find your files | |
| pwm_files = glob.glob("token*_pwm.npy") | |
| pwm_files.sort() | |
| print(f"{'Token ID':<10} | {'Consensus Sequence (50bp)':<55}") | |
| print("-" * 70) | |
| for pwm_file in pwm_files: | |
| # Get ID | |
| tid = pwm_file.split("token")[1].split("_")[0] | |
| # Load Matrix (50, 4) | |
| pwm = np.load(pwm_file) | |
| # Generate Consensus String | |
| consensus = [] | |
| for row in pwm: | |
| # row is [prob_A, prob_C, prob_G, prob_T] | |
| max_idx = np.argmax(row) | |
| max_val = row[max_idx] | |
| # If the probability is low (e.g., < 0.4), it's just noise/background | |
| if max_val < 0.25: | |
| consensus.append(".") # Low confidence | |
| else: | |
| consensus.append(idx_to_base[max_idx]) | |
| seq_str = "".join(consensus) | |
| print(f"{tid:<10} | {seq_str}") | |