import numpy as np import glob # Helper to map index to letter idx_to_base = ['A', 'C', 'G', 'T'] # Find your files pwm_files = glob.glob("token*_pwm.npy") pwm_files.sort() print(f"{'Token ID':<10} | {'Consensus Sequence (50bp)':<55}") print("-" * 70) for pwm_file in pwm_files: # Get ID tid = pwm_file.split("token")[1].split("_")[0] # Load Matrix (50, 4) pwm = np.load(pwm_file) # Generate Consensus String consensus = [] for row in pwm: # row is [prob_A, prob_C, prob_G, prob_T] max_idx = np.argmax(row) max_val = row[max_idx] # If the probability is low (e.g., < 0.4), it's just noise/background if max_val < 0.25: consensus.append(".") # Low confidence else: consensus.append(idx_to_base[max_idx]) seq_str = "".join(consensus) print(f"{tid:<10} | {seq_str}")