nancyH
/

SparseAE

Model card Files Files and versions

SparseAE / consensus_string.py

nancyH's picture

Upload folder using huggingface_hub

b46126b verified about 2 months ago

history blame contribute delete

908 Bytes

	import numpy as np
	import glob

	# Helper to map index to letter
	idx_to_base = ['A', 'C', 'G', 'T']

	# Find your files
	pwm_files = glob.glob("token*_pwm.npy")
	pwm_files.sort()

	print(f"{'Token ID':<10} \| {'Consensus Sequence (50bp)':<55}")
	print("-" * 70)

	for pwm_file in pwm_files:
	# Get ID
	tid = pwm_file.split("token")[1].split("_")[0]

	# Load Matrix (50, 4)
	pwm = np.load(pwm_file)

	# Generate Consensus String
	consensus = []
	for row in pwm:
	# row is [prob_A, prob_C, prob_G, prob_T]
	max_idx = np.argmax(row)
	max_val = row[max_idx]

	# If the probability is low (e.g., < 0.4), it's just noise/background
	if max_val < 0.25:
	consensus.append(".") # Low confidence
	else:
	consensus.append(idx_to_base[max_idx])

	seq_str = "".join(consensus)
	print(f"{tid:<10} \| {seq_str}")