Spaces:
Sleeping
Sleeping
| def extract_best_design(fasta_file: str, output_file: str) -> None: | |
| import os | |
| if not os.path.exists(fasta_file): | |
| raise FileNotFoundError(f"Input FASTA file not found: {fasta_file}") | |
| best_score = float('inf') | |
| best_header = "" | |
| best_seq = "" | |
| with open(fasta_file, 'r') as f: | |
| lines = [line.strip() for line in f.readlines() if line.strip()] | |
| for i in range(0, len(lines), 2): | |
| if i + 1 >= len(lines): | |
| break | |
| header = lines[i] | |
| sequence = lines[i+1] | |
| # Skip the original native sequence (first entry) | |
| if "sample" not in header: | |
| continue | |
| # Parse the score: "score=0.7647" | |
| try: | |
| score_part = [p for p in header.split(',') if 'score' in p][0] | |
| score = float(score_part.split('=')[1]) | |
| if score < best_score: | |
| best_score = score | |
| best_header = header | |
| best_seq = sequence | |
| except (IndexError, ValueError) as e: | |
| continue | |
| if best_seq: | |
| os.makedirs(os.path.dirname(output_file) if os.path.dirname(output_file) else '.', exist_ok=True) | |
| with open(output_file, 'w') as f: | |
| f.write(f"{best_header}\n{best_seq}\n") | |
| print(f"✅ Success! Best design (score={best_score:.4f}) saved to {output_file}") | |
| else: | |
| raise ValueError(f"No valid designs found in {fasta_file}") | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser(description="Extract best design from ProteinMPNN output") | |
| parser.add_argument("--input_fa", type=str, default="generated/3kas/seqs/3kas_clones.fa", | |
| help="Input FASTA file path (relative to project root)") | |
| parser.add_argument("--output_fa", type=str, default="generated/shuttle/best_shuttle.fa", | |
| help="Output FASTA file path (relative to project root)") | |
| args = parser.parse_args() | |
| extract_best_design(args.input_fa, args.output_fa) |