File size: 2,063 Bytes
32c275c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def extract_best_design(fasta_file: str, output_file: str) -> None:
    import os
    
    if not os.path.exists(fasta_file):
        raise FileNotFoundError(f"Input FASTA file not found: {fasta_file}")
    
    best_score = float('inf')
    best_header = ""
    best_seq = ""
    
    with open(fasta_file, 'r') as f:
        lines = [line.strip() for line in f.readlines() if line.strip()]
        
    for i in range(0, len(lines), 2):
        if i + 1 >= len(lines):
            break
        header = lines[i]
        sequence = lines[i+1]
        
        # Skip the original native sequence (first entry)
        if "sample" not in header:
            continue
            
        # Parse the score: "score=0.7647"
        try:
            score_part = [p for p in header.split(',') if 'score' in p][0]
            score = float(score_part.split('=')[1])
            
            if score < best_score:
                best_score = score
                best_header = header
                best_seq = sequence
        except (IndexError, ValueError) as e:
            continue

    if best_seq:
        os.makedirs(os.path.dirname(output_file) if os.path.dirname(output_file) else '.', exist_ok=True)
        with open(output_file, 'w') as f:
            f.write(f"{best_header}\n{best_seq}\n")
        print(f"✅ Success! Best design (score={best_score:.4f}) saved to {output_file}")
    else:
        raise ValueError(f"No valid designs found in {fasta_file}")

if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser(description="Extract best design from ProteinMPNN output")
    parser.add_argument("--input_fa", type=str, default="generated/3kas/seqs/3kas_clones.fa",
                        help="Input FASTA file path (relative to project root)")
    parser.add_argument("--output_fa", type=str, default="generated/shuttle/best_shuttle.fa",
                        help="Output FASTA file path (relative to project root)")
    
    args = parser.parse_args()
    extract_best_design(args.input_fa, args.output_fa)