Spaces:
Sleeping
Sleeping
| import os | |
| from Bio.PDB import PDBParser, Superimposer, PDBIO | |
| def get_core_rmsd(reference_pdb, design_pdb, plddt_threshold=70.0): | |
| """ | |
| Calculate RMSD using only high-confidence residues (pLDDT > threshold). | |
| This focuses on the core scaffold alignment, ignoring low-confidence regions. | |
| Handles both normalized (0-1) and raw pLDDT (0-100) values in B-factor column. | |
| """ | |
| parser = PDBParser(QUIET=True) | |
| ref_struct = parser.get_structure("ref", reference_pdb) | |
| des_struct = parser.get_structure("des", design_pdb) | |
| ref_atoms = [] | |
| des_atoms = [] | |
| # Detect if B-factors are normalized (0-1) or raw pLDDT (0-100) | |
| sample_bfactor = None | |
| for res in des_struct.get_residues(): | |
| if 'CA' in res: | |
| sample_bfactor = res['CA'].get_bfactor() | |
| break | |
| # If max B-factor is < 1.0, assume normalized (0-1 scale) | |
| # Otherwise assume raw pLDDT (0-100 scale) | |
| is_normalized = sample_bfactor is not None and sample_bfactor < 1.0 | |
| # Adjust threshold based on scale | |
| if is_normalized: | |
| # Normalized: 70 pLDDT = 0.70 | |
| actual_threshold = plddt_threshold / 100.0 | |
| else: | |
| # Raw pLDDT: use threshold as-is | |
| actual_threshold = plddt_threshold | |
| # Iterate through residues and filter by B-factor (pLDDT is stored there) | |
| for ref_res, des_res in zip(ref_struct.get_residues(), des_struct.get_residues()): | |
| # ESMFold/AlphaFold store pLDDT in the B-factor column | |
| # We only take Alpha Carbons (CA) for a standard backbone alignment | |
| if 'CA' in des_res and 'CA' in ref_res: | |
| plddt = des_res['CA'].get_bfactor() | |
| if plddt >= actual_threshold: | |
| ref_atoms.append(ref_res['CA']) | |
| des_atoms.append(des_res['CA']) | |
| if len(ref_atoms) == 0: | |
| # Fallback to all residues if no high-confidence ones found | |
| ref_atoms = [a for a in ref_struct.get_atoms() if a.get_name() == 'CA'] | |
| des_atoms = [a for a in des_struct.get_atoms() if a.get_name() == 'CA'] | |
| min_len = min(len(ref_atoms), len(des_atoms)) | |
| ref_atoms = ref_atoms[:min_len] | |
| des_atoms = des_atoms[:min_len] | |
| # Superimpose and calculate RMSD | |
| super_imposer = Superimposer() | |
| super_imposer.set_atoms(ref_atoms, des_atoms) | |
| super_imposer.apply(des_struct.get_atoms()) | |
| return super_imposer.rms, len(ref_atoms) | |
| def polish_design(target_pdb_id, uploaded_file_path, plddt_threshold=70.0): | |
| """ | |
| Performs high-precision structural alignment using core-scaffold RMSD. | |
| Uses only high-confidence residues (pLDDT > threshold) for more meaningful metrics. | |
| Returns both global and core RMSD values. | |
| """ | |
| # 1. Setup paths | |
| target_path = os.path.join("data", f"{target_pdb_id.lower()}.pdb") | |
| output_name = "Refined_Shuttle.pdb" | |
| # 2. ALIGNMENT using core-scaffold RMSD (high-confidence residues only) | |
| parser = PDBParser(QUIET=True) | |
| target_struct = parser.get_structure("target", target_path) | |
| design_struct = parser.get_structure("design", uploaded_file_path) | |
| # Get atoms for alignment - filter by pLDDT if available | |
| ref_atoms = [] | |
| des_atoms = [] | |
| ref_atoms_high_conf = [] # For pLDDT > 80 | |
| des_atoms_high_conf = [] # For pLDDT > 80 | |
| # Detect if B-factors are normalized (0-1) or raw pLDDT (0-100) | |
| sample_bfactor = None | |
| for res in design_struct.get_residues(): | |
| if 'CA' in res: | |
| sample_bfactor = res['CA'].get_bfactor() | |
| break | |
| is_normalized = sample_bfactor is not None and sample_bfactor < 1.0 | |
| actual_threshold = (plddt_threshold / 100.0) if is_normalized else plddt_threshold | |
| high_conf_threshold = (80.0 / 100.0) if is_normalized else 80.0 | |
| # Collect atoms for alignment (using plddt_threshold) | |
| # Also collect high-confidence atoms (pLDDT > 80) for detailed report | |
| for ref_res, des_res in zip(target_struct.get_residues(), design_struct.get_residues()): | |
| if 'CA' in des_res and 'CA' in ref_res: | |
| plddt = des_res['CA'].get_bfactor() | |
| if plddt >= actual_threshold: | |
| ref_atoms.append(ref_res['CA']) | |
| des_atoms.append(des_res['CA']) | |
| if plddt >= high_conf_threshold: | |
| ref_atoms_high_conf.append(ref_res['CA']) | |
| des_atoms_high_conf.append(des_res['CA']) | |
| # Fallback to all CA atoms if no high-confidence ones found | |
| if len(ref_atoms) == 0: | |
| print(f"⚠️ No residues with pLDDT >= {plddt_threshold}. Using all residues.") | |
| ref_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA'] | |
| des_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA'] | |
| min_len = min(len(ref_atoms), len(des_atoms)) | |
| ref_atoms = ref_atoms[:min_len] | |
| des_atoms = des_atoms[:min_len] | |
| # Perform alignment using the main threshold atoms | |
| sup = Superimposer() | |
| sup.set_atoms(ref_atoms, des_atoms) | |
| sup.apply(design_struct.get_atoms()) | |
| core_rmsd = sup.rms | |
| num_residues = len(ref_atoms) | |
| print(f"🎯 Core-Scaffold RMSD (pLDDT > {plddt_threshold}): {core_rmsd:.3f} Å ({num_residues} residues)") | |
| # Calculate global RMSD (all CA atoms) | |
| all_ref_atoms = [a for a in target_struct.get_atoms() if a.get_name() == 'CA'] | |
| all_des_atoms = [a for a in design_struct.get_atoms() if a.get_name() == 'CA'] | |
| min_len = min(len(all_ref_atoms), len(all_des_atoms)) | |
| all_ref_atoms = all_ref_atoms[:min_len] | |
| all_des_atoms = all_des_atoms[:min_len] | |
| # Calculate global RMSD after alignment | |
| sup_global = Superimposer() | |
| sup_global.set_atoms(all_ref_atoms, all_des_atoms) | |
| global_rmsd = sup_global.rms | |
| # Calculate high-confidence core RMSD (pLDDT > 80) | |
| high_conf_rmsd = None | |
| if len(ref_atoms_high_conf) > 0: | |
| sup_high_conf = Superimposer() | |
| sup_high_conf.set_atoms(ref_atoms_high_conf, des_atoms_high_conf) | |
| high_conf_rmsd = sup_high_conf.rms | |
| else: | |
| # If no high-confidence atoms, use core_rmsd as fallback | |
| high_conf_rmsd = core_rmsd | |
| # 3. EXPORT | |
| # This saves the design in the same 3D coordinate space as the human receptor | |
| io = PDBIO() | |
| io.set_structure(design_struct) | |
| io.save(output_name) | |
| return output_name, global_rmsd, core_rmsd, high_conf_rmsd | |
| def process_results(target_pdb_id, result_pdb, global_rmsd, core_rmsd): | |
| """ | |
| Generate a detailed structural validation report with tiered RMSD analysis. | |
| Args: | |
| target_pdb_id: Target PDB ID | |
| result_pdb: Path to the aligned result PDB | |
| global_rmsd: Global RMSD (all residues) | |
| core_rmsd: High-confidence core RMSD (pLDDT > 80) | |
| Returns: | |
| str: Formatted validation report | |
| """ | |
| # Calculate the tiers we found earlier | |
| # pLDDT > 80: High Fidelity Core | |
| # pLDDT < 50: Disordered Loop | |
| # Determine design status based on core RMSD | |
| if core_rmsd < 1.0: | |
| status = "Success - High-Precision Core Match" | |
| status_emoji = "✅" | |
| elif core_rmsd < 2.0: | |
| status = "Good - Minor Core Deviation" | |
| status_emoji = "⚠️" | |
| else: | |
| status = "Possible Fold Drift - Review Required" | |
| status_emoji = "❌" | |
| report = f"""🔬 Structural Validation Report | |
| Target: {target_pdb_id.upper()} | |
| RMSD Metrics: | |
| • Global RMSD: {global_rmsd:.2f} Å (all residues) | |
| • High-Confidence Core RMSD (pLDDT > 80): {core_rmsd:.2f} Å | |
| Design Status: {status_emoji} {status} | |
| Interpretation: | |
| • Core RMSD < 1.0 Å: Excellent scaffold preservation | |
| • Core RMSD 1.0-2.0 Å: Good structural match | |
| • Core RMSD > 2.0 Å: Possible fold drift, review structure | |
| """ | |
| return report | |