File size: 5,422 Bytes
714cf46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import argparse
import concurrent.futures
import subprocess
from pathlib import Path

from tqdm import tqdm

OST_COMPARE_STRUCTURE = r"""
#!/bin/bash
# https://openstructure.org/docs/2.7/actions/#ost-compare-structures

IMAGE_NAME=openstructure-0.2.8

command="compare-structures \
-m {model_file} \
-r {reference_file} \
--fault-tolerant \
--min-pep-length 4 \
--min-nuc-length 4 \
-o {output_path} \
--lddt --bb-lddt --qs-score --dockq \
--ics --ips --rigid-scores --patch-scores --tm-score"

sudo docker run -u $(id -u):$(id -g) --rm --volume {mount}:{mount} $IMAGE_NAME $command
"""


OST_COMPARE_LIGAND = r"""
#!/bin/bash
# https://openstructure.org/docs/2.7/actions/#ost-compare-structures

IMAGE_NAME=openstructure-0.2.8

command="compare-ligand-structures \
-m {model_file} \
-r {reference_file} \
--fault-tolerant \
--lddt-pli --rmsd \
--substructure-match \
-o {output_path}"

sudo docker run -u $(id -u):$(id -g) --rm --volume {mount}:{mount} $IMAGE_NAME $command
"""


def evaluate_structure(
    name: str,
    pred: Path,
    reference: Path,
    outdir: str,
    mount: str,
    executable: str = "/bin/bash",
) -> None:
    """Evaluate the structure."""
    # Evaluate polymer metrics
    out_path = Path(outdir) / f"{name}.json"

    if out_path.exists():
        print(  # noqa: T201
            f"Skipping recomputation of {name} as protein json file already exists"
        )
    else:
        subprocess.run(
            OST_COMPARE_STRUCTURE.format(
                model_file=str(pred),
                reference_file=str(reference),
                output_path=str(out_path),
                mount=mount,
            ),
            shell=True,  # noqa: S602
            check=False,
            executable=executable,
            capture_output=True,
        )

    # Evaluate ligand metrics
    out_path = Path(outdir) / f"{name}_ligand.json"
    if out_path.exists():
        print(f"Skipping recomputation of {name} as ligand json file already exists")  # noqa: T201
    else:
        subprocess.run(
            OST_COMPARE_LIGAND.format(
                model_file=str(pred),
                reference_file=str(reference),
                output_path=str(out_path),
                mount=mount,
            ),
            shell=True,  # noqa: S602
            check=False,
            executable=executable,
            capture_output=True,
        )


def main(args):
    # Aggregate the predictions and references
    files = list(args.data.iterdir())
    names = {f.stem.lower(): f for f in files}

    # Create the output directory
    args.outdir.mkdir(parents=True, exist_ok=True)

    first_item = True
    with concurrent.futures.ThreadPoolExecutor(args.max_workers) as executor:
        futures = []
        for name, folder in names.items():
            for model_id in range(5):
                # Split the input data
                if args.format == "af3":
                    pred_path = folder / f"seed-1_sample-{model_id}" / "model.cif"
                elif args.format == "chai":
                    pred_path = folder / f"pred.model_idx_{model_id}.cif"
                elif args.format == "boltz":
                    name_file = (
                        f"{name[0].upper()}{name[1:]}"
                        if args.testset == "casp"
                        else name.lower()
                    )
                    pred_path = folder / f"{name_file}_model_{model_id}.cif"

                if args.testset == "casp":
                    ref_path = args.pdb / f"{name[0].upper()}{name[1:]}.cif"
                elif args.testset == "test":
                    ref_path = args.pdb / f"{name.lower()}.cif.gz"

                if first_item:
                    # Evaluate the first item in the first prediction
                    # Ensures that the docker image is downloaded
                    evaluate_structure(
                        name=f"{name}_model_{model_id}",
                        pred=str(pred_path),
                        reference=str(ref_path),
                        outdir=str(args.outdir),
                        mount=args.mount,
                        executable=args.executable,
                    )
                    first_item = False
                else:
                    future = executor.submit(
                        evaluate_structure,
                        name=f"{name}_model_{model_id}",
                        pred=str(pred_path),
                        reference=str(ref_path),
                        outdir=str(args.outdir),
                        mount=args.mount,
                        executable=args.executable,
                    )
                    futures.append(future)

        # Wait for all tasks to complete
        with tqdm(total=len(futures)) as pbar:
            for _ in concurrent.futures.as_completed(futures):
                pbar.update(1)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("data", type=Path)
    parser.add_argument("pdb", type=Path)
    parser.add_argument("outdir", type=Path)
    parser.add_argument("--format", type=str, default="af3")
    parser.add_argument("--testset", type=str, default="casp")
    parser.add_argument("--mount", type=str)
    parser.add_argument("--executable", type=str, default="/bin/bash")
    parser.add_argument("--max-workers", type=int, default=32)
    args = parser.parse_args()
    main(args)