| | """Module containing commands line scripts for training and planning steps.""" |
| |
|
| | import os |
| | import warnings |
| | from pathlib import Path |
| |
|
| | import click |
| | import yaml |
| |
|
| | from synplan.chem.data.filtering import ReactionFilterConfig, filter_reactions_from_file |
| | from synplan.chem.data.standardizing import ( |
| | ReactionStandardizationConfig, |
| | standardize_reactions_from_file, |
| | ) |
| | from synplan.chem.reaction_rules.extraction import extract_rules_from_reactions |
| | from synplan.chem.reaction_routes.clustering import run_cluster_cli |
| | from synplan.chem.utils import standardize_building_blocks |
| | from synplan.mcts.search import run_search |
| | from synplan.ml.training.supervised import create_policy_dataset, run_policy_training |
| | from synplan.ml.training.reinforcement import run_updating |
| | from synplan.utils.config import ( |
| | PolicyNetworkConfig, |
| | RuleExtractionConfig, |
| | TreeConfig, |
| | TuningConfig, |
| | ValueNetworkConfig, |
| | ) |
| | from synplan.utils.loading import download_all_data |
| | from synplan.utils.visualisation import ( |
| | routes_clustering_report, |
| | routes_subclustering_report, |
| | ) |
| |
|
| | warnings.filterwarnings("ignore") |
| |
|
| |
|
| | @click.group(name="synplan") |
| | def synplan(): |
| | """SynPlanner command line interface.""" |
| |
|
| |
|
| | @synplan.command(name="download_all_data") |
| | @click.option( |
| | "--save_to", |
| | "save_to", |
| | help="Path to the folder where downloaded data will be stored.", |
| | ) |
| | def download_all_data_cli(save_to: str = ".") -> None: |
| | """Downloads all data for training, planning and benchmarking SynPlanner.""" |
| | download_all_data(save_to=save_to) |
| |
|
| |
|
| | @synplan.command(name="building_blocks_standardizing") |
| | @click.option( |
| | "--input", |
| | "input_file", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with building blocks to be standardized.", |
| | ) |
| | @click.option( |
| | "--output", |
| | "output_file", |
| | required=True, |
| | type=click.Path(), |
| | help="Path to the file where standardized building blocks will be stored.", |
| | ) |
| | def building_blocks_standardizing_cli(input_file: str, output_file: str) -> None: |
| | """Standardizes building blocks.""" |
| | standardize_building_blocks(input_file=input_file, output_file=output_file) |
| |
|
| |
|
| | @synplan.command(name="reaction_standardizing") |
| | @click.option( |
| | "--config", |
| | "config_path", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the configuration file for reactions standardizing.", |
| | ) |
| | @click.option( |
| | "--input", |
| | "input_file", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with reactions to be standardized.", |
| | ) |
| | @click.option( |
| | "--output", |
| | "output_file", |
| | type=click.Path(), |
| | help="Path to the file where standardized reactions will be stored.", |
| | ) |
| | @click.option( |
| | "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." |
| | ) |
| | def reaction_standardizing_cli( |
| | config_path: str, input_file: str, output_file: str, num_cpus: int |
| | ) -> None: |
| | """Standardizes reactions and remove duplicates.""" |
| | stand_config = ReactionStandardizationConfig.from_yaml(config_path) |
| | standardize_reactions_from_file( |
| | config=stand_config, |
| | input_reaction_data_path=input_file, |
| | standardized_reaction_data_path=output_file, |
| | num_cpus=num_cpus, |
| | batch_size=100, |
| | ) |
| |
|
| |
|
| | @synplan.command(name="reaction_filtering") |
| | @click.option( |
| | "--config", |
| | "config_path", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the configuration file for reactions filtering.", |
| | ) |
| | @click.option( |
| | "--input", |
| | "input_file", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with reactions to be filtered.", |
| | ) |
| | @click.option( |
| | "--output", |
| | "output_file", |
| | default=Path("./"), |
| | type=click.Path(), |
| | help="Path to the file where successfully filtered reactions will be stored.", |
| | ) |
| | @click.option( |
| | "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." |
| | ) |
| | def reaction_filtering_cli( |
| | config_path: str, input_file: str, output_file: str, num_cpus: int |
| | ): |
| | """Filters erroneous reactions.""" |
| | reaction_check_config = ReactionFilterConfig().from_yaml(config_path) |
| | filter_reactions_from_file( |
| | config=reaction_check_config, |
| | input_reaction_data_path=input_file, |
| | filtered_reaction_data_path=output_file, |
| | num_cpus=num_cpus, |
| | batch_size=100, |
| | ) |
| |
|
| |
|
| | @synplan.command(name="rule_extracting") |
| | @click.option( |
| | "--config", |
| | "config_path", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the configuration file for reaction rules extracting.", |
| | ) |
| | @click.option( |
| | "--input", |
| | "input_file", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with reactions for reaction rules extraction.", |
| | ) |
| | @click.option( |
| | "--output", |
| | "output_file", |
| | required=True, |
| | type=click.Path(), |
| | help="Path to the file where extracted reaction rules will be stored.", |
| | ) |
| | @click.option( |
| | "--num_cpus", default=4, type=int, help="The number of CPUs to use for processing." |
| | ) |
| | def rule_extracting_cli( |
| | config_path: str, input_file: str, output_file: str, num_cpus: int |
| | ): |
| | """Reaction rules extraction.""" |
| | reaction_rule_config = RuleExtractionConfig.from_yaml(config_path) |
| | extract_rules_from_reactions( |
| | config=reaction_rule_config, |
| | reaction_data_path=input_file, |
| | reaction_rules_path=output_file, |
| | num_cpus=num_cpus, |
| | batch_size=100, |
| | ) |
| |
|
| |
|
| | @synplan.command(name="ranking_policy_training") |
| | @click.option( |
| | "--config", |
| | "config_path", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the configuration file for ranking policy training.", |
| | ) |
| | @click.option( |
| | "--reaction_data", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with reactions for ranking policy training.", |
| | ) |
| | @click.option( |
| | "--reaction_rules", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with extracted reaction rules.", |
| | ) |
| | @click.option( |
| | "--results_dir", |
| | default=Path("."), |
| | type=click.Path(), |
| | help="Path to the directory where the trained policy network will be stored.", |
| | ) |
| | @click.option( |
| | "--num_cpus", |
| | default=4, |
| | type=int, |
| | help="The number of CPUs to use for training set preparation.", |
| | ) |
| | def ranking_policy_training_cli( |
| | config_path: str, |
| | reaction_data: str, |
| | reaction_rules: str, |
| | results_dir: str, |
| | num_cpus: int, |
| | ) -> None: |
| | """Ranking policy network training.""" |
| | policy_config = PolicyNetworkConfig.from_yaml(config_path) |
| | policy_config.policy_type = "ranking" |
| | policy_dataset_file = os.path.join(results_dir, "policy_dataset.dt") |
| |
|
| | datamodule = create_policy_dataset( |
| | reaction_rules_path=reaction_rules, |
| | molecules_or_reactions_path=reaction_data, |
| | output_path=policy_dataset_file, |
| | dataset_type="ranking", |
| | batch_size=policy_config.batch_size, |
| | num_cpus=num_cpus, |
| | ) |
| |
|
| | run_policy_training(datamodule, config=policy_config, results_path=results_dir) |
| |
|
| |
|
| | @synplan.command(name="filtering_policy_training") |
| | @click.option( |
| | "--config", |
| | "config_path", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the configuration file for filtering policy training.", |
| | ) |
| | @click.option( |
| | "--molecule_data", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with molecules for filtering policy training.", |
| | ) |
| | @click.option( |
| | "--reaction_rules", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with extracted reaction rules.", |
| | ) |
| | @click.option( |
| | "--results_dir", |
| | default=Path("."), |
| | type=click.Path(), |
| | help="Path to the directory where the trained policy network will be stored.", |
| | ) |
| | @click.option( |
| | "--num_cpus", |
| | default=8, |
| | type=int, |
| | help="The number of CPUs to use for training set preparation.", |
| | ) |
| | def filtering_policy_training_cli( |
| | config_path: str, |
| | molecule_data: str, |
| | reaction_rules: str, |
| | results_dir: str, |
| | num_cpus: int, |
| | ): |
| | """Filtering policy network training.""" |
| |
|
| | policy_config = PolicyNetworkConfig.from_yaml(config_path) |
| | policy_config.policy_type = "filtering" |
| | policy_dataset_file = os.path.join(results_dir, "policy_dataset.ckpt") |
| |
|
| | datamodule = create_policy_dataset( |
| | reaction_rules_path=reaction_rules, |
| | molecules_or_reactions_path=molecule_data, |
| | output_path=policy_dataset_file, |
| | dataset_type="filtering", |
| | batch_size=policy_config.batch_size, |
| | num_cpus=num_cpus, |
| | ) |
| |
|
| | run_policy_training(datamodule, config=policy_config, results_path=results_dir) |
| |
|
| |
|
| | @synplan.command(name="value_network_tuning") |
| | @click.option( |
| | "--config", |
| | "config_path", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the configuration file for value network training.", |
| | ) |
| | @click.option( |
| | "--targets", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with target molecules for planning simulations.", |
| | ) |
| | @click.option( |
| | "--reaction_rules", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with extracted reaction rules. Needed for planning simulations.", |
| | ) |
| | @click.option( |
| | "--building_blocks", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with building blocks. Needed for planning simulations.", |
| | ) |
| | @click.option( |
| | "--policy_network", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with trained policy network. Needed for planning simulations.", |
| | ) |
| | @click.option( |
| | "--value_network", |
| | default=None, |
| | type=click.Path(exists=True), |
| | help="Path to the file with trained value network. Needed in case of additional value network fine-tuning", |
| | ) |
| | @click.option( |
| | "--results_dir", |
| | default=".", |
| | type=click.Path(exists=False), |
| | help="Path to the directory where the trained value network will be stored.", |
| | ) |
| | def value_network_tuning_cli( |
| | config_path: str, |
| | targets: str, |
| | reaction_rules: str, |
| | building_blocks: str, |
| | policy_network: str, |
| | value_network: str, |
| | results_dir: str, |
| | ): |
| | """Value network tuning.""" |
| |
|
| | with open(config_path, "r", encoding="utf-8") as file: |
| | config = yaml.safe_load(file) |
| |
|
| | policy_config = PolicyNetworkConfig.from_dict(config["node_expansion"]) |
| | policy_config.weights_path = policy_network |
| |
|
| | value_config = ValueNetworkConfig.from_dict(config["value_network"]) |
| | if value_network is None: |
| | value_config.weights_path = os.path.join( |
| | results_dir, "weights", "value_network.ckpt" |
| | ) |
| |
|
| | tree_config = TreeConfig.from_dict(config["tree"]) |
| | tuning_config = TuningConfig.from_dict(config["tuning"]) |
| |
|
| | run_updating( |
| | targets_path=targets, |
| | tree_config=tree_config, |
| | policy_config=policy_config, |
| | value_config=value_config, |
| | reinforce_config=tuning_config, |
| | reaction_rules_path=reaction_rules, |
| | building_blocks_path=building_blocks, |
| | results_root=results_dir, |
| | ) |
| |
|
| |
|
| | @synplan.command(name="planning") |
| | @click.option( |
| | "--config", |
| | "config_path", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the configuration file for retrosynthetic planning.", |
| | ) |
| | @click.option( |
| | "--targets", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with target molecules for retrosynthetic planning.", |
| | ) |
| | @click.option( |
| | "--reaction_rules", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with extracted reaction rules.", |
| | ) |
| | @click.option( |
| | "--building_blocks", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with building blocks.", |
| | ) |
| | @click.option( |
| | "--policy_network", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with trained policy network.", |
| | ) |
| | @click.option( |
| | "--value_network", |
| | default=None, |
| | type=click.Path(exists=True), |
| | help="Path to the file with trained value network.", |
| | ) |
| | @click.option( |
| | "--results_dir", |
| | default=".", |
| | type=click.Path(exists=False), |
| | help="Path to the file where retrosynthetic planning results will be stored.", |
| | ) |
| | def planning_cli( |
| | config_path: str, |
| | targets: str, |
| | reaction_rules: str, |
| | building_blocks: str, |
| | policy_network: str, |
| | value_network: str, |
| | results_dir: str, |
| | ): |
| | """Retrosynthetic planning.""" |
| |
|
| | with open(config_path, "r", encoding="utf-8") as file: |
| | config = yaml.safe_load(file) |
| |
|
| | search_config = {**config["tree"], **config["node_evaluation"]} |
| | policy_config = PolicyNetworkConfig.from_dict( |
| | {**config["node_expansion"], **{"weights_path": policy_network}} |
| | ) |
| |
|
| | run_search( |
| | targets_path=targets, |
| | search_config=search_config, |
| | policy_config=policy_config, |
| | reaction_rules_path=reaction_rules, |
| | building_blocks_path=building_blocks, |
| | value_network_path=value_network, |
| | results_root=results_dir, |
| | ) |
| |
|
| |
|
| | @synplan.command(name="clustering") |
| | @click.option( |
| | "--targets", |
| | required=True, |
| | type=click.Path(exists=True), |
| | help="Path to the file with target molecules for retrosynthetic planning.", |
| | ) |
| | @click.option( |
| | "--routes_file", |
| | default=".", |
| | type=click.Path(exists=False), |
| | help="Path to the file where the planning results are stored.", |
| | ) |
| | @click.option( |
| | "--cluster_results_dir", |
| | default=".", |
| | type=click.Path(exists=False), |
| | help="Path to the file where clustering results will be stored.", |
| | ) |
| | @click.option( |
| | "--perform_subcluster", |
| | default=None, |
| | type=click.Path(exists=False), |
| | help="Perform subclustering.", |
| | ) |
| | @click.option( |
| | "--subcluster_results_dir", |
| | default=".", |
| | type=click.Path(exists=False), |
| | help="Path to the file where subclustering results will be stored.", |
| | ) |
| | def cluster_route_from_file_cli( |
| | targets: str, |
| | routes_file: str, |
| | cluster_results_dir: str, |
| | perform_subcluster: bool, |
| | subcluster_results_dir: str, |
| | ): |
| | """Clustering the routes from planning""" |
| | run_cluster_cli( |
| | routes_file=routes_file, |
| | cluster_results_dir=cluster_results_dir, |
| | perform_subcluster=perform_subcluster, |
| | subcluster_results_dir=subcluster_results_dir if perform_subcluster else None, |
| | ) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | synplan() |
| |
|