| | import os |
| | import sys |
| |
|
| | import pandas as pd |
| | import torch |
| |
|
| | import numpy as np |
| |
|
| | import matplotlib.pyplot as plt |
| | import seaborn as sns |
| |
|
| | import linecache |
| | import re |
| |
|
| | from Bio.PDB import PDBParser, PDBIO |
| | import math |
| |
|
| | from Bio.PDB import PDBIO |
| | from Bio.PDB import PDBParser |
| | from Bio.PDB import Superimposer |
| | from Bio.PDB.vectors import calc_angle, calc_dihedral |
| | import Bio.PDB.vectors |
| | |
| | from Bio.PDB.DSSP import DSSP |
| | |
| |
|
| | resdict = { |
| | "ALA": "A", |
| | "CYS": "C", |
| | "ASP": "D", |
| | "GLU": "E", |
| | "PHE": "F", |
| | "GLY": "G", |
| | "HIS": "H", |
| | "ILE": "I", |
| | "LYS": "K", |
| | "LEU": "L", |
| | "MET": "M", |
| | "ASN": "N", |
| | "PRO": "P", |
| | "GLN": "Q", |
| | "ARG": "R", |
| | "SER": "S", |
| | "THR": "T", |
| | "VAL": "V", |
| | "TRP": "W", |
| | "TYR": "Y", |
| | } |
| | |
| | |
| | resdict = { |
| | "ALA": "A", |
| | "ARG": "R", |
| | "ASN": "N", |
| | "ASP": "D", |
| | "CYS": "C", |
| | "GLN": "Q", |
| | "GLU": "E", |
| | "GLY": "G", |
| | "HIS": "H", |
| | "HSD": "H", |
| | "HSE": "H", |
| | "HSP": "H", |
| | "ILE": "I", |
| | "LYS": "K", |
| | "LEU": "L", |
| | "MET": "M", |
| | "PHE": "F", |
| | "PRO": "P", |
| | "SER": "S", |
| | "THR": "T", |
| | "TRP": "W", |
| | "TYR": "Y", |
| | "VAL": "V", |
| |
|
| | } |
| | |
| | |
| | SMD_Vel = 0.0001 |
| |
|
| | |
| |
|
| | def collect_geo_of_backbone(chain): |
| | prev = "0" |
| | rad = 180.0 / math.pi |
| | |
| | resu = {"AA":[],\ |
| | "Bond_CA_N":[],"Bond_CA_C":[],"Bond_N_C1":[],\ |
| | "Angl_CA1_C1_N":[],"Angl_C1_N_CA":[],"Angl_N_CA_C":[],\ |
| | "Dihe_PHI":[],"Dihe_PSI":[],"Dihe_OME":[]} |
| | |
| | for res in chain: |
| | if res.get_resname() in resdict.keys(): |
| |
|
| | |
| | resu["AA"].append(resdict[res.get_resname()]) |
| | |
| | |
| | |
| |
|
| | if prev == "0": |
| | |
| | N_prev = res["N"] |
| | CA_prev = res["CA"] |
| | C_prev = res["C"] |
| | |
| | prev = "1" |
| | else: |
| | n1 = N_prev.get_vector() |
| | ca1 = CA_prev.get_vector() |
| | c1 = C_prev.get_vector() |
| | |
| | |
| | C_curr = res["C"] |
| | N_curr = res["N"] |
| | CA_curr = res["CA"] |
| |
|
| | |
| | c = C_curr.get_vector() |
| | n = N_curr.get_vector() |
| | ca = CA_curr.get_vector() |
| |
|
| | |
| | ca1_c1_n_ThisAngle = calc_angle(ca1, c1, n)*rad |
| | c1_n_ca_ThisAngle = calc_angle(c1, n, ca)*rad |
| | n_ca_c_ThisAngle = calc_angle(n, ca, c)*rad |
| |
|
| | ca_n_ThisBond = CA_curr - N_curr |
| | ca_c_ThisBond = CA_curr - C_curr |
| | n_c1_ThisBond = N_curr - C_prev |
| |
|
| | ThisPsi = calc_dihedral(n1, ca1, c1, n) |
| | ThisOmega = calc_dihedral(ca1, c1, n, ca) |
| | ThisPhi = calc_dihedral(c1, n, ca, c) |
| |
|
| | |
| | |
| | resu["Bond_CA_N"].append(ca_n_ThisBond) |
| | resu["Bond_CA_C"].append(ca_c_ThisBond) |
| | resu["Bond_N_C1"].append(n_c1_ThisBond) |
| | |
| | resu["Angl_CA1_C1_N"].append(ca1_c1_n_ThisAngle) |
| | resu["Angl_C1_N_CA"].append(c1_n_ca_ThisAngle) |
| | resu["Angl_N_CA_C"].append(n_ca_c_ThisAngle) |
| | |
| | resu["Dihe_PHI"].append(ThisPhi) |
| | resu["Dihe_PSI"].append(ThisPsi) |
| | resu["Dihe_OME"].append(ThisOmega) |
| |
|
| | |
| | N_prev = res["N"] |
| | CA_prev = res["CA"] |
| | C_prev = res["C"] |
| |
|
| | |
| | return resu |
| | |
| | def collect_multi_chain_AA_info(pdb_file): |
| | parser = PDBParser() |
| | structure = parser.get_structure("sample", pdb_file) |
| | resu_full = {"Chain":[],"AA":{}} |
| | for chain in structure.get_chains(): |
| | this_chain_id = chain.get_id() |
| | |
| | |
| | resu_full["Chain"].append(this_chain_id) |
| | resu_test = collect_geo_of_backbone(chain) |
| | resu_full["AA"][this_chain_id]=resu_test["AA"] |
| | |
| | |
| | return resu_full |
| |
|
| |
|
| |
|
| | |
| |
|
| | |
| | |
| | def get_one_force_record(ii, resu_file_name_list): |
| | |
| | pdb_id = resu_file_name_list['PDB_ID'][ii] |
| | data_one_file = resu_file_name_list['Path'][ii]+'/1_working_dir/collect_results/smd_resu.dat' |
| | data = np.genfromtxt(data_one_file) |
| | |
| | |
| | |
| | |
| | |
| | disp_data = data[:,1] |
| | force_data = data[:,7] |
| | |
| | |
| | |
| | step_data = data[:,0] |
| | setdata_one_file = resu_file_name_list['Path'][ii]+'/1_working_dir/box_dimension_after_eq.dat' |
| | line_4 = linecache.getline(setdata_one_file, 4) |
| | SMD_Vel = float(line_4.split()[2]) |
| | pull_data = SMD_Vel*step_data |
| | |
| | |
| | return disp_data, force_data, pdb_id, pull_data |
| |
|
| | |
| | def get_one_AA_record(ii, resu_file_name_list): |
| | |
| | |
| | pdb_file = resu_file_name_list['Path'][ii]+'/1_working_dir/TestProt_chain_0_after_psf.pdb' |
| | |
| | resu_full = collect_multi_chain_AA_info(pdb_file) |
| | |
| | |
| | AA_seq = ''.join(resu_full["AA"][resu_full["Chain"][0]]) |
| | |
| | return AA_seq |
| |
|
| | |
| | def conv_one_record(force_data, kernel_size): |
| | kernel = np.ones(kernel_size) / kernel_size |
| | force_data_convolved = np.convolve(force_data, kernel, mode='same') |
| | |
| | return force_data_convolved |
| |
|
| | from math import factorial |
| |
|
| | from scipy.ndimage.filters import uniform_filter1d |
| | |
| | |
| | def savitzky_golay(y, window_size, order, deriv=0, rate=1): |
| | |
| | try: |
| | |
| | window_size = np.abs(int(window_size)) |
| | |
| | order = np.abs(int(order)) |
| | except ValueError: |
| | raise ValueError("window_size and order have to be of type int") |
| | |
| | if window_size % 2 != 1 or window_size < 1: |
| | raise TypeError("window_size size must be a positive odd number") |
| | if window_size < order + 2: |
| | raise TypeError("window_size is too small for the polynomials order") |
| | order_range = range(order+1) |
| | half_window = (window_size -1) // 2 |
| | |
| | b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)]) |
| | m = np.linalg.pinv(b).A[deriv] * rate**deriv * factorial(deriv) |
| | |
| | |
| | firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] ) |
| | lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1]) |
| | y = np.concatenate((firstvals, y, lastvals)) |
| | |
| | return np.convolve( m[::-1], y, mode='valid') |
| |
|
| | |
| | def read_gap_values_from_dat(file): |
| | |
| | |
| | line_2 = linecache.getline(file, 2) |
| | line_3 = linecache.getline(file, 3) |
| | |
| | ini_gap = float(line_2.split()[2]) |
| | fin_gap = float(line_3.split()[2]) |
| | return ini_gap, fin_gap |
| |
|
| |
|
| | def read_one_array_from_df(one_record): |
| | return np.array(list(map(float, one_record.split(" ")))) |
| | |
| | def read_string_find_max(reco): |
| | x = read_one_array_from_df(reco) |
| | return np.amax(x) |
| |
|
| | def read_string_find_max(reco): |
| | x = read_one_array_from_df(reco) |
| | return np.amax(x) |
| | |
| | def cal_seq_end_gap(x): |
| | inc_gap_arr = x['posi_data']-x['posi_data'][0] |
| | ini_gap = x['ini_gap'] |
| | gap_arr = ini_gap+inc_gap_arr |
| | |
| | return gap_arr |
| | |
| | def cal_pull_end_gap(x): |
| | inc_gap_arr = x['pull_data'] |
| | ini_gap = x['ini_gap'] |
| | gap_arr = ini_gap+inc_gap_arr |
| | |
| | return gap_arr |
| |
|
| | |
| | |
| |
|
| | def simplify_NormPull_FORCEnF_rec(n_fold,this_seq_len,this_n_PullGap_arr,this_Force_arr): |
| |
|
| | target_pull_gap_list = [1./(this_seq_len*n_fold)*(jj+0) for jj in range(this_seq_len*n_fold)] |
| | target_pull_gap_list.append(1.) |
| |
|
| | |
| | target_force = [] |
| | for jj in range(len(target_pull_gap_list)): |
| | |
| | this_t_n_PullGap = target_pull_gap_list[jj] |
| |
|
| | if this_t_n_PullGap<this_n_PullGap_arr[0]: |
| | this_t_F = 0. |
| | else: |
| | |
| | disp_arr = np.abs(this_n_PullGap_arr - this_t_n_PullGap) |
| | pick_id = np.argmin(disp_arr) |
| | this_t_F = this_Force_arr[pick_id] |
| | |
| | target_force.append(this_t_F) |
| | |
| | target_pull_gap_arr = np.array(target_pull_gap_list) |
| | target_force_arr = np.array(target_force) |
| | |
| | |
| | resu = {} |
| | resu['sample_NormPullGap'] = target_pull_gap_arr |
| | resu['smaple_FORCE'] = target_force_arr |
| | return resu |
| |
|
| | |
| | def read_input_model_A(file_path): |
| | with open(file_path, 'r') as f: |
| | txt = f.read() |
| | nums = re.findall(r'\[([^][]+)\]', txt) |
| | arr = np.loadtxt(nums) |
| | |
| | |
| |
|
| | return arr |
| |
|
| | def read_input_model_B(file_path): |
| | with open(file_path, 'r') as f: |
| | txt = f.read() |
| | nums = re.findall(r'\[([^][]+)\]', txt) |
| | |
| | arr = np.loadtxt( [nums[0].replace('\n','')] ) |
| | |
| | |
| |
|
| | return arr |
| |
|
| | def read_one_input_arr_from_txt(file_path): |
| | with open(file_path, 'r') as f: |
| | txt = f.read() |
| | nums = re.findall(r'\[([^][]+)\]', txt) |
| | |
| | arr = np.loadtxt( [nums[0].replace('\n','')] ) |
| | |
| | |
| |
|
| | return arr |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | def recover_input_for_model_B_ver2(file_path, seq_len): |
| | raw_arr = read_one_input_arr_from_txt(file_path) |
| | arr = raw_arr[0:0+seq_len+1] |
| | return arr |
| |
|
| | |
| | def recover_input_for_model_B_ver3(file_path, seq_len): |
| | raw_arr = read_one_input_arr_from_txt(file_path) |
| | arr = np.zeros(seq_len+1) |
| | arr[1:1+seq_len] = raw_arr[0:0+seq_len] |
| | return arr |