Background Workflow |
Pyimport os ################# # SET VARIABLES # ################# subs = 'C' # may also have to change file1 - file3 below par_dir = '/mindstore/home33ext/kscopino/5JUP_PROTOCOL/PIGGYBACKING/COD1_' + subs + '2_1S_50PS/' data_dir = 'COD1_' + subs + '2_' expts = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27] frames_per_ns = 100 ################ # COLLECT DATA # ################ # change directory os.chdir(par_dir) # initialize list for all data dat_list1 = [] dat_list2 = [] dat_list3 = [] # keep track of longest expt max_expt_len = 0 # loop through experiments for e in expts: file1 = par_dir + data_dir + str(e) + '/DATA/rmsd_' + str(e) + '_local_bkbone.dat' file2 = par_dir + data_dir + str(e) + '/DATA/rmsd_' + str(e) + '_NOTrestr_bkbone.dat' file3 = par_dir + data_dir + str(e) + '/DATA/rmsd_' + str(e) + '_restr_bkbone.dat' ########## # FILE 1 # ########## handle1 = open(file1, 'r') # create list of lines lines1 = handle1.readlines() # remove header lines1_nh = lines1[1:] # split into list of frame #s and RMSD, convert to float vals1 = [] for line in lines1_nh: vals1 += line.split() vals1 = [float(v) for v in vals1] # separate RMSDs rmsds1 = [] for a1 in range(1,len(vals1),2): rmsds1.append(vals1[a1]) # number of whole nanoseconds in run and frames in whole nanoseconds num_bins = len(rmsds1) / frames_per_ns num_frames = num_bins * frames_per_ns # check if experiment longer than max if num_frames > max_expt_len: max_expt_len = num_frames # calculate average RMSD for each 1ns bin, add to list of averages avgs1 = [] for b1 in range(0, num_frames, frames_per_ns): bin_rmsds1 = [] for c1 in range(b1, b1 + frames_per_ns): bin_rmsds1.append(rmsds1[c1]) avgs1.append(sum(bin_rmsds1)/ len(bin_rmsds1)) # add to all data dat_list1.append(avgs1) handle1.close() ########## # FILE 2 # ########## handle2 = open(file2, 'r') # create list of lines lines2 = handle2.readlines() # remove header lines2_nh = lines2[1:] # split into list of frame #s and RMSD, convert to float vals2 = [] for line in lines2_nh: vals2 += line.split() vals2 = [float(v) for v in vals2] # separate RMSDs rmsds2 = [] for a2 in range(1,len(vals2),2): rmsds2.append(vals2[a2]) # number of whole nanoseconds in run, frames in whole nanoseconds num_bins = len(rmsds2) / frames_per_ns num_frames = num_bins * frames_per_ns # max frames calculated from file 1 # calculate average RMSD for each 1ns bin, add to list of averages avgs2 = [] for b2 in range(0, num_frames, frames_per_ns): bin_rmsds2 = [] for c2 in range(b2, b2 + frames_per_ns): bin_rmsds2.append(rmsds2[c2]) avgs2.append(sum(bin_rmsds2)/ len(bin_rmsds2)) # add to all data dat_list2.append(avgs2) handle2.close() ########## # FILE 3 # ########## handle3 = open(file3, 'r') # create list of lines lines3 = handle3.readlines() # remove header lines3_nh = lines3[1:] # split into list of frame #s and RMSD, convert to float vals3 = [] for line in lines3_nh: vals3 += line.split() vals3 = [float(v) for v in vals3] # separate RMSDs rmsds3 = [] for a3 in range(1,len(vals3),2): rmsds3.append(vals3[a3]) # number of whole nanoseconds in run, frames in whole nanoseconds num_bins = len(rmsds3) / frames_per_ns num_frames = num_bins * frames_per_ns # max frames calculated from file 1 # calculate average RMSD for each 1ns bin, add to list of averages avgs3 = [] for b3 in range(0, num_frames, frames_per_ns): bin_rmsds3 = [] for c3 in range(b3, b3 + frames_per_ns): bin_rmsds3.append(rmsds3[c3]) avgs3.append(sum(bin_rmsds3)/ len(bin_rmsds3)) # add to all data dat_list3.append(avgs3) handle3.close() ################### # MANIPULATE DATA # ################### # fill experiments shorter than the max with blank values for d1 in dat_list1: if len(d1) < (max_expt_len / frames_per_ns): for f1 in range(len(d1),(max_expt_len / frames_per_ns)): d1.append(None) for d2 in dat_list2: if len(d2) < (max_expt_len / frames_per_ns): for f2 in range(len(d2),(max_expt_len / frames_per_ns)): d2.append(None) for d3 in dat_list3: if len(d3) < (max_expt_len / frames_per_ns): for f3 in range(len(d3),(max_expt_len / frames_per_ns)): d3.append(None) ################# # FORMAT OUTPUT # ################# # ns of longest run ns = [] for g in range(0, max_expt_len, frames_per_ns): ns.append(float((g + (frames_per_ns/2)))/frames_per_ns) # file1 header print('bkbone_local') print('ns\t'), for h1 in expts: print('run_' + str(h1) + '\t'), print('avg') # file1 data for i1 in range(0,(max_expt_len / frames_per_ns)): enum1 = 0 prtline1 = [] prtline1.append(ns[i1]) for j1 in dat_list1: prtline1.append(dat_list1[enum1][i1]) enum1 += 1 avg1 = sum(filter(None, prtline1[1:])) / len(filter(None,prtline1[1:])) prtline1.append(avg1) for k1 in prtline1: print(str(k1)+'\t'), print print # file2 header print('bkbone_unrestr') print('ns\t'), for h2 in expts: print('run_' + str(h2) + '\t'), print('avg') # file2 data for i2 in range(0,(max_expt_len / frames_per_ns)): enum2 = 0 prtline2 = [] prtline2.append(ns[i2]) for j2 in dat_list2: prtline2.append(dat_list2[enum2][i2]) enum2 += 1 avg2 = sum(filter(None, prtline2[1:])) / len(filter(None,prtline2[1:])) prtline2.append(avg2) for k2 in prtline2: print(str(k2)+'\t'), print print # file3 header print('bkbone_unrestr') print('ns\t'), for h3 in expts: print('run_' + str(h3) + '\t'), print('avg') # file3 data for i3 in range(0,(max_expt_len / frames_per_ns)): enum3 = 0 prtline3 = [] prtline3.append(ns[i3]) for j3 in dat_list3: prtline3.append(dat_list3[enum3][i3]) enum3 += 1 avg3 = sum(filter(None, prtline3[1:])) / len(filter(None,prtline3[1:])) prtline3.append(avg3) for k3 in prtline3: print(str(k3)+'\t'), print |