import os
#################
# SET VARIABLES #
#################
subs = 'C'
# may also have to change file1 - file3 below
par_dir = '/mindstore/home33ext/kscopino/5JUP_PROTOCOL/PIGGYBACKING/COD1_' + subs + '2_1S_50PS/'
data_dir = 'COD1_' + subs + '2_'
expts = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27]
frames_per_ns = 100
################
# COLLECT DATA #
################
# change directory
os.chdir(par_dir)
# initialize list for all data
dat_list1 = []
dat_list2 = []
dat_list3 = []
# keep track of longest expt
max_expt_len = 0
# loop through experiments
for e in expts:
file1 = par_dir + data_dir + str(e) + '/DATA/rmsd_' + str(e) + '_local_bkbone.dat'
file2 = par_dir + data_dir + str(e) + '/DATA/rmsd_' + str(e) + '_NOTrestr_bkbone.dat'
file3 = par_dir + data_dir + str(e) + '/DATA/rmsd_' + str(e) + '_restr_bkbone.dat'
##########
# FILE 1 #
##########
handle1 = open(file1, 'r')
# create list of lines
lines1 = handle1.readlines()
# remove header
lines1_nh = lines1[1:]
# split into list of frame #s and RMSD, convert to float
vals1 = []
for line in lines1_nh:
vals1 += line.split()
vals1 = [float(v) for v in vals1]
# separate RMSDs
rmsds1 = []
for a1 in range(1,len(vals1),2):
rmsds1.append(vals1[a1])
# number of whole nanoseconds in run and frames in whole nanoseconds
num_bins = len(rmsds1) / frames_per_ns
num_frames = num_bins * frames_per_ns
# check if experiment longer than max
if num_frames > max_expt_len:
max_expt_len = num_frames
# calculate average RMSD for each 1ns bin, add to list of averages
avgs1 = []
for b1 in range(0, num_frames, frames_per_ns):
bin_rmsds1 = []
for c1 in range(b1, b1 + frames_per_ns):
bin_rmsds1.append(rmsds1[c1])
avgs1.append(sum(bin_rmsds1)/ len(bin_rmsds1))
# add to all data
dat_list1.append(avgs1)
handle1.close()
##########
# FILE 2 #
##########
handle2 = open(file2, 'r')
# create list of lines
lines2 = handle2.readlines()
# remove header
lines2_nh = lines2[1:]
# split into list of frame #s and RMSD, convert to float
vals2 = []
for line in lines2_nh:
vals2 += line.split()
vals2 = [float(v) for v in vals2]
# separate RMSDs
rmsds2 = []
for a2 in range(1,len(vals2),2):
rmsds2.append(vals2[a2])
# number of whole nanoseconds in run, frames in whole nanoseconds
num_bins = len(rmsds2) / frames_per_ns
num_frames = num_bins * frames_per_ns
# max frames calculated from file 1
# calculate average RMSD for each 1ns bin, add to list of averages
avgs2 = []
for b2 in range(0, num_frames, frames_per_ns):
bin_rmsds2 = []
for c2 in range(b2, b2 + frames_per_ns):
bin_rmsds2.append(rmsds2[c2])
avgs2.append(sum(bin_rmsds2)/ len(bin_rmsds2))
# add to all data
dat_list2.append(avgs2)
handle2.close()
##########
# FILE 3 #
##########
handle3 = open(file3, 'r')
# create list of lines
lines3 = handle3.readlines()
# remove header
lines3_nh = lines3[1:]
# split into list of frame #s and RMSD, convert to float
vals3 = []
for line in lines3_nh:
vals3 += line.split()
vals3 = [float(v) for v in vals3]
# separate RMSDs
rmsds3 = []
for a3 in range(1,len(vals3),2):
rmsds3.append(vals3[a3])
# number of whole nanoseconds in run, frames in whole nanoseconds
num_bins = len(rmsds3) / frames_per_ns
num_frames = num_bins * frames_per_ns
# max frames calculated from file 1
# calculate average RMSD for each 1ns bin, add to list of averages
avgs3 = []
for b3 in range(0, num_frames, frames_per_ns):
bin_rmsds3 = []
for c3 in range(b3, b3 + frames_per_ns):
bin_rmsds3.append(rmsds3[c3])
avgs3.append(sum(bin_rmsds3)/ len(bin_rmsds3))
# add to all data
dat_list3.append(avgs3)
handle3.close()
###################
# MANIPULATE DATA #
###################
# fill experiments shorter than the max with blank values
for d1 in dat_list1:
if len(d1) < (max_expt_len / frames_per_ns):
for f1 in range(len(d1),(max_expt_len / frames_per_ns)):
d1.append(None)
for d2 in dat_list2:
if len(d2) < (max_expt_len / frames_per_ns):
for f2 in range(len(d2),(max_expt_len / frames_per_ns)):
d2.append(None)
for d3 in dat_list3:
if len(d3) < (max_expt_len / frames_per_ns):
for f3 in range(len(d3),(max_expt_len / frames_per_ns)):
d3.append(None)
#################
# FORMAT OUTPUT #
#################
# ns of longest run
ns = []
for g in range(0, max_expt_len, frames_per_ns):
ns.append(float((g + (frames_per_ns/2)))/frames_per_ns)
# file1 header
print('bkbone_local')
print('ns\t'),
for h1 in expts:
print('run_' + str(h1) + '\t'),
print('avg')
# file1 data
for i1 in range(0,(max_expt_len / frames_per_ns)):
enum1 = 0
prtline1 = []
prtline1.append(ns[i1])
for j1 in dat_list1:
prtline1.append(dat_list1[enum1][i1])
enum1 += 1
avg1 = sum(filter(None, prtline1[1:])) / len(filter(None,prtline1[1:]))
prtline1.append(avg1)
for k1 in prtline1:
print(str(k1)+'\t'),
print
print
# file2 header
print('bkbone_unrestr')
print('ns\t'),
for h2 in expts:
print('run_' + str(h2) + '\t'),
print('avg')
# file2 data
for i2 in range(0,(max_expt_len / frames_per_ns)):
enum2 = 0
prtline2 = []
prtline2.append(ns[i2])
for j2 in dat_list2:
prtline2.append(dat_list2[enum2][i2])
enum2 += 1
avg2 = sum(filter(None, prtline2[1:])) / len(filter(None,prtline2[1:]))
prtline2.append(avg2)
for k2 in prtline2:
print(str(k2)+'\t'),
print
print
# file3 header
print('bkbone_unrestr')
print('ns\t'),
for h3 in expts:
print('run_' + str(h3) + '\t'),
print('avg')
# file3 data
for i3 in range(0,(max_expt_len / frames_per_ns)):
enum3 = 0
prtline3 = []
prtline3.append(ns[i3])
for j3 in dat_list3:
prtline3.append(dat_list3[enum3][i3])
enum3 += 1
avg3 = sum(filter(None, prtline3[1:])) / len(filter(None,prtline3[1:]))
prtline3.append(avg3)
for k3 in prtline3:
print(str(k3)+'\t'),
print