comparison data_manager/data_manager_snpsift_dbnsfp.py @ 2:1adeb79c05f8 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
author iuc
date Fri, 10 Jun 2016 08:25:27 -0400
parents 5f429f5faba5
children 5ebc15b0bf00
comparison
equal deleted inserted replaced
1:3cf653fe6c46 2:1adeb79c05f8
101 wtr = open(dbnsfp_tsv, 'w') 101 wtr = open(dbnsfp_tsv, 'w')
102 allfiles = [info.filename for info in my_zip.infolist()] 102 allfiles = [info.filename for info in my_zip.infolist()]
103 files = [f for f in allfiles if re.match(dbNSFP_file_pat, f)] 103 files = [f for f in allfiles if re.match(dbNSFP_file_pat, f)]
104 files = sorted(files, key=natural_sortkey) 104 files = sorted(files, key=natural_sortkey)
105 for j, file in enumerate(files): 105 for j, file in enumerate(files):
106 tempfiles = []
107 tempfiles.append(file + "_%d" % len(tempfiles))
108 tfh = open(tempfiles[-1], 'w')
109 lastpos = None
106 fh = my_zip.open(file, 'rU') 110 fh = my_zip.open(file, 'rU')
107 for i, line in enumerate(fh): 111 for i, line in enumerate(fh):
108 if j > 0 and i == 0: 112 if i == 0:
113 if j == 0:
114 wtr.write(line)
109 continue 115 continue
110 wtr.write(line) 116 else:
117 pos = int(line.split('\t')[1])
118 if lastpos and pos < lastpos:
119 tfh.close()
120 tempfiles.append(file + "_%d" % len(tempfiles))
121 tfh = open(tempfiles[-1], 'w')
122 print >> sys.stderr, "%s [%d] pos: %d < %d" % (file, i, pos, lastpos)
123 lastpos = pos
124 tfh.write(line)
125 tfh.close()
126 if len(tempfiles) == 1:
127 with open(tempfiles[0], 'r') as tfh:
128 wtr.writelines(tfh.readlines())
129 else:
130 tfha = [open(temp, 'r') for temp in tempfiles]
131 lines = [tfh.readline() for tfh in tfha]
132 curpos = [int(line.split('\t')[1]) for line in lines]
133 while len(tfha) > 0:
134 k = curpos.index(min(curpos))
135 wtr.write(lines[k])
136 line = tfha[k].readline()
137 if line:
138 lines[k] = line
139 curpos[k] = int(line.split('\t')[1])
140 else:
141 tfha[k].close()
142 del tfha[k]
143 del lines[k]
144 del curpos[k]
111 return dbnsfp_tsv 145 return dbnsfp_tsv
112 146
113 147
114 def main(): 148 def main():
115 # Parse Command Line 149 # Parse Command Line