Mercurial > repos > iuc > data_manager_snpsift_dbnsfp
comparison data_manager/data_manager_snpsift_dbnsfp.py @ 2:1adeb79c05f8 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
| author | iuc |
|---|---|
| date | Fri, 10 Jun 2016 08:25:27 -0400 |
| parents | 5f429f5faba5 |
| children | 5ebc15b0bf00 |
comparison
equal
deleted
inserted
replaced
| 1:3cf653fe6c46 | 2:1adeb79c05f8 |
|---|---|
| 101 wtr = open(dbnsfp_tsv, 'w') | 101 wtr = open(dbnsfp_tsv, 'w') |
| 102 allfiles = [info.filename for info in my_zip.infolist()] | 102 allfiles = [info.filename for info in my_zip.infolist()] |
| 103 files = [f for f in allfiles if re.match(dbNSFP_file_pat, f)] | 103 files = [f for f in allfiles if re.match(dbNSFP_file_pat, f)] |
| 104 files = sorted(files, key=natural_sortkey) | 104 files = sorted(files, key=natural_sortkey) |
| 105 for j, file in enumerate(files): | 105 for j, file in enumerate(files): |
| 106 tempfiles = [] | |
| 107 tempfiles.append(file + "_%d" % len(tempfiles)) | |
| 108 tfh = open(tempfiles[-1], 'w') | |
| 109 lastpos = None | |
| 106 fh = my_zip.open(file, 'rU') | 110 fh = my_zip.open(file, 'rU') |
| 107 for i, line in enumerate(fh): | 111 for i, line in enumerate(fh): |
| 108 if j > 0 and i == 0: | 112 if i == 0: |
| 113 if j == 0: | |
| 114 wtr.write(line) | |
| 109 continue | 115 continue |
| 110 wtr.write(line) | 116 else: |
| 117 pos = int(line.split('\t')[1]) | |
| 118 if lastpos and pos < lastpos: | |
| 119 tfh.close() | |
| 120 tempfiles.append(file + "_%d" % len(tempfiles)) | |
| 121 tfh = open(tempfiles[-1], 'w') | |
| 122 print >> sys.stderr, "%s [%d] pos: %d < %d" % (file, i, pos, lastpos) | |
| 123 lastpos = pos | |
| 124 tfh.write(line) | |
| 125 tfh.close() | |
| 126 if len(tempfiles) == 1: | |
| 127 with open(tempfiles[0], 'r') as tfh: | |
| 128 wtr.writelines(tfh.readlines()) | |
| 129 else: | |
| 130 tfha = [open(temp, 'r') for temp in tempfiles] | |
| 131 lines = [tfh.readline() for tfh in tfha] | |
| 132 curpos = [int(line.split('\t')[1]) for line in lines] | |
| 133 while len(tfha) > 0: | |
| 134 k = curpos.index(min(curpos)) | |
| 135 wtr.write(lines[k]) | |
| 136 line = tfha[k].readline() | |
| 137 if line: | |
| 138 lines[k] = line | |
| 139 curpos[k] = int(line.split('\t')[1]) | |
| 140 else: | |
| 141 tfha[k].close() | |
| 142 del tfha[k] | |
| 143 del lines[k] | |
| 144 del curpos[k] | |
| 111 return dbnsfp_tsv | 145 return dbnsfp_tsv |
| 112 | 146 |
| 113 | 147 |
| 114 def main(): | 148 def main(): |
| 115 # Parse Command Line | 149 # Parse Command Line |
