Mercurial > repos > yating-l > rename_tracks
comparison rename_tracks.py @ 0:dd6d3b009659 draft
planemo upload
author | yating-l |
---|---|
date | Fri, 22 Jun 2018 16:25:42 -0400 |
parents | |
children | 75d9357a0a53 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dd6d3b009659 |
---|---|
1 # -*- coding: utf8 -*- | |
2 | |
3 """ | |
4 Rename the custom evidence tracks so that the tracks use the same sequence names as the renamed reference | |
5 """ | |
6 import sys | |
7 import csv | |
8 import subprocess | |
9 | |
10 def rename_interval(inputFile, nameDict, renamedFile): | |
11 writer = open(renamedFile, 'w') | |
12 with open(inputFile, 'r') as f: | |
13 lines = f.readlines() | |
14 for l in lines: | |
15 if not l.startswith("#"): | |
16 scaffold_name = l.split()[0] | |
17 if scaffold_name in nameDict: | |
18 l = l.replace(scaffold_name, nameDict[scaffold_name]) | |
19 writer.write(l) | |
20 writer.close() | |
21 | |
22 def rename_bam(inputFile, nameDict, renamedFile): | |
23 header = subprocess.Popen(['samtools', 'view', '-H', inputFile], stdout=subprocess.PIPE) | |
24 array_call = ['sed'] | |
25 for k,v in nameDict.items(): | |
26 substitute = "s/%s/%s/" % (str(k), str(v)) | |
27 array_call.append('-e') | |
28 array_call.append(substitute) | |
29 reheader = subprocess.Popen(array_call, stdin=header.stdout, stdout=subprocess.PIPE) | |
30 out = open(renamedFile, 'w') | |
31 subprocess.Popen(['samtools', 'reheader', '-', inputFile], stdin=reheader.stdout, stdout=out) | |
32 | |
33 def getNameDict(nameMapping): | |
34 nameDict = {} | |
35 with open(nameMapping, 'r') as f: | |
36 reader = csv.reader(f) | |
37 for row in reader: | |
38 nameDict[row[0]] = row[1] | |
39 return nameDict | |
40 | |
41 def main(): | |
42 inputFile = sys.argv[1] | |
43 nameMapping = sys.argv[2] | |
44 inputFormat = sys.argv[3] | |
45 outputfile = sys.argv[4] | |
46 nameDict = getNameDict(nameMapping) | |
47 if inputFormat == "bed" or inputFormat == "gff3" or inputFormat == "gtf": | |
48 rename_interval(inputFile, nameDict, outputfile) | |
49 elif inputFormat == "bam": | |
50 rename_bam(inputFile, nameDict, outputfile) | |
51 | |
52 if __name__ == "__main__": | |
53 main() |