0
|
1 # -*- coding: utf8 -*-
|
|
2
|
|
3 """
|
|
4 Rename the custom evidence tracks so that the tracks use the same sequence names as the renamed reference
|
|
5 """
|
|
6 import sys
|
|
7 import csv
|
|
8 import subprocess
|
|
9
|
|
10 def rename_interval(inputFile, nameDict, renamedFile):
|
|
11 writer = open(renamedFile, 'w')
|
|
12 with open(inputFile, 'r') as f:
|
|
13 lines = f.readlines()
|
|
14 for l in lines:
|
|
15 if not l.startswith("#"):
|
|
16 scaffold_name = l.split()[0]
|
|
17 if scaffold_name in nameDict:
|
|
18 l = l.replace(scaffold_name, nameDict[scaffold_name])
|
|
19 writer.write(l)
|
|
20 writer.close()
|
|
21
|
|
22 def rename_bam(inputFile, nameDict, renamedFile):
|
|
23 header = subprocess.Popen(['samtools', 'view', '-H', inputFile], stdout=subprocess.PIPE)
|
|
24 array_call = ['sed']
|
|
25 for k,v in nameDict.items():
|
|
26 substitute = "s/%s/%s/" % (str(k), str(v))
|
|
27 array_call.append('-e')
|
|
28 array_call.append(substitute)
|
|
29 reheader = subprocess.Popen(array_call, stdin=header.stdout, stdout=subprocess.PIPE)
|
|
30 out = open(renamedFile, 'w')
|
|
31 subprocess.Popen(['samtools', 'reheader', '-', inputFile], stdin=reheader.stdout, stdout=out)
|
|
32
|
|
33 def getNameDict(nameMapping):
|
|
34 nameDict = {}
|
|
35 with open(nameMapping, 'r') as f:
|
|
36 reader = csv.reader(f)
|
|
37 for row in reader:
|
|
38 nameDict[row[0]] = row[1]
|
|
39 return nameDict
|
|
40
|
|
41 def main():
|
|
42 inputFile = sys.argv[1]
|
|
43 nameMapping = sys.argv[2]
|
|
44 inputFormat = sys.argv[3]
|
|
45 outputfile = sys.argv[4]
|
|
46 nameDict = getNameDict(nameMapping)
|
|
47 if inputFormat == "bed" or inputFormat == "gff3" or inputFormat == "gtf":
|
|
48 rename_interval(inputFile, nameDict, outputfile)
|
|
49 elif inputFormat == "bam":
|
|
50 rename_bam(inputFile, nameDict, outputfile)
|
|
51
|
|
52 if __name__ == "__main__":
|
|
53 main() |