annotate extractHeaders.py @ 1:c1ebe32249b0 draft default tip

planemo upload commit 81955d80a45c04ad4874f19f43c36ca608b3735e
author yating-l
date Mon, 06 Nov 2017 15:38:14 -0500
parents 45cb248771d2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
1 #!/usr/bin/python
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
2 """ Extract headers from Fasta file and write the headers to a Tabular file """
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
3
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
4 import sys
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
5
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
6 def extractHeaders(fasta_file, tab_file):
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
7
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
8 with open(tab_file, 'w') as out:
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
9 with open(fasta_file, 'r') as f:
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
10 lines = f.readlines()
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
11 for l in lines:
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
12 if '>' in l:
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
13 l = l.split()
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
14 name = l[0].replace('>', '').rstrip()
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
15 desc = ''.join(l[1:]).rstrip()
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
16 out.write(name + '\t' + desc + '\n')
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
17
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
18
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
19
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
20 def main(argv):
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
21 input_file = argv[1]
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
22 output_file = argv[2]
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
23 extractHeaders(input_file, output_file)
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
24
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
25 if __name__ == "__main__":
45cb248771d2 planemo upload
yating-l
parents:
diff changeset
26 main(sys.argv)