0
|
1 #! /bin/python
|
|
2
|
|
3
|
|
4 import sys
|
|
5 from os.path import basename
|
|
6
|
|
7 fasta = sys.argv[1]
|
|
8 linesize = int(sys.argv[2])
|
|
9
|
|
10 if len(sys.argv[1:])>=3:
|
|
11 outfastaname = sys.argv[3]
|
|
12 else:
|
|
13 outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
|
|
14
|
|
15
|
|
16 if len(sys.argv[1:])>=4:
|
|
17 outplotname = sys.argv[4]
|
|
18 else:
|
|
19 outplotname = "%s_nt_counts.pdf" % (basename(fasta))
|
|
20
|
|
21
|
|
22 #fasta = "/Users/boris/Desktop/mouse/mouse_reference_mtDNA.fasta"
|
|
23 #linesize = 200
|
|
24
|
|
25 fastaheader = ">noname"
|
|
26 fastaseq = ""
|
|
27
|
|
28 with open(fasta) as fa:
|
|
29 for line in fa:
|
|
30 if line.strip().startswith(">"):
|
|
31 fastaheader = line.strip()
|
|
32 else:
|
|
33 fastaseq+= line.strip()
|
|
34
|
|
35
|
|
36 #outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
|
|
37 outfile = open(outfastaname,"w+")
|
|
38
|
|
39 outfile.write(fastaheader+"\n")
|
|
40 for i in range(0,len(fastaseq),linesize):
|
|
41 outfile.write(fastaseq[i:i+linesize]+'\n')
|
|
42 outfile.close()
|
|
43
|
|
44 ############################################################################
|
|
45 import matplotlib.pyplot as plt
|
|
46 import numpy as np
|
|
47
|
|
48 bases=['A','C','G','T','N']
|
|
49 counts = np.array([fastaseq.upper().count(nt) for nt in bases])
|
|
50
|
|
51 index = np.array(range(len(counts)))
|
|
52 bar_width = 0.7
|
|
53 plt.bar(index,counts,bar_width,color=['red','green','orange','blue','grey'])
|
|
54
|
|
55 plt.axis([-1,5,0,max(counts)+1000])
|
|
56 plt.xlabel('Nucleotide')
|
|
57 plt.ylabel('Count')
|
|
58 plt.title('Fasta nucleotide content')
|
|
59 plt.xticks(index+bar_width/2, bases)
|
|
60
|
|
61 plt.savefig(outplotname,format="pdf")
|
|
62
|