comparison example2.py @ 0:47e1dc8b27f8 draft

Uploaded example2 first
author boris
date Wed, 02 Dec 2015 22:49:57 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:47e1dc8b27f8
1 #! /bin/python
2
3
4 import sys
5 from os.path import basename
6
7 fasta = sys.argv[1]
8 linesize = int(sys.argv[2])
9
10 if len(sys.argv[1:])>=3:
11 outfastaname = sys.argv[3]
12 else:
13 outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
14
15
16 if len(sys.argv[1:])>=4:
17 outplotname = sys.argv[4]
18 else:
19 outplotname = "%s_nt_counts.pdf" % (basename(fasta))
20
21
22 #fasta = "/Users/boris/Desktop/mouse/mouse_reference_mtDNA.fasta"
23 #linesize = 200
24
25 fastaheader = ">noname"
26 fastaseq = ""
27
28 with open(fasta) as fa:
29 for line in fa:
30 if line.strip().startswith(">"):
31 fastaheader = line.strip()
32 else:
33 fastaseq+= line.strip()
34
35
36 #outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
37 outfile = open(outfastaname,"w+")
38
39 outfile.write(fastaheader+"\n")
40 for i in range(0,len(fastaseq),linesize):
41 outfile.write(fastaseq[i:i+linesize]+'\n')
42 outfile.close()
43
44 ############################################################################
45 import matplotlib.pyplot as plt
46 import numpy as np
47
48 bases=['A','C','G','T','N']
49 counts = np.array([fastaseq.upper().count(nt) for nt in bases])
50
51 index = np.array(range(len(counts)))
52 bar_width = 0.7
53 plt.bar(index,counts,bar_width,color=['red','green','orange','blue','grey'])
54
55 plt.axis([-1,5,0,max(counts)+1000])
56 plt.xlabel('Nucleotide')
57 plt.ylabel('Count')
58 plt.title('Fasta nucleotide content')
59 plt.xticks(index+bar_width/2, bases)
60
61 plt.savefig(outplotname,format="pdf")
62