annotate example2.py @ 1:d1421e859b75 draft default tip

Deleted selected files
author boris
date Wed, 02 Dec 2015 22:50:23 -0500
parents 47e1dc8b27f8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
1 #! /bin/python
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
2
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
3
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
4 import sys
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
5 from os.path import basename
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
6
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
7 fasta = sys.argv[1]
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
8 linesize = int(sys.argv[2])
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
9
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
10 if len(sys.argv[1:])>=3:
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
11 outfastaname = sys.argv[3]
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
12 else:
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
13 outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
14
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
15
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
16 if len(sys.argv[1:])>=4:
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
17 outplotname = sys.argv[4]
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
18 else:
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
19 outplotname = "%s_nt_counts.pdf" % (basename(fasta))
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
20
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
21
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
22 #fasta = "/Users/boris/Desktop/mouse/mouse_reference_mtDNA.fasta"
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
23 #linesize = 200
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
24
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
25 fastaheader = ">noname"
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
26 fastaseq = ""
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
27
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
28 with open(fasta) as fa:
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
29 for line in fa:
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
30 if line.strip().startswith(">"):
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
31 fastaheader = line.strip()
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
32 else:
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
33 fastaseq+= line.strip()
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
34
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
35
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
36 #outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
37 outfile = open(outfastaname,"w+")
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
38
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
39 outfile.write(fastaheader+"\n")
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
40 for i in range(0,len(fastaseq),linesize):
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
41 outfile.write(fastaseq[i:i+linesize]+'\n')
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
42 outfile.close()
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
43
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
44 ############################################################################
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
45 import matplotlib.pyplot as plt
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
46 import numpy as np
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
47
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
48 bases=['A','C','G','T','N']
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
49 counts = np.array([fastaseq.upper().count(nt) for nt in bases])
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
50
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
51 index = np.array(range(len(counts)))
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
52 bar_width = 0.7
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
53 plt.bar(index,counts,bar_width,color=['red','green','orange','blue','grey'])
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
54
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
55 plt.axis([-1,5,0,max(counts)+1000])
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
56 plt.xlabel('Nucleotide')
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
57 plt.ylabel('Count')
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
58 plt.title('Fasta nucleotide content')
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
59 plt.xticks(index+bar_width/2, bases)
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
60
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
61 plt.savefig(outplotname,format="pdf")
47e1dc8b27f8 Uploaded example2 first
boris
parents:
diff changeset
62