diff example2.py @ 0:47e1dc8b27f8 draft

Uploaded example2 first
author boris
date Wed, 02 Dec 2015 22:49:57 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example2.py	Wed Dec 02 22:49:57 2015 -0500
@@ -0,0 +1,62 @@
+#! /bin/python
+
+
+import sys
+from os.path import basename
+
+fasta = sys.argv[1]
+linesize = int(sys.argv[2])
+
+if len(sys.argv[1:])>=3:
+    outfastaname = sys.argv[3]
+else:
+    outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
+
+
+if len(sys.argv[1:])>=4:
+    outplotname = sys.argv[4]
+else:
+    outplotname = "%s_nt_counts.pdf" % (basename(fasta))
+
+
+#fasta = "/Users/boris/Desktop/mouse/mouse_reference_mtDNA.fasta"
+#linesize = 200
+
+fastaheader  = ">noname"
+fastaseq = ""
+
+with open(fasta) as fa:
+    for line in fa:
+        if line.strip().startswith(">"):
+            fastaheader = line.strip()
+        else:
+            fastaseq+= line.strip() 
+
+
+#outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
+outfile = open(outfastaname,"w+")
+
+outfile.write(fastaheader+"\n")
+for i in range(0,len(fastaseq),linesize):
+    outfile.write(fastaseq[i:i+linesize]+'\n')
+outfile.close()
+
+############################################################################
+import matplotlib.pyplot as plt
+import numpy as np
+
+bases=['A','C','G','T','N']
+counts = np.array([fastaseq.upper().count(nt) for nt in bases])
+
+index = np.array(range(len(counts)))
+bar_width = 0.7
+plt.bar(index,counts,bar_width,color=['red','green','orange','blue','grey'])
+
+plt.axis([-1,5,0,max(counts)+1000])
+plt.xlabel('Nucleotide')
+plt.ylabel('Count')
+plt.title('Fasta nucleotide content')
+plt.xticks(index+bar_width/2, bases)
+
+plt.savefig(outplotname,format="pdf")
+