view example2.py @ 1:d1421e859b75 draft default tip

Deleted selected files
author boris
date Wed, 02 Dec 2015 22:50:23 -0500
parents 47e1dc8b27f8
children
line wrap: on
line source

#! /bin/python


import sys
from os.path import basename

fasta = sys.argv[1]
linesize = int(sys.argv[2])

if len(sys.argv[1:])>=3:
    outfastaname = sys.argv[3]
else:
    outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))


if len(sys.argv[1:])>=4:
    outplotname = sys.argv[4]
else:
    outplotname = "%s_nt_counts.pdf" % (basename(fasta))


#fasta = "/Users/boris/Desktop/mouse/mouse_reference_mtDNA.fasta"
#linesize = 200

fastaheader  = ">noname"
fastaseq = ""

with open(fasta) as fa:
    for line in fa:
        if line.strip().startswith(">"):
            fastaheader = line.strip()
        else:
            fastaseq+= line.strip() 


#outfastaname = "adjusted_%d_%s" % (linesize,basename(fasta))
outfile = open(outfastaname,"w+")

outfile.write(fastaheader+"\n")
for i in range(0,len(fastaseq),linesize):
    outfile.write(fastaseq[i:i+linesize]+'\n')
outfile.close()

############################################################################
import matplotlib.pyplot as plt
import numpy as np

bases=['A','C','G','T','N']
counts = np.array([fastaseq.upper().count(nt) for nt in bases])

index = np.array(range(len(counts)))
bar_width = 0.7
plt.bar(index,counts,bar_width,color=['red','green','orange','blue','grey'])

plt.axis([-1,5,0,max(counts)+1000])
plt.xlabel('Nucleotide')
plt.ylabel('Count')
plt.title('Fasta nucleotide content')
plt.xticks(index+bar_width/2, bases)

plt.savefig(outplotname,format="pdf")