view eden_cluster_motif_finder.py @ 0:95a776023fbc draft

Uploaded
author bgruening
date Thu, 12 Jun 2014 11:35:21 -0400
parents
children 295e085b8d60
line wrap: on
line source

#!/usr/bin/env python
import argparse
import sys
import os
import subprocess
from Bio import SeqIO
import random
import tempfile
import shlex
import shutil
import logging
from eden_wrapper import EDeNWrapper
import eden_iterative_motif_finder
from eden_utilities import log
from eden_utilities import create_params
import eden_cluster_splitter
    
def main(args):
    #run cluster split
    eden_cluster_splitter.main(args)
    #work in the output dir
    print 'start motif finder', args.output_dir_path
    motif_set = set()
    for root, dirs, files in os.walk( args.output_dir_path ):
        for filename in files:
            if filename.startswith('cluster_'):
                filepath = os.path.join(root, filename)
                args.fasta_file = filepath
                out_filepath = os.path.join(root, '%s.motif' % filename)
                args.output_file_path = open(out_filepath, 'w+')
                eden_iterative_motif_finder.main(args)
                args.output_file_path.close()
                for line in open(out_filepath):
                    motif_set.add(line.split()[0])

    with open(args.fasta_file, 'r') as f:
        for record in SeqIO.parse(f, 'fasta'):
            for motif_id, motif in enumerate(motif_set):
                seq = eden_iterative_motif_finder.NormaliseSequenceToDNA(record.seq)
                motif_count = seq.count(motif)
                if motif_count > 0:
                    res = '%s\t%s\t%s\t%s\n' % (record.id, motif_id, motif, motif_count)
                    print res,
    

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Extract motifs patterns with EDeN.')
    parser = create_params(parser, 'eden_cluster_motif')
    args = parser.parse_args()
    main(args)