# HG changeset patch # User bgruening # Date 1428662862 14400 # Node ID 53c20f28562edc2896112e3604a4ea0e9dd9d848 # Parent 97ecd7154f6b6c0ed956bceb7098a40d5ed8de81 Uploaded diff -r 97ecd7154f6b -r 53c20f28562e .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Fri Apr 10 06:47:42 2015 -0400 @@ -0,0 +1,3 @@ +# repository published to https://toolshed.g2.bx.psu.edu/repos/bgruening/find_subsequences +owner: bgruening +name: find_subsequences diff -r 97ecd7154f6b -r 53c20f28562e find_subsequences.py --- a/find_subsequences.py Fri Mar 20 06:21:30 2015 -0400 +++ b/find_subsequences.py Fri Apr 10 06:47:42 2015 -0400 @@ -10,7 +10,7 @@ choices = ['embl', 'fasta', 'fastq-sanger', 'fastq', 'fastq-solexa', 'fastq-illumina', 'genbank', 'gb'] -def find_pattern(seqs, pattern, outfile_path): +def find_pattern(seqs, pattern, outfile_path, strand): """ Finds all occurrences of a pattern in the a given sequence. Outputs sequence ID, start and end postion of the pattern. @@ -23,8 +23,10 @@ with open(outfile_path, 'w+') as outfile: for seq in seqs: - search_func(seq, pattern, outfile) - search_func(seq, rev_compl, outfile, '-') + if strand in ['both', 'forward']: + search_func(seq, pattern, outfile) + if strand in ['both', 'reverse']: + search_func(seq, rev_compl, outfile, '-') def simple_pattern_search(sequence, pattern, outfile, strand='+'): @@ -50,12 +52,13 @@ if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('-i', '--input' , required=True) - parser.add_argument('-o', '--output' , required=True) - parser.add_argument('-p', '--pattern' , required=True) + parser.add_argument('-i', '--input', required=True) + parser.add_argument('-o', '--output', required=True) + parser.add_argument('-p', '--pattern', required=True) + parser.add_argument('--strand', choices=['both', 'forward', 'reverse'], default='both') parser.add_argument('-f', '--format', default="fasta", choices=choices) args = parser.parse_args() with open(args.input) as handle: - find_pattern( SeqIO.parse(handle, args.format), args.pattern, args.output ) + find_pattern( SeqIO.parse(handle, args.format), args.pattern, args.output, args.strand ) diff -r 97ecd7154f6b -r 53c20f28562e find_subsequences.xml --- a/find_subsequences.xml Fri Mar 20 06:21:30 2015 -0400 +++ b/find_subsequences.xml Fri Apr 10 06:47:42 2015 -0400 @@ -6,14 +6,15 @@ @@ -248,6 +249,11 @@ + + + + + @@ -271,6 +277,20 @@ + + + + + + + + + + + + + +