# HG changeset patch
# User bgruening
# Date 1428662862 14400
# Node ID 53c20f28562edc2896112e3604a4ea0e9dd9d848
# Parent 97ecd7154f6b6c0ed956bceb7098a40d5ed8de81
Uploaded
diff -r 97ecd7154f6b -r 53c20f28562e .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Fri Apr 10 06:47:42 2015 -0400
@@ -0,0 +1,3 @@
+# repository published to https://toolshed.g2.bx.psu.edu/repos/bgruening/find_subsequences
+owner: bgruening
+name: find_subsequences
diff -r 97ecd7154f6b -r 53c20f28562e find_subsequences.py
--- a/find_subsequences.py Fri Mar 20 06:21:30 2015 -0400
+++ b/find_subsequences.py Fri Apr 10 06:47:42 2015 -0400
@@ -10,7 +10,7 @@
choices = ['embl', 'fasta', 'fastq-sanger', 'fastq', 'fastq-solexa', 'fastq-illumina', 'genbank', 'gb']
-def find_pattern(seqs, pattern, outfile_path):
+def find_pattern(seqs, pattern, outfile_path, strand):
"""
Finds all occurrences of a pattern in the a given sequence.
Outputs sequence ID, start and end postion of the pattern.
@@ -23,8 +23,10 @@
with open(outfile_path, 'w+') as outfile:
for seq in seqs:
- search_func(seq, pattern, outfile)
- search_func(seq, rev_compl, outfile, '-')
+ if strand in ['both', 'forward']:
+ search_func(seq, pattern, outfile)
+ if strand in ['both', 'reverse']:
+ search_func(seq, rev_compl, outfile, '-')
def simple_pattern_search(sequence, pattern, outfile, strand='+'):
@@ -50,12 +52,13 @@
if __name__ == "__main__":
parser = argparse.ArgumentParser()
- parser.add_argument('-i', '--input' , required=True)
- parser.add_argument('-o', '--output' , required=True)
- parser.add_argument('-p', '--pattern' , required=True)
+ parser.add_argument('-i', '--input', required=True)
+ parser.add_argument('-o', '--output', required=True)
+ parser.add_argument('-p', '--pattern', required=True)
+ parser.add_argument('--strand', choices=['both', 'forward', 'reverse'], default='both')
parser.add_argument('-f', '--format', default="fasta", choices=choices)
args = parser.parse_args()
with open(args.input) as handle:
- find_pattern( SeqIO.parse(handle, args.format), args.pattern, args.output )
+ find_pattern( SeqIO.parse(handle, args.format), args.pattern, args.output, args.strand )
diff -r 97ecd7154f6b -r 53c20f28562e find_subsequences.xml
--- a/find_subsequences.xml Fri Mar 20 06:21:30 2015 -0400
+++ b/find_subsequences.xml Fri Apr 10 06:47:42 2015 -0400
@@ -6,14 +6,15 @@
@@ -248,6 +249,11 @@
+
+
+
+
+
@@ -271,6 +277,20 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+