# HG changeset patch
# User devteam
# Date 1400511569 14400
# Node ID a0c8173dbdc95c961b7029e88a837af1e7f1697e
Imported from capsule None
diff -r 000000000000 -r a0c8173dbdc9 rmap_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rmap_wrapper.py Mon May 19 10:59:29 2014 -0400
@@ -0,0 +1,88 @@
+#!/usr/bin/env python
+
+import os, sys, tempfile
+
+assert sys.version_info[:2] >= (2.4)
+
+def stop_err( msg ):
+
+ sys.stderr.write( "%s\n" % msg )
+ sys.exit()
+
+
+def __main__():
+
+ # I/O
+ target_path = sys.argv[1]
+ infile = sys.argv[2]
+ read_len = sys.argv[3] # -w
+ align_len = sys.argv[4] # -h
+ mismatch = sys.argv[5] # -m
+ output_file = sys.argv[6]
+
+ # first guess the read length
+ guess_read_len = 0
+ seq = ''
+ for i, line in enumerate(open(infile)):
+ line = line.rstrip('\r\n')
+ if line.startswith('>'):
+ if seq:
+ guess_read_len = len(seq)
+ break
+ else:
+ seq += line
+
+ try:
+ test = int(read_len)
+ if test == 0:
+ read_len = str(guess_read_len)
+ else:
+ assert test >= 20 and test <= 64
+ except:
+ stop_err('Invalid value for read length. Must be between 20 and 64.')
+
+ try:
+ int(align_len)
+ except:
+ stop_err('Invalid value for minimal length of a hit.')
+
+ try:
+ int(mismatch)
+ #assert test >= 0 and test <= int(0.1*int(read_len))
+ except:
+ stop_err('Invalid value for mismatch numbers in an alignment.')
+
+ all_files = []
+ if os.path.isdir(target_path):
+
+ # check target genome
+ fa_files = os.listdir(target_path)
+
+ for file in fa_files:
+ file = "%s/%s" % ( target_path, file )
+ file = os.path.normpath(file)
+ all_files.append(file)
+ else:
+ stop_err("No sequences for %s are available for search, please report this error." %(target_path))
+
+ for detail_file_path in all_files:
+ output_tempfile = tempfile.NamedTemporaryFile().name
+ command = "rmap -h %s -w %s -m %s -c %s %s -o %s 2>&1" % ( align_len, read_len, mismatch, detail_file_path, infile, output_tempfile )
+ #print command
+ try:
+ os.system( command )
+ except Exception, e:
+ stop_err( str( e ) )
+
+ try:
+ os.system( 'cat %s >> %s' % ( output_tempfile, output_file ) )
+ except Exception, e:
+ stop_err( str( e ) )
+
+ try:
+ os.remove( output_tempfile )
+ except:
+ pass
+
+
+if __name__ == '__main__': __main__()
diff -r 000000000000 -r a0c8173dbdc9 rmap_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rmap_wrapper.xml Mon May 19 10:59:29 2014 -0400
@@ -0,0 +1,86 @@
+
+ for Solexa Short Reads Alignment
+
+ rmap
+
+
+ #if $trim.choice=="No":
+ rmap_wrapper.py $database $input_seq 0 $align_len $mismatch $output1
+ #else:
+ rmap_wrapper.py $database $input_seq $trim.read_len $align_len $mismatch $output1
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: warningmark
+
+ RMAP was developed for **Solexa** reads.
+
+.. class:: infomark
+
+**TIP**. The tool will guess the length of the reads, however, if you select to trim the reads, the *Reads length* must be between 20 and 64. Reads with lengths longer than the specified value will be trimmed at the 3'end.
+
+-----
+
+**What it does**
+
+This tool runs **rmap** (for more information, please see the reference below), mapping Solexa reads onto a genome build.
+
+-----
+
+**Parameters**
+
+- *Minimal Length of a Hit* (**-h**) : this is the seed length or the minimal exact match length
+- *Number of Mismatches Allowed* (**-m**) : the maximal number of mismatches allowed in an alignment
+- *Read Length* (**-w**) : maximal length of the reads; reads longer than the threshold will be truncated at 3' end.
+
+-----
+
+**Reference**
+
+ **RMAP** is developed by Dr. Andrew D Smith and Dr. Zhenyu Xuan at the Cold Spring Harbor Laboratory. Please see http://rulai.cshl.edu/rmap/
+
+
+
diff -r 000000000000 -r a0c8173dbdc9 test-data/rmap_wrapper_test1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rmap_wrapper_test1.bed Mon May 19 10:59:29 2014 -0400
@@ -0,0 +1,8 @@
+phix 360 396 seq1 1 -
+phix 4188 4224 seq2 1 +
+phix 4908 4944 seq4 0 -
+phix 2811 2847 seq5 2 +
+phix 3847 3883 seq6 0 -
+phix 91 127 seq7 0 +
+phix 2302 2338 seq8 2 +
+phix 2448 2484 seq9 0 +
diff -r 000000000000 -r a0c8173dbdc9 test-data/rmap_wrapper_test1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rmap_wrapper_test1.fasta Mon May 19 10:59:29 2014 -0400
@@ -0,0 +1,20 @@
+>seq1
+GACTCATGATTTCTTACCTATTAGTGGTTGAACATC
+>seq2
+GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT
+>seq3
+GTTGTCGATAGAACTTCATGTGCCTGTAAAACAAGT
+>seq4
+ACCAACCAGAACGTGAAAAAGCGTCCTGCGTGTAGC
+>seq5
+GTTTATGTTGGTTTCATGGTTTTGTCTAACTTTATC
+>seq6
+GCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCG
+>seq7
+GCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGC
+>seq8
+GTTATAACGCCGAAGCGGTAAAAATTTTTATTTTTT
+>seq9
+GTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCT
+>seq10
+GTGGCCTGTTGATTCTAAAGGTTAGTTTCTTCACGC
diff -r 000000000000 -r a0c8173dbdc9 tool-data/faseq.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/faseq.loc.sample Mon May 19 10:59:29 2014 -0400
@@ -0,0 +1,26 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use genome fasta sequence files. The faseq.loc file has this format
+#(white space characters are TAB characters):
+#
+#
+#
+# In the dir, each file is fasta format and contains only one sequence. So,
+#for example, if you had hg18 fasta sequences stored in /depot/data2/galaxy/faseq/hg18,
+#then your faseq.loc entry would look like this:
+#
+#hg18 /depot/data2/galaxy/faseq/hg18
+#
+#and your /depot/data2/galaxy/faseq/hg18 directory would contain all of
+#your fasta sequence files (e.g.):
+#
+#-rw-r--r-- 1 wychung galaxy 138082251 2008-04-16 11:57 chr10.fa
+#-rw-r--r-- 1 wychung galaxy 115564 2008-04-16 11:57 chr10_random.fa
+#-rw-r--r-- 1 wychung galaxy 137141451 2008-04-16 11:58 chr11.fa
+#...etc...
+#Your faseq.loc file should include an entry per line for each set of fasta
+#sequence files you have stored. For example:
+#
+#hg18 /depot/data2/galaxy/faseq/hg18
+#mm9 /depot/data2/galaxy/faseq/mm9
+#Arabidopsis /depot/data2/galaxy/faseq/Arabidopsis
+#...etc...
diff -r 000000000000 -r a0c8173dbdc9 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon May 19 10:59:29 2014 -0400
@@ -0,0 +1,6 @@
+
+
+
+
+
+