# HG changeset patch # User rnateam # Date 1381997249 14400 # Node ID 94926c35b6f37cda5756abdeeb1a0332ba6278da intial uploaded diff -r 000000000000 -r 94926c35b6f3 segemehl.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/segemehl.xml Thu Oct 17 04:07:29 2013 -0400 @@ -0,0 +1,151 @@ + + suffix arrays based short read aligner + + segemehl + + + ## prepare segemehl index if no reference genome is supplied + temp_index = `mktemp`; + #if $refGenomeSource.genomeSource == "history": + segemehl.x -x $temp_index -d $refGenomeSource.own_reference_genome; + #else: + $temp_index = ${refGenomeSource.index.fields.index_path} + #end if + + + ## execute segemehl + segemehl.x + + ## number of threads + -t 4 + + ## db file path + -d ${refGenomeSource.index.fields.db_path} + + -i $temp_index + + ## check for single/pair-end + #if str( $library.type ) == "single": + #set $query_list = list() + ## prepare inputs + #for $fastq in $library.reads: + $query_list.append('%s' %($fastq.input_query)) + #end for + -q "#echo ' '.join( $query_list )#" + #else + ## prepare inputs + + #set $mate1 = list() + #set $mate2 = list() + #for $mate_pair in $library.mate_list: + $mate1.append( str($mate_pair.first_strand_query) ) + $mate2.append( str($mate_pair.second_strand_query) ) + #end for + + -q #echo ','.join($mate1) + -p #echo ','.join($mate2) + + -I $library.maxinsertsize + #end if + -m $minsize + -A $accuracy + -H $hitstrategy + #if str( $prime5 ).strip(): + -P $prime5 + #end if + #if str( $prime3 ).strip(): + -Q $prime3 + #end if + $polyA + $autoclip + $hardclip + $order + -s + -o $segemehl_out + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**What it does** + +Segemelt is a short read mapper with gaps. + +segemehl is a software to map short sequencer reads to reference genomes. +Unlike other methods, segemehl is able to detect not only mismatches but also insertions and deletions. +Furthermore, segemehl is not limited to a specific read length and is able to mapprimer- or polyadenylation contaminated reads correctly. +segemehl implements a matching strategy based on enhanced suffix arrays (ESA). Segemehl now supports the SAM format, +reads gziped queries to save both disk and memory space and allows bisulfite sequencing mapping and split read mapping. + + + diff -r 000000000000 -r 94926c35b6f3 tool-data/segemehl_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/segemehl_indices.loc.sample Thu Oct 17 04:07:29 2013 -0400 @@ -0,0 +1,30 @@ +#This is a sample file that enables the segemehl mapper to find genome indices +#and database files. You will need to create these data files and then create +#a segemehl_indices.loc file similar to this one (store it in this directory) +#that points to the directories in which those files are stored. +#The segemehl_indices.loc file has this format (longer white space characters are TAB characters): +# +# +# +#So, for example, if you had hg18 indexed stored in +#/data/0/galaxy/segemehl/hg18/, +#then the segemehl_indices.loc entry would look like this: +# +#hg18 hg18 hg18 /data/0/galaxy/segemehl/hg18/chromosomes.fa /data/0/galaxy/segemehl/hg18/chromosomes.idx +# +#and your /data/0/galaxy/segemehl/hg18/ directory +#would contain hg18.*.ebwt files: +# +#-rw-r--r-- 1 bag bag 539833 2013-10-13 10:12 chromosomes.idx +#-rw-r--r-- 1 bag bag 342562 2013-10-13 10:12 chromosomes.fa +#...etc... +# +#Your segemehl_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#hg18canon hg18 hg18 Canonical /data/0/galaxy/segemehl/hg18/chromosomes_can.fa /data/0/galaxy/segemehl/hg18/chromosomes_can.idx +#hg18full hg18 hg18 Full /data/0/galaxy/segemehl/hg18/chromosomes_full.fa /data/0/galaxy/segemehl/hg18/chromosomes_full.idx +#/orig/path/hg19 hg19 hg19 /data/0/galaxy/segemehl/hg19/chromosomes.fa /data/0/galaxy/segemehl/hg19/chromosomes.idx +#...etc... +# diff -r 000000000000 -r 94926c35b6f3 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Oct 17 04:07:29 2013 -0400 @@ -0,0 +1,8 @@ + + + + + value, dbkey, name, db_path, index_path + +
+
diff -r 000000000000 -r 94926c35b6f3 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Oct 17 04:07:29 2013 -0400 @@ -0,0 +1,6 @@ + + + + + +