Mercurial > repos > rnateam > graphclust_preprocessing
view preprocessing.xml @ 7:8634e06ae642 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/GraphClust commit 4406735e44aba20859c252be39f4e99df28c7a92
author | rnateam |
---|---|
date | Sat, 27 Oct 2018 13:19:07 -0400 |
parents | e31c659be8bc |
children |
line wrap: on
line source
<tool id="preproc" name="Preprocessing" version="0.5"> <requirements> <requirement type="package" version="0.6.0">graphclust-wrappers</requirement> <requirement type="package" version="3.0">zip</requirement> <requirement type="package" version="1.70">biopython</requirement> </requirements> <stdio> <exit_code range="1:" /> </stdio> <command> <![CDATA[ preprocessing.pl '$fastaFile' $max_length $in_winShift $min_seq_length #if $SHAPEdata: && python '$__tool_directory__/splitSHAPE.py' '$SHAPEdata' #end if #if $AlignmentData: && python '$__tool_directory__/splitStockholm.py' '$AlignmentData' #end if ]]> </command> <inputs> <param type="data" name="fastaFile" format="fasta" /> <param type="data" name="SHAPEdata" format="txt" optional="true" label="SHAPE data"/> <param type="data" name="AlignmentData" format="stockholm" optional="true" label="Alignments file"/> <param name="max_length" type="integer" value="10000" size="5" label="window size"/> <param name="in_winShift" type="integer" value="100" size="5" label="window shift in percent"/> <param name="min_seq_length" type="integer" value="5" size="5" label="minimum sequence length"/> </inputs> <outputs> <data name="data.fasta" format="fasta" from_work_dir="FASTA/data.fasta" label="data.fasta"/> <data name="data.map" format="txt" from_work_dir="FASTA/data.map" label="data.map"/> <data name="data.names" format="txt" from_work_dir="FASTA/data.names" label="data.names"/> <data name="data.fasta.scan" format="fasta" from_work_dir="FASTA/data.fasta.scan" label="data.fasta.scan"/> <data name="FASTA" format="zip" from_work_dir="FASTA.zip" label="FASTA.ZIP"/> <data name="shape_data_split" format="txt" from_work_dir="shape_data_split.react" label="SHAPE.data.split"/> <data name="alignment_data_split" format="stockholm" from_work_dir="alignment_data_split.stk" label="alignments.data.stk"/> </outputs> <tests> <test> <param name="fastaFile" value="input.fa"/> <param name="max_length" value="10000"/> <param name="in_winShift" value="100"/> <param name="min_seq_length" value="5"/> <output name="data.fasta" file="FASTA/data.fasta"/> <output name="data.map" file="FASTA/data.map" /> <output name="data.names" file="FASTA/data.names"/> <output name="data.fasta.scan" file="FASTA/data.fasta.scan" /> </test> <test> <param name="fastaFile" value="sample_3.fa"/> <param name="SHAPEdata" value="sample_3.react"/> <param name="max_length" value="100"/> <param name="in_winShift" value="50"/> <param name="min_seq_length" value="5"/> <output name="shape_data_split" file="sample_3_shape_data_split.react" /> </test> <test> <param name="fastaFile" value="sample_4_representatives.fa"/> <param name="AlignmentData" value="sample_4_all.stk"/> <param name="max_length" value="50"/> <param name="in_winShift" value="50"/> <param name="min_seq_length" value="5"/> <output name="alignment_data_split" file="sample_4_alignment_data_split.stk" /> </test> </tests> <help> <![CDATA[ **What it does** The tool takes as input a set of sequences in Fasta format and creates the final input for GraphCLust based on given parameters. **Parameters** + **window size** : All input sequences are splitted into fragments of this length. The shift of the sliding window can be defined via option *window shift in percent*. This paramter reflects the expected length of signals to be found. Slightly larger windows are usually ok. Too small windows can disturb existing signals. + **window shift in percent** : Relative window size in % for window shift during input preprocessing. Please note that a small shift results in much more fragments for clustering. The benefit is that RNA motifs/structures are not destroyed by arbitrary split points. Smaller shifts usually increase the cluster quality. Too small shifts (<20) are not recommended as a dense center is "polluted" by overlapping fragments and no other occurences in the dataset can be found. + **minimum sequence length** : Minimal length of input sequences. Every input sequence below that length is ignored completely during clustering. ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/bts224</citation> </citations> </tool>