view convert_extract_sequence_file.xml @ 1:d7747e6f329f draft default tip

planemo upload commit 0a1bbb0870f6b776175151d4bc818c5546731ca8-dirty
author bebatut
date Thu, 28 Apr 2016 08:34:50 -0400
parents b4cf778be846
children
line wrap: on
line source

<tool id="convert_extract_sequence_file" name="Convert/ Extract information" version="1.0.0">

	<description>from a sequence file, with possible constraints</description>

    <macros>
        <xml name="extraction_option">
            <param name="to_extract" type="select" display="checkboxes" multiple="true" label="Information to extract" help="">
                <option value="id">Identifiant</option>
                <option value="length">Length</option>
                <option value="description">Description</option>
                <validator type="no_options" message="Select at least one information to extract"/>
            </param>
        </xml>

        <xml name="extraction_test">
            <param name='specific_extraction' type="select" label="Extract specific information?" help="If no is selected, a sequence file is generated. If yes, a text file containing the wanted information is generated">
                <option value="True">Yes</option>
                <option value="False" selected="true">No</option>
            </param>
        </xml>
    </macros>

	<requirements>
  	</requirements>

    <stdio>
    </stdio>

    <version_command>
    </version_command>

  	<!--<command>-->
    <command><![CDATA[
  		python $__tool_directory__/convert_extract_sequence_file.py 
      		--input $sequence_file_format.sequence_file

            --custom_extraction_type $sequence_file_format.extraction.specific_extraction

            #if $sequence_file_format.extraction.specific_extraction == "True":
                --to_extract "{$sequence_file_format.extraction.to_extract}"
                --output_information $information_file
            #else if $sequence_file_format.format=="fastq":
                --split $sequence_file_format.extraction.split.split_test
                #if $sequence_file_format.extraction.split.split_test :
                    --quality_format $sequence_file_format.extraction.split.quality_format
                    --output_sequence $fasta_sequence_file_from_fastq
                    --output_quality $quality_file
                #else:
                    --output_sequence $fastq_sequence_file
                #end if
            #else:
                --output_sequence $fasta_sequence_file
            #end if
                
    		#if $constraints.constrained_extraction == "True" :
                #for $i, $constrain in enumerate( $constraints.constraint_definition )
                	#set info_to_constrain=$constrain.constrained_information['info_to_constrain']             
                    #if $info_to_constrain in ("id"):
                    	--constraint "$info_to_constrain:
                    	${constrain.constrained_information.constraint_type.type}:
                    	${constrain.constrained_information.constraint_type.value}"
                    #else:
                    	#for $j, $sub_constrain in enumerate( $constrain.constrained_information.constraint_definition )
                    		--constraint "$info_to_constrain:
                    		${sub_constrain.type}:
                    		${sub_constrain.value}"
                        #end for
                    #end if
                #end for
            #end if

            --report $report_filepath
            --format $sequence_file_format.format
        ]]>
  	</command>

  	<inputs>
        <conditional name="sequence_file_format">
        	<param name="format" type="select" display="radio" 
                label="Format of the sequence file" help="">
	            <option value="fasta">Fasta</option>
                <option value="fastq">FastQ</option>
	        </param>
            <when value="fastq">
                <param name="sequence_file" type="data" format="fastq" 
                    label="Sequence file" help=""/>
                <conditional name="extraction">
                    <expand macro="extraction_test"/> 

                    <when value="True">
                        <expand macro="extraction_option"/> 
                    </when>
                    <when value="False">
                        <conditional name="split">
                            <param name='split_test' type="select" label="Split file into sequence and quality files?" help="If yes is selected, a fasta and a quality file are generated. If no, a fastq file is generated">
                                <option value="True" selected="true">Yes</option>
                                <option value="False">No</option>
                            </param>

                            <when value="True">
                                <param name="quality_format" type="select" display="radio" label="Coding of quality scores?" help="">
                                    <option value="sanger" selected="true">Sanger (Phred+33)</option>
                                    <option value="solexa">Solexa (Solexa+64) </option>
                                    <option value="illumina_1_3">Illumina 1.3+ (Phred+64) </option>
                                    <option value="illumina_1_5">Illumina 1.5+ (Phred+64) </option>
                                    <option value="illumina_1_8">Illumina 1.8+ (Phred+33) </option>
                                </param>
                            </when>
                            <when value="False" />
                        </conditional> 
                    </when>
                </conditional> 
            </when>

            <when value="fasta">
                <param name="sequence_file" type="data" format="fasta" 
                    label="Sequence file" help=""/>
                <conditional name="extraction">
                    <expand macro="extraction_test"/> 

                    <when value="True">
                        <expand macro="extraction_option"/>
                    </when>
                    <when value="False" />
                </conditional> 
            </when>
        </conditional>
        
        <conditional name="constraints">
            <param name='constrained_extraction' type='select' label="Constrain extraction?" help="">
                <option value="True">Yes</option>
                <option value="False" selected="true">No</option>
            </param>

            <when value="True">
                <repeat name="constraint_definition" title="Constraints on sequences" min="1">
                    <conditional name="constrained_information">
                        <param name="info_to_constrain" type="select" label="Information to constrain" help="">
                            <option value="id">Identifiant</option>
                            <option value="length">Length</option>
                        </param>
                        <when value="id">
                            <conditional name="constraint_type">
                                <param name="type" type="select" display="radio" label="Type of constraint" help="">
                                    <option value="equal">Equal a value</option>
                                    <option value="in">In a list</option>
                                    <option value="not_in">Not in a list</option>
                                </param>
                                <when value="equal">
                                    <param name="value" type="text" size="200" label="Equal to" help=""/>
                                        <validator type="empty_field" message="Give a value"/>
                                </when>
                                <when value="in">
                                    <param format="txt" name="value" type="data" label="List of constraint" help="Text file with a value per line and nothing else"/>
                                        <validator type="unspecified_build" message="Select a file"/>
                                </when>
                                <when value="not_in">
                                    <param format="txt" name="value" type="data" label="List of constraint" help="Text file with a value per line and nothing else"/>
                                        <validator type="unspecified_build" message="Select a file"/>
                                </when>
                            </conditional>
                        </when>  
                        <when value="length">
                            <repeat name="constraint_definition" title="Constraint on sequence length" min="1">
                                <param name="type" type="select" label="Type of constraint" help="">
                                    <option value="equal">Equal to </option>
                                    <option value="lower">Lower than </option>
                                    <option value="strictly_lower">Strictly lower than </option>
                                    <option value="greater">Greater than </option>
                                    <option value="strictly_greater">Strictly greater than </option>
                                </param>
                                <param name="value" type="integer" min="0" max="3000" value="100" label="Value" help=""/>
                            </repeat>
                        </when>  
                    </conditional>
                </repeat>
            </when>
            <when value="False" />
        </conditional> />
  	</inputs>

  	<outputs>
        <data format="tabular" name="information_file" 
            label="${tool.name} on ${on_string}: Information">
            <filter>((sequence_file_format['extraction']['specific_extraction'] == "True" ))</filter>
        </data>

        <data format="fasta" name="fasta_sequence_file"
            label="${tool.name} on ${on_string}: Extracted sequences" >
            <filter>((sequence_file_format['format'] == 'fasta' and not sequence_file_format['extraction']['specific_extraction']== "True" ))</filter>
        </data>

        <data format="fastq" name="fastq_sequence_file"
            label="${tool.name} on ${on_string}: Extracted sequences">
            <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "False" ))</filter>
        </data>

        <data format="qual" name="quality_file" 
            label="${tool.name} on ${on_string}: Extracted quality">
            <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "True" ))</filter>
        </data>

        <data format="fasta" name="fasta_sequence_file_from_fastq"
            label="${tool.name} on ${on_string}: Extracted sequences">
            <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "True" ))</filter>
        </data>

        <data format="txt" name="report_filepath" 
            label="${tool.name} on ${on_string}: Report"/>
  	</outputs>

  	<tests>
        <test>
            <param name="format" value="fasta"/>
            <param name="sequence_file" value="input_sequence_file.fasta"/>
            <param name="specific_extraction" value="True" />
            <param name="to_extract" value="length" />
            <param name="constrained_extraction" value="False" />
            <output name="information_file" file="information_lenght_fasta_output.txt"/>
            <output name="report_filepath" file="report_length_fasta_output.txt"/>
        </test>
        <test>
            <param name="format" value="fastq"/>
            <param name="sequence_file" value="input_sequence_file.fastq"/>
            <param name="specific_extraction" value="False" />
            <param name="split_test" value="True" />
            <param name="quality_format" value="illumina_1_3" />
            <param name="constrained_extraction" value="False" />
            <output name="quality_file" file="extracted_quality_illumina_1_3_fastq_output.qual"/>
            <output name="fasta_sequence_file_from_fastq" file="extracted_sequences_illumina_1_3_fastq_output.fasta"/>
            <output name="report_filepath" file="report_illumina_1_3_fastq_output.txt"/>
        </test>
  	</tests>

  	<help><![CDATA[

**What it does**

This tool extracts information (sequences, id, length, ...) from sequence files or convert a FastQ file to Fasta file.

Some constraints could be added to extraction/conversion. For example, only sequences with more than 30 bp could be extracted. Or, a sequences whose the identifiant is in a list. 

The input is a sequence file in fasta or fastq format. The tool generates different outputs given the chosen parameters.
]]>
  	</help>

    <citations>
    </citations>
</tool>