changeset 0:97363f287de1 draft

planemo upload for repository https://github.com/CPGRZA/cpgr_ancestry commit 77798d7797f07f1f94f5cecd39195bed15987b56
author sanbi-uwc
date Fri, 14 Sep 2018 01:08:00 -0400
parents
children 497594b1d2f4
files shapeit_phase.xml
diffstat 1 files changed, 411 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shapeit_phase.xml	Fri Sep 14 01:08:00 2018 -0400
@@ -0,0 +1,411 @@
+<tool id="shapeit_phase" name="shapeit_phase" version="0.2.0">
+    <description>genomic data with or without reference panel</description>
+
+    <requirements>
+        <requirement type="package" version="v2.r790">shapeit</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+
+    <command><![CDATA[
+        #if str($use_ref_panel) == "use_ref_panel_yes" and str($use_ref_panel.ref_panel_from_table) == "ref_panel_from_table_yes" and str($input_files.input_type_select) == 'plink_bed_bim_fam_list':
+          # this is all expecting 1000GP-style reference data
+          PART=`head -1 myfile.bim | cut -f1` && 
+          REF_DIR='${use_genetic_map.map_from_table.fields.path}' &&
+          REF_PREFIX='${use_genetic_map.map_from_table.fields.reference_prefix}' &&
+          if [ -f "\${REF_DIR}/\${REF_PREFIX}_\${PART}.hap.gz" ] \; then
+            HAP="\${REF_DIR}/\${REF_PREFIX}_\${PART}.hap.gz" &&
+            LEGEND="\${REF_DIR}/\${REF_PREFIX}_\${PART}.legend.gz" &&
+            SAMPLE="\${REF_DIR}/\${REF_PREFIX}.sample" ;
+          fi &&
+          MAP_PREFIX='${use_genetic_map.map_from_table.fields.map_prefix}' &&
+          MAP=`ls \${REF_DIR}/\${MAP_PREFIX}_\${PART}_*` &&
+          if [ ! -f "\${MAP}" ] ; then
+            echo "Failed to find valid map file at \${MAP}" >&2 &&
+            exit 1 ; 
+          fi &&
+        # end 
+
+        shapeit
+
+        #if $input_files.input_type_select == "vcf":
+          --input-vcf ${input_files.vcf_file}
+        #else if $input_files.input_type_select == "plink_ped_map":
+          --input-ped ${input_files.plink_ped} ${input_files.plink_map}
+          #if $input_files.plink_missing_code != "0":
+            --missing_code $input_files.plink_missing_code
+          #end if
+        #else if $input_files.input_type_select == "plink_bed_bim_fam":
+          --input-bed ${input_files.plink_bed} ${input_files.plink_bim} ${input_files.plink_fam}
+        #else if $input_files.input_type_select == "plink_bed_bim_fam_list":
+          --input-bed '${input_files.plink_bed_etc_list}[0]' '${input_files.plink_bed_etc_list}[1]' '${input_files.plink_bed_etc_list}[2]'
+        #else if $input_files.inputy_type_select == "oxford_gen_sample":
+          --input-gen ${input_files.oxford_gen} ${input_files.oxford_sample}
+          #if $input_files.oxford_threshold != "0.9":
+            --input-thr $input_files.oxford_threshold
+          #end if
+        #end if
+
+        #if $use_genetic_map.use_genetic_map_select == "use_genetic_map_yes":
+          #if $use_genetic_map.genetic_map_from_table == "genetic_map_from_table_yes":
+            --input-map \$MAP
+          else:
+            --input-map $use_genetic_map.genetic_map
+          #end if
+        #else if $use_genetic_map.rho != "0.0004":
+          --rho $use_genetic_map.rho
+        #end if
+
+        #if $use_ref_panel.use_ref_panel_select == "use_ref_panel_yes":
+          #if $use_ref_panel.ref_panel_from_table == "ref_panel_from_table_yes":
+            --input-ref \$HAP \$LEGEND \$SAMPLE
+          #else
+            --input-ref $use_ref_panel.ref_panel_haps $use_ref_panel.ref_panel_legend $use_ref_panel.ref_panel_sample
+          #end if
+          #if $use_ref_panel.disable_mcmc_iterations == "disable_mcmc_iterations_yes":
+            --no-mcmc
+          #end if
+          #if $use_ref_panel.filter_groups == "exclude_groups":
+            --exclude-grp $use_ref_panel.filter_groups.exclude_groups_file
+          #else if $use_ref_panel.filter_groups == "include_groups":
+              --include-grp $use_ref_panel.filter_groups.include_groups_file
+          #end if
+        #end if
+
+        #if $use_filters.use_filters_select == "use_filters_yes":
+          #if $use_filters.use_input_from == "use_input_from_yes":
+            --input-from $input_from
+          #end if
+          #if $use_filters.use_input_to == "use_input_to_yes":
+            --input-to $input_to
+          #end if
+          #if $use_filters.use_output_from == "use_output_from_yes":
+            --output-from $output_from
+          #end if
+          #if $use_filters.use_output_to == "use_output_to_yes":
+            --output-to $output_to
+          #end if
+          #if $use_filters.filter_individuals == "exclude_individuals":
+            --exclude-ind $use_filters.exclude_inviduals_file
+          #else if $use_filters.filter_individuals == "include_individuals":
+            --include-ind $use_filters.include_inviduals_file
+          #end if
+          #if $use_filters.filter_individuals == "exclude_SNPs":
+            --exclude-snp $use_filters.exclude_SNPs_file
+          #else if $use_filters.filter_SNPs == "include_SNPs":
+            --include-snp $use_filters.include_SNPs_file
+          #end if
+        #end if
+
+        #if $use_advanced_parameters.use_advanced_parameters_select == "use_advanced_parameters_yes":
+          #if $use_advanced_parameters.chrX == "chrX_yes":
+            --chrX
+          #end if
+          #if $use_advanced_parameters.noped == "noped_yes":
+            --noped
+          #end if
+          #if $use_advanced_parameters.states != "100":
+            --states $use_advanced_parameters.states
+          #end if
+          #if $use_advanced_parameters.window != "2.0":
+            --window $use_advanced_parameters.window
+          #end if
+          #if $use_advanced_parameters.model_version1 == "use_v1_yes":
+            --model-version1
+          #end if
+          #if $use_advanced_parameters.eff_size.eff_size_select == "eff_size_european":
+            --effective-size 11418
+          #else if $use_advanced_parameters.eff_size.eff_size_select == "eff_size_african":
+            --effective-size 17469
+          #else if $use_advanced_parameters.eff_size.eff_size_select == "eff_size_asian":
+            --effective-size 14269
+          #else if $use_advanced_parameters.eff_size.eff_size_select == "eff_size_mixedpop":
+            --effective-size $use_advanced_parameters.eff_size.eff_size_mixed
+          #end if
+          #if $use_advanced_parameters.burn != "7":
+            --burn $use_advanced_parameters.burn
+          #end if
+          #if $use_advanced_parameters.prune != "8":
+            --prune $use_advanced_parameters.prune
+          #end if
+          #if $use_advanced_parameters.main != "20":
+            --main $use_advanced_parameters.main
+          #end if
+          #if $use_advanced_parameters.threads != "1":
+            --threads $use_advanced_parameters.threads
+          #end if
+          #if $use_advanced_parameters.set_rng_seed.set_rng_seed_select == "set_rng_seed_yes":
+            --seed $use_advanced_parameters.rng_seed
+          #end if
+        #end if
+        --output-max $out_haps $out_sample
+        --output-graph $out_graph
+        --output-log $out_log
+    ]]></command>
+
+    <inputs>
+      <conditional name="input_files">
+        <param name="input_type_select" type="select" label="Please specify your input files.">
+          <option value="vcf" selected="True">VCF Variant Call Format</option>
+          <option value="plink_ped_map">Plink PED/MAP format</option>
+          <option value="plink_bed_bim_fam">Plink BED/BIM/FAM format</option>
+          <option value="plink_bed_bim_fam_list">Plink BED/BIM/FAM format list</option>
+          <option value="oxford_gen_sample">Oxford GEN/SAMPLE format</option>
+        </param>
+        <when value="plink_ped_map">
+          <param name="plink_ped" type="data" format="txt" label="Specify text genotype file (.ped)"/>
+          <param name="plink_map" type="data" format="txt" label="Specify SNP map file (.map)"/>
+          <param name="plink_missing_code" type="text" label="Missing data labeled as" value="0"/>
+        </when>
+        <when value="plink_bed_bim_fam">
+          <param name="plink_bed" type="data" format="binary" label="Specify binary genotype file (.bed)"/>
+          <param name="plink_bim" type="data" format="txt" label="Specify SNP map file (.bim)"/>
+          <param name="plink_fam" type="data" format="txt" label="Specify individual information file (.fam)"/>
+        </when>
+        <when value="plink_bed_bim_fam_list">
+          <param name="plink_bed_etc_list" type="data_collection" collection_type="list" label="BED/BIM/FAM files as a collection (list)" />
+        </when>
+        <when value="oxford_gen_sample">
+          <param name="oxford_gen" type="data" format="txt" label="Specify text genotype file (.gen)"/>
+          <param name="oxford_sample" type="data" format="txt" label="Specify individual information file (.sample)"/>
+          <param name="oxford_threshold" type="float" label="SNP Probability threshold" value="0.9"/>
+        </when>
+        <when value="vcf">
+          <param name="vcf_file" type="data" format="vcf" label="Specify Variant Call Format file (.vcf)"/>
+        </when>
+      </conditional>
+
+      <conditional name="use_genetic_map">
+        <param name="use_genetic_map_select" type="select" label="Do you want to use a genetic map?">
+          <option value="use_genetic_map_yes" selected="True">Yes</option>
+          <option value="use_genetic_map_no">No</option>
+        </param>
+        <when value="use_genetic_map_yes">
+          <conditional name="genetic_map_from_table">
+            <param name="genetic_map_from_table_select" type="select" label="Use built in data?">
+              <option value="genetic_map_from_table_yes" selected="True">Yes</option>
+              <option value="genetic_map_from_table_no">No</option>
+            </param>
+            <when value="genetic_map_from_table_yes">
+              <param name="map_from_table" type="select" label="Select reference map">
+                <options from_data_table="shapeit_ref" />
+              </param>
+            </when>
+            <when value="genetic_map_from_table_no">
+              <param name="genetic_map" type="data" format="txt" label="Genetic map file (.gmap)" />
+            </when>
+          </conditional>
+        </when>
+        <when value="use_genetic_map_no">
+          <param name="rho" type="float" value="0.0004" label="Adjust recombination rate (--rho) for human data (0.001)"/>
+        </when>
+      </conditional>
+
+      <conditional name="use_ref_panel">
+        <param name="use_ref_panel_select" type="select" label="Do you want to use a reference panel?">
+          <option value="use_ref_panel_yes">Use a reference panel</option>
+          <option value="use_ref_panel_no" selected="True">Do not use reference panel</option>
+        </param>
+        <when value="use_ref_panel_yes">
+          <conditional name="ref_panel_from_table">
+            <param name="ref_panel_from_table_select" type="select" label="Use built in data?">
+              <option value="ref_panel_from_table_yes" selected="True">Yes</option>
+              <option value="ref_panel_from_table_no">No</option>
+            </param>
+            <when value="ref_panel_from_table_yes">
+              <param name="ref_from_table" type="select" label="Select reference panel">
+                <options from_data_table="shapeit_ref" />
+              </param>
+            </when>
+            <when value="ref_panel_from_table_no">            
+              <param name="ref_panel_haps" type="data" format="data" label="Specify reference haplotypes file (.haps)" />
+              <param name="ref_panel_legend" type="data" format="data" label="Specify SNP map file (.legend)" />
+              <param name="ref_panel_sample" type="data" format="data" label="Specify individual information file (.sample)" />
+            </when>
+          </conditional>
+          <conditional name="disable_mcmc_iterations">
+            <param name="disable_mcmc_iterations_select" type="select" label="Disable MCMC iterations">
+              <option value="disable_mcmc_iterations_yes">Yes</option>
+              <option value="disable_mcmc_iterations_no" selected="True">No</option>
+            </param>
+            <when value="disable_mcmc_iterations_yes"></when>
+            <when value="disable_mcmc_iterations_no"></when>
+          </conditional>
+
+          <conditional name="filter_groups">
+            <param name="filter_groups_select" type="select" label="Do you want to filter groups?">
+              <option value="no_individual_filter" selected="True">No filtering</option>
+              <option value="exclude_groups">Exclude list of groups</option>
+              <option value="include_groups">Limit list of groups</option>
+            </param>
+            <when value="no_individual_filter"></when>
+            <when value="exclude_groups">
+              <param name="exclude_inviduals_file" type="data" format="txt" label="Specify file with excluded groups (.exc)"/>
+            </when>
+            <when value="include_groups">
+              <param name="include_inviduals_file" type="data" format="txt" label="Specify file with limited groups (.inc)"/>
+            </when>
+          </conditional>
+
+        </when>
+        <when value="use_ref_panel_no">
+        </when>
+      </conditional>
+
+      <conditional name="use_advanced_parameters">
+        <param name="use_advanced_parameters_select" type="select" label="Set parameters">
+          <option value="use_advanced_parameters_yes">Use advanced parameters</option>
+          <option value="use_advanced_parameters_no" selected="True">Use standard parameters</option>
+        </param>
+
+        <when value="use_advanced_parameters_yes">
+          <param name="chrX" type="select" label="Use X chromosomal genotypes?">
+            <option value="chrX_yes">Yes</option>
+            <option value="chrX_no" selected="True">No</option>
+          </param>
+
+          <param name="noped" type="select" label="Discard pedigree/family information?">
+            <option value="noped_yes">Yes</option>
+            <option value="noped_no" selected="True">No</option>
+          </param>
+
+          <param name="states" type="integer" label="Number of conditioning haplotypes used during phasing (--states)" value="100"/>
+          <param name="window" type="float" label="Window size [Mb] in which conditioning haplotypes are defined" value="2.0"/>
+          <param name="model_version1" type="select" label="Use SHAPEIT v1 graphical model">
+            <option value="use_v1_yes">Yes</option>
+            <option value="use_v1_no" selected="True">No</option>
+          </param>
+
+          <conditional name="eff_size">
+            <param name="eff_size_select" type="select" label="Effective population size for phasing">
+              <option value="eff_size_european">European population (11418)</option>
+              <option value="eff_size_african">African population (17469)</option>
+              <option value="eff_size_asian">Asian population (14269)</option>
+              <option value="eff_size_mixed_option" selected="True">Mixed population</option>
+            </param>
+            <when value="eff_size_european"></when>
+            <when value="eff_size_african"></when>
+            <when value="eff_size_asian"></when>
+            <when value="eff_size_mixed_option">
+              <param name="eff_size_mixed" type="integer" label="Specify effective population size, depending on proportions [11418,17469]" value="14443"/>
+            </when>
+          </conditional>
+
+          <param name="burn" type="integer" value="7" label="Number of burn-in iterations to find good starting point"/>
+          <param name="prune" type="integer" value="8" label="Number of pruning iterations to find parsimonious graphs"/>
+          <param name="main" type="integer" value="20" label="Number of iterations to compute transition probabilities"/>
+          <param name="threads" type="integer" value="1" label="Specify number of threads"/>
+
+          <conditional name="set_rng_seed">
+            <param name="set_rng_seed_select" type="select" label="Set seed for random number generator (RNG)">
+              <option value="set_rng_seed_yes">Yes</option>
+              <option value="set_rng_seed_no" selected="True">No</option>
+            </param>
+            <when value="set_rng_seed_yes">
+              <param name="rng_seed" type="integer" value="18011981" label="RNG seed" />
+            </when>
+            <when value="set_rng_seed_no"></when>
+          </conditional>
+        </when>
+        <when value="use_advanced_parameters_no"></when>
+      </conditional>
+
+      <conditional name="use_filters">
+        <param name="use_filters_select" type="select" label="Apply filtering">
+          <option value="use_filters_yes">Yes</option>
+          <option value="use_filters_no" selected="True">No</option>
+        </param>
+        <when value="use_filters_yes">
+
+          <conditional name="use_input_from">
+            <param name="use_input_from_select" type="select" label="Only consider genotypes above a position?">
+              <option value="use_input_from_yes">Yes</option>
+              <option value="use_input_from_no" selected="True">No</option>
+            </param>
+            <when value="use_input_from_yes">
+              <param name="input_from" type="integer" value="1" label="Set position:"/>
+            </when>
+            <when value="use_input_from_no"></when>
+          </conditional>
+
+          <conditional name="use_input_to">
+            <param name="use_input_to_select" type="select" label="Only consider genotypes above a position?">
+              <option value="use_input_to_yes">Yes</option>
+              <option value="use_input_to_no" selected="True">No</option>
+            </param>
+            <when value="use_input_to_yes">
+              <param name="input_to" type="integer" value="1" label="Set position:"/>
+            </when>
+            <when value="use_input_to_no"></when>
+          </conditional>
+
+          <conditional name="use_output_from">
+            <param name="use_output_from_select" type="select" label="Only output genotypes above a position?">
+              <option value="use_output_from_yes">Yes</option>
+              <option value="use_output_from_no" selected="True">No</option>
+            </param>
+            <when value="use_output_from_yes">
+              <param name="output_from" type="integer" value="1" label="Set position:"/>
+            </when>
+            <when value="use_output_from_no"></when>
+          </conditional>
+
+          <conditional name="use_output_to">
+            <param name="use_output_to_select" type="select" label="Only output genotypes above a position?">
+              <option value="use_output_to_yes">Yes</option>
+              <option value="use_output_to_no" selected="True">No</option>
+            </param>
+            <when value="use_output_to_yes">
+              <param name="output_to" type="integer" value="1" label="Set position:"/>
+            </when>
+            <when value="use_output_to_no"></when>
+          </conditional>
+
+          <conditional name="filter_individuals">
+            <param name="filter_individuals_select" type="select" label="Do you want to filter individuals?">
+              <option value="no_individual_filter" selected="True">No filtering</option>
+              <option value="exclude_individuals">Exclude list of individuals</option>
+              <option value="include_individuals">Limit list of individuals</option>
+            </param>
+            <when value="no_individual_filter"></when>
+            <when value="exclude_individuals">
+              <param name="exclude_inviduals_file" type="data" format="txt" label="Specify file with excluded individuals (.exc)"/>
+            </when>
+            <when value="include_individuals">
+              <param name="include_inviduals_file" type="data" format="txt" label="Specify file with limited individuals (.inc)"/>
+            </when>
+          </conditional>
+
+          <conditional name="filter_SNPs">
+            <param name="filter_SNPs_select" type="select" label="Do you want to filter SNPs?">
+              <option value="no_individual_filter" selected="True">No filtering</option>
+              <option value="exclude_SNPs">Exclude list of SNPs</option>
+              <option value="include_SNPs">Limit list of SNPs</option>
+            </param>
+            <when value="no_individual_filter"></when>
+            <when value="exclude_SNPs">
+              <param name="exclude_inviduals_file" type="data" format="txt" label="Specify file with excluded SNPs (.exc)"/>
+            </when>
+            <when value="include_SNPs">
+              <param name="include_inviduals_file" type="data" format="txt" label="Specify file with limited SNPs (.inc)"/>
+            </when>
+          </conditional>
+        </when>
+        <when value="use_filters_no"></when>
+      </conditional>
+
+    </inputs>
+    <outputs>
+      <data name="out_haps" format="tabular" label="${tool.name} on ${on_string} haplotypes (.haps)"/>
+      <data name="out_sample" format="tabular" label="${tool.name} on ${on_string} additional information (.sample)"/>
+      <data name="out_graph" format="text" label="${tool.name} on ${on_string} haplotype graphs (.hgraph)"/>
+      <data name="out_log" format="text" label="${tool.name} on ${on_string} logfile (.log)"/>
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help.
+    ]]></help>
+    <citations>
+      <citation type="doi">10.1038/nmeth.1785</citation>
+    </citations>
+</tool>