Mercurial > repos > iuc > samtools_bam_to_cram
changeset 0:f11c1dd0b4ae draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/bam_to_cram commit fdc0553f8451316c1f0b63488db3ef250e99b3ad
author | iuc |
---|---|
date | Fri, 05 May 2017 13:12:05 -0400 |
parents | |
children | 38fa15df86b2 |
files | macros.xml samtools_bam_to_cram.xml test-data/test.bam test-data/test.bed test-data/test.cram test-data/test.fa test-data/test.sam |
diffstat | 7 files changed, 215 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri May 05 13:12:05 2017 -0400 @@ -0,0 +1,71 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.3">samtools</requirement> + <yield/> + </requirements> + </xml> + + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{SAM_def, + title={Definition of SAM/BAM format}, + url = {https://samtools.github.io/hts-specs/SAMv1.pdf},} + </citation> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + <citation type="doi">10.1093/bioinformatics/btr509</citation> + <citation type="bibtex"> + @misc{Danecek_et_al, + Author={Danecek, P., Schiffels, S., Durbin, R.}, + title={Multiallelic calling model in bcftools (-m)}, + url = {http://samtools.github.io/bcftools/call-m.pdf},} + </citation> + <citation type="bibtex"> + @misc{Durbin_VCQC, + Author={Durbin, R.}, + title={Segregation based metric for variant call QC}, + url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} + </citation> + <citation type="bibtex"> + @misc{Li_SamMath, + Author={Li, H.}, + title={Mathematical Notes on SAMtools Algorithms}, + url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} + </citation> + <citation type="bibtex"> + @misc{SamTools_github, + title={SAMTools GitHub page}, + url = {https://github.com/samtools/samtools},} + </citation> + </citations> + </xml> + <xml name="version_command"> + <version_command>samtools 2>&1 | grep Version</version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> + <token name="@no-chrom-options@"> +----- + +.. class:: warningmark + +**No options available? How to re-detect metadata** + +If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: + +1. Click on the **pencil** icon adjacent to the dataset in the history +2. A new menu will appear in the center pane of the interface +3. Click **Datatype** tab +4. Set **New Type** to **BAM** +5. Click **Save** + +The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. + + </token> + +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_bam_to_cram.xml Fri May 05 13:12:05 2017 -0400 @@ -0,0 +1,125 @@ +<tool id="samtools_bam_to_cram" name="samtools BAM to CRAM" version="1.2.0"> + <description>convert BAM alignments to CRAM format</description> + + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> + + <command><![CDATA[ + #if str( $input_alignment.metadata.bam_index ) != "None": + ln + -f + -s + '${input_alignment.metadata.bam_index}' + '${input_alignment}.bai' + && + #end if + + samtools view + #if $parameter_regions.target_region == "regions_bed_file" + -L '${parameter_regions.regions_bed_file}' + #end if + + -@\${GALAXY_SLOTS:-1} + -C + -T '${reference_source.input_reference}' + -o '${output_alignment}' + + '${input_alignment}' + + #if $parameter_regions.target_region == "region" + '${parameter_regions.region_string}' + #end if + ]]></command> + + <inputs> + <param name="input_alignment" type="data" format="bam,sam" label="BAM (or SAM) alignment file"/> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Load reference genome from"> + <option value="cached">Local cache</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="input_reference" type="select" format="fasta" label="Genome reference FASTA file"> + <options from_data_table="all_fasta"> + <filter type="data_meta" ref="input_alignment" key="dbkey" column="1" /> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="input_reference" type="data" format="fasta" label="Genome reference FASTA file"/> + </when> + </conditional> + <conditional name="parameter_regions"> + <param name="target_region" type="select" label="Choose conversion within specific genomic region(s)"> + <option value="entire_input_file">Entire BAM alignment file</option> + <option value="region">Specific region</option> + <option value="regions_bed_file">List of specific regions (BED file)</option> + </param> + <when value="entire_input_file" /> + <when value="region"> + <param name="region_string" type="text" label="Samtools: region in which pileup is generated" help="e.g. chrX or chr:start-end" /> + </when> + <when value="regions_bed_file"> + <param name="regions_bed_file" type="data" format="bed" + label="only include reads overlapping this BED FILE" argument="-L"/> + </when> + </conditional> + </inputs> + + <outputs> + <data format="cram" name="output_alignment" label="$tool.name on ${on_string}.cram"></data> + </outputs> + + <tests> + <test> + <param name="input_alignment" value="test.bam" ftype="bam" /> + <param name="reference_source_selector" value="history" /> + <param name="input_reference" value="test.fa" /> + <param name="target_region" value="entire_input_file" /> + + <output name="output_alignment" file="test.cram" compare="sim_size" delta="250" /> + </test> + <test> + <param name="input_alignment" value="test.sam" ftype="sam" /> + <param name="reference_source_selector" value="history" /> + <param name="input_reference" value="test.fa" /> + <param name="target_region" value="entire_input_file" /> + + <output name="output_alignment" file="test.cram" compare="sim_size" delta="250" /> + </test> + <test> + <param name="input_alignment" value="test.bam" ftype="bam" /> + <param name="reference_source_selector" value="history" /> + <param name="input_reference" value="test.fa" /> + <param name="target_region" value="region" /> + <param name="region_string" value="CHROMOSOME_I" /> + + <output name="output_alignment" file="test.cram" compare="sim_size" delta="250" /> + </test> + <test> + <param name="input_alignment" value="test.bam" ftype="bam" /> + <param name="reference_source_selector" value="history" /> + <param name="input_reference" value="test.fa" /> + <param name="target_region" value="regions_bed_file" /> + <param name="regions_bed_file" value="test.bed" ftype="bed" /> + + <output name="output_alignment" file="test.cram" compare="sim_size" delta="250" /> + </test> + </tests> + + <help> + +**What this tool does** + +Converts alignments from the BAM format to the CRAM format using the SAMTools_ toolkit. The CRAM format does additional compression relative to the reference genome which makes the compression in terms of file size more efficient. + +.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml + </help> + + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.bed Fri May 05 13:12:05 2017 -0400 @@ -0,0 +1,1 @@ +CHROMOSOME_I 1 120
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.fa Fri May 05 13:12:05 2017 -0400 @@ -0,0 +1,3 @@ +>CHROMOSOME_I +gcctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagc +ctaagcctaagcctaagcctaagcctaagcctaagcctaagcctaagcct
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.sam Fri May 05 13:12:05 2017 -0400 @@ -0,0 +1,15 @@ +@HD VN:1.4 SO:unsorted +@SQ SN:CHROMOSOME_I LN:100 +@RG ID:UNKNOWN SM:UNKNOWN +@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-beta5 +@PG ID:0 CL:aaaaa/aaa/aaaaa/aaaaaa/aaaaaaaaa/aaa/iuc/package_aaaaaaaaa_x_y/aaaaaaaaaaaa/bin/aaaaaaaaaaaaaaaaa aaaaaaaaaa /aaaa/aaaaa/aaa/aaaaaaaaaaaaaaaaaaa/tools/aaaaaaaaa/test-data/test.cram aa /aaaa/aaaaa/aaa/aaaaaaaaaaaaaaaaaaa/tools/aaaaaaaaa/test-data/test.fa -O test PN:samtools VN:1.2 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:UNKNOWN XG:i:1 XM:i:5 XN:i:0 XO:i:1 AS:i:-18 XS:i:-18 YT:Z:UU +SRR065390.921023 16 CHROMOSOME_I 3 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 RG:Z:UNKNOWN XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-13 YT:Z:UU +SRR065390.1871511 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA <?@<@A8>0:BB@>B<=B@???@=8@B>BB@CA@DACDCBBCCCA@CCCCACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:UNKNOWN XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.3743423 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################?6@:7<=@3=@ABAAB>BDBBABADABDDDBDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:UNKNOWN XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.4251890 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########@BB=BCBBC?B>B;>B@@ADBBB@DBBBBDCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:UNKNOWN XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.5238868 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @,=@@D8D;?BBB>;?BBB==BB@D;>D>BBB>BBDDB<DABADCACDCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:UNKNOWN XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.6023338 0 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAAGCTAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCDDDBCCABB=DABBA?################ RG:Z:UNKNOWN XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-6 YT:Z:UU +SRR065390.6815812 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############?@.@?B;B0B=;<DADB@@BDDBBDDBCBCBD@CCDCCCCCCCDCCCCCCCCACCCCCCCCCCBCCCCCCDCCCCCCCCCCCBCC RG:Z:UNKNOWN XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.6905811 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #######################BB@>A<BC>@@BCCB@=BACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC RG:Z:UNKNOWN XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.8289592 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A?@C9@@BC=AABDD@A@DC@CB=@BA?6@CCAAC@+CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCC RG:Z:UNKNOWN XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU