Mercurial > repos > yhoogstrate > crossmap
changeset 13:7e32d2a88c07 draft
planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/crossmap_galaxy_wrapper commit 853297e61ca5fecafe4f09ec449e2731515b32c1-dirty
author | yhoogstrate |
---|---|
date | Thu, 13 Aug 2015 07:43:24 -0400 |
parents | e83cdc4e56cd |
children | bdb0959ece9e |
files | crossmap.xml test-data/test_bam_01_input_a.sam test-data/test_bam_01_output_a.sam test-data/test_bam_01_output_a.unmap.sam test-data/test_gff_01_input_a.gtf test-data/test_gff_01_output_a__all.gtf test-data/test_gff_01_output_a__only-matches.gtf test-data/test_vcf_01.fasta test-data/test_vcf_01.over.chain test-data/test_vcf_01_input.vcf test-data/test_vcf_01_output.vcf test-data/test_vcf_01_output.vcf.unmap tool-data/all_fasta.loc.sample |
diffstat | 13 files changed, 337 insertions(+), 95 deletions(-) [+] |
line wrap: on
line diff
--- a/crossmap.xml Wed Aug 05 04:07:13 2015 -0400 +++ b/crossmap.xml Thu Aug 13 07:43:24 2015 -0400 @@ -1,18 +1,20 @@ -<tool id="crossmap" name="CrossMap" version="0.1.9"> +<tool id="crossmap" name="CrossMap" version="0.2.a"> <description>Convert genome coordinates or annotation files between genome assemblies</description> <requirements> - <requirement type="package" version="0.1.9">crossmap</requirement> + <requirement type="package" version="0.2">crossmap</requirement> </requirements> + <version_command>CrossMap.py 2>&1 | head -n 1 | grep -E --only-matching 'CrossMap.*'</version_command> + <stdio> <regex match="Usage: CrossMap.py" source="stdout" level="fatal"/> - <regex match=".*" source="both" level="log" description="tool progress"/> + <regex match=".*" source="both" level="log"/> </stdio> <command> CrossMap.py - $multiple.input_format + ${multiple.input_format.replace("sam","bam")} #if $multiple.input_format == "vcf" and $multiple.seq_source.index_source == "cached" <!-- This is the 2nd dbkey, and the corresponding value has to be looked up --> @@ -21,13 +23,11 @@ "$multiple.seq_source.input_chain" #end if -<!-- these arguments don't work - #if $multiple.input_format == "bam" + #if $multiple.input_format in ["bam", "sam"] -m $multiple.insert_size -s $multiple.insert_size_stdev -t $multiple.insert_size_fold #end if ---> "$multiple.seq_source.input" @@ -35,17 +35,26 @@ "$multiple.seq_source.input_fasta" #end if - #if $multiple.include_fails == "true" + #if str($multiple.include_fails) == "True" > #end if "$output" + + #if $multiple.input_format in ["bam", "sam"] + ; mv "${output}.${multiple.input_format}" "$output" + ; mv "${output}.unmap.${multiple.input_format}" "$output_unmapped" + #else if $multiple.input_format in ["vcf"] + ; mv "${output}" "$output" + ; mv "${output}.unmap" "$output_unmapped" + #end if </command> <inputs> <conditional name="multiple"> <param name="input_format" type="select" label="Convert a file of the following format"> - <option value="bam">BAM or SAM format</option> + <option value="bam">BAM</option> + <option value="sam">SAM</option> <option value="bed">BED or BED-like</option> <option value="bigwig">BigWig</option> <option value="gff">GFF or GTF</option> @@ -59,7 +68,7 @@ <option value="history">From History</option> </param> <when value="cached"> - <param type="data" format="bam,sam" name="input" label="BAM/SAM file"> + <param type="data" format="bam" name="input" label="BAM/SAM file"> <validator type="unspecified_build" /> <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="LiftOver mapping (chain file) is not available for the specified build." /> </param> @@ -78,11 +87,43 @@ </when> </conditional> -<!-- These parameters don't work <param name="insert_size" type="float" value="200.0" label="Insert size (-m)" help="Average insert size of pair-end sequencing (bp) [default=200.0]" /> <param name="insert_size_stdev" type="float" value="30.0" label="Insert size std. dev (-s)" help="Stanadard deviation of insert size. [default=30.0]" /> <param name="insert_size_fold" type="float" value="3.0" label="Insert size std. dev foldchange (-t)" help="A mapped pair is considered as 'proper pair' if both ends mapped to different strand and the distance between them is less then '-t' * stdev from the mean. [default=3.0]" /> ---> + + <param name="include_fails" type="hidden" tvalue="False" /> + </when> + <when value="sam"><!-- BAM and SAM are exactly the same conditions, but they need to be separate to get the proper output format --> + <conditional name="seq_source"> + <param name="index_source" type="select" label="Source for LiftOver Data (chain file)"> + <option value="cached">Local data (in galaxy)</option> + <option value="history">From History</option> + </param> + <when value="cached"> + <param type="data" format="sam" name="input" label="BAM/SAM file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="LiftOver mapping (chain file) is not available for the specified build." /> + </param> + <param name="input_chain" type="select" label="Lift Over To"> + <options from_file="liftOver.loc"> + <column name="name" index="1"/> + <column name="value" index="2"/> + <column name="dbkey" index="0"/> + <filter type="data_meta" ref="input" key="dbkey" column="0" /> + </options> + </param> + </when> + <when value="history"> + <param type="data" format="bam,sam" name="input" label="BAM/SAM file" /> + <param type="data" format="csv" name="input_chain" label="LiftOver chain file" /> + </when> + </conditional> + + <param name="insert_size" type="float" value="200.0" label="Insert size (-m)" help="Average insert size of pair-end sequencing (bp) [default=200.0]" /> + <param name="insert_size_stdev" type="float" value="30.0" label="Insert size std. dev (-s)" help="Stanadard deviation of insert size. [default=30.0]" /> + <param name="insert_size_fold" type="float" value="3.0" label="Insert size std. dev foldchange (-t)" help="A mapped pair is considered as 'proper pair' if both ends mapped to different strand and the distance between them is less then '-t' * stdev from the mean. [default=3.0]" /> + + <param name="include_fails" type="hidden" tvalue="False" /> </when> <when value="bed"> <conditional name="seq_source"> @@ -109,7 +150,8 @@ <param type="data" format="csv" name="input_chain" label="LiftOver chain file" /> </when> </conditional> - <param name="include_fails" type="boolean" truevalue="true" falsevalue="false" label="Include failed liftovers" help="If a coordinate can not be lift over, do you want to include it in the output (it is still being marked 'fail')" /> + + <param name="include_fails" type="boolean" truevalue="True" checked="false" falsevalue="False" label="Include failed liftovers" help="If a coordinate can not be lift over, do you want to include it in the output (it is still being marked 'fail')" /> </when> <when value="bigwig"> <conditional name="seq_source"> @@ -162,6 +204,8 @@ <param type="data" format="csv" name="input_chain" label="LiftOver chain file" /> </when> </conditional> + + <param name="include_fails" type="boolean" truevalue="True" checked="false" falsevalue="False" label="Include failed liftovers" help="If a coordinate can not be lift over, do you want to include it in the output (it is still being marked 'fail')" /> </when> <when value="vcf"> <conditional name="seq_source"> @@ -170,53 +214,56 @@ <option value="history_chain">Chain file from History</option> <option value="history_all">Chain & FASTA files from History</option> </param> - <when value="cached"> - <param type="data" format="vcf" name="input" label="VCF file"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="LiftOver mapping (chain file) is not available for the specified build." /> - </param> - - <!-- automatically fetch a FASTA file from the same DBKEY as the chain file --> - <param name="input_chain" type="select" label="Lift Over To (Chain file)"> - <options from_file="liftOver.loc"> - <column name="name" index="1" /> - <column name="value" index="1" /><!-- It is not possible to send the *.chain file as value, and obtain the 2nd dbkey as parameter via a filter --> - <column name="dbkey" index="0" /> - <filter type="data_meta" ref="input" key="dbkey" column="0" /> - </options> - </param> - - <param name="input_fasta" type="select" label="Lift Over To (FASTA file)" help="The FASTA file must be on the same build (dbkey) as the LiftOver chain file"> - <options from_file="all_fasta.loc"> - <column name="name" index="2"/> - <column name="value" index="3"/> - <column name="dbkey" index="1"/> - <filter type="param_value" ref="input_chain" column="1" /> - </options> - </param> - </when> + + <when value="cached"> + <param type="data" format="vcf" name="input" label="VCF file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="LiftOver mapping (chain file) is not available for the specified build." /> + </param> + + <!-- automatically fetch a FASTA file from the same DBKEY as the chain file --> + <param name="input_chain" type="select" label="Lift Over To (Chain file)"> + <options from_file="liftOver.loc"> + <column name="name" index="1" /> + <column name="value" index="1" /><!-- It is not possible to send the *.chain file as value, and obtain the 2nd dbkey as parameter via a filter --> + <column name="dbkey" index="0" /> + <filter type="data_meta" ref="input" key="dbkey" column="0" /> + </options> + </param> - <when value="history_chain"> - <param type="data" format="vcf" name="input" label="VCF file"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="all_fasta.loc" metadata_name="dbkey" metadata_column="1" message="LiftOver mapping (FASTA file) is not available for the specified build." /> - </param> - <param type="data" format="csv" name="input_chain" multiple="false" label="LiftOver chain file" /> - <param name="input_fasta" type="select" label="Lift Over To (FASTA file)" help="The FASTA file must be on the same build (dbkey) as the LiftOver chain file"> - <options from_file="all_fasta.loc"> - <column name="name" index="2"/> - <column name="value" index="3"/> - <column name="dbkey" index="1"/> - </options> - </param> - </when> - - <when value="history_all"> - <param type="data" format="vcf" name="input" label="VCF file" /> - <param type="data" format="csv" name="input_chain" multiple="false" label="LiftOver chain file" /> - <param type="data" format="fasta" name="input_fasta" multiple="false" label="Full genome FASTA file" /> - </when> + <param name="input_fasta" type="select" label="Lift Over To (FASTA file)" help="The FASTA file must be on the same build (dbkey) as the LiftOver chain file"> + <options from_file="all_fasta.loc"> + <column name="name" index="2"/> + <column name="value" index="3"/> + <column name="dbkey" index="1"/> + <filter type="param_value" ref="input_chain" column="1" /> + </options> + </param> + </when> + + <when value="history_chain"> + <param type="data" format="vcf" name="input" label="VCF file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="all_fasta.loc" metadata_name="dbkey" metadata_column="1" message="LiftOver mapping (FASTA file) is not available for the specified build." /> + </param> + <param type="data" format="csv" name="input_chain" multiple="false" label="LiftOver chain file" /> + <param name="input_fasta" type="select" label="Lift Over To (FASTA file)" help="The FASTA file must be on the same build (dbkey) as the LiftOver chain file"> + <options from_file="all_fasta.loc"> + <column name="name" index="2"/> + <column name="value" index="3"/> + <column name="dbkey" index="1"/> + </options> + </param> + </when> + + <when value="history_all"> + <param type="data" format="vcf" name="input" label="VCF file" /> + <param type="data" format="csv" name="input_chain" multiple="false" label="LiftOver chain file" /> + <param type="data" format="fasta" name="input_fasta" multiple="false" label="Full genome FASTA file" /> + </when> </conditional> + + <param name="include_fails" type="hidden" tvalue="False" /> </when> <when value="wig"> <conditional name="seq_source"> @@ -224,58 +271,93 @@ <option value="cached">Local data (in galaxy)</option> <option value="history">From History</option> </param> - <when value="cached"> - <param format="wig" name="input" type="data" label="Wiggle file"> - <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="LiftOver mapping (chain file) is not available for the specified build." /> - </param> - <param name="input_chain" type="select" label="Lift Over To"> - <options from_file="liftOver.loc"> - <column name="name" index="1"/> - <column name="value" index="2"/> - <column name="dbkey" index="0"/> - <filter type="data_meta" ref="input" key="dbkey" column="0" /> - </options> - </param> - </when> - <when value="history"> - <param format="wig" name="input" type="data" label="Wiggle file" /> - <param type="data" format="csv" name="input_chain" label="LiftOver chain file" /> - </when> + <when value="cached"> + <param format="wig" name="input" type="data" label="Wiggle file"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="LiftOver mapping (chain file) is not available for the specified build." /> + </param> + <param name="input_chain" type="select" label="Lift Over To"> + <options from_file="liftOver.loc"> + <column name="name" index="1"/> + <column name="value" index="2"/> + <column name="dbkey" index="0"/> + <filter type="data_meta" ref="input" key="dbkey" column="0" /> + </options> + </param> + </when> + <when value="history"> + <param format="wig" name="input" type="data" label="Wiggle file" /> + <param type="data" format="csv" name="input_chain" label="LiftOver chain file" /> + </when> </conditional> </when> </conditional> </inputs> <outputs> - <data format="text" name="output" label="${tool.name} on " /> + <data format="text" name="output" label="${tool.name} on "> + <change_format> + <when input="multiple.input_format" value="bam" format="bam" /> + <when input="multiple.input_format" value="sam" format="sam" /> + <when input="multiple.input_format" value="bed" format="bed" /> + <when input="multiple.input_format" value="bigwig" format="bigwig" /> + <when input="multiple.input_format" value="gff" format="gff" /> + <when input="multiple.input_format" value="vcf" format="vcf" /> + <when input="imultiple.nput_format" value="wig" format="wig" /> + </change_format> + </data> + + <data format="text" name="output_unmapped" label="${tool.name} unmapped on "> + <filter>input_format in ["bam" , "vcf"]</filter> + <change_format> + <when input="multiple.input_format" value="bam" format="bam" /> + <when input="multiple.input_format" value="sam" format="sam" /> + <when input="multiple.input_format" value="bigwig" format="bigwig" /> + <when input="multiple.input_format" value="gff" format="gff" /> + <when input="multiple.input_format" value="vcf" format="vcf" /> + <when input="multiple.input_format" value="wig" format="wig" /> + </change_format> + </data> </outputs> <tests> + <!-- BAM/SAM --> + <test> + <param name="input_format" value="sam"/> + <param name="index_source" value="history"/> + <param name="input" value="test_bam_01_input_a.sam" ftype="sam"/> + <param name="input_chain" value="aToB.over.chain" ftype="csv"/> + <param name="include_fails" value="False"/> + + <output name="output" file="test_bam_01_output_a.sam" compare="diff" lines_diff="4"/> + <output name="output_unmapped" file="test_bam_01_output_a.unmap.sam"/> + </test> + + <!-- BED --> + <test> + <param name="input_format" value="bed"/> + <param name="index_source" value="history"/> + <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/> + <param name="input_chain" value="aToB.over.chain" ftype="csv"/> + <param name="include_fails" value="False"/> + + <output name="output" file="test_bed_01_output_a__only-matches.bed"/> + </test> <test> <param name="input_format" value="bed"/> <param name="index_source" value="history"/> <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/> <param name="input_chain" value="aToB.over.chain" ftype="txt"/> - <param name="include_fails" value="false"/> + <param name="include_fails" value="True"/> - <output name="output" file="test_bed_01_output_a__only-matches.bed"/> - </test> - <!--<test> - <param name="input_format" value="bed"/> - <param name="index_source" value="history"/> - <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/> - <param name="input_chain" value="aToB.over.chain" ftype="txt"/> - <param name="include_fails" value="true"/> - - <output name="output" file="test_bed_02_output_a__all.bed"/> + <output name="output" file="test_bed_01_output_a__all.bed"/> </test> <test> <param name="input_format" value="bed"/> <param name="index_source" value="history"/> <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/> <param name="input_chain" value="aToB.over.chain" ftype="txt"/> - <param name="include_fails" value="false"/> + <param name="include_fails" value="False"/> <output name="output" file="test_bed_02_output_a__only-matches.bed"/> </test> @@ -284,10 +366,47 @@ <param name="index_source" value="history"/> <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/> <param name="input_chain" value="aToB.over.chain" ftype="txt"/> + <param name="include_fails" value="True"/> + + <output name="output" file="test_bed_02_output_a__all.bed"/> + </test> + + <!-- BigWig --> + + <!-- GFF --> + <test> + <param name="input_format" value="gff"/> + <param name="index_source" value="history"/> + <param name="input" value="test_gff_01_input_a.gtf" ftype="gtf"/> + <param name="input_chain" value="aToB.over.chain" ftype="csv"/> + <param name="include_fails" value="false"/> + + <output name="output" file="test_gff_01_output_a__only-matches.gtf"/> + </test> + <test> + <param name="input_format" value="gff"/> + <param name="index_source" value="history"/> + <param name="input" value="test_gff_01_input_a.gtf" ftype="gtf"/> + <param name="input_chain" value="aToB.over.chain" ftype="csv"/> <param name="include_fails" value="true"/> - <output name="output" file="test_bed_02_output_a__all.bed"/> - </test>--> + <output name="output" file="test_gff_01_output_a__all.gtf"/> + </test> + + <!-- VCF --> + <test> + <param name="input_format" value="vcf"/> + <param name="index_source" value="history_all"/> + <param name="input" value="test_vcf_01_input.vcf" ftype="vcf"/> + <param name="input_chain" value="test_vcf_01.over.chain" ftype="csv"/> + <param name="input_fasta" value="test_vcf_01.fasta" ftype="fasta"/> + <param name="include_fails" value="False"/> + + <output name="output" file="test_vcf_01_output.vcf" compare="diff" lines_diff="4"/> + <output name="output_unmapped" file="test_vcf_01_output.vcf.unmap"/> + </test> + + <!-- WIG --> </tests> <help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_bam_01_input_a.sam Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,27 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:1000051 +@SQ SN:chr2 LN:1000051 +@SQ SN:chr3 LN:1000051 +@SQ SN:chr4 LN:9250051 +@PG ID:- VN:1.0.0 CL:cmatrix +@CO Test data for CrossMap bam +read_001 0 chr1 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_002 0 chr2 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_003 0 chr3 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_004 0 chr4 9200000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_005 0 chr4 8940000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_006 0 chr1 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_007 0 chr2 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_008 0 chr3 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_009 0 chr4 9250000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_010 0 chr4 9000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_011 16 chr1 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_012 16 chr2 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_013 16 chr3 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_014 16 chr4 9200000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_015 16 chr4 8940000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_016 16 chr1 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_017 16 chr2 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_018 16 chr3 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_019 16 chr4 9250000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_020 16 chr4 9000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN *
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_bam_01_output_a.sam Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,23 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:247249719 +@SQ SN:chr10 LN:135374737 +@SQ SN:chr2 LN:242951149 +@SQ SN:chr3 LN:199501827 +@SQ SN:chr4 LN:191273063 +@PG ID:- VN:1.0.0 CL:cmatrix +@PG ID:CrossMap VN:0.1.10 +@CO Test data for CrossMap bam +@CO Liftover from original BAM/SAM file: +@CO Liftover is based on the chain file: +read_001 0 chr1 89863 60 50M = 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_002 0 chr2 90000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_003 0 chr3 75000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_006 0 chr1 989863 60 50M = 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_007 0 chr2 990000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_008 0 chr3 975000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_011 16 chr1 89863 60 50M = 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_012 16 chr2 90000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_013 16 chr3 75000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_016 16 chr1 989863 60 50M = 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_017 16 chr2 990000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_018 16 chr3 975000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN *
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_bam_01_output_a.unmap.sam Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,15 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:1000051 +@SQ SN:chr2 LN:1000051 +@SQ SN:chr3 LN:1000051 +@SQ SN:chr4 LN:9250051 +@PG ID:- VN:1.0.0 CL:cmatrix +@CO Test data for CrossMap bam +read_004 0 chr4 9200000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_005 0 chr4 8940000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_009 0 chr4 9250000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_010 0 chr4 9000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_014 16 chr4 9200000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_015 16 chr4 8940000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_019 16 chr4 9250000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_020 16 chr4 9000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN *
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_gff_01_input_a.gtf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,5 @@ +chr1 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr2 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr3 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr4 CrossMap_test_data CDS 9200000 9250000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr5 CrossMap_test_data CDS 8940000 9000000 0.000000 - 0 gene_id "TEST_GENE_1";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_gff_01_output_a__all.gtf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,5 @@ +chr1 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; fail (multpile match to target assembly) +chr2 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; -> chr2 CrossMap_test_data CDS 90000 990000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr3 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; -> chr3 CrossMap_test_data CDS 75000 975000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr4 CrossMap_test_data CDS 9200000 9250000 0.000000 - 0 gene_id "TEST_GENE_1"; fail (multpile match to target assembly) +chr5 CrossMap_test_data CDS 8940000 9000000 0.000000 - 0 gene_id "TEST_GENE_1"; fail (no match to target assembly)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_gff_01_output_a__only-matches.gtf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,2 @@ +chr2 CrossMap_test_data CDS 90000 990000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr3 CrossMap_test_data CDS 75000 975000 0.000000 - 0 gene_id "TEST_GENE_1";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01.fasta Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,10 @@ +>chr1 +CAAAAAAGCAGTTGACAGTTGTTCGGGCTTGTTACGCATCTTAAGATCGAATAAGGAGAGAGGGTCTAAACG +AGGCGCACCCCGCCTATGGGTGATCGAGGTACTAGGGGTTGGTCGCAGGTCTGTATTACCGTTAGCGGTGCA +AGGGGATCTGATCGAGTGATTCACCTACTCATGTGGCGAGCACGCCGACGAAATACTCCTGGTCGTGTTATA +AAGCCCTGGTTTTCCTTTCC +>chr2 +CACAAAATGCACGTGGATGCAGGCATTTATCCAACCCACACTATTACGTTCACCAAATGTGTGGACCAACTG +CGGGACTAGGTAAGCTTGTCCTCAATGAGCGAAATTGATATTTCTCTACCGACTTGGGGTCGACTGGACGAG +TCAGCTGTGCAACAGCTCAGCCGGTTTCGATAAACCGAAACCTTGAATGTTTGGACTTGCGTCATGGCGAAC +AAAGATCGTTCATGTCGCA \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01.over.chain Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,11 @@ +chain 4900 chr1 236 + 1 236 chr2 236 - 1 237 1 + 9 1 0 + 10 0 5 + 61 4 0 + 16 0 4 + 42 3 0 + 16 0 8 + 14 1 0 + 3 7 0 + 48 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01_input.vcf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,8 @@ +##fileformat=VCFv4.2 +##FORMAT=<ID=GT,Number=1,Type=Integer,Description="Genotype"> +##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype Probabilities"> +##FORMAT=<ID=PL,Number=G,Type=Float,Description="Phred-scaled Genotype Likelihoods"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP001 SAMP002 +1 10 rs11449 G A . PASS . GT 0/0 0/1 +1 100 rs84825 C C . PASS . GT:GP 0/1:. 0/1:0.03,0.97,0 +1 200 rs84823 T G . PASS . GT:PL ./.:. 1/1:10,5,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01_output.vcf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,12 @@ +##fileformat=VCFv4.2 +##FORMAT=<ID=GT,Number=1,Type=Integer,Description="Genotype"> +##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype Probabilities"> +##FORMAT=<ID=PL,Number=G,Type=Float,Description="Phred-scaled Genotype Likelihoods"> +##liftOverProgram=CrossMap(https://sourceforge.net/projects/crossmap/) +##liftOverFile=test_vcf_01.over.chain +##new_reference_genome=test_vcf_01.fasta +##liftOverTime=August13,2015 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP001 SAMP002 +2 227 rs11449 C A . PASS . GT 0/0 0/1 +2 137 rs84825 T C . PASS . GT:GP 0/1:. 0/1:0.03,0.97,0 +2 36 rs84823 C G . PASS . GT:PL ./.:. 1/1:10,5,0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01_output.vcf.unmap Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,5 @@ +##fileformat=VCFv4.2 +##FORMAT=<ID=GT,Number=1,Type=Integer,Description="Genotype"> +##FORMAT=<ID=GP,Number=G,Type=Float,Description="Genotype Probabilities"> +##FORMAT=<ID=PL,Number=G,Type=Float,Description="Phred-scaled Genotype Likelihoods"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP001 SAMP002
--- a/tool-data/all_fasta.loc.sample Wed Aug 05 04:07:13 2015 -0400 +++ b/tool-data/all_fasta.loc.sample Thu Aug 13 07:43:24 2015 -0400 @@ -4,13 +4,13 @@ #all_fasta.loc. This file has the format (white space characters are #TAB characters): # -#<unique_build_id> <dbkey> <display_name> <file_path> +#<unique_build_id> <dbkey> <display_name> <file_path> # #So, all_fasta.loc could look something like this: # -#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa -#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa -#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa # #Your all_fasta.loc file should contain an entry for each individual #fasta file. So there will be multiple fasta files for each build,