# HG changeset patch # User yhoogstrate # Date 1439466204 14400 # Node ID 7e32d2a88c07ceb3cf9fd7a02da4f59fe7b09ab3 # Parent e83cdc4e56cd1a53a6f6c4d8759c36da81faa4bd planemo upload for repository https://github.com/ErasmusMC-Bioinformatics/crossmap_galaxy_wrapper commit 853297e61ca5fecafe4f09ec449e2731515b32c1-dirty diff -r e83cdc4e56cd -r 7e32d2a88c07 crossmap.xml --- a/crossmap.xml Wed Aug 05 04:07:13 2015 -0400 +++ b/crossmap.xml Thu Aug 13 07:43:24 2015 -0400 @@ -1,18 +1,20 @@ - + Convert genome coordinates or annotation files between genome assemblies - crossmap + crossmap + CrossMap.py 2>&1 | head -n 1 | grep -E --only-matching 'CrossMap.*' + - + CrossMap.py - $multiple.input_format + ${multiple.input_format.replace("sam","bam")} #if $multiple.input_format == "vcf" and $multiple.seq_source.index_source == "cached" @@ -21,13 +23,11 @@ "$multiple.seq_source.input_chain" #end if - "$multiple.seq_source.input" @@ -35,17 +35,26 @@ "$multiple.seq_source.input_fasta" #end if - #if $multiple.include_fails == "true" + #if str($multiple.include_fails) == "True" > #end if "$output" + + #if $multiple.input_format in ["bam", "sam"] + ; mv "${output}.${multiple.input_format}" "$output" + ; mv "${output}.unmap.${multiple.input_format}" "$output_unmapped" + #else if $multiple.input_format in ["vcf"] + ; mv "${output}" "$output" + ; mv "${output}.unmap" "$output_unmapped" + #end if - + + @@ -59,7 +68,7 @@ - + @@ -78,11 +87,43 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -109,7 +150,8 @@ - + + @@ -162,6 +204,8 @@ + + @@ -170,53 +214,56 @@ - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -224,58 +271,93 @@ - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - + + + + + + + + + + + + + + input_format in ["bam" , "vcf"] + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - - - + + + + + + + + + + + + + + + + - - --> + + + + + + + + + + + + + + + + + diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_bam_01_input_a.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_bam_01_input_a.sam Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,27 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:1000051 +@SQ SN:chr2 LN:1000051 +@SQ SN:chr3 LN:1000051 +@SQ SN:chr4 LN:9250051 +@PG ID:- VN:1.0.0 CL:cmatrix +@CO Test data for CrossMap bam +read_001 0 chr1 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_002 0 chr2 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_003 0 chr3 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_004 0 chr4 9200000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_005 0 chr4 8940000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_006 0 chr1 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_007 0 chr2 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_008 0 chr3 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_009 0 chr4 9250000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_010 0 chr4 9000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_011 16 chr1 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_012 16 chr2 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_013 16 chr3 100000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_014 16 chr4 9200000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_015 16 chr4 8940000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_016 16 chr1 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_017 16 chr2 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_018 16 chr3 1000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_019 16 chr4 9250000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_020 16 chr4 9000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_bam_01_output_a.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_bam_01_output_a.sam Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,23 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:247249719 +@SQ SN:chr10 LN:135374737 +@SQ SN:chr2 LN:242951149 +@SQ SN:chr3 LN:199501827 +@SQ SN:chr4 LN:191273063 +@PG ID:- VN:1.0.0 CL:cmatrix +@PG ID:CrossMap VN:0.1.10 +@CO Test data for CrossMap bam +@CO Liftover from original BAM/SAM file: +@CO Liftover is based on the chain file: +read_001 0 chr1 89863 60 50M = 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_002 0 chr2 90000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_003 0 chr3 75000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_006 0 chr1 989863 60 50M = 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_007 0 chr2 990000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_008 0 chr3 975000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_011 16 chr1 89863 60 50M = 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_012 16 chr2 90000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_013 16 chr3 75000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_016 16 chr1 989863 60 50M = 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_017 16 chr2 990000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_018 16 chr3 975000 60 50M chr1 1 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_bam_01_output_a.unmap.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_bam_01_output_a.unmap.sam Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,15 @@ +@HD VN:1.0 SO:coordinate +@SQ SN:chr1 LN:1000051 +@SQ SN:chr2 LN:1000051 +@SQ SN:chr3 LN:1000051 +@SQ SN:chr4 LN:9250051 +@PG ID:- VN:1.0.0 CL:cmatrix +@CO Test data for CrossMap bam +read_004 0 chr4 9200000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_005 0 chr4 8940000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_009 0 chr4 9250000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_010 0 chr4 9000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_014 16 chr4 9200000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_015 16 chr4 8940000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_019 16 chr4 9250000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * +read_020 16 chr4 9000000 60 50M * 0 0 NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN * diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_gff_01_input_a.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_gff_01_input_a.gtf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,5 @@ +chr1 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr2 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr3 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr4 CrossMap_test_data CDS 9200000 9250000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr5 CrossMap_test_data CDS 8940000 9000000 0.000000 - 0 gene_id "TEST_GENE_1"; diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_gff_01_output_a__all.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_gff_01_output_a__all.gtf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,5 @@ +chr1 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; fail (multpile match to target assembly) +chr2 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; -> chr2 CrossMap_test_data CDS 90000 990000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr3 CrossMap_test_data CDS 100000 1000000 0.000000 - 0 gene_id "TEST_GENE_1"; -> chr3 CrossMap_test_data CDS 75000 975000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr4 CrossMap_test_data CDS 9200000 9250000 0.000000 - 0 gene_id "TEST_GENE_1"; fail (multpile match to target assembly) +chr5 CrossMap_test_data CDS 8940000 9000000 0.000000 - 0 gene_id "TEST_GENE_1"; fail (no match to target assembly) diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_gff_01_output_a__only-matches.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_gff_01_output_a__only-matches.gtf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,2 @@ +chr2 CrossMap_test_data CDS 90000 990000 0.000000 - 0 gene_id "TEST_GENE_1"; +chr3 CrossMap_test_data CDS 75000 975000 0.000000 - 0 gene_id "TEST_GENE_1"; diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_vcf_01.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01.fasta Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,10 @@ +>chr1 +CAAAAAAGCAGTTGACAGTTGTTCGGGCTTGTTACGCATCTTAAGATCGAATAAGGAGAGAGGGTCTAAACG +AGGCGCACCCCGCCTATGGGTGATCGAGGTACTAGGGGTTGGTCGCAGGTCTGTATTACCGTTAGCGGTGCA +AGGGGATCTGATCGAGTGATTCACCTACTCATGTGGCGAGCACGCCGACGAAATACTCCTGGTCGTGTTATA +AAGCCCTGGTTTTCCTTTCC +>chr2 +CACAAAATGCACGTGGATGCAGGCATTTATCCAACCCACACTATTACGTTCACCAAATGTGTGGACCAACTG +CGGGACTAGGTAAGCTTGTCCTCAATGAGCGAAATTGATATTTCTCTACCGACTTGGGGTCGACTGGACGAG +TCAGCTGTGCAACAGCTCAGCCGGTTTCGATAAACCGAAACCTTGAATGTTTGGACTTGCGTCATGGCGAAC +AAAGATCGTTCATGTCGCA \ No newline at end of file diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_vcf_01.over.chain --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01.over.chain Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,11 @@ +chain 4900 chr1 236 + 1 236 chr2 236 - 1 237 1 + 9 1 0 + 10 0 5 + 61 4 0 + 16 0 4 + 42 3 0 + 16 0 8 + 14 1 0 + 3 7 0 + 48 + diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_vcf_01_input.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01_input.vcf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,8 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP001 SAMP002 +1 10 rs11449 G A . PASS . GT 0/0 0/1 +1 100 rs84825 C C . PASS . GT:GP 0/1:. 0/1:0.03,0.97,0 +1 200 rs84823 T G . PASS . GT:PL ./.:. 1/1:10,5,0 diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_vcf_01_output.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01_output.vcf Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,12 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +##liftOverProgram=CrossMap(https://sourceforge.net/projects/crossmap/) +##liftOverFile=test_vcf_01.over.chain +##new_reference_genome=test_vcf_01.fasta +##liftOverTime=August13,2015 +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP001 SAMP002 +2 227 rs11449 C A . PASS . GT 0/0 0/1 +2 137 rs84825 T C . PASS . GT:GP 0/1:. 0/1:0.03,0.97,0 +2 36 rs84823 C G . PASS . GT:PL ./.:. 1/1:10,5,0 diff -r e83cdc4e56cd -r 7e32d2a88c07 test-data/test_vcf_01_output.vcf.unmap --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_vcf_01_output.vcf.unmap Thu Aug 13 07:43:24 2015 -0400 @@ -0,0 +1,5 @@ +##fileformat=VCFv4.2 +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMP001 SAMP002 diff -r e83cdc4e56cd -r 7e32d2a88c07 tool-data/all_fasta.loc.sample --- a/tool-data/all_fasta.loc.sample Wed Aug 05 04:07:13 2015 -0400 +++ b/tool-data/all_fasta.loc.sample Thu Aug 13 07:43:24 2015 -0400 @@ -4,13 +4,13 @@ #all_fasta.loc. This file has the format (white space characters are #TAB characters): # -# +# # #So, all_fasta.loc could look something like this: # -#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa -#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa -#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa # #Your all_fasta.loc file should contain an entry for each individual #fasta file. So there will be multiple fasta files for each build,