Mercurial > repos > iuc > bbtools_bbmerge
changeset 2:16db2a5b73c5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/bbtools commit cd59ba2c349865259b92302a1d70e103b8a5e3cb
| author | iuc |
|---|---|
| date | Tue, 27 Aug 2024 10:13:32 +0000 |
| parents | 03646ca53c91 |
| children | 1f6a6b1af62b |
| files | bbmerge.xml macros.xml test-data/cv_output.gff test-data/cv_output.txt test-data/cv_output.vcf test-data/cv_qualityhist_output.tabular test-data/cv_scorehist_output.tabular test-data/cv_zygosityhist_output.tabular |
| diffstat | 8 files changed, 71 insertions(+), 68 deletions(-) [+] |
line wrap: on
line diff
--- a/bbmerge.xml Tue May 30 09:01:10 2023 +0000 +++ b/bbmerge.xml Tue Aug 27 10:13:32 2024 +0000 @@ -4,11 +4,9 @@ <import>macros.xml</import> </macros> <expand macro="edam_ontology"/> + <expand macro="bio.tools"/> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ -#import os -#import re - #if str($input_type_cond.input_type) in ['single', 'pair']: #set read1 = $input_type_cond.read1 ## bbmerge uses the file extension to determine the input format. @@ -25,21 +23,24 @@ #end if #else: #set read1 = $input_type_cond.reads_collection['forward'] - #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) ## bbmap uses the file extension to determine the input format. - #set ext = $read1_identifier + '.fastq' + #set ext = '.fastq' #if $read1.ext.endswith('.gz'): #set ext = $ext + '.gz' #end if - #set read1_file = $read1_identifier + $ext + #set read1_file = 'forward' + $ext ln -s '${read1}' '${read1_file}' && #set read2 = $input_type_cond.reads_collection['reverse'] - #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) - #set read2_file = $read2_identifier + $ext + #set read2_file = 'reverse' + $ext ln -s '${read2}' '${read2_file}' && #end if -bbmerge.sh +if [[ "\${_JAVA_OPTIONS}" != *-Xmx* && "\${JAVA_TOOL_OPTIONS}" != *-Xmx* ]]; then + export _JAVA_OPTIONS="\${_JAVA_OPTIONS} -Xmx\${GALAXY_MEMORY_MB:-4096}m -Xms256m"; +fi && + +bbmerge.sh tmpdir="\$TMPDIR" t="\${GALAXY_SLOTS:-2}" + #### Input parameters #if str($input_type_cond.input_type) == 'single': in='${read1_file}' @@ -97,7 +98,6 @@ ]]></command> <inputs> <expand macro="input_type_cond"/> - <section name="qt_options" title="Quality and trimming options"> <param name="qtrim" type="select" label="Select option for quality trimming ends before mapping"> <option value="f" selected="true">No trimming</option> @@ -109,7 +109,6 @@ <param argument="minlength_after_trim" type="integer" value="60" label="Don't trim reads to be shorter than this value"/> <param argument="usequality" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Use quality scores when determining which read kmers to use as seeds?"/> </section> - <section name="merge_options" title="Merging parameters"> <param name="strictness" type="select" label="Select option for quality trimming ends before mapping"> <option value="xstrict">max strict</option> @@ -127,16 +126,13 @@ <param argument="trimnonoverlapping" type="boolean" truevalue="t" falsevalue="f" checked="false" label="Trim all non-overlapping portions, leaving only consensus sequence. By default, only sequence to the right of the overlap (adapter sequence) is trimmed."/> <param argument="mininsert" type="integer" value="35" label="Minimum insert size to merge reads"/> <param argument="minoverlap" type="integer" value="12" label="Minimum number of overlapping bases to allow merging"/> - <param argument="minq" type="integer" value="9" label="Ignore bases with quality below this"/> <param argument="maxq" type="integer" value="41" label="Cap output quality scores at this"/> - <param argument="entropy" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Increase the minimum overlap requirement for low-complexity reads"/> <param argument="efilter" type="integer" value="6" label="Ban overlaps with over this many times the expected number of errors." help="Lower is more strict, -1 disables."/> <param argument="pfilter" type="float" value="0.00004" label="Probability filter to disallow improbable overlaps." help="Higher is stricter. 0 will disable the filter; 1 will allow only perfect overlaps."/> <param argument="kfilter" type="integer" value="41" label="Ban overlaps that create kmers with count below this value" help="Requires good coverage, 0 disables."/> <param argument="usequality" type="boolean" truevalue="t" falsevalue="f" checked="true" label="Take quality factors into account" help="If disabled, quality values are completely ignored, both for overlap detection and filtering. May be useful for data with inaccurate quality values."/> - <conditional name="adapters"> <param name="selector" type="select" label="Provide adapter sequences to improve accuracy?"> <option value="wout_adapters" selected="true">No</option> @@ -148,7 +144,6 @@ <param argument="adapter2" type="text" value="" label="Right adapter sequence"/> </when> </conditional> - <conditional name="merge_mode"> <param name="selector" type="select" label="Evaluate overlaps via..." help="In the ratio mode, overlaps are decided based on the ratio of matching to mismatching bases. Flat mode scores overlaps based on the total number of mismatching bases only."> <option value="Ratio mode" selected="true">Ratio mode</option> @@ -252,4 +247,3 @@ </help> <expand macro="citations"/> </tool> -
--- a/macros.xml Tue May 30 09:01:10 2023 +0000 +++ b/macros.xml Tue Aug 27 10:13:32 2024 +0000 @@ -1,21 +1,31 @@ <macros> - <token name="@TOOL_VERSION@">39.01</token> + <token name="@TOOL_VERSION@">39.08</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">22.01</token> <xml name="edam_ontology"> <edam_topics> - <edam_topic>topic_0622</edam_topic> <!-- Genomics --> - <edam_topic>topic_0091</edam_topic> <!-- Bioinformatics --> + <edam_topic>topic_0622</edam_topic> + <!-- Genomics --> + <edam_topic>topic_0091</edam_topic> + <!-- Bioinformatics --> </edam_topics> <edam_operations> - <edam_operation>operation_0496</edam_operation> <!-- Global alignment --> - <edam_operation>operation_0491</edam_operation> <!-- Pairwise sequence alignment --> + <edam_operation>operation_0496</edam_operation> + <!-- Global alignment --> + <edam_operation>operation_0491</edam_operation> + <!-- Pairwise sequence alignment --> </edam_operations> </xml> + <xml name="bio.tools"> + <xrefs> + <xref type="bio.tools">bbtools</xref> + </xrefs> + </xml> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">bbmap</requirement> - <requirement type="package" version="1.16.1">samtools</requirement> <!-- automatic solving installs 1.6 in some cases, instead --> + <requirement type="package" version="1.20">samtools</requirement> + <!-- automatic solving installs 1.6 in some cases, instead --> </requirements> </xml> <macro name="dbKeyActionsBBMap"> @@ -57,7 +67,7 @@ <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Reverse reads fastq file"/> </when> <when value="paired"> - <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> + <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" label="Collection of fastqsanger paired read files" collection_type="paired"/> </when> </conditional> </macro> @@ -82,18 +92,18 @@ </macro> <macro name="ktrim_cond"> <conditional name="ktrim_cond"> - <param name="ktrim_select" type="select" label="Trim reads to remove bases matching reference kmers?"> + <param name="ktrim_select" type="select" label="Trim reads to remove bases matching reference kmers?"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> <when value="no"/> <when value="yes"> - <param argument="ktrim" type="select" label="Select trimming position"> + <param argument="ktrim" type="select" label="Select trimming position"> <option value="r">Trim to the right</option> <option value="l">Trim to the left</option> </param> <param argument="minlength" type="integer" value="10" label="Minimum read length" help="Trimmed reads shorter than this will be discarded, pairs will be discarded if both are shorter."/> - </when> + </when> </conditional> </macro> <xml name="citations"> @@ -104,4 +114,3 @@ </citations> </xml> </macros> -
--- a/test-data/cv_output.gff Tue May 30 09:01:10 2023 +0000 +++ b/test-data/cv_output.gff Tue Aug 27 10:13:32 2024 +0000 @@ -1,18 +1,18 @@ -##gff-version -#BBMapVersion +##gff-version 3 +#BBMapVersion 39.08 #ploidy 2 #rarity 1.00000 #minAlleleFraction 0.1000 #reads 2207 #pairedReads 0 #properlyPairedReads 0 -#readLengthAvg +#readLengthAvg 145.46 #properPairRate 0.0000 -#totalQualityAvg -#mapqAvg -#reference +#totalQualityAvg 36.5800 +#mapqAvg 43.35 +#reference /home/bag/projects/code/tools-iuc/tools/bbtools/test-data/NC_002945v4.fasta #seqid source type start end score strand phase attributes -NC_002945.4 . sequence_variant_obs 976327 976327 -NC_002945.4 . sequence_variant_obs 1501932 1501932 -NC_002945.4 . sequence_variant_obs 3380579 3380579 -NC_002945.4 . sequence_variant_obs 3646293 3646293 +NC_002945.4 . sequence_variant_obs 976327 976327 0.87 + . ID=SUB T +NC_002945.4 . sequence_variant_obs 1501932 1501932 0.92 + . ID=SUB C +NC_002945.4 . sequence_variant_obs 3380579 3380579 0.92 + . ID=SUB G +NC_002945.4 . sequence_variant_obs 3646293 3646293 0.89 + . ID=SUB A
--- a/test-data/cv_output.txt Tue May 30 09:01:10 2023 +0000 +++ b/test-data/cv_output.txt Tue Aug 27 10:13:32 2024 +0000 @@ -1,18 +1,18 @@ -#fileformat -#BBMapVersion +#fileformat Var_1.3 +#BBMapVersion 39.08 #ploidy 2 #rarity 1.00000 #minAlleleFraction 0.1000 #reads 2207 #pairedReads 0 #properlyPairedReads 0 -#readLengthAvg -#properPairRate -#totalQualityAvg -#mapqAvg -#reference +#readLengthAvg 145.46 +#properPairRate 0.0000 +#totalQualityAvg 36.5800 +#mapqAvg 43.35 +#reference /home/bag/projects/code/tools-iuc/tools/bbtools/test-data/NC_002945v4.fasta #scaf start stop type call r1p r1m r2p r2m paired lengthSum mapq mapqMax baseq baseqMax edist edistMax id idMax cov minusCov nearbyVarCount flagged contigEndDist phredScore readCount alleleFraction revisedAF strandRatio baseqAvg mapqAvg edistAvg identityAvg edistScore identityScore qualityScore pairedScore biasScore coverageScore homopolymerScore score -0 976326 976327 SUB T -0 1501931 1501932 SUB C -0 3380578 3380579 SUB G -0 3646292 3646293 SUB A +0 976326 976327 SUB T 2 0 0 0 0 292 77 40 48 24 111 65 1930 972 0 -1 0 0 0 22.21 2 1 1 0.5000 24 38.50 55.50 965 0.9972 0.9932 0.7489 0.9800 0.9737 0.7143 1 0.8725 +0 1501931 1501932 SUB C 0 2 0 0 0 292 88 44 64 39 74 38 1986 993 0 -1 0 0 0 27.20 2 1 1 0.5000 32 44 37 993 0.9851 0.9994 0.9831 0.9800 0.9737 0.7143 1 0.9202 +0 3380578 3380579 SUB G 0 2 0 0 0 289 87 44 75 38 105 63 1986 993 0 -1 0 0 0 27.85 2 1 1 0.5000 37.50 43.50 52.50 993 0.9968 0.9994 0.9972 0.9800 0.9737 0.7143 1 0.9250 +0 3646292 3646293 SUB A 1 1 0 0 0 291 86 44 76 38 44 31 1986 993 0 -1 0 0 0 23.34 2 1 1 1 38 43 22 993 0.7996 0.9994 0.9975 0.9800 0.9747 0.7143 1 0.8853
--- a/test-data/cv_output.vcf Tue May 30 09:01:10 2023 +0000 +++ b/test-data/cv_output.vcf Tue Aug 27 10:13:32 2024 +0000 @@ -1,16 +1,16 @@ ##fileformat=VCFv4.2 -##BBMapVersion= +##BBMapVersion=39.08 ##ploidy=2 ##rarity=1.00000 ##minallelefraction=0.10000 ##reads=2207 ##pairedReads=0 ##properlyPairedReads=0 -##readLengthAvg= +##readLengthAvg=145.462 ##properPairRate=0.00000 -##totalQualityAvg= +##totalQualityAvg=36.580 ##mapqAvg=43.348 -##reference= +##reference=/home/bag/projects/code/tools-iuc/tools/bbtools/test-data/NC_002945v4.fasta ##contig=<ID=NC_002945.4,length=7000> ##FILTER=<ID=FAIL,Description="Fail"> ##FILTER=<ID=PASS,Description="Pass"> @@ -53,8 +53,8 @@ ##FORMAT=<ID=SB,Number=1,Type=Float,Description="Strand Bias"> ##FORMAT=<ID=SC,Number=1,Type=Float,Description="Score"> ##FORMAT=<ID=PF,Number=1,Type=String,Description="Pass Filter"> -#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT -NC_002945.4 976327 . N T 22.21 PASS SN=0;STA=976326;STO=976327;TYP=SUB;R1P=2;R1M=0;R2P=0;R2M=0;AD=2;DP=2;MCOV=-1;PPC=0;AF=1.0000;RAF=1.0000;LS=292;MQS=77;MQM=40;BQS=48;BQM=24;EDS=111;EDM=65;IDS=1930;IDM=972;NVC=0;FLG=0;CED=0;HMP=0;SB=0.9980 GT:DP:AD:AF:RAF:NVC:FLG:SB:SC:PF 1/1:2:2:1.0000:1.0000:0:0:0.9980:22.21:PASS -NC_002945.4 1501932 . N C 27.20 PASS SN=0;STA=1501931;STO=1501932;TYP=SUB;R1P=0;R1M=2;R2P=0;R2M=0;AD=2;DP=2;MCOV=-1;PPC=0;AF=1.0000;RAF=1.0000;LS=292;MQS=88;MQM=44;BQS=64;BQM=39;EDS=74;EDM=38;IDS=1986;IDM=993;NVC=0;FLG=0;CED=0;HMP=0;SB=0.9980 GT:DP:AD:AF:RAF:NVC:FLG:SB:SC:PF 1/1:2:2:1.0000:1.0000:0:0:0.9980:27.20:PASS -NC_002945.4 3380579 . N G 27.85 PASS SN=0;STA=3380578;STO=3380579;TYP=SUB;R1P=0;R1M=2;R2P=0;R2M=0;AD=2;DP=2;MCOV=-1;PPC=0;AF=1.0000;RAF=1.0000;LS=289;MQS=87;MQM=44;BQS=75;BQM=38;EDS=105;EDM=63;IDS=1986;IDM=993;NVC=0;FLG=0;CED=0;HMP=0;SB=0.9980 GT:DP:AD:AF:RAF:NVC:FLG:SB:SC:PF 1/1:2:2:1.0000:1.0000:0:0:0.9980:27.85:PASS -NC_002945.4 3646293 . N A 23.34 PASS SN=0;STA=3646292;STO=3646293;TYP=SUB;R1P=1;R1M=1;R2P=0;R2M=0;AD=2;DP=2;MCOV=-1;PPC=0;AF=1.0000;RAF=1.0000;LS=291;MQS=86;MQM=44;BQS=76;BQM=38;EDS=44;EDM=31;IDS=1986;IDM=993;NVC=0;FLG=0;CED=0;HMP=0;SB=1.0000 GT:DP:AD:AF:RAF:NVC:FLG:SB:SC:PF 1/1:2:2:1.0000:1.0000:0:0:1.0000:23.34:PASS +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT dataset_4f1db038-76fe-46bc-a222-dfc4894a8ff2.dat +NC_002945.4 976327 . N T 22.21 PASS SN=0;STA=976326;STO=976327;TYP=SUB;R1P=2;R1M=0;R2P=0;R2M=0;AD=2;DP=2;MCOV=-1;PPC=0;AF=1;RAF=1;LS=292;MQS=77;MQM=40;BQS=48;BQM=24;EDS=111;EDM=65;IDS=1930;IDM=972;NVC=0;FLG=0;CED=0;HMP=0;SB=0.9980 GT:DP:AD:AF:RAF:NVC:FLG:SB:SC:PF 1/1:2:2:1:1:0:0:0.9980:22.21:PASS +NC_002945.4 1501932 . N C 27.20 PASS SN=0;STA=1501931;STO=1501932;TYP=SUB;R1P=0;R1M=2;R2P=0;R2M=0;AD=2;DP=2;MCOV=-1;PPC=0;AF=1;RAF=1;LS=292;MQS=88;MQM=44;BQS=64;BQM=39;EDS=74;EDM=38;IDS=1986;IDM=993;NVC=0;FLG=0;CED=0;HMP=0;SB=0.9980 GT:DP:AD:AF:RAF:NVC:FLG:SB:SC:PF 1/1:2:2:1:1:0:0:0.9980:27.20:PASS +NC_002945.4 3380579 . N G 27.85 PASS SN=0;STA=3380578;STO=3380579;TYP=SUB;R1P=0;R1M=2;R2P=0;R2M=0;AD=2;DP=2;MCOV=-1;PPC=0;AF=1;RAF=1;LS=289;MQS=87;MQM=44;BQS=75;BQM=38;EDS=105;EDM=63;IDS=1986;IDM=993;NVC=0;FLG=0;CED=0;HMP=0;SB=0.9980 GT:DP:AD:AF:RAF:NVC:FLG:SB:SC:PF 1/1:2:2:1:1:0:0:0.9980:27.85:PASS +NC_002945.4 3646293 . N A 23.34 PASS SN=0;STA=3646292;STO=3646293;TYP=SUB;R1P=1;R1M=1;R2P=0;R2M=0;AD=2;DP=2;MCOV=-1;PPC=0;AF=1;RAF=1;LS=291;MQS=86;MQM=44;BQS=76;BQM=38;EDS=44;EDM=31;IDS=1986;IDM=993;NVC=0;FLG=0;CED=0;HMP=0;SB=1 GT:DP:AD:AF:RAF:NVC:FLG:SB:SC:PF 1/1:2:2:1:1:0:0:1:23.34:PASS
--- a/test-data/cv_qualityhist_output.tabular Tue May 30 09:01:10 2023 +0000 +++ b/test-data/cv_qualityhist_output.tabular Tue Aug 27 10:13:32 2024 +0000 @@ -1,8 +1,8 @@ #BaseQualityHist -#Vars -#Mean -#Median -#Mode +#Vars 4 +#Mean 32.75 +#Median 32 +#Mode 32 #Quality AvgCount MaxCount 0 0 0 1 0 0
