Mercurial > repos > dvanzessen > bcbio_nextgen_emc
changeset 0:2ed60a09d6b6 draft
Uploaded
author | dvanzessen |
---|---|
date | Mon, 15 Jul 2019 05:11:46 -0400 |
parents | |
children | fd8fe1448616 |
files | LICENSE.md README.md bcbio-nextgen.xml bcbio_system.yaml make_html.py script.py |
diffstat | 6 files changed, 310 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE.md Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,22 @@ + +The MIT License (MIT) + +Copyright (c) 2019 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,1 @@ +bcbio-nextgen
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bcbio-nextgen.xml Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,116 @@ +<tool id="bcbio-nextgen" name="BCBio" version="0.1.0"> + <command detect_errors="exit_code"><![CDATA[ + PATH="/home/bioinf/bcbio_test/tool_dir/bin:/home/bioinf/bcbio_test/anaconda/bin:\$PATH" && + mkdir $everything_else.files_path && + cd $everything_else.files_path && + python $__tool_directory__/script.py --workdir `pwd` --output-dir `pwd`/output --template $main --output-conf `pwd`/config.yaml + #if str( $region_file.has_region_file_select ) == "yes": + --bed ${region_file.bed} + #end if + #for $i, $sample in enumerate( $samples ) + --input $sample.forward:$sample.reverse:$sample.phenotype + #end for + && bcbio_nextgen.py /home/bioinf/bcbio_test/galaxy/bcbio_system.yaml `pwd`/config.yaml && + zcat final/*/Batch1-ensemble-annotated.vcf.gz > $output_vcf && + python $__tool_directory__/make_html.py --input-dir $everything_else.files_path --root-html $everything_else + ]]></command> + <configfiles> + <configfile name='main'> +details: +{% for sample in samples -%} +- algorithm: + aligner: $aligner + mark_duplicates: ${mark_duplicates} + remove_lcr: ${remove_lcr} + variantcaller: [$variantcallers] + {% if bed_file_path -%} variant_regions: {{ bed_file_path }}{% endif %} + ensemble: + numpass: 2 + align_split_size: false + analysis: $analysis + lane: {{ loop.index }} + description: {{ sample['description'] }} + files: [{{ sample['forward'] }}, {{ sample['reverse'] }}] + genome_build: $build + metadata: + phenotype: {{ sample['phenotype'] }} + batch: Batch1 + upload: + dir: ./final +{% endfor %} + </configfile> + </configfiles> + <inputs> + <param name="build" type="select" label="Genome Build"> + <option value="GRCh37">GRCh37</option> + <option value="hg19">hg19</option> + <option value="GRCm38">GRCm38</option> + <option value="mm10">mm10</option> + </param> + <param name="analysis" type="select" label="Analysis"> + <option value="variant2">variant2</option> + <option value="RNA-seq">RNA-seq</option> + <option value="smallRNA-seq">smallRNA-seq</option> + </param> + <param name="aligner" type="select" label="Aligner"> + <option value="bwa">bwa</option> + <option value="bowtie">bowtie</option> + <option value="bowtie2">bowtie2</option> + <option value="hisat2">hisat2</option> + <option value="minimap2">minimap2</option> + <option value="novoalign">novoalign</option> + <option value="snap">snap</option> + <option value="star">star</option> + <option value="tophat2">tophat2</option> + <option value="false">false</option> + </param> + <param name="variantcallers" type="select" multiple="true" label="Variantcallers"> + <option value="false">false</option> + <option value="freebayes">freebayes</option> + <option value="gatk-haplotype">gatk-haplotype</option> + <option value="haplotyper">haplotyper</option> + <option value="platypus">platypus</option> + <!--<option value="mutect">mutect</option>--> + <option value="mutect2">mutect2</option> + <option value="scalpel">scalpel</option> + <option value="tnhaplotyper">tnhaplotyper</option> + <option value="tnscope">tnscope</option> + <option value="vardict">vardict</option> + <option value="varscan">varscan</option> + <option value="samtools">samtools</option> + <option value="gatk">gatk</option> + </param> + <param name="mark_duplicates" type="select"> + <option value="true" selected="true">true</option> + <option value="false">false</option> + </param> + <param name="remove_lcr" type="select"> + <option value="true" selected="true">true</option> + <option value="false">false</option> + </param> + <repeat name="samples" title="Samples" min="1"> + <param name="phenotype" type="select" label="Phenotype"> + <option value="normal">Normal</option> + <option value="tumor">Tumor</option> + </param> + <param type="data" name="forward" format="fastq,fastq.gz,fastqsanger.gz" /> + <param type="data" name="reverse" format="fastq,fastq.gz,fastqsanger.gz" /> + </repeat> + <conditional name="region_file"> + <param name="has_region_file_select" type="select" label="Do you have a region file"> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + <when value="yes"> + <param type="data" name="bed" format="bed" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="output_vcf" format="vcf"/> + <data name="everything_else" format="html"/> + </outputs> + <help><![CDATA[ + TODO: Fill in help. + ]]></help> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bcbio_system.yaml Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,58 @@ +--- +# Configuration file specifying system details for running an analysis pipeline +# These pipeline apply generally across multiple projects. Adjust them in sample +# specific configuration files when needed. + +# -- Base setup + +# Define resources to be used for individual programs on multicore machines. +# These can be defined specifically for memory and processor availability. +# - memory: Specify usage for memory intensive programs. The indicated value +# specifies the wanted *per core* usage. +# - cores: Define cores that can be used for multicore programs. The indicated +# value is the maximum cores that should be allocated for a program. +# - jvm_opts: specify details +resources: + # default options, used if other items below are not present + # avoids needing to configure/adjust for every program + default: + memory: 3G + cores: 16 + jvm_opts: ["-Xms750m", "-Xmx3500m"] + gatk: + jvm_opts: ["-Xms500m", "-Xmx3500m"] + snpeff: + jvm_opts: ["-Xms750m", "-Xmx3g"] + qualimap: + memory: 4g + express: + memory: 8g + dexseq: + memory: 10g + macs2: + memory: 8g + seqcluster: + memory: 8g + +# Location of galaxy configuration file, which has pointers to reference data +# https://bcbio-nextgen.readthedocs.org/en/latest/contents/configuration.html#reference-genome-files +galaxy_config: universe_wsgi.ini + + +# -- Additional options for specific integration, not required for standalone usage. + +# Galaxy integration. Required for retrieving information from Galaxy LIMS. +#galaxy_url: http://your/galaxy/url +#galaxy_api_key: your_galaxy_api_key + +# Details for hooking automated processing to a sequencer machine. +# Not required if running standalone pipelines. +# analysis: +# # Can specify a different remote host to initiate +# # the copy from. This is useful for NFS shared filesystems +# # where you want to manage the copy from the base machine. +# copy_user: +# copy_host: +# store_dir: /store4/solexadata +# base_dir: /array0/projects/Sequencing +# worker_program: nextgen_analysis_server.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_html.py Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,33 @@ +import os +import argparse +from jinja2 import Template + + +def main(): + # --workdir `pwd` --output-dir `pwd`/output --input + parser = argparse.ArgumentParser() + + parser.add_argument("--input-dir", "-d", required=True) + parser.add_argument("--root-html", "-o", required=True) + + args = parser.parse_args() + + input_dir = args.input_dir + root_html = args.root_html + + with open(root_html, 'w') as root_html_handle: + root_html_handle.write("<ol>") + for root, dirs, files in os.walk(input_dir, followlinks=True): + print(root, dirs, files) + relative_root = root.replace(input_dir, "")[:-1] + print(relative_root) + for f in files: + f = "{0}/{1}".format(relative_root, f) + if f.startswith("/"): + f = f[1:] + root_html_handle.write("<li>{0}</li>".format( + f + )) + +if __name__ == "__main__": + main() \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script.py Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,80 @@ +import os +import argparse +from collections import Counter +from jinja2 import Template + + +def main(): + # --workdir `pwd` --output-dir `pwd`/output --input + parser = argparse.ArgumentParser() + + parser.add_argument("--workdir", "-w", required=True) + parser.add_argument("--output-dir", "-o", required=True) + parser.add_argument("--template", "-t", required=True) + parser.add_argument("--output-conf", "-c", required=True) + parser.add_argument("--input", "-i", action="append", required=True) + parser.add_argument("--bed", "-b", default=None) + + + + args = parser.parse_args() + + workdir = args.workdir + output_dir = args.output_dir + input_files_raw = args.input + template_file_path = args.template + output_config_path = args.output_conf + bed_file_path = args.bed + + + if bed_file_path: + bed_new_name = "bed_file.bed" + bed_new_file_path = os.path.join( + workdir, + bed_new_name + ) + os.symlink(bed_file_path, bed_new_file_path) + bed_file_path = bed_new_file_path + + input_files = [] + phenotype_counter = Counter() + for input_file in input_files_raw: + if input_file.find(":"): + forward_file, reverse_file, phenotype = input_file.split(":") + phenotype_counter.update(phenotype) + phenotype_count = phenotype_counter[phenotype] + + forward_new_name = "{phenotype}_{phenotype_count}_R1.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count) + forward_new_file_path = os.path.join( + workdir, + forward_new_name + ) + os.symlink(forward_file, forward_new_file_path) + + reverse_new_name = "{phenotype}_{phenotype_count}_R2.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count) + reverse_new_file_path = os.path.join( + workdir, + reverse_new_name + ) + os.symlink(reverse_file, reverse_new_file_path) + + input_files.append( + { + "forward": forward_new_file_path, + "reverse": reverse_new_file_path, + "description": "{phenotype}_{phenotype_index}".format(phenotype=phenotype, phenotype_index=phenotype_count), + "phenotype": phenotype + } + ) + + with open(output_config_path, 'w') as config_file_handle, open(template_file_path, 'r') as template_file_handle: + template = Template(template_file_handle.read()) + config_file_handle.write(template.render( + samples=input_files, + output_dir=output_dir, + bed_file_path=bed_file_path + )) + + +if __name__ == "__main__": + main() \ No newline at end of file