# HG changeset patch # User dvanzessen # Date 1563181906 14400 # Node ID 2ed60a09d6b6aae5b07bd540ca3439f8b47d4daa Uploaded diff -r 000000000000 -r 2ed60a09d6b6 LICENSE.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE.md Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,22 @@ + +The MIT License (MIT) + +Copyright (c) 2019 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff -r 000000000000 -r 2ed60a09d6b6 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,1 @@ +bcbio-nextgen diff -r 000000000000 -r 2ed60a09d6b6 bcbio-nextgen.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bcbio-nextgen.xml Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,116 @@ + + $output_vcf && + python $__tool_directory__/make_html.py --input-dir $everything_else.files_path --root-html $everything_else + ]]> + + +details: +{% for sample in samples -%} +- algorithm: + aligner: $aligner + mark_duplicates: ${mark_duplicates} + remove_lcr: ${remove_lcr} + variantcaller: [$variantcallers] + {% if bed_file_path -%} variant_regions: {{ bed_file_path }}{% endif %} + ensemble: + numpass: 2 + align_split_size: false + analysis: $analysis + lane: {{ loop.index }} + description: {{ sample['description'] }} + files: [{{ sample['forward'] }}, {{ sample['reverse'] }}] + genome_build: $build + metadata: + phenotype: {{ sample['phenotype'] }} + batch: Batch1 + upload: + dir: ./final +{% endfor %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r 2ed60a09d6b6 bcbio_system.yaml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bcbio_system.yaml Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,58 @@ +--- +# Configuration file specifying system details for running an analysis pipeline +# These pipeline apply generally across multiple projects. Adjust them in sample +# specific configuration files when needed. + +# -- Base setup + +# Define resources to be used for individual programs on multicore machines. +# These can be defined specifically for memory and processor availability. +# - memory: Specify usage for memory intensive programs. The indicated value +# specifies the wanted *per core* usage. +# - cores: Define cores that can be used for multicore programs. The indicated +# value is the maximum cores that should be allocated for a program. +# - jvm_opts: specify details +resources: + # default options, used if other items below are not present + # avoids needing to configure/adjust for every program + default: + memory: 3G + cores: 16 + jvm_opts: ["-Xms750m", "-Xmx3500m"] + gatk: + jvm_opts: ["-Xms500m", "-Xmx3500m"] + snpeff: + jvm_opts: ["-Xms750m", "-Xmx3g"] + qualimap: + memory: 4g + express: + memory: 8g + dexseq: + memory: 10g + macs2: + memory: 8g + seqcluster: + memory: 8g + +# Location of galaxy configuration file, which has pointers to reference data +# https://bcbio-nextgen.readthedocs.org/en/latest/contents/configuration.html#reference-genome-files +galaxy_config: universe_wsgi.ini + + +# -- Additional options for specific integration, not required for standalone usage. + +# Galaxy integration. Required for retrieving information from Galaxy LIMS. +#galaxy_url: http://your/galaxy/url +#galaxy_api_key: your_galaxy_api_key + +# Details for hooking automated processing to a sequencer machine. +# Not required if running standalone pipelines. +# analysis: +# # Can specify a different remote host to initiate +# # the copy from. This is useful for NFS shared filesystems +# # where you want to manage the copy from the base machine. +# copy_user: +# copy_host: +# store_dir: /store4/solexadata +# base_dir: /array0/projects/Sequencing +# worker_program: nextgen_analysis_server.py diff -r 000000000000 -r 2ed60a09d6b6 make_html.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_html.py Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,33 @@ +import os +import argparse +from jinja2 import Template + + +def main(): + # --workdir `pwd` --output-dir `pwd`/output --input + parser = argparse.ArgumentParser() + + parser.add_argument("--input-dir", "-d", required=True) + parser.add_argument("--root-html", "-o", required=True) + + args = parser.parse_args() + + input_dir = args.input_dir + root_html = args.root_html + + with open(root_html, 'w') as root_html_handle: + root_html_handle.write("
    ") + for root, dirs, files in os.walk(input_dir, followlinks=True): + print(root, dirs, files) + relative_root = root.replace(input_dir, "")[:-1] + print(relative_root) + for f in files: + f = "{0}/{1}".format(relative_root, f) + if f.startswith("/"): + f = f[1:] + root_html_handle.write("
  1. {0}
  2. ".format( + f + )) + +if __name__ == "__main__": + main() \ No newline at end of file diff -r 000000000000 -r 2ed60a09d6b6 script.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script.py Mon Jul 15 05:11:46 2019 -0400 @@ -0,0 +1,80 @@ +import os +import argparse +from collections import Counter +from jinja2 import Template + + +def main(): + # --workdir `pwd` --output-dir `pwd`/output --input + parser = argparse.ArgumentParser() + + parser.add_argument("--workdir", "-w", required=True) + parser.add_argument("--output-dir", "-o", required=True) + parser.add_argument("--template", "-t", required=True) + parser.add_argument("--output-conf", "-c", required=True) + parser.add_argument("--input", "-i", action="append", required=True) + parser.add_argument("--bed", "-b", default=None) + + + + args = parser.parse_args() + + workdir = args.workdir + output_dir = args.output_dir + input_files_raw = args.input + template_file_path = args.template + output_config_path = args.output_conf + bed_file_path = args.bed + + + if bed_file_path: + bed_new_name = "bed_file.bed" + bed_new_file_path = os.path.join( + workdir, + bed_new_name + ) + os.symlink(bed_file_path, bed_new_file_path) + bed_file_path = bed_new_file_path + + input_files = [] + phenotype_counter = Counter() + for input_file in input_files_raw: + if input_file.find(":"): + forward_file, reverse_file, phenotype = input_file.split(":") + phenotype_counter.update(phenotype) + phenotype_count = phenotype_counter[phenotype] + + forward_new_name = "{phenotype}_{phenotype_count}_R1.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count) + forward_new_file_path = os.path.join( + workdir, + forward_new_name + ) + os.symlink(forward_file, forward_new_file_path) + + reverse_new_name = "{phenotype}_{phenotype_count}_R2.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count) + reverse_new_file_path = os.path.join( + workdir, + reverse_new_name + ) + os.symlink(reverse_file, reverse_new_file_path) + + input_files.append( + { + "forward": forward_new_file_path, + "reverse": reverse_new_file_path, + "description": "{phenotype}_{phenotype_index}".format(phenotype=phenotype, phenotype_index=phenotype_count), + "phenotype": phenotype + } + ) + + with open(output_config_path, 'w') as config_file_handle, open(template_file_path, 'r') as template_file_handle: + template = Template(template_file_handle.read()) + config_file_handle.write(template.render( + samples=input_files, + output_dir=output_dir, + bed_file_path=bed_file_path + )) + + +if __name__ == "__main__": + main() \ No newline at end of file