changeset 0:2ed60a09d6b6 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:11:46 -0400
parents
children fd8fe1448616
files LICENSE.md README.md bcbio-nextgen.xml bcbio_system.yaml make_html.py script.py
diffstat 6 files changed, 310 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE.md	Mon Jul 15 05:11:46 2019 -0400
@@ -0,0 +1,22 @@
+
+The MIT License (MIT)
+
+Copyright (c) 2019 
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Mon Jul 15 05:11:46 2019 -0400
@@ -0,0 +1,1 @@
+bcbio-nextgen
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bcbio-nextgen.xml	Mon Jul 15 05:11:46 2019 -0400
@@ -0,0 +1,116 @@
+<tool id="bcbio-nextgen" name="BCBio" version="0.1.0">
+    <command detect_errors="exit_code"><![CDATA[
+        PATH="/home/bioinf/bcbio_test/tool_dir/bin:/home/bioinf/bcbio_test/anaconda/bin:\$PATH" && 
+        mkdir $everything_else.files_path && 
+        cd $everything_else.files_path &&
+        python $__tool_directory__/script.py --workdir `pwd` --output-dir `pwd`/output --template $main --output-conf `pwd`/config.yaml
+        #if str( $region_file.has_region_file_select ) == "yes":
+          --bed ${region_file.bed} 
+        #end if
+        #for $i, $sample in enumerate( $samples )
+            --input $sample.forward:$sample.reverse:$sample.phenotype
+        #end for
+        && bcbio_nextgen.py /home/bioinf/bcbio_test/galaxy/bcbio_system.yaml `pwd`/config.yaml && 
+        zcat final/*/Batch1-ensemble-annotated.vcf.gz > $output_vcf &&
+        python $__tool_directory__/make_html.py --input-dir $everything_else.files_path --root-html $everything_else
+    ]]></command>
+    <configfiles>
+        <configfile name='main'>
+details:
+{% for sample in samples -%}
+- algorithm:
+    aligner: $aligner
+    mark_duplicates: ${mark_duplicates}
+    remove_lcr: ${remove_lcr}
+    variantcaller: [$variantcallers]
+    {% if bed_file_path -%}  variant_regions: {{ bed_file_path }}{% endif %}
+    ensemble:
+      numpass: 2
+    align_split_size: false
+  analysis: $analysis
+  lane: {{ loop.index }}
+  description: {{ sample['description'] }}
+  files: [{{ sample['forward'] }}, {{ sample['reverse'] }}]
+  genome_build: $build
+  metadata:
+    phenotype: {{ sample['phenotype'] }}
+    batch: Batch1
+  upload:
+    dir: ./final
+{% endfor %}
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="build" type="select" label="Genome Build">
+            <option value="GRCh37">GRCh37</option>
+            <option value="hg19">hg19</option>
+            <option value="GRCm38">GRCm38</option>
+            <option value="mm10">mm10</option>
+        </param>
+        <param name="analysis" type="select" label="Analysis">
+            <option value="variant2">variant2</option>
+            <option value="RNA-seq">RNA-seq</option>
+            <option value="smallRNA-seq">smallRNA-seq</option>
+        </param>
+        <param name="aligner" type="select" label="Aligner">
+            <option value="bwa">bwa</option>
+            <option value="bowtie">bowtie</option>
+            <option value="bowtie2">bowtie2</option>
+            <option value="hisat2">hisat2</option>
+            <option value="minimap2">minimap2</option>
+            <option value="novoalign">novoalign</option>
+            <option value="snap">snap</option>
+            <option value="star">star</option>
+            <option value="tophat2">tophat2</option>
+            <option value="false">false</option>
+        </param>
+        <param name="variantcallers" type="select" multiple="true" label="Variantcallers">
+            <option value="false">false</option>
+            <option value="freebayes">freebayes</option>
+            <option value="gatk-haplotype">gatk-haplotype</option>
+            <option value="haplotyper">haplotyper</option>
+            <option value="platypus">platypus</option>
+            <!--<option value="mutect">mutect</option>-->
+            <option value="mutect2">mutect2</option>
+            <option value="scalpel">scalpel</option>
+            <option value="tnhaplotyper">tnhaplotyper</option>
+            <option value="tnscope">tnscope</option>
+            <option value="vardict">vardict</option>
+            <option value="varscan">varscan</option>
+            <option value="samtools">samtools</option>
+            <option value="gatk">gatk</option>
+        </param>
+        <param name="mark_duplicates" type="select">
+            <option value="true" selected="true">true</option>
+            <option value="false">false</option>
+        </param>
+        <param name="remove_lcr" type="select">
+            <option value="true" selected="true">true</option>
+            <option value="false">false</option>
+        </param>
+        <repeat name="samples" title="Samples" min="1">
+            <param name="phenotype" type="select" label="Phenotype">
+                <option value="normal">Normal</option>
+                <option value="tumor">Tumor</option>
+            </param>
+            <param type="data" name="forward" format="fastq,fastq.gz,fastqsanger.gz" />
+            <param type="data" name="reverse" format="fastq,fastq.gz,fastqsanger.gz" />
+        </repeat>
+        <conditional name="region_file">
+            <param name="has_region_file_select" type="select" label="Do you have a region file">
+                <option value="yes">Yes</option>
+                <option value="no" selected="true">No</option>
+            </param>
+            <when value="yes">
+                <param type="data" name="bed" format="bed" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output_vcf" format="vcf"/>
+        <data name="everything_else" format="html"/>
+    </outputs>
+    <help><![CDATA[
+        TODO: Fill in help.
+    ]]></help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bcbio_system.yaml	Mon Jul 15 05:11:46 2019 -0400
@@ -0,0 +1,58 @@
+---
+# Configuration file specifying system details for running an analysis pipeline
+# These pipeline apply generally across multiple projects. Adjust them in sample
+# specific configuration files when needed.
+
+# -- Base setup
+
+# Define resources to be used for individual programs on multicore machines.
+# These can be defined specifically for memory and processor availability.
+# - memory: Specify usage for memory intensive programs. The indicated value
+#           specifies the wanted *per core* usage.
+# - cores: Define cores that can be used for multicore programs. The indicated
+#          value is the maximum cores that should be allocated for a program.
+# - jvm_opts: specify details
+resources:
+  # default options, used if other items below are not present
+  # avoids needing to configure/adjust for every program
+  default:
+    memory: 3G
+    cores: 16
+    jvm_opts: ["-Xms750m", "-Xmx3500m"]
+  gatk:
+    jvm_opts: ["-Xms500m", "-Xmx3500m"]
+  snpeff:
+    jvm_opts: ["-Xms750m", "-Xmx3g"]
+  qualimap:
+    memory: 4g
+  express:
+    memory: 8g
+  dexseq:
+    memory: 10g
+  macs2:
+    memory: 8g
+  seqcluster:
+    memory: 8g
+
+# Location of galaxy configuration file, which has pointers to reference data
+# https://bcbio-nextgen.readthedocs.org/en/latest/contents/configuration.html#reference-genome-files
+galaxy_config: universe_wsgi.ini
+
+
+# -- Additional options for specific integration, not required for standalone usage.
+
+# Galaxy integration. Required for retrieving information from Galaxy LIMS.
+#galaxy_url: http://your/galaxy/url
+#galaxy_api_key: your_galaxy_api_key
+
+# Details for hooking automated processing to a sequencer machine.
+# Not required if running standalone pipelines.
+# analysis:
+#   # Can specify a different remote host to initiate
+#   # the copy from. This is useful for NFS shared filesystems
+#   # where you want to manage the copy from the base machine.
+#   copy_user:
+#   copy_host:
+#   store_dir: /store4/solexadata
+#   base_dir: /array0/projects/Sequencing
+#   worker_program: nextgen_analysis_server.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make_html.py	Mon Jul 15 05:11:46 2019 -0400
@@ -0,0 +1,33 @@
+import os
+import argparse
+from jinja2 import Template
+
+
+def main():
+    # --workdir `pwd` --output-dir `pwd`/output --input 
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--input-dir", "-d", required=True)
+    parser.add_argument("--root-html", "-o", required=True)
+
+    args = parser.parse_args()
+
+    input_dir = args.input_dir
+    root_html = args.root_html
+
+    with open(root_html, 'w') as root_html_handle:
+        root_html_handle.write("<ol>")
+        for root, dirs, files in os.walk(input_dir, followlinks=True):
+            print(root, dirs, files)
+            relative_root = root.replace(input_dir, "")[:-1]
+            print(relative_root)
+            for f in files:
+                f = "{0}/{1}".format(relative_root, f)
+                if f.startswith("/"):
+                    f = f[1:]
+                root_html_handle.write("<li>{0}</li>".format(
+                    f
+                ))
+                
+if __name__ == "__main__":
+    main()
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script.py	Mon Jul 15 05:11:46 2019 -0400
@@ -0,0 +1,80 @@
+import os
+import argparse
+from collections import Counter
+from jinja2 import Template
+
+
+def main():
+    # --workdir `pwd` --output-dir `pwd`/output --input 
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--workdir", "-w", required=True)
+    parser.add_argument("--output-dir", "-o", required=True)
+    parser.add_argument("--template", "-t", required=True)
+    parser.add_argument("--output-conf", "-c", required=True)
+    parser.add_argument("--input", "-i", action="append", required=True)
+    parser.add_argument("--bed", "-b", default=None)
+
+    
+
+    args = parser.parse_args()
+
+    workdir = args.workdir
+    output_dir = args.output_dir
+    input_files_raw = args.input
+    template_file_path = args.template
+    output_config_path = args.output_conf
+    bed_file_path = args.bed
+    
+    
+    if bed_file_path:
+        bed_new_name = "bed_file.bed"
+        bed_new_file_path = os.path.join(
+            workdir,
+            bed_new_name
+        )
+        os.symlink(bed_file_path, bed_new_file_path)
+        bed_file_path = bed_new_file_path
+    
+    input_files = []
+    phenotype_counter = Counter()
+    for input_file in input_files_raw:
+        if input_file.find(":"):
+            forward_file, reverse_file, phenotype = input_file.split(":")
+            phenotype_counter.update(phenotype)
+            phenotype_count = phenotype_counter[phenotype]
+
+            forward_new_name = "{phenotype}_{phenotype_count}_R1.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
+            forward_new_file_path = os.path.join(
+                workdir,
+                forward_new_name
+            )
+            os.symlink(forward_file, forward_new_file_path)
+
+            reverse_new_name = "{phenotype}_{phenotype_count}_R2.fastq.gz".format(phenotype=phenotype, phenotype_count=phenotype_count)
+            reverse_new_file_path = os.path.join(
+                workdir,
+                reverse_new_name
+            )
+            os.symlink(reverse_file, reverse_new_file_path)
+            
+            input_files.append(
+                {
+                    "forward": forward_new_file_path, 
+                    "reverse": reverse_new_file_path, 
+                    "description": "{phenotype}_{phenotype_index}".format(phenotype=phenotype, phenotype_index=phenotype_count),
+                    "phenotype": phenotype
+                }
+            )
+        
+    with open(output_config_path, 'w') as config_file_handle, open(template_file_path, 'r') as template_file_handle:
+        template = Template(template_file_handle.read())
+        config_file_handle.write(template.render(
+            samples=input_files,
+            output_dir=output_dir,
+            bed_file_path=bed_file_path
+        ))
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file