Mercurial > repos > sanbi-uwc > novo_align
changeset 4:905996402f3c draft
planemo upload for repository https://github.com/zipho/novo_align commit 09e64885cb6efcc9aaacd6a4fd897ba928981359
author | sanbi-uwc |
---|---|
date | Thu, 10 Mar 2016 15:53:44 -0500 |
parents | 2cadfddb73cd |
children | d51c5af7a8fe |
files | novo_align.py novo_align.xml novo_sort.py novo_sort.xml |
diffstat | 4 files changed, 66 insertions(+), 84 deletions(-) [+] |
line wrap: on
line diff
--- a/novo_align.py Wed Mar 09 05:42:36 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -from __future__ import print_function -import argparse -from subprocess import check_call, CalledProcessError -from json import load, dump, dumps -from os import environ, mkdir, makedirs, path -from os.path import isdir, exists -import shlex -import sys -import logging -log = logging.getLogger( __name__ ) - -def novo_align(output_filename, index_filename, fwd_file, rev_file ): - #novoalign -c 8 -k -d /cip0/research/ajayi/RNA-seq_Analysis_Project_Case_Study/reference/Homo_Sapiens/out/TB_H37Rv.nix - # -f X165_820L8_.R1_val_1.fq X165_820L8_.R2_val_2.fq -i PE 250,100 - # -o SAM '@RG\tID:readgroup\tPU:platform unit\tLB:library' | samtools view -bS - > `pwd`/out/X165_820L8.bam - #output_filename = path.join(output_directory, fwd_file.split(".")[0] + ".bam") - param = r'@RG\tID:readgroup\tPU:platform unit\tLB:library' - cmdline_str = "novoalign -c 8 -k -d {} -f {} {} -i PE 250, 100 -o SAM '{}' | samtools view -bS - > {}".format( - index_filename, - fwd_file, - rev_file, - param, - output_filename) - cmdline = newSplit(cmdline_str) - try: - check_call(cmdline) - except CalledProcessError: - print("Error running the nova-align", file=sys.stderr) - -def newSplit(value): - lex = shlex.shlex(value) - lex.quotes = '"' - lex.whitespace_split = True - lex.commenters = '' - return list(lex) - -def main(): - parser = argparse.ArgumentParser(description="Generate a BAM file from the Novo Align tool") - parser.add_argument('output_filename') - parser.add_argument('--index_filename') - parser.add_argument('--forward_filename') - parser.add_argument('--reverse_filename') - args = parser.parse_args() - - #a dirty way of referencing the file - index_file_path = args.index_filename + "/" + args.index_filename.split("/")[-1] - - novo_align(args.output_filename, index_file_path, args.forward_filename, args.reverse_filename) - -if __name__ == "__main__": main()
--- a/novo_align.xml Wed Mar 09 05:42:36 2016 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -<?xml version="1.0" encoding="utf-8" ?> -<tool id="novo_align" name="NOVO ALIGN" version="0.0.1"> - <description>Tool to run the Novo Align mapping tool</description> - <requirements> - <requirement type="package" version="1.2">samtools</requirement> - </requirements> - <stdio> - <exit_code range=":-1" /> - <exit_code range="1:" /> - </stdio> - <command interpreter="python"> - novo_align.py '${out_file}' --index_file ${index1.fields.path} --forward_file ${fastq_input1} --reverse_file ${fastq_input2} - </command> - <inputs> - <param name="fastq_input1" type="data" format="fasta, fastq, fastsanger" label="Select first set of reads" help="Specify dataset with forward reads"/> - <param name="fastq_input2" type="data" format="fasta, fastq, fastsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/> - <param name="index1" type="select" label="Novo-Align Index(FASTA format)"> - <options from_data_table="novocraft_index"/> - </param> - </inputs> - <outputs> - <data format="bam" name="out_file" /> - </outputs> - <help>Help!</help> - <citations> - <citation></citation> - </citations> - <tests> - <test> - </test> - </tests> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/novo_sort.py Thu Mar 10 15:53:44 2016 -0500 @@ -0,0 +1,38 @@ +#!/usr/bin/env python + +from __future__ import print_function +import argparse +from subprocess import check_call, CalledProcessError +from json import load, dump, dumps +from os import environ, mkdir, makedirs, path +from os.path import isdir, exists +import shlex +import sys +import logging +log = logging.getLogger( __name__ ) + +def novo_sort( bam_filename ): + #novosort -c 8 -m 8G -s-f $i > $i".sorted"; done + cmdline_str = "novosort -c 8 -m 8G -s -f {}".format( bam_filename ) + cmdline = newSplit(cmdline_str) + try: + check_call(cmdline) + except CalledProcessError: + print("Error running the nova-align", file=sys.stderr) + +def newSplit(value): + lex = shlex.shlex(value) + lex.quotes = '"' + lex.whitespace_split = True + lex.commenters = '' + return list(lex) + +def main(): + parser = argparse.ArgumentParser(description="Generate a BAM file from the Novo Align tool") + parser.add_argument('output_filename') + parser.add_argument('--bam_filename') + args = parser.parse_args() + + novo_sort(args.bam_filename) + +if __name__ == "__main__": main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/novo_sort.xml Thu Mar 10 15:53:44 2016 -0500 @@ -0,0 +1,28 @@ +<?xml version="1.0" encoding="utf-8" ?> +<tool id="novo_sort" name="NOVO SORT" version="0.0.1"> + <description>Tool to run the Novo Sort mapping tool</description> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + </requirements> + <stdio> + <exit_code range=":-1" /> + <exit_code range="1:" /> + </stdio> + <command interpreter="python"> + novo_sort.py '${out_file}' --bam_filename ${input1} + </command> + <inputs> + <param name="input1" type="data" format="bam" label="Select first set of reads" help="Specify dataset with forward reads"/> + </inputs> + <outputs> + <data format="bam" name="out_file" /> + </outputs> + <help>Help!</help> + <citations> + <citation></citation> + </citations> + <tests> + <test> + </test> + </tests> +</tool>