changeset 3:2cadfddb73cd draft

planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
author sanbi-uwc
date Wed, 09 Mar 2016 05:42:36 -0500
parents a0c086e61358
children 905996402f3c
files novo_align.py novo_align.xml
diffstat 2 files changed, 24 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/novo_align.py	Mon Mar 07 08:52:24 2016 -0500
+++ b/novo_align.py	Wed Mar 09 05:42:36 2016 -0500
@@ -8,31 +8,34 @@
 from os.path import isdir, exists
 import shlex
 import sys
+import logging
+log = logging.getLogger( __name__ )
 
-def novo_align(output_directory, index_filename, fwd_file, rev_file ):
-    if exists(output_directory) and not isdir(output_directory):
-        print("Output directory path already exists but is not a directory: {}".format(output_directory),
-              file=sys.stderr)
-    elif not exists(output_directory):
-        mkdir(output_directory)
-
+def novo_align(output_filename, index_filename, fwd_file, rev_file ):
     #novoalign -c 8 -k -d /cip0/research/ajayi/RNA-seq_Analysis_Project_Case_Study/reference/Homo_Sapiens/out/TB_H37Rv.nix
     #         -f X165_820L8_.R1_val_1.fq  X165_820L8_.R2_val_2.fq -i PE 250,100
     #         -o SAM '@RG\tID:readgroup\tPU:platform unit\tLB:library' | samtools view -bS - > `pwd`/out/X165_820L8.bam
-
-    output_filename = path.join(output_directory, fwd_file.split(".")[0] + ".bam")
-
-    cmdline_str = "novoalign -c 8 -k -d {} -f {} {} -i PE 250, 100 -o SAM '@RG\tID:readgroup\tPU:platform unit\tLB:library' | samtools view -bS - > {}".format(
+    #output_filename = path.join(output_directory, fwd_file.split(".")[0] + ".bam")
+    param = r'@RG\tID:readgroup\tPU:platform unit\tLB:library'
+    cmdline_str = "novoalign -c 8 -k -d {} -f {} {} -i PE 250, 100 -o SAM '{}' | samtools view -bS - > {}".format(
         index_filename,
         fwd_file,
         rev_file,
+        param,
         output_filename)
-    cmdline = shlex.split(cmdline_str)
+    cmdline = newSplit(cmdline_str)
     try:
         check_call(cmdline)
     except CalledProcessError:
         print("Error running the nova-align", file=sys.stderr)
 
+def newSplit(value):
+    lex = shlex.shlex(value)
+    lex.quotes = '"'
+    lex.whitespace_split = True
+    lex.commenters = ''
+    return list(lex)
+
 def main():
     parser = argparse.ArgumentParser(description="Generate a BAM file from the Novo Align tool")
     parser.add_argument('output_filename')
@@ -40,15 +43,10 @@
     parser.add_argument('--forward_filename')
     parser.add_argument('--reverse_filename')
     args = parser.parse_args()
-
-    filename = args.output_filename
-    print("=============================")
-    print(args.__dict__)
-
-    params = load(open(filename, 'rb'))
-    output_directory = params['output_data'][0]['extra_files_path']
-    makedirs(output_directory)
-
-    novo_align(output_directory, args.index_filename, args.forward_filename, args.reverse_filename)
+   
+    #a dirty way of referencing the file
+    index_file_path = args.index_filename + "/" + args.index_filename.split("/")[-1]
+    
+    novo_align(args.output_filename, index_file_path, args.forward_filename, args.reverse_filename)
 
 if __name__ == "__main__": main()
--- a/novo_align.xml	Mon Mar 07 08:52:24 2016 -0500
+++ b/novo_align.xml	Wed Mar 09 05:42:36 2016 -0500
@@ -9,17 +9,17 @@
         <exit_code range="1:" />
     </stdio>
     <command interpreter="python">
-        novo_align.py "${out_file}" --index_file ${index1.fields.path} --forward_file ${fastq_input1} --reverse_file ${fastq_input2}
+        novo_align.py '${out_file}' --index_file ${index1.fields.path} --forward_file ${fastq_input1} --reverse_file ${fastq_input2}
     </command>
     <inputs>
-        <param name="fastq_input1" type="data" format="fasta" label="Select first set of reads" help="Specify dataset with forward reads"/>
-        <param name="fastq_input2" type="data" format="fasta" label="Select second set of reads" help="Specify dataset with reverse reads"/>
+        <param name="fastq_input1" type="data" format="fasta, fastq, fastsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
+        <param name="fastq_input2" type="data" format="fasta, fastq, fastsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
         <param name="index1" type="select" label="Novo-Align Index(FASTA format)">
             <options from_data_table="novocraft_index"/>
         </param>
     </inputs>
     <outputs>
-        <data name="out_file" format="data_manager_json" />
+        <data format="bam" name="out_file" />
     </outputs>
     <help>Help!</help>
     <citations>