diff novo_align.py @ 3:2cadfddb73cd draft

planemo upload for repository https://github.com/zipho/novo_align commit 97b06b10c9981b652ec9c1e0abfc11777222a726
author sanbi-uwc
date Wed, 09 Mar 2016 05:42:36 -0500
parents 7e0286bd9405
children
line wrap: on
line diff
--- a/novo_align.py	Mon Mar 07 08:52:24 2016 -0500
+++ b/novo_align.py	Wed Mar 09 05:42:36 2016 -0500
@@ -8,31 +8,34 @@
 from os.path import isdir, exists
 import shlex
 import sys
+import logging
+log = logging.getLogger( __name__ )
 
-def novo_align(output_directory, index_filename, fwd_file, rev_file ):
-    if exists(output_directory) and not isdir(output_directory):
-        print("Output directory path already exists but is not a directory: {}".format(output_directory),
-              file=sys.stderr)
-    elif not exists(output_directory):
-        mkdir(output_directory)
-
+def novo_align(output_filename, index_filename, fwd_file, rev_file ):
     #novoalign -c 8 -k -d /cip0/research/ajayi/RNA-seq_Analysis_Project_Case_Study/reference/Homo_Sapiens/out/TB_H37Rv.nix
     #         -f X165_820L8_.R1_val_1.fq  X165_820L8_.R2_val_2.fq -i PE 250,100
     #         -o SAM '@RG\tID:readgroup\tPU:platform unit\tLB:library' | samtools view -bS - > `pwd`/out/X165_820L8.bam
-
-    output_filename = path.join(output_directory, fwd_file.split(".")[0] + ".bam")
-
-    cmdline_str = "novoalign -c 8 -k -d {} -f {} {} -i PE 250, 100 -o SAM '@RG\tID:readgroup\tPU:platform unit\tLB:library' | samtools view -bS - > {}".format(
+    #output_filename = path.join(output_directory, fwd_file.split(".")[0] + ".bam")
+    param = r'@RG\tID:readgroup\tPU:platform unit\tLB:library'
+    cmdline_str = "novoalign -c 8 -k -d {} -f {} {} -i PE 250, 100 -o SAM '{}' | samtools view -bS - > {}".format(
         index_filename,
         fwd_file,
         rev_file,
+        param,
         output_filename)
-    cmdline = shlex.split(cmdline_str)
+    cmdline = newSplit(cmdline_str)
     try:
         check_call(cmdline)
     except CalledProcessError:
         print("Error running the nova-align", file=sys.stderr)
 
+def newSplit(value):
+    lex = shlex.shlex(value)
+    lex.quotes = '"'
+    lex.whitespace_split = True
+    lex.commenters = ''
+    return list(lex)
+
 def main():
     parser = argparse.ArgumentParser(description="Generate a BAM file from the Novo Align tool")
     parser.add_argument('output_filename')
@@ -40,15 +43,10 @@
     parser.add_argument('--forward_filename')
     parser.add_argument('--reverse_filename')
     args = parser.parse_args()
-
-    filename = args.output_filename
-    print("=============================")
-    print(args.__dict__)
-
-    params = load(open(filename, 'rb'))
-    output_directory = params['output_data'][0]['extra_files_path']
-    makedirs(output_directory)
-
-    novo_align(output_directory, args.index_filename, args.forward_filename, args.reverse_filename)
+   
+    #a dirty way of referencing the file
+    index_file_path = args.index_filename + "/" + args.index_filename.split("/")[-1]
+    
+    novo_align(args.output_filename, index_file_path, args.forward_filename, args.reverse_filename)
 
 if __name__ == "__main__": main()