diff sRbowtieParser.py @ 2:70193ce0540e draft

planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author mvdbeek
date Thu, 11 Jun 2015 10:15:20 -0400
parents 3a510730e3fc
children
line wrap: on
line diff
--- a/sRbowtieParser.py	Sun Mar 29 11:27:33 2015 -0400
+++ b/sRbowtieParser.py	Thu Jun 11 10:15:20 2015 -0400
@@ -1,11 +1,15 @@
 #!/usr/bin/python
 # python parser module to analyse sRbowtie alignments
-# version 1.0.0 - argparse implementation
+# version 1.0.2 - argparse implementation
 # Usage sRbowtieParser.py  <1:index source> <2:extraction directive> <3:outputL> <4:polarity> <5:6:7 filePath:FileExt:FileLabel> <.. ad  lib>
 
 import sys, argparse
 from smRtools import *
 
+def masterListGenerator(data_source):
+  for filePath, FileExt, FileLabel in data_source:
+    yield HandleSmRNAwindows (filePath, FileExt, IndexSource, genomeRefFormat)
+
 def Parser():
   the_parser = argparse.ArgumentParser()
   the_parser.add_argument('--IndexSource', action="store", type=str, help="Path to the index source")
@@ -24,22 +28,30 @@
 genomeRefFormat = args.ExtractDirective
 Output = args.output
 Polarity = args.polarity
-MasterListOfGenomes = {}
+header = ["gene"]
+
+
+FileLabelList=[label for label in args.alignmentLabel]
+header.extend(FileLabelList)
+assert (len(FileLabelList)==len(set(FileLabelList))),"You have supplied a non-unique label. Please make sure that your input files have unique names"
 
-for filePath, FileExt, FileLabel in zip (args.alignmentSource, args.alignmentFormat, args.alignmentLabel):
-  MasterListOfGenomes[FileLabel] = HandleSmRNAwindows (filePath, FileExt, IndexSource, genomeRefFormat) 
+data_source=zip (args.alignmentSource, args.alignmentFormat, args.alignmentLabel)
+master_generator=masterListGenerator(data_source)
 
-header = ["gene"]
-for filePath, FileExt, FileLabel in zip (args.alignmentSource, args.alignmentFormat, args.alignmentLabel):
-  header.append(FileLabel)
+for i,window in enumerate(master_generator):
+  window=window
+  if i==0:
+    gene_count_dict={gene:[str(item.readcount(polarity=Polarity))] for gene,item in window.instanceDict.items()}
+  else:
+    [gene_count_dict[gene].append(str(item.readcount(polarity=Polarity))) for gene,item in window.instanceDict.items()]
+
 
 F = open (args.output, "w")
 # print >>F, args
 print >> F, "\t".join(header)
-for item in sorted (MasterListOfGenomes[header[1]].instanceDict.keys() ):
+
+for item in sorted(gene_count_dict.keys()):
   line=[item]
-  for sample in header[1:]:
-    count = str (MasterListOfGenomes[sample].instanceDict[item].readcount(polarity=Polarity))
-    line.append(count)
+  line.extend(gene_count_dict[item])
   print >> F,  "\t".join(line )
 F.close()