| 1 | 1 #!/usr/bin/python | 
|  | 2 # python parser module for pre-mir and mature miRNAs, guided by mirbase.org GFF3 | 
|  | 3 # version 0.0.9 (1-6-2014) | 
|  | 4 # Usage MirParser.py  <1:index source> <2:extraction directive> <3:output pre-mir> <4: output mature miRs> <5:mirbase GFF3> | 
|  | 5 #                     <6:pathToLatticeDataframe or "dummy_dataframe_path"> <7:Rcode or "dummy_plotCode"> <8:latticePDF or "dummy_latticePDF"> | 
|  | 6 #                     <9:10:11 filePath:FileExt:FileLabel> <.. ad  lib> | 
|  | 7 | 
|  | 8 import sys, subprocess | 
|  | 9 from smRtools import * | 
|  | 10 | 
|  | 11 IndexSource = sys.argv[1] | 
|  | 12 ExtractionDirective = sys.argv[2] | 
|  | 13 if ExtractionDirective == "--do_not_extract_index": | 
|  | 14   genomeRefFormat = "fastaSource" | 
|  | 15 elif  ExtractionDirective == "--extract_index": | 
|  | 16   genomeRefFormat = "bowtieIndex" | 
|  | 17 OutputPre_mirs = sys.argv[3] | 
|  | 18 OutputMature_Mirs = sys.argv[4] | 
|  | 19 GFF3_file = sys.argv[5] | 
|  | 20 lattice = sys.argv[6] | 
|  | 21 Rcode = sys.argv[7] | 
|  | 22 latticePDF = sys.argv[8] | 
|  | 23 Triplets = [sys.argv[9:][i:i+3] for i in xrange(0, len(sys.argv[9:]), 3)] | 
|  | 24 MasterListOfGenomes = {} | 
|  | 25 | 
|  | 26 for [filePath, FileExt, FileLabel] in Triplets: | 
|  | 27   print FileLabel | 
|  | 28   MasterListOfGenomes[FileLabel] = HandleSmRNAwindows (alignmentFile=filePath, alignmentFileFormat=FileExt, genomeRefFile=IndexSource, genomeRefFormat=genomeRefFormat, biosample=FileLabel) | 
|  | 29 | 
|  | 30 header = ["gene"] | 
|  | 31 for [filePath, FileExt, FileLabel] in Triplets: | 
|  | 32   header.append(FileLabel) | 
|  | 33 | 
|  | 34 hit_table = ["\t".join(header)] # table header: gene, sample1, sample2, sample3, etc. separated by tabulation | 
|  | 35 | 
|  | 36 ## read GFF3 to subinstantiate | 
|  | 37 gff3 = open (GFF3_file, "r") | 
|  | 38 lattice_dataframe = [] | 
|  | 39 for line in gff3: | 
|  | 40   if line[0] == "#": continue | 
|  | 41   gff_fields = line[:-1].split("\t") | 
|  | 42   chrom = gff_fields[0] | 
|  | 43   gff_name = gff_fields[-1].split("Name=")[-1].split(";")[0] # to isolate the GFF Name | 
|  | 44   item_upstream_coordinate = int(gff_fields[3]) | 
|  | 45   item_downstream_coordinate = int(gff_fields[4]) | 
|  | 46   if gff_fields[6] == "+": | 
|  | 47     item_polarity = "forward" | 
|  | 48   else: | 
|  | 49     item_polarity = "reverse" | 
|  | 50   item_line = [gff_name] | 
|  | 51   for sample in header[1:]: | 
|  | 52     count = MasterListOfGenomes[sample].instanceDict[chrom].readcount(upstream_coord=item_upstream_coordinate, downstream_coord=item_downstream_coordinate, polarity=item_polarity) | 
|  | 53     item_line.append(str(count)) | 
|  | 54     ## subtreatement for lattice | 
|  | 55     if lattice != "dummy_dataframe_path": | 
|  | 56       if ("5p" not in gff_name) and  ("3p" not in gff_name): | 
|  | 57         lattice_dataframe.append(MasterListOfGenomes[sample].instanceDict[chrom].readcoverage(upstream_coord=item_upstream_coordinate, downstream_coord=item_downstream_coordinate, windowName=gff_name+"_"+sample) ) | 
|  | 58     ## end of subtreatement for lattice | 
|  | 59   hit_table.append("\t".join(item_line) ) | 
|  | 60 gff3.close() | 
|  | 61 | 
|  | 62 Fpremirs = open (OutputPre_mirs, "w") | 
|  | 63 print >> Fpremirs, hit_table[0] | 
|  | 64 finalPreList = [ i for i in sorted(hit_table[1:]) if ("5p" not in i) and  ("3p" not in i)] | 
|  | 65 print >> Fpremirs, "\n".join(finalPreList ) | 
|  | 66 Fpremirs.close() | 
|  | 67 | 
|  | 68 Fmaturemires = open (OutputMature_Mirs, "w") | 
|  | 69 print >> Fmaturemires, hit_table[0] | 
|  | 70 finalMatureList = [ i for i in sorted(hit_table[1:]) if ("5p" in i) or ("3p" in i)] | 
|  | 71 print >> Fmaturemires, "\n".join(finalMatureList ) | 
|  | 72 Fmaturemires.close() | 
|  | 73 | 
|  | 74 if lattice != "dummy_dataframe_path": | 
|  | 75   Flattice = open(lattice, "w") | 
|  | 76   print >> Flattice, "%s\t%s\t%s\t%s\t%s\t%s\t%s" % ("sample", "mir", "offset", "offsetNorm", "counts","countsNorm",  "polarity") | 
|  | 77   print >> Flattice, "\n".join(lattice_dataframe) | 
|  | 78   Flattice.close() | 
|  | 79   R_command="Rscript "+ Rcode | 
|  | 80   process = subprocess.Popen(R_command.split()) | 
|  | 81   process.wait() |