| 0 | 1 #!/usr/bin/env python | 
|  | 2 | 
|  | 3 """ | 
|  | 4 Condenses pileup format into ranges of bases. | 
|  | 5 | 
|  | 6 usage: %prog [options] | 
|  | 7    -i, --input=i: Input pileup file | 
|  | 8    -o, --output=o: Output pileup | 
|  | 9    -c, --coverage=c: Coverage | 
|  | 10    -f, --format=f: Pileup format | 
|  | 11    -b, --base=b: Base to select | 
|  | 12    -s, --seq_column=s: Sequence column | 
|  | 13    -l, --loc_column=l: Base location column | 
|  | 14    -r, --base_column=r: Reference base column | 
|  | 15    -C, --cvrg_column=C: Coverage column | 
|  | 16 """ | 
|  | 17 | 
|  | 18 from galaxy import eggs | 
|  | 19 import pkg_resources; pkg_resources.require( "bx-python" ) | 
|  | 20 from bx.cookbook import doc_optparse | 
|  | 21 import sys | 
|  | 22 | 
|  | 23 def stop_err( msg ): | 
|  | 24     sys.stderr.write( msg ) | 
|  | 25     sys.exit() | 
|  | 26 | 
|  | 27 def __main__(): | 
|  | 28     strout = '' | 
|  | 29     #Parse Command Line | 
|  | 30     options, args = doc_optparse.parse( __doc__ ) | 
|  | 31     coverage = int(options.coverage) | 
|  | 32     fin = file(options.input, 'r') | 
|  | 33     fout = file(options.output, 'w') | 
|  | 34     inLine = fin.readline() | 
|  | 35     if options.format == 'six': | 
|  | 36         seqIndex = 0 | 
|  | 37         locIndex = 1 | 
|  | 38         baseIndex = 2 | 
|  | 39         covIndex = 3 | 
|  | 40     elif options.format == 'ten': | 
|  | 41         seqIndex = 0 | 
|  | 42         locIndex = 1 | 
|  | 43         if options.base == 'first': | 
|  | 44             baseIndex = 2 | 
|  | 45         else: | 
|  | 46             baseIndex = 3 | 
|  | 47         covIndex = 7 | 
|  | 48     else: | 
|  | 49         seqIndex = int(options.seq_column) - 1 | 
|  | 50         locIndex = int(options.loc_column) - 1 | 
|  | 51         baseIndex = int(options.base_column) - 1 | 
|  | 52         covIndex = int(options.cvrg_column) - 1 | 
|  | 53     lastSeq = '' | 
|  | 54     lastLoc = -1 | 
|  | 55     locs = [] | 
|  | 56     startLoc = -1 | 
|  | 57     bases = [] | 
|  | 58     while inLine.strip() != '': | 
|  | 59         lineParts = inLine.split('\t') | 
|  | 60         try: | 
|  | 61             seq, loc, base, cov = lineParts[seqIndex], int(lineParts[locIndex]), lineParts[baseIndex], int(lineParts[covIndex]) | 
|  | 62         except IndexError, ei: | 
|  | 63             if options.format == 'ten': | 
|  | 64                 stop_err( 'It appears that you have selected 10 columns while your file has 6. Make sure that the number of columns you specify matches the number in your file.\n' + str( ei ) ) | 
|  | 65             else: | 
|  | 66                 stop_err( 'There appears to be something wrong with your column index values.\n' + str( ei ) ) | 
|  | 67         except ValueError, ev: | 
|  | 68             if options.format == 'six': | 
|  | 69                 stop_err( 'It appears that you have selected 6 columns while your file has 10. Make sure that the number of columns you specify matches the number in your file.\n' + str( ev ) ) | 
|  | 70             else: | 
|  | 71                 stop_err( 'There appears to be something wrong with your column index values.\n' + str( ev ) ) | 
|  | 72 #        strout += str(startLoc) + '\n' | 
|  | 73 #        strout += str(bases) + '\n' | 
|  | 74 #        strout += '%s\t%s\t%s\t%s\n' % (seq, loc, base, cov) | 
|  | 75         if loc == lastLoc+1 or lastLoc == -1: | 
|  | 76             if cov >= coverage: | 
|  | 77                 if seq == lastSeq or lastSeq == '': | 
|  | 78                     if startLoc == -1: | 
|  | 79                         startLoc = loc | 
|  | 80                     locs.append(loc) | 
|  | 81                     bases.append(base) | 
|  | 82                 else: | 
|  | 83                     if len(bases) > 0: | 
|  | 84                         fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases))) | 
|  | 85                     startLoc = loc | 
|  | 86                     locs = [loc] | 
|  | 87                     bases = [base] | 
|  | 88             else: | 
|  | 89                 if len(bases) > 0: | 
|  | 90                     fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases))) | 
|  | 91                 startLoc = -1 | 
|  | 92                 locs = [] | 
|  | 93                 bases = [] | 
|  | 94         else: | 
|  | 95             if len(bases) > 0: | 
|  | 96                 fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases))) | 
|  | 97             if cov >= coverage: | 
|  | 98                 startLoc = loc | 
|  | 99                 locs = [loc] | 
|  | 100                 bases = [base] | 
|  | 101             else: | 
|  | 102                 startLoc = -1 | 
|  | 103                 locs = [] | 
|  | 104                 bases = [] | 
|  | 105         lastSeq = seq | 
|  | 106         lastLoc = loc | 
|  | 107         inLine = fin.readline() | 
|  | 108     if len(bases) > 0: | 
|  | 109         fout.write('%s\t%s\t%s\t%s\n' % (lastSeq, startLoc-1, lastLoc, ''.join(bases))) | 
|  | 110     fout.close() | 
|  | 111     fin.close() | 
|  | 112 | 
|  | 113 #    import sys | 
|  | 114 #    strout += file(fout.name,'r').read() | 
|  | 115 #    sys.stderr.write(strout) | 
|  | 116 | 
|  | 117 if __name__ == "__main__" : __main__() |