comparison mzsqlite_psm_align.py @ 5:af5f22779a8e draft

planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/mzsqlite_psm_align commit d65efea03eb2db8a43e64599a4f899ead1a252ba-dirty
author jjohnson
date Tue, 10 Apr 2018 11:52:04 -0400
parents 492f98d89e26
children
comparison
equal deleted inserted replaced
4:a96b2754171c 5:af5f22779a8e
407 help='Genome reference sequence in 2bit format') 407 help='Genome reference sequence in 2bit format')
408 parser.add_argument( 408 parser.add_argument(
409 '-r', '--reads_bam', default=None, 409 '-r', '--reads_bam', default=None,
410 help='reads alignment bam path') 410 help='reads alignment bam path')
411 parser.add_argument( 411 parser.add_argument(
412 '-g', '--gffutils_file', default=None, 412 '-g', '--gffutils_sqlite', default=None,
413 help='gffutils GTF sqlite DB') 413 help='gffutils GTF sqlite DB')
414 parser.add_argument( 414 parser.add_argument(
415 '-B', '--probed', default=None, 415 '-B', '--probed', default=None,
416 help='proBed path') 416 help='proBed path')
417 parser.add_argument( 417 parser.add_argument(
441 samfile = pysam.AlignmentFile(args.reads_bam, "rb" ) if args.reads_bam else None 441 samfile = pysam.AlignmentFile(args.reads_bam, "rb" ) if args.reads_bam else None
442 seqlens = twobit.sequence_sizes() 442 seqlens = twobit.sequence_sizes()
443 443
444 probed = open(args.probed,'w') if args.probed else sys.stdout 444 probed = open(args.probed,'w') if args.probed else sys.stdout
445 445
446 gff_cursor = get_connection(args.gffutils_file).cursor() if args.gffutils_file else None 446 gff_cursor = get_connection(args.gffutils_sqlite).cursor() if args.gffutils_sqlite else None
447 map_cursor = get_connection(args.genomic_mapping_sqlite).cursor() 447 map_cursor = get_connection(args.genomic_mapping_sqlite).cursor()
448 mz_cursor = get_connection(args.mzsqlite_file).cursor() 448 mz_cursor = get_connection(args.mzsqlite).cursor()
449 449
450 unmapped_accs = set() 450 unmapped_accs = set()
451 timings = dict() 451 timings = dict()
452 def add_time(name,elapsed): 452 def add_time(name,elapsed):
453 if name in timings: 453 if name in timings:
654 locations.add('%s:%s-%s:%s' % (start_chrom,start_pos,end_chrom,end_pos)) 654 locations.add('%s:%s-%s:%s' % (start_chrom,start_pos,end_chrom,end_pos))
655 te = time() 655 te = time()
656 add_time('GENOMIC_POS_QUERY',te - ts) 656 add_time('GENOMIC_POS_QUERY',te - ts)
657 except: 657 except:
658 unmapped_accs.add(acc) 658 unmapped_accs.add(acc)
659 print('Unmapped: %s' % acc, file=sys.stderr) 659 if args.debug:
660 print('Unmapped: %s' % acc, file=sys.stderr)
660 return len(locations) 661 return len(locations)
661 return -1 662 return -1
662 663
663 def spectrum_peptide_count(spectrum_id): 664 def spectrum_peptide_count(spectrum_id):
664 ts = time() 665 ts = time()
727 for i in range(3): 728 for i in range(3):
728 if i < ao: 729 if i < ao:
729 bases.append(list(set([c[i] for c in aa_codon_map[aa]]))) 730 bases.append(list(set([c[i] for c in aa_codon_map[aa]])))
730 else: 731 else:
731 bases.append([b for b, cnt in reversed(sorted(pileup[idx + i]['bases'].iteritems(), key=lambda (k,v): (v,k)))]) 732 bases.append([b for b, cnt in reversed(sorted(pileup[idx + i]['bases'].iteritems(), key=lambda (k,v): (v,k)))])
732 print('%s' % bases) 733 if args.debug:
734 print('%s' % bases,file=sys.stderr)
733 for b0 in bases[0]: 735 for b0 in bases[0]:
734 if b0 not in aa_na_map[aa]: 736 if b0 not in aa_na_map[aa]:
735 continue 737 continue
736 for b1 in bases[1]: 738 for b1 in bases[1]:
737 if b1 not in aa_na_map[aa][b0]: 739 if b1 not in aa_na_map[aa][b0]:
810 'XT' : 'i', #Enzyme specificity 812 'XT' : 'i', #Enzyme specificity
811 'XE' : 'i', #Enzyme used in the experiment 813 'XE' : 'i', #Enzyme used in the experiment
812 'XG' : 'A', #Peptide type 814 'XG' : 'A', #Peptide type
813 'XU' : 'Z', #URI 815 'XU' : 'Z', #URI
814 """ 816 """
815 psm_cursor = get_connection(args.mzsqlite_file).cursor() 817 psm_cursor = get_connection(args.mzsqlite).cursor()
816 ts = time() 818 ts = time()
817 psms = psm_cursor.execute(PSM_QUERY) 819 psms = psm_cursor.execute(PSM_QUERY)
818 te = time() 820 te = time()
819 add_time('PSM_QUERY',te - ts) 821 add_time('PSM_QUERY',te - ts)
820 proBAM = ProBAM(species=None,assembly=args.genomeReference,seqlens=seqlens,comments=[]) 822 proBAM = ProBAM(species=None,assembly=args.genomeReference,seqlens=seqlens,comments=[])