Mercurial > repos > drosofff > msp_blastparser_and_hits
comparison BlastParser_and_hits.py @ 11:87d250071bdc draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_blastparser_and_hits commit 3048cdbea989bc7d28326bf9479fc3010ff8b33c
| author | drosofff |
|---|---|
| date | Tue, 02 Feb 2016 11:37:20 -0500 |
| parents | cf78a10bcfcf |
| children | 75dbe4f77699 |
comparison
equal
deleted
inserted
replaced
| 10:cf78a10bcfcf | 11:87d250071bdc |
|---|---|
| 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") | 19 the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out mean BitScores below the specified float number") |
| 20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list") | 20 the_parser.add_argument('--filter_term_in', action="store", type=str, default="", help="select the specified term in the subject list") |
| 21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") | 21 the_parser.add_argument('--filter_term_out', action="store", type=str, default="", help="exclude the specified term from the subject list") |
| 22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") | 22 the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") |
| 23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") | 23 the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") |
| 24 the_parser.add_argument('--dataset_name', action="store", type=str, default="", help="the name of the dataset that has been parsed, to be reported in the output") | |
| 24 args = the_parser.parse_args() | 25 args = the_parser.parse_args() |
| 25 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): | 26 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): |
| 26 the_parser.error('argument(s) missing, call the -h option of the script') | 27 the_parser.error('argument(s) missing, call the -h option of the script') |
| 27 if not args.flanking: | 28 if not args.flanking: |
| 28 args.flanking = 0 | 29 args.flanking = 0 |
| 127 leftCoordinate -= FlankingValue | 128 leftCoordinate -= FlankingValue |
| 128 else: | 129 else: |
| 129 leftCoordinate = 1 | 130 leftCoordinate = 1 |
| 130 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) | 131 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
| 131 | 132 |
| 132 def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): | 133 def outputParsing (dataset_name, F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out="", mode="verbose"): |
| 133 def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): | 134 def filter_results (results, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, filter_term_in="", filter_term_out=""): |
| 134 for subject in results.keys(): | 135 for subject in results.keys(): |
| 135 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov: | 136 if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov: |
| 136 del results[subject] | 137 del results[subject] |
| 137 continue | 138 continue |
| 158 for subject in results: | 159 for subject in results: |
| 159 for transcript in Xblastdict[subject]: | 160 for transcript in Xblastdict[subject]: |
| 160 blasted_transcripts.append(transcript) | 161 blasted_transcripts.append(transcript) |
| 161 blasted_transcripts = list( set( blasted_transcripts)) | 162 blasted_transcripts = list( set( blasted_transcripts)) |
| 162 if mode == "verbose": | 163 if mode == "verbose": |
| 163 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" | 164 print >>F, "--- %s ---" % (dataset_name) |
| 165 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore" | |
| 164 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 166 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
| 165 print >> F, "#\n# %s" % subject | 167 print >> F, " \n# %s" % subject |
| 166 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) | 168 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) |
| 167 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) | 169 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) |
| 168 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) | 170 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) |
| 169 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) | 171 print >> F, "# Best Bit Score: %s" % (results[subject]["maxBitScores"]) |
| 170 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"]) | 172 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"]) |
| 179 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore] | 181 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore] |
| 180 info = [str(i) for i in info] | 182 info = [str(i) for i in info] |
| 181 info = "\t".join(info) | 183 info = "\t".join(info) |
| 182 print >> F, info | 184 print >> F, info |
| 183 else: | 185 else: |
| 186 print >>F, "--- %s ---" % (dataset_name) | |
| 184 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" | 187 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tBest Bit Score\tMean Bit Score" |
| 185 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): | 188 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
| 186 line = [] | 189 line = [] |
| 187 line.append(subject) | 190 line.append(subject) |
| 188 line.append(results[subject]["subjectLength"]) | 191 line.append(results[subject]["subjectLength"]) |
| 217 fastadict = getfasta (args.sequences) | 220 fastadict = getfasta (args.sequences) |
| 218 Xblastdict = getblast (args.blast) | 221 Xblastdict = getblast (args.blast) |
| 219 results = defaultdict(dict) | 222 results = defaultdict(dict) |
| 220 for subject in Xblastdict: | 223 for subject in Xblastdict: |
| 221 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) | 224 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
| 222 blasted_transcripts = outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, | 225 blasted_transcripts = outputParsing (args.dataset_name, args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, |
| 223 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, | 226 filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, |
| 224 filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in, | 227 filter_meanScore=args.filter_meanScore, filter_term_in=args.filter_term_in, |
| 225 filter_term_out=args.filter_term_out, mode=args.mode) | 228 filter_term_out=args.filter_term_out, mode=args.mode) |
| 226 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) | 229 dispatch_sequences (fastadict, blasted_transcripts, args.al_sequences, args.un_sequences) |
| 227 | 230 |
