Mercurial > repos > yating-l > regtools_junctions_extract
comparison validator.py @ 3:24fc8a8efe19 draft default tip
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
| author | yating-l |
|---|---|
| date | Tue, 20 Dec 2016 17:15:16 -0500 |
| parents | cf258ca024ff |
| children |
comparison
equal
deleted
inserted
replaced
| 2:cf258ca024ff | 3:24fc8a8efe19 |
|---|---|
| 5 create a column and move the score column to that column. | 5 create a column and move the score column to that column. |
| 6 """ | 6 """ |
| 7 def checkAndFixBed(bedfile, revised_file): | 7 def checkAndFixBed(bedfile, revised_file): |
| 8 # Store the lines that have been removed | 8 # Store the lines that have been removed |
| 9 removedLines = [] | 9 removedLines = [] |
| 10 scoreLines = [] | |
| 11 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 | 10 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 |
| 12 with open(revised_file, 'w') as tmp: | 11 with open(revised_file, 'w') as tmp: |
| 13 with open(bedfile, 'r') as f: | 12 with open(bedfile, 'r') as f: |
| 14 lines = f.readlines() | 13 lines = f.readlines() |
| 15 i = 1 | 14 i = 1 |
| 16 for line in lines: | 15 for line in lines: |
| 17 fields = line.split() | 16 fields = line.split() |
| 18 strand = fields[5] | 17 strand = fields[5] |
| 19 score = fields[4] | 18 score = fields[4] |
| 20 if (int(fields[4]) > 1000): | 19 fields[4] = '1000' |
| 21 scoreLines.append("line" + str(i) + ":" + line) | 20 fields.append(score) |
| 22 fields[4] = '1000' | |
| 23 if (strand == '+' or strand == '-'): | 21 if (strand == '+' or strand == '-'): |
| 24 tmp.write('\t'.join(map(str, fields))) | 22 tmp.write('\t'.join(map(str, fields))) |
| 25 tmp.write("\n") | 23 tmp.write("\n") |
| 26 else: | 24 else: |
| 27 removedLines.append("line" + str(i) + ": " + line) | 25 removedLines.append("line" + str(i) + ": " + line) |
| 28 i = i+1 | 26 i = i+1 |
| 29 | 27 |
| 30 return removedLines, scoreLines | 28 return removedLines |
| 31 | 29 |
| 32 def main(): | 30 def main(): |
| 33 inputfile = str(sys.argv[1]) | 31 inputfile = str(sys.argv[1]) |
| 34 outputfile = str(sys.argv[2]) | 32 outputfile = str(sys.argv[2]) |
| 35 removed, changed = checkAndFixBed(inputfile, outputfile) | 33 removed = checkAndFixBed(inputfile, outputfile) |
| 36 if (removed != []): | 34 if (removed != []): |
| 37 print "\nRemoved invalid lines: \n" | 35 print "\nRemoved invalid lines: \n" |
| 38 print "\n".join(removed) | 36 print "\n".join(removed) |
| 39 if (changed != []): | |
| 40 print "\nThe following lines have scores > 1000, so they are changed to 1000:\n" | |
| 41 print "\n".join(changed) | |
| 42 | 37 |
| 43 if __name__ == "__main__": | 38 if __name__ == "__main__": |
| 44 main() | 39 main() |
