comparison validator.py @ 2:cf258ca024ff draft

planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
author yating-l
date Mon, 19 Dec 2016 14:49:37 -0500
parents 2626c5b4c665
children 24fc8a8efe19
comparison
equal deleted inserted replaced
1:e9dcea52d079 2:cf258ca024ff
5 create a column and move the score column to that column. 5 create a column and move the score column to that column.
6 """ 6 """
7 def checkAndFixBed(bedfile, revised_file): 7 def checkAndFixBed(bedfile, revised_file):
8 # Store the lines that have been removed 8 # Store the lines that have been removed
9 removedLines = [] 9 removedLines = []
10 scoreLines = []
10 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 11 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000
11 with open(revised_file, 'w') as tmp: 12 with open(revised_file, 'w') as tmp:
12 with open(bedfile, 'r') as f: 13 with open(bedfile, 'r') as f:
13 lines = f.readlines() 14 lines = f.readlines()
14 i = 1 15 i = 1
15 for line in lines: 16 for line in lines:
16 fields = line.split() 17 fields = line.split()
17 strand = fields[5] 18 strand = fields[5]
18 score = fields[4] 19 score = fields[4]
19 fields[4] = '1000' 20 if (int(fields[4]) > 1000):
20 fields.append(score) 21 scoreLines.append("line" + str(i) + ":" + line)
22 fields[4] = '1000'
21 if (strand == '+' or strand == '-'): 23 if (strand == '+' or strand == '-'):
22 tmp.write('\t'.join(map(str, fields))) 24 tmp.write('\t'.join(map(str, fields)))
23 tmp.write("\n") 25 tmp.write("\n")
24 else: 26 else:
25 removedLines.append("line" + str(i) + ": " + line) 27 removedLines.append("line" + str(i) + ": " + line)
26 i = i+1 28 i = i+1
27 29
28 return removedLines 30 return removedLines, scoreLines
29 31
30 def main(): 32 def main():
31 inputfile = str(sys.argv[1]) 33 inputfile = str(sys.argv[1])
32 outputfile = str(sys.argv[2]) 34 outputfile = str(sys.argv[2])
33 removed = checkAndFixBed(inputfile, outputfile) 35 removed, changed = checkAndFixBed(inputfile, outputfile)
34 if (removed != []): 36 if (removed != []):
35 print "\nRemoved invalid lines: \n" 37 print "\nRemoved invalid lines: \n"
36 print "\n".join(removed) 38 print "\n".join(removed)
39 if (changed != []):
40 print "\nThe following lines have scores > 1000, so they are changed to 1000:\n"
41 print "\n".join(changed)
37 42
38 if __name__ == "__main__": 43 if __name__ == "__main__":
39 main() 44 main()