view fcfunc.py @ 0:97bd5bb4204c default tip

commit
author ryo_tas <yamanaka@genome.rcast.u-tokyo.ac.jp>
date Tue, 30 Dec 2014 18:45:34 +0900
parents
children
line wrap: on
line source

#!/usr/bin/env python

import sys

def checkFormat(file):
    f = open(file)
    n = 0      #line count
    oldlen=0   #length of previous line

    for line in f:
        line=line.rstrip('\n\t ')
        n=n+1
        if line.find('track') == -1 and line.find('browse') == -1 and line and n<20:
            row=line.split();
            rowlen=len(row)
        
            #check for 3 fields
            if rowlen < 3:
                sys.stdout.write('Error with BED file format: need atleast 3 fields per line\n')
                sys.exit()

            #check is field 3 is greater thant field 2
            if int(row[2]) < int(row[1]):
                sys.stdout.write('Error with BED file format: field 3 needs to be greater than row 2\n')
                sys.exit()

            #check each field
            m=0   #field number
            while m < len(row):
                #check if length of lines are consistent
                if rowlen != oldlen and oldlen != 0:
                    sys.stdout.write('Error with BED file format: number of fields do not match\n')
                    sys.exit()
                #column 1 & 4
                #if (m==0 or m==3) and row[m].isdigit() == 1:
                #    pass

                #column 2
                elif m==1:
                    if row[m].isdigit() == 0:
                        sys.stdout.write('Error with BED file format: line%d,field%d need a number\n' %(n,m+1))
                        sys.exit()
                    elif row[m] < 0:
                        sys.stdout.write('Error with BED file format: line%d,field%d can not be negative\n' %(n.m))
                        sys.exit()
                #column 3, 7, 8, 10
                elif (m==2 or m==6 or m==7 or m==9):
                    if row[m].isdigit() == 0:
                        sys.stdout.write('Error with BED file format: line%d,field%d need a number\n' %(n,m+1))
                        sys.exit()
                    elif m==9:
                        blockCount=int(row[9])
                #column 5
                #elif m==4:
                #    try:
                #        float(row[4])
                #    except ValueError:
                #        sys.stdout.write('Error with BED file format: line%d,field%d needs to be a number\n' %(n,m+1))
                #        sys.exit()
                #    else:
                #        pass    

                #column 6
                elif m==5 and (row[5] != '-' and row[5] != '+'):
                    sys.stdout.write('Error with BED file format: need +/- in line%d,field%d\n' %(n,m+1))
                    sys.exit()
                #column 9
                elif m==8 and row[8] != '0':
                    sys.stdout.write('Error with BED file format: line%d,field%d is always 0\n' %(n,m+1))
                    sys.exit()   
                #column 11 & 12
                elif m==10 or m==11: 
                    if row[m].find(',')==-1:
                        sys.stdout.write('Error with BED file format: need comma separated list at line%d,field%d\n' %(n,m+1))
                        sys.exit()
                    else:
                        col=row[m].strip(',').split(',')
                        if len(col) != blockCount:
                            sys.stdout.write('Error with BED file format: block count does not match list length at line%d,field%d\n' %(n,m+1))
                            sys.exit()
                        else:
                            for l in col:
                                if l.isdigit() == 0:
                                    sys.stdout.write('Error with BED file format: need list of numbers at line%d,field%d\n' %(n,m+1))
                                    sys.exit()
                m=m+1
                oldlen=rowlen
        #elif n>=20:
        #    break
if __name__ == '__main__':
    checkFormat(sys.argv[1])
    sys.stdout.write('passed')