# HG changeset patch # User jingchunzhu # Date 1442527245 25200 # Node ID 3a036a34c362340a18a70d578b5bc8cc5d67b0d8 # Parent 728eda331f071d5e891fceb0b0d13a5a0bf766a7 better handle of input file diff -r 728eda331f07 -r 3a036a34c362 seg2matrix/CGData/BaseTable.py --- a/seg2matrix/CGData/BaseTable.py Thu Sep 17 14:33:21 2015 -0700 +++ b/seg2matrix/CGData/BaseTable.py Thu Sep 17 15:00:45 2015 -0700 @@ -63,24 +63,31 @@ linenum = 0 for row in read: linenum += 1 + if linenum ==1: #ignore header line + continue r = self.__row_class__() if (comment is None or not row[0].startswith(comment)) and not row[0].startswith("#"): for i, col in enumerate(cols): + skip =0 isOptional = False if 'columnDef' in self['cgformat'] and col in self['cgformat']['columnDef'] and 'optional' in self['cgformat']['columnDef'][col]: isOptional = self['cgformat']['columnDef'][col]['optional'] if len(row) > i: try: setattr(r, col, colType[col](row[i])) - except ValueError: + except ValueError: raise ValueError( "col invalid type %s on line %d" % (row[i], linenum)) else: if isOptional: setattr(r, col, None) else: print row + skip =1 # ignore bad lines + break raise InvalidFormat("missing colum " + col) - + if skip: + continue + if not self.groupKey: if self.secondKey is not None: key1 = getattr(r, self.firstKey ) diff -r 728eda331f07 -r 3a036a34c362 segToMatrix.xml --- a/segToMatrix.xml Thu Sep 17 14:33:21 2015 -0700 +++ b/segToMatrix.xml Thu Sep 17 15:00:45 2015 -0700 @@ -26,11 +26,23 @@ - **Given a segmented copy number data file, convert it into xena ready matrix file.** +**Given a segmented copy number data file, convert it into xena ready matrix file.** + +1. Input data file format: tab-deliminated - Output File no 1. matrix file + ======= ===== ======= ===== ====== ====== + sanmple chr start end strand value + ======= ===== ======= ===== ====== ====== + sample1 chr1 1 100 . 0.5 + sample2 chr1 101 1000 . 1.5 + sample3 chr1 1000 2000 . -0.5 + ... ... ... ... ... ... + ======= ===== ======= ===== ====== ====== - Output File no 2. probeMap file -- probe to gene mapping file (is in the same genome assembly as the input segmented copy number data) + +2. Output File no 1. matrix file + + Output File no 2. probeMap file -- probe to gene mapping file (is in the same genome assembly as the input segmented copy number data)