changeset 20:914bc8ee6222

Debugged the merge mutation data tool
author melissacline
date Fri, 20 Mar 2015 15:50:22 -0700
parents 371579dd9bc6
children 3a259686f0fc
files mergeGenomicFiles.xml mergeMutationDatasets.xml mergeXenaMutation.py
diffstat 3 files changed, 67 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/mergeGenomicFiles.xml	Fri Mar 20 18:09:15 2015 -0400
+++ b/mergeGenomicFiles.xml	Fri Mar 20 15:50:22 2015 -0700
@@ -1,4 +1,4 @@
-<tool id="mergeGenomicFiles" description="Merge two genomic datasets into a new dataset" name="mergeGenomicFiles" version="0.0.1">
+<tool id="mergeGenomicFiles" description="Merge two genomic datasets into a new dataset" name="Merge Genomic Datasets" version="0.0.1">
   <description>
     Given two genomic datasets, merge them to create a third dataset with the row and column identifiers from both datasets.
   </description>
--- a/mergeMutationDatasets.xml	Fri Mar 20 18:09:15 2015 -0400
+++ b/mergeMutationDatasets.xml	Fri Mar 20 15:50:22 2015 -0700
@@ -1,4 +1,4 @@
-<tool id="mergeMutationDatasets" description="Merge two mutation datasets into a new dataset" name="mergeMutationData" version="0.0.1">
+<tool id="mergeMutationDatasets" description="Merge two mutation datasets into a new dataset" name="Merge Mutation Data" version="0.0.1">
   <description>
     Given two mutation datasets, merge them to create a larger dataset with the mutations from both datasets. Output this larger dataset, along with a 2-column matrix indicating the source of each mutation
   </description>
@@ -18,9 +18,9 @@
     <param type="text" name="labelForDatasetB"  label="Dataset B Label (optional)" optional="true"/>
  </inputs>
   <outputs>
-    <data name="outputC" format="tabular" label="Merged Mutation Data"/>
+    <data name="errorLog" format="data" label="Execution Log"/>
     <data name="outputSourceMatrix" format="tabular" label="Mutation Data Sources"/> 
-    <data name="errorLog" format="data" label="Execution Log"/>
+    <data name="outputC" format="tabular" label="Merged Mutation Data"/>
   </outputs>
   <help>
     ***Merge Xena Mutation Datasets***
--- a/mergeXenaMutation.py	Fri Mar 20 18:09:15 2015 -0400
+++ b/mergeXenaMutation.py	Fri Mar 20 15:50:22 2015 -0700
@@ -64,7 +64,7 @@
 
   return columnDic
 
-def summarizeColumns(infiles, fileColumn, allCols, ferror):
+def summarizeColumns(inFiles, fileColumn, allCols, ferror):
   for infile in inFiles:
     columnDic = header (infile, ferror)
     fileColumn [infile] = columnDic
@@ -138,74 +138,76 @@
     print "this is merging data A+B=C for mutation by position type of data\n"
     sys.exit(1)
 
-    #
-    # The input files to this script are two or more matrices, in which
-    # columns represent samples and rows represent genes or measurements.
-    # There are two output files: outMergedData contains the input data merged
-    # into a single matrix, and outSourceMatrix is a two-column matrix
-    # indicating which file each sample (or column label) came from.  This
-    # assumes that each sample came from at most one file.
-    #
-    parser = argparse.ArgumentParser()
-    parser.add_argument("inFileA", type=str, help="First input file")
-    parser.add_argument("inFileB", type=str, help="Second input file")
-    parser.add_argument("outMergedData", type=str,
-                        help="Filename for the merged dataset")
-    parser.add_argument("outSourceMatrix", type=str,
-                        help="""Filename for a Nx2 matrix that indicates
+  #
+  # The input files to this script are two or more matrices, in which
+  # columns represent samples and rows represent genes or measurements.
+  # There are two output files: outMergedData contains the input data merged
+  # into a single matrix, and outSourceMatrix is a two-column matrix
+  # indicating which file each sample (or column label) came from.  This
+  # assumes that each sample came from at most one file.
+  #
+  parser = argparse.ArgumentParser()
+  parser.add_argument("outMergedData", type=str,
+                      help="Filename for the merged dataset")
+  parser.add_argument("outSourceMatrix", type=str,
+                      help="""Filename for a Nx2 matrix that indicates
                                 the source file of each column""")
-    parser.add_argument("errorLog", type=str,
-                        help="""Error log""")
-    parser.add_argument("--aLabel", type=str, default=None,
-                        help="User-friendly label for the first input file")
-    parser.add_argument("--bLabel", type=str, default=None,
-                        help="User-friendly label for the second input file")
-    args = parser.parse_args()
+  parser.add_argument("errorLog", type=str,
+                      help="""Error log""")
+  parser.add_argument("inFileA", type=str, help="First input file")
+  parser.add_argument("inFileB", type=str, help="Second input file")
+  parser.add_argument("--aLabel", type=str, default=None,
+                      help="User-friendly label for the first input file")
+  parser.add_argument("--bLabel", type=str, default=None,
+                      help="User-friendly label for the second input file")
+  args = parser.parse_args()
 
                                     
-    #inFiles = sys.argv[4:]
-    print inFiles
-    errofile = args.errorLog
-    outfile = args.outMergedData
-    print outfile
-    outPhenotypeFile = args.outSourceMatrix
-    print outPhenotypeFile
-    
-    ferror = open(errofile,'w')
+  #inFiles = sys.argv[4:]
+  inFiles = list()
+  inFiles.append(args.inFileA)
+  inFiles.append(args.inFileB)
+  errofile = args.errorLog
+  outfile = args.outMergedData
+  #print outfile
+  outPhenotypeFile = args.outSourceMatrix
+  #print outPhenotypeFile
     
-    #get all the columns, build fileColumn dictionary
-    fileColumn={}
-    allCols =[]
-    summarizeColumns(inFiles, fileColumn, allCols, ferror)
-    ferror.close()
-
-    #output header line
-    fout = open(outfile,'w')
-    outputHeader (requiredCOLs,allCols,fout)
+  ferror = open(errofile,'w')
+    
+  #get all the columns, build fileColumn dictionary
+  fileColumn={}
+  allCols =[]
+  summarizeColumns(inFiles, fileColumn, allCols, ferror)
+  ferror.close()
 
-    #process and output combined mutationXena file
-    fout = open(outfile,'a')
-
-    columnDic = fileColumn[args.inFileA]
-    processAndOutput(args.inFileA,requiredCOLs,allCols,columnDic,fout)
-    columnDic = fileColumn[args.inFileB]
-    processAndOutput(args.inFileB,requiredCOLs,allCols,columnDic,fout)
-    fout.close()
+  #output header line
+  fout = open(outfile,'w')
+  outputHeader (requiredCOLs,allCols,fout)
+  
+  #process and output combined mutationXena file
+  fout = open(outfile,'a')
 
-    #collect sample from source information
-    sampleDic ={}
-    if args.aLabel is None:
-      collectSource(args.inFileA, args.inFileA, sampleDic)
-    else:
-      collectSource(args.inFileA, args.aLabel, sampleDic
-    if args.bLabel is None:
-      collectSource(args.inFileB, args.inFileB, sampleDic)
-    else:
-      collectSource(args.inFileB, args.bLabel, sampleDic
+  columnDic = fileColumn[args.inFileA]
+  processAndOutput(args.inFileA,requiredCOLs,allCols,columnDic,fout)
+  columnDic = fileColumn[args.inFileB]
+  processAndOutput(args.inFileB,requiredCOLs,allCols,columnDic,fout)
+  fout.close()
+  
+  #collect sample from source information
+  sampleDic ={}
+  if args.aLabel is None:
+    collectSource(args.inFileA, args.inFileA, sampleDic)
+  else:
+    collectSource(args.inFileA, args.aLabel, sampleDic)
+  if args.bLabel is None:
+    collectSource(args.inFileB, args.inFileB, sampleDic)
+  else:
+    collectSource(args.inFileB, args.bLabel, sampleDic)
 
 
-    #output sample source information as phenotype matrix
-    outputSampleDic (sampleDic, outPhenotypeFile)
+  #output sample source information as phenotype matrix
+  outputSampleDic (sampleDic, outPhenotypeFile)