annotate src/breadcrumbs/scripts/scriptManipulateTable.py @ 32:041787cd0d31 draft default tip

Modified from StringIO import StringIO ## for Python 2 to from io import StringIO ## for Python 3
author george-weingart
date Wed, 23 Jun 2021 20:52:58 +0000
parents d589875b8125
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
2 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
3 Author: Timothy Tickle
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
4 Description: Performs common manipulations on tables
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
5 """
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
6
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
7 __author__ = "Timothy Tickle"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
8 __copyright__ = "Copyright 2012"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
9 __credits__ = ["Timothy Tickle"]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
10 __license__ = ""
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
11 __version__ = ""
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
12 __maintainer__ = "Timothy Tickle"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
13 __email__ = "ttickle@sph.harvard.edu"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
14 __status__ = "Development"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
15
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
16 import argparse
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
17 import csv
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
18 import sys
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
19 import re
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
20 import os
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
21 import numpy as np
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
22 from src.AbundanceTable import AbundanceTable
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
23 #from src.PCA import PCA
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
24 from src.ValidateData import ValidateData
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
25
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
26 #Set up arguments reader
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
27 argp = argparse.ArgumentParser( prog = "scriptManipulateTable.py",
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
28 description = """Performs common manipulations on tables.\nExample: python scriptManipulateTable.py -i TID -l STSite Test.pcl""" )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
29
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
30 #Arguments
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
31 #Describe table
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
32 argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
33 argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
34 argp.add_argument("-d","--fileDelim", dest= "cFileDelimiter", action= "store", default="\t", help="File delimiter, default tab")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
35 argp.add_argument("-f","--featureDelim", dest= "cFeatureDelimiter", action= "store", default="|", help="Feature (eg. bug or function) delimiter, default '|'")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
36
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
37 #Checked x 2
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
38 argp.add_argument("-n","--doNorm", dest="fNormalize", action="store_true", default=False, help="Flag to turn on normalization")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
39 argp.add_argument("-s","--doSum", dest="fSum", action="store_true", default=False, help="Flag to turn on summation")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
40
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
41 #Unsupervised filtering
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
42 argp.add_argument("-A","--doFilterAbundance", dest="strFilterAbundance", action="store", default=None, help="Turns on filtering by abundance (remove features that do not have a minimum abundance in a minimum number of samples); Should be a real number and an integer in the form 'minAbundance,minSamples', (should be performed on a normalized file).")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
43 argp.add_argument("-P","--doFilterPercentile", dest="strFilterPercentile", action="store", default=None, help="Turns on filtering by percentile Should be two numbers between 0 and 1 in the form 'percentile,percentage'. (should be performed on a normalized file).")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
44 argp.add_argument("-O","--doFilterOccurrence", dest="strFilterOccurence", action="store", default=None, help="Turns on filtering by occurrence. Should be two integers in the form 'minSequence,minSample' (should NOT be performed on a normalized file).")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
45 #argp.add_argument("-D","--doFilterDeviation", dest="dCuttOff", action="store", type=float, default=None, help="Flag to turn on filtering by standard deviation (should NOT be performed on a normalized file).")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
46
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
47 #Change bug membership
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
48 argp.add_argument("-t","--makeTerminal", dest="fMakeTerminal", action="store_true", default=False, help="Works reduces the file to teminal features in the original file.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
49 argp.add_argument("-u","--reduceOTUs", dest="fRemoveOTUs", action="store_true", default=False, help="Remove otu entries from file.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
50 argp.add_argument("-c","--reduceToClade", dest="iClade", action="store", type=int, default=None, help="Specify a level of clade to reduce to [].")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
51 argp.add_argument("-b","--reduceToFeatures", dest="strFeatures", action="store", default=None, help="Reduce measurements to certain features (bugs or functions). This can be a comma delimited string (of atleast 2 bugs) or a file.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
52
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
53 #Manipulate based on metadata
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
54 #Checked
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
55 argp.add_argument("-y","--stratifyBy", dest="strStratifyBy", action="store", default=None, help="Metadata to stratify tables by.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
56 argp.add_argument("-r","--removeMetadata", dest="strRemoveMetadata", action="store", default=None, help="Remove samples of this metadata and value (format comma delimited string with metadata id first and the values to remove after 'id,lvalue1,value2').")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
57
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
58 #Manipulate lineage
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
59 #Checked
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
60 argp.add_argument("-x","--doPrefixClades", dest="fPrefixClades", action="store_true", default=False, help="Flag to turn on adding prefixes to clades to better identify them, for example s__ will be placed infront of each species.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
61
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
62 #Combine tables
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
63 #argp.add_argument("-m","--combineIntersect", dest="fCombineIntersect", action="store_true", default=False, help="Combine two tables including only common features/metadata (intersection).")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
64 #argp.add_argument("-e","--combineUnion", dest="fCombineUnion", action="store_true", default=False, help="Combine two tables (union).")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
65
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
66 #Dimensionality Reduction
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
67 #argp.add_argument("-p","--doPCA", dest="fDoPCA",action="store_true", default=False, help="Flag to turn on adding metabugs and metametadata by performing PCA on each of bug relative abundance and continuous metadata and add the resulting components")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
68
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
69 #Checked
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
70 argp.add_argument("-o","--output", dest="strOutFile", action="store", default=None, help="Indicate output pcl file.")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
71 argp.add_argument("strFileAbund", help ="Input data file")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
72
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
73 args = argp.parse_args( )
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
74
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
75 # Creat output file if needed.
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
76 if not args.strOutFile:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
77 args.strOutFile = os.path.splitext(args.strFileAbund)[0]+"-mod.pcl"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
78 lsPieces = os.path.splitext(args.strOutFile)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
79
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
80 #List of abundance tables
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
81 lsTables = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
82
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
83 #Read in abundance table
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
84 abndTable = AbundanceTable.funcMakeFromFile(xInputFile=args.strFileAbund,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
85 cDelimiter = args.cFileDelimiter,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
86 sMetadataID = args.sIDName,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
87 sLastMetadata = args.sLastMetadataName,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
88 lOccurenceFilter = None,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
89 cFeatureNameDelimiter=args.cFeatureDelimiter,
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
90 xOutputFile = args.strOutFile)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
91
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
92 #TODO Check filtering, can not have some filtering together
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
93
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
94 # Make feature list
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
95 lsFeatures = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
96 if args.strFeatures:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
97 print "Get features not completed"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
98 # if "," in args.strFeatures:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
99 # lsFeatures = args.strFeatures.split(",")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
100 # print "ManipulateTable::Reading in feature list "+str(len(lsFeatures))+"."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
101 # else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
102 # csvr = csv.reader(open(args.strFeatures, "rU"))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
103 # print "ManipulateTable::Reading in feature file "+args.strFeatures+"."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
104 # for lsLine in csvr:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
105 # lsFeatures.extend(lsLine)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
106
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
107 lsTables.append(abndTable)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
108
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
109 # Do summing
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
110 #Sum if need
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
111 if args.fSum:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
112 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
113 print "ManipulateTable::"+abndTable.funcGetName()+" had "+str(len(abndTable.funcGetFeatureNames()))+" features before summing."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
114 fResult = abndTable.funcSumClades()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
115 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
116 print "ManipulateTable::"+abndTable.funcGetName()+" was summed."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
117 print "ManipulateTable::"+abndTable.funcGetName()+" has "+str(len(abndTable.funcGetFeatureNames()))+" features after summing."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
118 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
119 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" was NOT summed."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
120
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
121 # Filter on counts
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
122 if args.strFilterOccurence:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
123 iMinimumSequence,iMinimumSample = args.strFilterOccurence.split(",")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
124 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
125 if abndTable.funcIsNormalized():
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
126 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" is normalized and can not be filtered by occurence. This filter needs counts."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
127 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
128 fResult = abndTable.funcFilterAbundanceBySequenceOccurence(iMinSequence = int(iMinimumSequence), iMinSamples = int(iMinimumSample))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
129 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
130 print "ManipulateTable::"+abndTable.funcGetName()+" was filtered by occurence and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
131 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
132 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" was NOT filtered by occurence."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
133
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
134 # Change bug membership
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
135 if args.fMakeTerminal:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
136 lsTerminalTables = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
137 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
138 print "ManipulateTable::"+abndTable.funcGetName()+" had "+str(len(abndTable.funcGetFeatureNames()))+" features before making terminal."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
139 abndTable = abndTable.funcGetFeatureAbundanceTable(abndTable.funcGetTerminalNodes())
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
140 if abndTable:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
141 print "ManipulateTable::"+abndTable.funcGetName()+" has "+str(len(abndTable.funcGetFeatureNames()))+" terminal features."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
142 lsTerminalTables.append(abndTable)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
143 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
144 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" was not made terminal."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
145 lsTables = lsTerminalTables
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
146
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
147 if args.fRemoveOTUs:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
148 lsNotOTUs = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
149 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
150 print "ManipulateTable::"+abndTable.funcGetName()+" had "+str(len(abndTable.funcGetFeatureNames()))+" features before removing OTUs."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
151 abndTable = abndTable.funcGetWithoutOTUs()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
152 if abndTable:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
153 print "ManipulateTable::"+abndTable.funcGetName()+" had OTUs removed and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
154 lsNotOTUs.append(abndTable)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
155 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
156 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" OTUs were not removed."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
157 lsTables = lsNotOTUs
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
158
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
159 if args.iClade:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
160 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
161 fResult = abndTable.funcReduceFeaturesToCladeLevel(args.iClade)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
162 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
163 print "ManipulateTable::"+abndTable.funcGetName()+" was reduced to clade level "+str(args.iClade)+"."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
164 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
165 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" was NOT reduced in clade levels."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
166
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
167 if args.strFeatures:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
168 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
169 fResult = abndTable.funcGetFeatureAbundanceTable(lsFeatures)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
170 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
171 print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced to given features and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
172 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
173 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced to the given list."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
174
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
175 if args.strRemoveMetadata:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
176 lsMetadata = args.strRemoveMetadata.split(",")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
177 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
178 fResult = abndTable.funcRemoveSamplesByMetadata(sMetadata=lsMetadata[0], lValuesToRemove=lsMetadata[1:])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
179 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
180 print "ManipulateTable::"+abndTable.funcGetName()+" has had samples removed and now has "+str(len(abndTable.funcGetSampleNames()))+" samples."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
181 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
182 print "ManipulateTable::ERROR. Could not remove samples from "+abndTable.funcGetName()+"."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
183
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
184 # Normalize if needed
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
185 if args.fNormalize:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
186 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
187 fResult = abndTable.funcNormalize()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
188 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
189 print "ManipulateTable::"+abndTable.funcGetName()+" was normalized."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
190 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
191 print "ManipulateTable::"+abndTable.funcGetName()+" was NOT normalized."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
192
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
193 # Filter on percentile
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
194 if args.strFilterPercentile:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
195 dPercentile,dPercentage = args.strFilterPercentile.split(",")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
196 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
197 if abndTable.funcIsNormalized():
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
198 fResult = abndTable.funcFilterAbundanceByPercentile(dPercentileCutOff = float(dPercentile), dPercentageAbovePercentile = float(dPercentage))
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
199 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
200 print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced by percentile and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
201 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
202 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced by percentile."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
203 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
204 print "ManipulateTable::"+abndTable.funcGetName()+" was NOT normalized and so the percentile filter is invalid, please indicate to normalize the table."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
205
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
206 # Filter on abundance (should go after filter on percentile because the filter on percentile
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
207 # needs the full distribution of features in a sample
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
208 if args.strFilterAbundance:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
209 dAbundance,iMinSamples = args.strFilterAbundance.split(",")
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
210 dAbundance = float(dAbundance)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
211 iMinSamples = int(iMinSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
212 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
213 if abndTable.funcIsNormalized():
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
214 fResult = abndTable.funcFilterAbundanceByMinValue(dMinAbundance=dAbundance,iMinSamples=iMinSamples)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
215 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
216 print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced by minimum relative abundance value and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
217 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
218 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced by percentile."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
219 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
220 print "ManipulateTable::"+abndTable.funcGetName()+" was NOT normalized and so the abundance filter is invalid, please indicate to normalize the table."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
221
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
222 #if args.dCuttOff:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
223 # print "Standard deviation filtering not completed"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
224 # for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
225 # abndTable.funcFilterFeatureBySD(dMinSDCuttOff=args.dCuttOff)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
226 # if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
227 # print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced by standard deviation and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
228 # else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
229 # print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced by standard devation."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
230
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
231 # Need to normalize again after abundance data filtering given removing features breaks the normalization
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
232 # This happends twice because normalization is required to make the abundance data to filter on ;-)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
233 # Normalize if needed
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
234 if args.fNormalize:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
235 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
236 fResult = abndTable.funcNormalize()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
237 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
238 print "ManipulateTable::"+abndTable.funcGetName()+" was normalized after filtering on abundance data."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
239
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
240 #Manipulate lineage
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
241 if args.fPrefixClades:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
242 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
243 fResult = abndTable.funcAddCladePrefixToFeatures()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
244 if fResult:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
245 print "ManipulateTable::Clade Prefix was added to "+abndTable.funcGetName()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
246 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
247 print "ManipulateTable::ERROR. Clade Prefix was NOT added to "+abndTable.funcGetName()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
248
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
249 # Under development
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
250 # Reduce dimensionality
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
251 #if args.fDoPCA:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
252 # pcaCur = PCA()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
253 # for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
254 #
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
255 # # Add data features
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
256 # # Make data components and add to abundance table
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
257 # pcaCur.loadData(abndTable,True)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
258 # pcaCur.run(fASTransform=True)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
259 # ldVariance = pcaCur.getVariance()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
260 # lldComponents = pcaCur.getComponents()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
261 # # Make Names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
262 # lsNamesData = ["Data_PC"+str((tpleVariance[0]+1))+"_"+re.sub("[\.|-]","_",str(tpleVariance[1])) for tpleVariance in enumerate(ldVariance)]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
263 # abndTable.funcAddDataFeature(lsNamesData,lldComponents)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
264 #
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
265 # # Add metadata features
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
266 # # Convert metadata to an input for PCA
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
267 # pcaCur.loadData(pcaCur.convertMetadataForPCA(abndTable),False)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
268 # fSuccessful = pcaCur.run(fASTransform=False)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
269 # if(fSuccessful):
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
270 # ldVariance = pcaCur.getVariance()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
271 # lldComponents = pcaCur.getComponents()
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
272 # # Make Names
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
273 # lsNamesMetadata = ["Metadata_PC"+str((tpleVariance[0]+1))+"_"+re.sub("[\.|-]","_",str(tpleVariance[1])) for tpleVariance in enumerate(ldVariance)]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
274 # # Make metadata components and add to abundance
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
275 # llsMetadata = [list(npdRow) for npdRow in lldComponents]
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
276 # abndTable.funcAddMetadataFeature(lsNamesMetadata, llsMetadata)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
277 # else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
278 # print "ManipulateTable::No metadata to PCA, no PCA components added to file based on metadata"
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
279
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
280 #Manipulate based on metadata
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
281 if args.strStratifyBy:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
282 labndStratifiedTables = []
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
283 for abndTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
284 labndResult = abndTable.funcStratifyByMetadata(strMetadata=args.strStratifyBy)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
285 print "ManipulateTable::"+abndTable.funcGetName()+" was stratified by "+args.strStratifyBy+" in to "+str(len(labndResult))+" tables."
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
286 labndStratifiedTables.extend(labndResult)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
287 lsTables = labndStratifiedTables
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
288
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
289 if len(lsTables) == 1:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
290 lsTables[0].funcWriteToFile(args.strOutFile)
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
291 else:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
292 iIndex = 1
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
293 for abndManTable in lsTables:
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
294 abndManTable.funcWriteToFile(lsPieces[0]+str(iIndex)+lsPieces[1])
d589875b8125 First version of micropita in this repository
george-weingart
parents:
diff changeset
295 iIndex = iIndex + 1