Mercurial > repos > george-weingart > micropita
comparison src/ConstantsMicropita.py @ 0:d589875b8125
First version of micropita in this repository
| author | george-weingart |
|---|---|
| date | Wed, 30 Apr 2014 21:35:07 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d589875b8125 |
|---|---|
| 1 """ | |
| 2 Author: Timothy Tickle | |
| 3 Description: Constants. | |
| 4 """ | |
| 5 | |
| 6 ##################################################################################### | |
| 7 #Copyright (C) <2012> | |
| 8 # | |
| 9 #Permission is hereby granted, free of charge, to any person obtaining a copy of | |
| 10 #this software and associated documentation files (the "Software"), to deal in the | |
| 11 #Software without restriction, including without limitation the rights to use, copy, | |
| 12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
| 13 #and to permit persons to whom the Software is furnished to do so, subject to | |
| 14 #the following conditions: | |
| 15 # | |
| 16 #The above copyright notice and this permission notice shall be included in all copies | |
| 17 #or substantial portions of the Software. | |
| 18 # | |
| 19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
| 20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | |
| 21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
| 22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
| 23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
| 24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
| 25 ##################################################################################### | |
| 26 | |
| 27 __author__ = "Timothy Tickle" | |
| 28 __copyright__ = "Copyright 2012" | |
| 29 __credits__ = ["Timothy Tickle"] | |
| 30 __license__ = "MIT" | |
| 31 __maintainer__ = "Timothy Tickle" | |
| 32 __email__ = "ttickle@sph.harvard.edu" | |
| 33 __status__ = "Development" | |
| 34 | |
| 35 from breadcrumbs.src.Metric import Metric | |
| 36 | |
| 37 class ConstantsMicropita(): | |
| 38 """ | |
| 39 Class to hold project constants. | |
| 40 """ | |
| 41 | |
| 42 #Character Constants | |
| 43 COLON = ":" | |
| 44 COMMA = "," | |
| 45 FASTA_ID_LINE_START = ">" | |
| 46 QUOTE = "\"" | |
| 47 TAB = '\t' | |
| 48 WHITE_SPACE = " " | |
| 49 PIPE = "|" | |
| 50 c_outputFileDelim = '\t' | |
| 51 | |
| 52 c_sEmptyPredictFileValue = 'NA' | |
| 53 | |
| 54 #Used to stop divide by zero errors | |
| 55 c_smallNumber = 0.00000000001 | |
| 56 | |
| 57 #SVM related | |
| 58 c_COST_RANGE_KEY = "range" | |
| 59 c_lCostRange = [-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10] | |
| 60 c_SCALED_FILE_EXT = ".scaled" | |
| 61 c_intScaleLowerBound = 0 | |
| 62 #LIBSVM file extensions | |
| 63 c_SCALING_PARAMETERS = ".range" | |
| 64 c_CV_FILE_EXT = ".cv.out" | |
| 65 c_CV_IMAGE_EXT = ".cv.png" | |
| 66 c_MODEL_FILE_EXT = ".model" | |
| 67 c_PREDICT_FILE_EXT = ".predict" | |
| 68 c_fProbabilitistic = True | |
| 69 c_SCALED_FOR_PREDICTION_FILE_EXT = ".scaledForpredict" | |
| 70 | |
| 71 #SVM output Dictionary keywords for files | |
| 72 c_strKeywordInputFile = "INPUT" | |
| 73 c_strKeywordScaledFile = "SCALED" | |
| 74 c_strKeywordRangeFile = "RANGE" | |
| 75 c_strKeywordCVOutFile = "CV_OUT" | |
| 76 c_strKeywordModelFile = "MODEL" | |
| 77 c_strKeywordScaledPredFile = "SCALED_FOR_PREDICTION" | |
| 78 c_strKeywordPredFile = "PREDICTION" | |
| 79 c_strKeywordCostValue = "C" | |
| 80 c_strKeywordAccuracy = "ACCURACY" | |
| 81 | |
| 82 #Default values for missing data in the Abundance Table | |
| 83 c_strEmptyAbundanceData = "0" | |
| 84 c_strEmptyDataMetadata = "NA" | |
| 85 lNAs = list(set(["NA","na","Na","nA",c_strEmptyDataMetadata])) | |
| 86 | |
| 87 #Occurence filter [min abundance, min samples occuring in] | |
| 88 #To turn off make == [0,0] | |
| 89 c_liOccurenceFilter = [0,0] | |
| 90 | |
| 91 #Break ties in targeted feature with diversity | |
| 92 c_fBreakRankTiesByDiversity = False | |
| 93 | |
| 94 ####Commandline arguments | |
| 95 #a Custom diversity metrics found in cogent | |
| 96 c_strCustomAlphaDiversityHelp = "A key word for any PyCogent supplied alpha diveristy metric (Richness, evenness, or diversity). Please supply an unnormalized (counts) abundance table for these metrics. Metrics include "+" ".join(Metric.setAlphaDiversities)+"." | |
| 97 | |
| 98 #b Custom diversity metrics found in cogent | |
| 99 c_strCustomBetaDiversityHelp = "A key word for any PyCogent supplied beta diversity metric. Metrics include "+" ".join(list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted])+"." | |
| 100 | |
| 101 #c,checked Checked abundance file | |
| 102 c_strCheckedAbundanceFileArgument = "--checked" | |
| 103 c_strCheckedAbundanceFileHelp = "Before analysis abundance files are checked and a new file results which analysis is perfromed on. The name of the checked file can be specified of the default will will be used (appending a -Checked to the end of the file name)." | |
| 104 | |
| 105 #d,id Name of the sample id row | |
| 106 c_strIDNameArgument = "--id" | |
| 107 c_strIDNameHelp = "The row in the abundance file that is the sample name/id row. Should be the sample name/Id in first column of the row." | |
| 108 | |
| 109 #e,label Supervised Label | |
| 110 c_strSupervisedLabelArgument = "--label" | |
| 111 c_strSupervisedLabelHelp = "The name of the metadata on which to perform supervised methods" | |
| 112 | |
| 113 #f, invertDiversity | |
| 114 c_strInvertDiversityHelp = "".join(["When using this flag, the diversity will be inverted (multiplicative inverse) before ranking in the highest diversity method. ", | |
| 115 "Recommended to use with dominance, menhinick, reciprocal_simpson, berger_parker_d, mcintosh_e, simpson_e, strong and any metric where 0 indicates most diverse."]) | |
| 116 | |
| 117 #g,logging Path of the logging file | |
| 118 c_strLoggingFileArgument = "--logfile" | |
| 119 c_strLoggingFileHelp = "File path to save the logging file." | |
| 120 | |
| 121 #h help | |
| 122 | |
| 123 #i,tree | |
| 124 c_strCustomEnvironmentFileHelp = "File describing the smaple environments; for use with Unifrac distance metrics." | |
| 125 | |
| 126 #j,delim File delimiter | |
| 127 c_strFileDelimiterArgument = "--delim" | |
| 128 c_strFileDelimiterHelp = "The delimiter for the abundance table (default = TAB)" | |
| 129 | |
| 130 #k,featdelim Feature delimiter | |
| 131 c_strFeatureNameDelimiterArgument = "--featdelim" | |
| 132 c_strFeatureNameDelimiterHelp = "The delimiter for a feature name if it contains a consensus sequence." | |
| 133 | |
| 134 #l,lastmeta The name of the last metadata | |
| 135 c_strLastMetadataNameArgument = "--lastmeta" | |
| 136 c_strLastMetadataNameHelp = "The row in the abundance file that is the sample name/id row. Should be the metadata name/Id in first column of the metadta row." | |
| 137 | |
| 138 #m,method | |
| 139 c_strSelectionTechniquesHelp = "Select techniques listed one after another." | |
| 140 | |
| 141 #n,num The Number of unsupervised sample selection | |
| 142 c_strCountArgument = "-n" | |
| 143 c_strCountHelp = "The number of samples to select with unsupervised methodology. (An integer greater than 0.)." | |
| 144 | |
| 145 #o,tree | |
| 146 c_strCustomPhylogeneticTreeHelp = "Tree for phylogenetic when selecting custom beta-diversities in the representative sampling criteria." | |
| 147 | |
| 148 #p,suppredfile File path fo the predict file for the supervised methods | |
| 149 c_strSupervisedPredictedFile = "--suppredfile" | |
| 150 c_strSupervisedPredictedFileHelp = "The file path for the predict file." | |
| 151 | |
| 152 #q,alphameta | |
| 153 c_strCustomAlphaDiversityMetadataHelp = "Metric in the pcl file which has custom alpha diversity measurements to use with the highest diversity sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most diverse." | |
| 154 | |
| 155 #r,targetmethod Taxa selection method | |
| 156 c_strTargetedFeatureMethodArgument = "--feature_method" | |
| 157 c_strTargetedFeatureMethodHelp = "The ranking method used to select targeted features." | |
| 158 | |
| 159 #s,stratify Unsupervised stratify metadata | |
| 160 c_strUnsupervisedStratifyMetadataArgument = "--stratify" | |
| 161 c_strUnsupervisedStratifyMetadataHelp = "The metatdata to stratify unsupervised analysis." | |
| 162 | |
| 163 #t,target Targeted feature file | |
| 164 c_strTargetedSelectionFileArgument = "--targets" | |
| 165 c_strTargetedSelectionFileHelp = "A file containing taxa/OTUs/clades to be used in targeted feature sampling criteria." | |
| 166 | |
| 167 #u,supinputfile File path for the input file for the supervised methods | |
| 168 c_strSupervisedInputFile = "--supinputfile" | |
| 169 c_strSupervisedInputFileHelp = "The file path for the input file for supervised methods." | |
| 170 | |
| 171 #v,logging String for logging level | |
| 172 c_strLoggingArgument = "--logging" | |
| 173 c_strLoggingHelp = "".join(["Logging level which will be logged to a .log file with the", | |
| 174 " same name as the strOutFile (but with a .log extension). Valid values are DEBUG, INFO, WARNING, ERROR, or CRITICAL."]) | |
| 175 c_lsLoggingChoices = ["DEBUG","INFO","WARNING","ERROR","CRITICAL"] | |
| 176 | |
| 177 #w, Last Feature Metadata indicator | |
| 178 c_strFeatureMetadataArgument = "--lastFeatureMetadata" | |
| 179 c_strFeatureMetadataHelp = "The last metadata describing a (bug) feature (not sample). Not all studies have feature metadata, if so this can be ignored and not used. See doc for PCL-Description.txt" | |
| 180 | |
| 181 #x,betamatrix | |
| 182 c_strCustomBetaDiversityMatrixHelp = "Precalculated beta-diversity matrix to be used in the representative sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most dissimilar." | |
| 183 | |
| 184 #Order is important, the first is the default | |
| 185 c_strTargetedRanked = "rank" | |
| 186 c_strTargetedAbundance = "abundance" | |
| 187 lsTargetedFeatureMethodValues = [c_strTargetedRanked, c_strTargetedAbundance] | |
| 188 | |
| 189 #Selection methods | |
| 190 c_strDiversity = "diverse" | |
| 191 c_strExtreme = "extreme" | |
| 192 c_strDiscriminant = "discriminant" | |
| 193 c_strDistinct = "distinct" | |
| 194 c_strRandom = "random" | |
| 195 c_strRepresentative = "representative" | |
| 196 c_strFeature = "features" | |
| 197 c_custom = "custom" | |
| 198 c_lsAllUnsupervisedMethods = [c_strRepresentative,c_strDiversity,c_strExtreme,c_strFeature,c_strRandom] | |
| 199 c_lsAllSupervisedMethods = [c_strDiscriminant,c_strDistinct] | |
| 200 c_lsAllMethods = c_lsAllUnsupervisedMethods + c_lsAllSupervisedMethods | |
| 201 | |
| 202 #Technique Names | |
| 203 c_strDiversity2 = c_strDiversity+"_C" | |
| 204 | |
| 205 #################################### | |
| 206 #Arguments without commandline flags | |
| 207 c_strAbundanceFileHelp = "Input file as either a PCL or Biome file." | |
| 208 c_strGenericOutputDataFileHelp = "The generated output data file." |
