Mercurial > repos > deepakjadmin > mayatool3_test2
diff docs/scripts/man1/CalculatePhysicochemicalProperties.1 @ 0:4816e4a8ae95 draft default tip
Uploaded
author | deepakjadmin |
---|---|
date | Wed, 20 Jan 2016 09:23:18 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docs/scripts/man1/CalculatePhysicochemicalProperties.1 Wed Jan 20 09:23:18 2016 -0500 @@ -0,0 +1,802 @@ +.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is turned on, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.ie \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. nr % 0 +. rr F +.\} +.el \{\ +. de IX +.. +.\} +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "CALCULATEPHYSICOCHEMICALPROPERTIES 1" +.TH CALCULATEPHYSICOCHEMICALPROPERTIES 1 "2015-03-29" "perl v5.14.2" "MayaChemTools" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SH "NAME" +CalculatePhysicochemicalProperties.pl \- Calculate physicochemical properties for SD files +.SH "SYNOPSIS" +.IX Header "SYNOPSIS" +CalculatePhysicochemicalProperties.pl SDFile(s)... +.PP +PhysicochemicalProperties.pl [\fB\-\-AromaticityModel\fR \fIAromaticityModelType\fR] +[\fB\-\-CompoundID\fR DataFieldName or LabelPrefixString] +[\fB\-\-CompoundIDLabel\fR text] [\fB\-\-CompoundIDMode\fR] [\fB\-\-DataFields\fR \*(L"FieldLabel1, FieldLabel2,...\*(R"] +[\fB\-d, \-\-DataFieldsMode\fR All | Common | Specify | CompoundID] [\fB\-f, \-\-Filter\fR Yes | No] [\fB\-h, \-\-help\fR] +[\fB\-\-HydrogenBonds\fR HBondsType1 | HBondsType2] [\fB\-k, \-\-KeepLargestComponent\fR Yes | No] +[\fB\-m, \-\-mode\fR All | RuleOf5 | RuleOf3 | \*(L"name1, [name2,...]\*(R"] +[\fB\-\-MolecularComplexity\fR \fIName,Value, [Name,Value,...]\fR] +[\fB\-\-OutDelim\fR comma | tab | semicolon] [\fB\-\-output\fR \s-1SD\s0 | text | both] [\fB\-o, \-\-overwrite\fR] +[\fB\-\-Precision\fR Name,Number,[Name,Number,..]] [\fB\-\-RotatableBonds\fR Name,Value, [Name,Value,...]] +[\fB\-\-RuleOf3Violations\fR Yes | No] [\fB\-\-RuleOf5Violations\fR Yes | No] +[\fB\-q, \-\-quote\fR Yes | No] [\fB\-r, \-\-root\fR RootName] +[\fB\-w, \-\-WorkingDir\fR dirname] SDFile(s)... +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +Calculate physicochemical properties for \fISDFile(s)\fR and create appropriate \s-1SD\s0 or \s-1CSV/TSV\s0 +text file(s) containing calculated properties. +.PP +The current release of MayaChemTools supports the calculation of these physicochemical +properties: +.PP +.Vb 7 +\& MolecularWeight, ExactMass, HeavyAtoms, Rings, AromaticRings, +\& van der Waals MolecularVolume [ Ref 93 ], RotatableBonds, +\& HydrogenBondDonors, HydrogenBondAcceptors, LogP and +\& Molar Refractivity (SLogP and SMR) [ Ref 89 ], Topological Polar +\& Surface Area (TPSA) [ Ref 90 ], Fraction of SP3 carbons (Fsp3Carbons) +\& and SP3 carbons (Sp3Carbons) [ Ref 115\-116, Ref 119 ], +\& MolecularComplexity [ Ref 117\-119 ] +.Ve +.PP +Multiple SDFile names are separated by spaces. The valid file extensions are \fI.sdf\fR +and \fI.sd\fR. All other file names are ignored. All the \s-1SD\s0 files in a current directory +can be specified either by \fI*.sdf\fR or the current directory name. +.PP +The calculation of molecular complexity using \fIMolecularComplexityType\fR parameter +corresponds to the number of bits-set or unique keys [ Ref 117\-119 ] in molecular fingerprints. +Default value for \fIMolecularComplexityType\fR: \fIMACCSKeys\fR of size 166. The calculation +of MACCSKeys is relatively expensive and can take rather substantial amount of time. +.SH "OPTIONS" +.IX Header "OPTIONS" +.IP "\fB\-\-AromaticityModel\fR \fIMDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel\fR" 4 +.IX Item "--AromaticityModel MDLAromaticityModel | TriposAromaticityModel | MMFFAromaticityModel | ChemAxonBasicAromaticityModel | ChemAxonGeneralAromaticityModel | DaylightAromaticityModel | MayaChemToolsAromaticityModel" +Specify aromaticity model to use during detection of aromaticity. Possible values in the current +release are: \fIMDLAromaticityModel, TriposAromaticityModel, MMFFAromaticityModel, +ChemAxonBasicAromaticityModel, ChemAxonGeneralAromaticityModel, DaylightAromaticityModel +or MayaChemToolsAromaticityModel\fR. Default value: \fIMayaChemToolsAromaticityModel\fR. +.Sp +The supported aromaticity model names along with model specific control parameters +are defined in \fBAromaticityModelsData.csv\fR, which is distributed with the current release +and is available under \fBlib/data\fR directory. \fBMolecule.pm\fR module retrieves data from +this file during class instantiation and makes it available to method \fBDetectAromaticity\fR +for detecting aromaticity corresponding to a specific model. +.IP "\fB\-\-CompoundID\fR \fIDataFieldName or LabelPrefixString\fR" 4 +.IX Item "--CompoundID DataFieldName or LabelPrefixString" +This value is \fB\-\-CompoundIDMode\fR specific and indicates how compound \s-1ID\s0 is generated. +.Sp +For \fIDataField\fR value of \fB\-\-CompoundIDMode\fR option, it corresponds to datafield label name +whose value is used as compound \s-1ID\s0; otherwise, it's a prefix string used for generating compound +IDs like LabelPrefixString<Number>. Default value, \fICmpd\fR, generates compound IDs which +look like Cmpd<Number>. +.Sp +Examples for \fIDataField\fR value of \fB\-\-CompoundIDMode\fR: +.Sp +.Vb 2 +\& MolID +\& ExtReg +.Ve +.Sp +Examples for \fILabelPrefix\fR or \fIMolNameOrLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR: +.Sp +.Vb 1 +\& Compound +.Ve +.Sp +The value specified above generates compound IDs which correspond to Compound<Number> +instead of default value of Cmpd<Number>. +.IP "\fB\-\-CompoundIDLabel\fR \fItext\fR" 4 +.IX Item "--CompoundIDLabel text" +Specify compound \s-1ID\s0 column label for \s-1CSV/TSV\s0 text file(s) used during \fICompoundID\fR value +of \fB\-\-DataFieldsMode\fR option. Default value: \fICompoundID\fR. +.IP "\fB\-\-CompoundIDMode\fR \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR" 4 +.IX Item "--CompoundIDMode DataField | MolName | LabelPrefix | MolNameOrLabelPrefix" +Specify how to generate compound IDs and write to \s-1CSV/TSV\s0 text file(s) along with calculated +physicochemical properties for \fItext | both\fR values of \fB\-\-output\fR option: use a \fISDFile(s)\fR +datafield value; use molname line from \fISDFile(s)\fR; generate a sequential \s-1ID\s0 with specific prefix; +use combination of both MolName and LabelPrefix with usage of LabelPrefix values for empty +molname lines. +.Sp +Possible values: \fIDataField | MolName | LabelPrefix | MolNameOrLabelPrefix\fR. +Default value: \fILabelPrefix\fR. +.Sp +For \fIMolNameAndLabelPrefix\fR value of \fB\-\-CompoundIDMode\fR, molname line in \fISDFile(s)\fR takes +precedence over sequential compound IDs generated using \fILabelPrefix\fR and only empty molname +values are replaced with sequential compound IDs. +.Sp +This is only used for \fICompoundID\fR value of \fB\-\-DataFieldsMode\fR option. +.ie n .IP "\fB\-\-DataFields\fR \fI""FieldLabel1,FieldLabel2,...""\fR" 4 +.el .IP "\fB\-\-DataFields\fR \fI``FieldLabel1,FieldLabel2,...''\fR" 4 +.IX Item "--DataFields FieldLabel1,FieldLabel2,..." +Comma delimited list of \fISDFiles(s)\fR data fields to extract and write to \s-1CSV/TSV\s0 text file(s) along +with calculated physicochemical properties for \fItext | both\fR values of \fB\-\-output\fR option. +.Sp +This is only used for \fISpecify\fR value of \fB\-\-DataFieldsMode\fR option. +.Sp +Examples: +.Sp +.Vb 2 +\& Extreg +\& MolID,CompoundName +.Ve +.IP "\fB\-d, \-\-DataFieldsMode\fR \fIAll | Common | Specify | CompoundID\fR" 4 +.IX Item "-d, --DataFieldsMode All | Common | Specify | CompoundID" +Specify how data fields in \fISDFile(s)\fR are transferred to output \s-1CSV/TSV\s0 text file(s) along +with calculated physicochemical properties for \fItext | both\fR values of \fB\-\-output\fR option: +transfer all \s-1SD\s0 data field; transfer \s-1SD\s0 data files common to all compounds; extract specified +data fields; generate a compound \s-1ID\s0 using molname line, a compound prefix, or a combination +of both. Possible values: \fIAll | Common | specify | CompoundID\fR. Default value: \fICompoundID\fR. +.IP "\fB\-f, \-\-Filter\fR \fIYes | No\fR" 4 +.IX Item "-f, --Filter Yes | No" +Specify whether to check and filter compound data in SDFile(s). Possible values: \fIYes or No\fR. +Default value: \fIYes\fR. +.Sp +By default, compound data is checked before calculating physiochemical properties and compounds +containing atom data corresponding to non-element symbols or no atom data are ignored. +.IP "\fB\-h, \-\-help\fR" 4 +.IX Item "-h, --help" +Print this help message. +.IP "\fB\-\-HydrogenBonds\fR \fIHBondsType1 | HBondsType2\fR" 4 +.IX Item "--HydrogenBonds HBondsType1 | HBondsType2" +Parameters to control calculation of hydrogen bond donors and acceptors. Possible values: +\&\fIHBondsType1, HydrogenBondsType1, HBondsType2, HydrogenBondsType2\fR. Default value: +\&\fIHBondsType2\fR which corresponds to \fBRuleOf5\fR definition for number of hydrogen bond +donors and acceptors. +.Sp +The current release of MayaChemTools supports identification of two types of hydrogen bond +donor and acceptor atoms with these names: +.Sp +.Vb 2 +\& HBondsType1 or HydrogenBondsType1 +\& HBondsType2 or HydrogenBondsType2 +.Ve +.Sp +The names of these hydrogen bond types are rather arbitrary. However, their definitions have +specific meaning and are as follows: +.Sp +.Vb 1 +\& HydrogenBondsType1 [ Ref 60\-61, Ref 65\-66 ]: +\& +\& Donor: NH, NH2, OH \- Any N and O with available H +\& Acceptor: N[!H], O \- Any N without available H and any O +\& +\& HydrogenBondsType2 [ Ref 91 ]: +\& +\& Donor: NH, NH2, OH \- N and O with available H +\& Acceptor: N, O \- And N and O +.Ve +.IP "\fB\-k, \-\-KeepLargestComponent\fR \fIYes | No\fR" 4 +.IX Item "-k, --KeepLargestComponent Yes | No" +Calculate physicochemical properties for only the largest component in molecule. Possible values: +\&\fIYes or No\fR. Default value: \fIYes\fR. +.Sp +For molecules containing multiple connected components, physicochemical properties can be +calculated in two different ways: use all connected components or just the largest connected +component. By default, all atoms except for the largest connected component are +deleted before calculation of physicochemical properties. +.ie n .IP "\fB\-m, \-\-mode\fR \fIAll | RuleOf5 | RuleOf3 | ""name1, [name2,...]""\fR" 4 +.el .IP "\fB\-m, \-\-mode\fR \fIAll | RuleOf5 | RuleOf3 | ``name1, [name2,...]''\fR" 4 +.IX Item "-m, --mode All | RuleOf5 | RuleOf3 | name1, [name2,...]" +Specify physicochemical properties to calculate for SDFile(s): calculate all available physical +chemical properties; calculate properties corresponding to Rule of 5; or use a comma delimited +list of supported physicochemical properties. Possible values: \fIAll | RuleOf5 | RuleOf3 | +\&\*(L"name1, [name2,...]\*(R"\fR. +.Sp +Default value: \fIMolecularWeight, HeavyAtoms, MolecularVolume, RotatableBonds, HydrogenBondDonors, +HydrogenBondAcceptors, SLogP, \s-1TPSA\s0\fR. These properties are calculated by default. +.Sp +\&\fIRuleOf5\fR [ Ref 91 ] includes these properties: \fIMolecularWeight, HydrogenBondDonors, HydrogenBondAcceptors, +SLogP\fR. \fIRuleOf5\fR states: MolecularWeight <= 500, HydrogenBondDonors <= 5, HydrogenBondAcceptors <= 10, and +logP <= 5. +.Sp +\&\fIRuleOf3\fR [ Ref 92 ] includes these properties: \fIMolecularWeight, RotatableBonds, HydrogenBondDonors, +HydrogenBondAcceptors, SLogP, \s-1TPSA\s0\fR. \fIRuleOf3\fR states: MolecularWeight <= 300, RotatableBonds <= 3, +HydrogenBondDonors <= 3, HydrogenBondAcceptors <= 3, logP <= 3, and \s-1TPSA\s0 <= 60. +.Sp +\&\fIAll\fR calculates all supported physicochemical properties: \fIMolecularWeight, ExactMass, +HeavyAtoms, Rings, AromaticRings, MolecularVolume, RotatableBonds, HydrogenBondDonors, +HydrogenBondAcceptors, SLogP, \s-1SMR\s0, \s-1TPSA\s0, Fsp3Carbons, Sp3Carbons, MolecularComplexity\fR. +.IP "\fB\-\-MolecularComplexity\fR \fIName,Value, [Name,Value,...]\fR" 4 +.IX Item "--MolecularComplexity Name,Value, [Name,Value,...]" +Parameters to control calculation of molecular complexity: it's a comma delimited list of parameter +name and value pairs. +.Sp +Possible parameter names: \fIMolecularComplexityType, AtomIdentifierType, +AtomicInvariantsToUse, FunctionalClassesToUse, MACCSKeysSize, NeighborhoodRadius, +MinPathLength, MaxPathLength, UseBondSymbols, MinDistance, MaxDistance, +UseTriangleInequality, DistanceBinSize, NormalizationMethodology\fR. +.Sp +The valid paramater valuse for each parameter name are described in the following sections. +.Sp +The current release of MayaChemTools supports calculation of molecular complexity using +\&\fIMolecularComplexityType\fR parameter corresponding to the number of bits-set or unique +keys [ Ref 117\-119 ] in molecular fingerprints. The valid values for \fIMolecularComplexityType\fR +are: +.Sp +.Vb 9 +\& AtomTypesFingerprints +\& ExtendedConnectivityFingerprints +\& MACCSKeys +\& PathLengthFingerprints +\& TopologicalAtomPairsFingerprints +\& TopologicalAtomTripletsFingerprints +\& TopologicalAtomTorsionsFingerprints +\& TopologicalPharmacophoreAtomPairsFingerprints +\& TopologicalPharmacophoreAtomTripletsFingerprints +.Ve +.Sp +Default value for \fIMolecularComplexityType\fR: \fIMACCSKeys\fR. +.Sp +\&\fIAtomIdentifierType\fR parameter name correspods to atom types used during generation of +fingerprints. The valid values for \fIAtomIdentifierType\fR are: \fIAtomicInvariantsAtomTypes, +DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, +SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\fR. \fIAtomicInvariantsAtomTypes\fR +is not supported for during the following values of \fIMolecularComplexityType\fR: \fIMACCSKeys, +TopologicalPharmacophoreAtomPairsFingerprints, TopologicalPharmacophoreAtomTripletsFingerprints\fR. +\&\fIFunctionalClassAtomTypes\fR is the only valid value for \fIAtomIdentifierType\fR for topological +pharmacophore fingerprints. +.Sp +Default value for \fIAtomIdentifierType\fR: \fIAtomicInvariantsAtomTypes\fR +for all except topological pharmacophore fingerprints where it is \fIFunctionalClassAtomTypes\fR. +.Sp +\&\fIAtomicInvariantsToUse\fR parameter name and values are used during \fIAtomicInvariantsAtomTypes\fR +value of parameter \fIAtomIdentifierType\fR. It's a list of space separated valid atomic invariant atom types. +.Sp +Possible values for atomic invariants are: \fI\s-1AS\s0, X, \s-1BO\s0, \s-1LBO\s0, \s-1SB\s0, \s-1DB\s0, \s-1TB\s0, H, Ar, \s-1RA\s0, \s-1FC\s0, \s-1MN\s0, \s-1SM\s0\fR. +Default value for \fIAtomicInvariantsToUse\fR parameter are set differently for different fingerprints +using \fIMolecularComplexityType\fR parameter as shown below: +.Sp +.Vb 1 +\& MolecularComplexityType AtomicInvariantsToUse +\& +\& AtomTypesFingerprints AS X BO H FC +\& TopologicalAtomPairsFingerprints AS X BO H FC +\& TopologicalAtomTripletsFingerprints AS X BO H FC +\& TopologicalAtomTorsionsFingerprints AS X BO H FC +\& +\& ExtendedConnectivityFingerprints AS X BO H FC MN +\& PathLengthFingerprints AS +.Ve +.Sp +The atomic invariants abbreviations correspond to: +.Sp +.Vb 1 +\& AS = Atom symbol corresponding to element symbol +\& +\& X<n> = Number of non\-hydrogen atom neighbors or heavy atoms +\& BO<n> = Sum of bond orders to non\-hydrogen atom neighbors or heavy atoms +\& LBO<n> = Largest bond order of non\-hydrogen atom neighbors or heavy atoms +\& SB<n> = Number of single bonds to non\-hydrogen atom neighbors or heavy atoms +\& DB<n> = Number of double bonds to non\-hydrogen atom neighbors or heavy atoms +\& TB<n> = Number of triple bonds to non\-hydrogen atom neighbors or heavy atoms +\& H<n> = Number of implicit and explicit hydrogens for atom +\& Ar = Aromatic annotation indicating whether atom is aromatic +\& RA = Ring atom annotation indicating whether atom is a ring +\& FC<+n/\-n> = Formal charge assigned to atom +\& MN<n> = Mass number indicating isotope other than most abundant isotope +\& SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or +\& 3 (triplet) +.Ve +.Sp +Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: +.Sp +.Vb 1 +\& AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/\-n>.MN<n>.SM<n> +.Ve +.Sp +Except for \s-1AS\s0 which is a required atomic invariant in atom types, all other atomic invariants are +optional. Atom type specification doesn't include atomic invariants with zero or undefined values. +.Sp +In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words +are also allowed: +.Sp +.Vb 12 +\& X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors +\& BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms +\& LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms +\& SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms +\& DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms +\& TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms +\& H : NumOfImplicitAndExplicitHydrogens +\& Ar : Aromatic +\& RA : RingAtom +\& FC : FormalCharge +\& MN : MassNumber +\& SM : SpinMultiplicity +.Ve +.Sp +\&\fIAtomTypes::AtomicInvariantsAtomTypes\fR module is used to assign atomic invariant +atom types. +.Sp +\&\fIFunctionalClassesToUse\fR parameter name and values are used during \fIFunctionalClassAtomTypes\fR +value of parameter \fIAtomIdentifierType\fR. It's a list of space separated valid atomic invariant atom types. +.Sp +Possible values for atom functional classes are: \fIAr, \s-1CA\s0, H, \s-1HBA\s0, \s-1HBD\s0, Hal, \s-1NI\s0, \s-1PI\s0, \s-1RA\s0\fR. +.Sp +Default value for \fIFunctionalClassesToUse\fR parameter is set to: +.Sp +.Vb 1 +\& HBD HBA PI NI Ar Hal +.Ve +.Sp +for all fingerprints except for the following two \fIMolecularComplexityType\fR fingerints: +.Sp +.Vb 1 +\& MolecularComplexityType FunctionalClassesToUse +\& +\& TopologicalPharmacophoreAtomPairsFingerprints HBD HBA P, NI H +\& TopologicalPharmacophoreAtomTripletsFingerprints HBD HBA PI NI H Ar +.Ve +.Sp +The functional class abbreviations correspond to: +.Sp +.Vb 9 +\& HBD: HydrogenBondDonor +\& HBA: HydrogenBondAcceptor +\& PI : PositivelyIonizable +\& NI : NegativelyIonizable +\& Ar : Aromatic +\& Hal : Halogen +\& H : Hydrophobic +\& RA : RingAtom +\& CA : ChainAtom +\& +\& Functional class atom type specification for an atom corresponds to: +\& +\& Ar.CA.H.HBA.HBD.Hal.NI.PI.RA +.Ve +.Sp +\&\fIAtomTypes::FunctionalClassAtomTypes\fR module is used to assign functional class atom +types. It uses following definitions [ Ref 60\-61, Ref 65\-66 ]: +.Sp +.Vb 4 +\& HydrogenBondDonor: NH, NH2, OH +\& HydrogenBondAcceptor: N[!H], O +\& PositivelyIonizable: +, NH2 +\& NegativelyIonizable: \-, C(=O)OH, S(=O)OH, P(=O)OH +.Ve +.Sp +\&\fIMACCSKeysSize\fR parameter name is only used during \fIMACCSKeys\fR value of +\&\fIMolecularComplexityType\fR and corresponds to the size of \s-1MACCS\s0 key set. Possible +values: \fI166 or 322\fR. Default value: \fI166\fR. +.Sp +\&\fINeighborhoodRadius\fR parameter name is only used during \fIExtendedConnectivityFingerprints\fR +value of \fIMolecularComplexityType\fR and corresponds to atomic neighborhoods radius for +generating extended connectivity fingerprints. Possible values: positive integer. Default value: +\&\fI2\fR. +.Sp +\&\fIMinPathLength\fR and \fIMaxPathLength\fR parameters are only used during \fIPathLengthFingerprints\fR +value of \fIMolecularComplexityType\fR and correspond to minimum and maximum path lengths to use +for generating path length fingerprints. Possible values: positive integers. Default value: \fIMinPathLength \- 1\fR; +\&\fIMaxPathLength \- 8\fR. +.Sp +\&\fIUseBondSymbols\fR parameter is only used during \fIPathLengthFingerprints\fR value of +\&\fIMolecularComplexityType\fR and indicates whether bond symbols are included in atom path +strings used to generate path length fingerprints. Possible value: \fIYes or No\fR. Default value: +\&\fIYes\fR. +.Sp +\&\fIMinDistance\fR and \fIMaxDistance\fR parameters are only used during \fITopologicalAtomPairsFingerprints\fR +and \fITopologicalAtomTripletsFingerprints\fR values of \fIMolecularComplexityType\fR and correspond to +minimum and maximum bond distance between atom pairs during topological pharmacophore fingerprints. +Possible values: positive integers. Default value: \fIMinDistance \- 1\fR; \fIMaxDistance \- 10\fR. +.Sp +\&\fIUseTriangleInequality\fR parameter is used during these values for \fIMolecularComplexityType\fR: +\&\fITopologicalAtomTripletsFingerprints\fR and \fITopologicalPharmacophoreAtomTripletsFingerprints\fR. +Possible values: \fIYes or No\fR. It determines wheter to apply triangle inequality to distance triplets. +Default value: \fITopologicalAtomTripletsFingerprints \- No\fR; +\&\fITopologicalPharmacophoreAtomTripletsFingerprints \- Yes\fR. +.Sp +\&\fIDistanceBinSize\fR parameter is used during \fITopologicalPharmacophoreAtomTripletsFingerprints\fR +value of \fIMolecularComplexityType\fR and correspons to distance bin size used for binning +distances during generation of topological pharmacophore atom triplets fingerprints. Possible +value: positive integer. Default value: \fI2\fR. +.Sp +\&\fINormalizationMethodology\fR is only used for these values for \fIMolecularComplexityType\fR: +\&\fIExtendedConnectivityFingerprints\fR, \fITopologicalPharmacophoreAtomPairsFingerprints\fR +and \fITopologicalPharmacophoreAtomTripletsFingerprints\fR. It corresponds to normalization +methodology to use for scaling the number of bits-set or unique keys during generation of +fingerprints. Possible values during \fIExtendedConnectivityFingerprints\fR: \fINone or +ByHeavyAtomsCount\fR; Default value: \fINone\fR. Possible values during topological +pharmacophore atom pairs and tripletes fingerprints: \fINone or ByPossibleKeysCount\fR; +Default value: \fINone\fR. \fIByPossibleKeysCount\fR corresponds to total number of +possible topological pharmacophore atom pairs or triplets in a molecule. +.Sp +Examples of \fIMolecularComplexity\fR name and value parameters: +.Sp +.Vb 2 +\& MolecularComplexityType,AtomTypesFingerprints,AtomIdentifierType, +\& AtomicInvariantsAtomTypes,AtomicInvariantsToUse,AS X BO H FC +\& +\& MolecularComplexityType,ExtendedConnectivityFingerprints, +\& AtomIdentifierType,AtomicInvariantsAtomTypes, +\& AtomicInvariantsToUse,AS X BO H FC MN,NeighborhoodRadius,2, +\& NormalizationMethodology,None +\& +\& MolecularComplexityType,MACCSKeys,MACCSKeysSize,166 +\& +\& MolecularComplexityType,PathLengthFingerprints,AtomIdentifierType, +\& AtomicInvariantsAtomTypes,AtomicInvariantsToUse,AS,MinPathLength, +\& 1,MaxPathLength,8,UseBondSymbols,Yes +\& +\& MolecularComplexityType,TopologicalAtomPairsFingerprints, +\& AtomIdentifierType,AtomicInvariantsAtomTypes,AtomicInvariantsToUse, +\& AS X BO H FC,MinDistance,1,MaxDistance,10 +\& +\& MolecularComplexityType,TopologicalAtomTripletsFingerprints, +\& AtomIdentifierType,AtomicInvariantsAtomTypes,AtomicInvariantsToUse, +\& AS X BO H FC,MinDistance,1,MaxDistance,10,UseTriangleInequality,No +\& +\& MolecularComplexityType,TopologicalAtomTorsionsFingerprints, +\& AtomIdentifierType,AtomicInvariantsAtomTypes,AtomicInvariantsToUse, +\& AS X BO H FC +\& +\& MolecularComplexityType,TopologicalPharmacophoreAtomPairsFingerprints, +\& AtomIdentifierType,FunctionalClassAtomTypes,FunctionalClassesToUse, +\& HBD HBA PI NI H,MinDistance,1,MaxDistance,10,NormalizationMethodology, +\& None +\& +\& MolecularComplexityType,TopologicalPharmacophoreAtomTripletsFingerprints, +\& AtomIdentifierType,FunctionalClassAtomTypes,FunctionalClassesToUse, +\& HBD HBA PI NI H Ar,MinDistance,1,MaxDistance,10,NormalizationMethodology, +\& None,UseTriangleInequality,Yes,NormalizationMethodology,None, +\& DistanceBinSize,2 +.Ve +.IP "\fB\-\-OutDelim\fR \fIcomma | tab | semicolon\fR" 4 +.IX Item "--OutDelim comma | tab | semicolon" +Delimiter for output \s-1CSV/TSV\s0 text file(s). Possible values: \fIcomma, tab, or semicolon\fR +Default value: \fIcomma\fR. +.IP "\fB\-\-output\fR \fI\s-1SD\s0 | text | both\fR" 4 +.IX Item "--output SD | text | both" +Type of output files to generate. Possible values: \fI\s-1SD\s0, text, or both\fR. Default value: \fItext\fR. +.IP "\fB\-o, \-\-overwrite\fR" 4 +.IX Item "-o, --overwrite" +Overwrite existing files. +.IP "\fB\-\-Precision\fR \fIName,Number,[Name,Number,..]\fR" 4 +.IX Item "--Precision Name,Number,[Name,Number,..]" +Precision of calculated property values in the output file: it's a comma delimited list of +property name and precision value pairs. Possible property names: \fIMolecularWeight, +ExactMass\fR. Possible values: positive intergers. Default value: \fIMolecularWeight,2, +ExactMass,4\fR. +.Sp +Examples: +.Sp +.Vb 2 +\& ExactMass,3 +\& MolecularWeight,1,ExactMass,2 +.Ve +.IP "\fB\-q, \-\-quote\fR \fIYes | No\fR" 4 +.IX Item "-q, --quote Yes | No" +Put quote around column values in output \s-1CSV/TSV\s0 text file(s). Possible values: +\&\fIYes or No\fR. Default value: \fIYes\fR. +.IP "\fB\-r, \-\-root\fR \fIRootName\fR" 4 +.IX Item "-r, --root RootName" +New file name is generated using the root: <Root>.<Ext>. Default for new file names: +<SDFileName><PhysicochemicalProperties>.<Ext>. The file type determines <Ext> value. +The sdf, csv, and tsv <Ext> values are used for \s-1SD\s0, comma/semicolon, and tab +delimited text files, respectively.This option is ignored for multiple input files. +.IP "\fB\-\-RotatableBonds\fR \fIName,Value, [Name,Value,...]\fR" 4 +.IX Item "--RotatableBonds Name,Value, [Name,Value,...]" +Parameters to control calculation of rotatable bonds [ Ref 92 ]: it's a comma delimited list of parameter +name and value pairs. Possible parameter names: \fIIgnoreTerminalBonds, IgnoreBondsToTripleBonds, +IgnoreAmideBonds, IgnoreThioamideBonds, IgnoreSulfonamideBonds\fR. Possible parameter values: +\&\fIYes or No\fR. By default, value of all parameters is set to \fIYes\fR. +.IP "\fB\-\-RuleOf3Violations\fR \fIYes | No\fR" 4 +.IX Item "--RuleOf3Violations Yes | No" +Specify whether to calculate \fBRuleOf3Violations\fR for SDFile(s). Possible values: \fIYes or No\fR. +Default value: \fINo\fR. +.Sp +For \fIYes\fR value of \fBRuleOf3Violations\fR, in addition to calculating total number of \fBRuleOf3\fR violations, +individual violations for compounds are also written to output files. +.Sp +\&\fBRuleOf3\fR [ Ref 92 ] states: MolecularWeight <= 300, RotatableBonds <= 3, HydrogenBondDonors <= 3, +HydrogenBondAcceptors <= 3, logP <= 3, and \s-1TPSA\s0 <= 60. +.IP "\fB\-\-RuleOf5Violations\fR \fIYes | No\fR" 4 +.IX Item "--RuleOf5Violations Yes | No" +Specify whether to calculate \fBRuleOf5Violations\fR for SDFile(s). Possible values: \fIYes or No\fR. +Default value: \fINo\fR. +.Sp +For \fIYes\fR value of \fBRuleOf5Violations\fR, in addition to calculating total number of \fBRuleOf5\fR violations, +individual violations for compounds are also written to output files. +.Sp +\&\fBRuleOf5\fR [ Ref 91 ] states: MolecularWeight <= 500, HydrogenBondDonors <= 5, HydrogenBondAcceptors <= 10, +and logP <= 5. +.IP "\fB\-\-TPSA\fR \fIName,Value, [Name,Value,...]\fR" 4 +.IX Item "--TPSA Name,Value, [Name,Value,...]" +Parameters to control calculation of \s-1TPSA:\s0 it's a comma delimited list of parameter name and value +pairs. Possible parameter names: \fIIgnorePhosphorus, IgnoreSulfur\fR. Possible parameter values: +\&\fIYes or No\fR. By default, value of all parameters is set to \fIYes\fR. +.Sp +By default, \s-1TPSA\s0 atom contributions from Phosphorus and Sulfur atoms are not included during +\&\s-1TPSA\s0 calculations. [ Ref 91 ] +.IP "\fB\-w, \-\-WorkingDir\fR \fIDirName\fR" 4 +.IX Item "-w, --WorkingDir DirName" +Location of working directory. Default value: current directory. +.SH "EXAMPLES" +.IX Header "EXAMPLES" +To calculate default set of physicochemical properties \- MolecularWeight, HeavyAtoms, +MolecularVolume, RotatableBonds, HydrogenBondDonor, HydrogenBondAcceptors, SLogP, +\&\s-1TPSA\s0 \- and generate a SamplePhysicochemicalProperties.csv file containing sequential +compound IDs along with properties data, type: +.PP +.Vb 1 +\& % CalculatePhysicochemicalProperties.pl \-o Sample.sdf +.Ve +.PP +To calculate all available physicochemical properties and generate both SampleAllProperties.csv +and SampleAllProperties.sdf files containing sequential compound IDs in \s-1CSV\s0 file along with +properties data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m All \-\-output both +\& \-r SampleAllProperties \-o Sample.sdf +.Ve +.PP +To calculate RuleOf5 physicochemical properties and generate a SampleRuleOf5Properties.csv file +containing sequential compound IDs along with properties data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m RuleOf5 +\& \-r SampleRuleOf5Properties \-o Sample.sdf +.Ve +.PP +To calculate RuleOf5 physicochemical properties along with counting RuleOf5 violations and generate +a SampleRuleOf5Properties.csv file containing sequential compound IDs along with properties data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m RuleOf5 \-\-RuleOf5Violations Yes +\& \-r SampleRuleOf5Properties \-o Sample.sdf +.Ve +.PP +To calculate RuleOf3 physicochemical properties and generate a SampleRuleOf3Properties.csv file +containing sequential compound IDs along with properties data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m RuleOf3 +\& \-r SampleRuleOf3Properties \-o Sample.sdf +.Ve +.PP +To calculate RuleOf3 physicochemical properties along with counting RuleOf3 violations and generate +a SampleRuleOf3Properties.csv file containing sequential compound IDs along with properties data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m RuleOf3 \-\-RuleOf3Violations Yes +\& \-r SampleRuleOf3Properties \-o Sample.sdf +.Ve +.PP +To calculate a specific set of physicochemical properties and generate a SampleProperties.csv file +containing sequential compound IDs along with properties data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m "Rings,AromaticRings" +\& \-r SampleProperties \-o Sample.sdf +.Ve +.PP +To calculate HydrogenBondDonors and HydrogenBondAcceptors using HydrogenBondsType1 definition +and generate a SampleProperties.csv file containing sequential compound IDs along with properties +data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m "HydrogenBondDonors,HydrogenBondAcceptors" +\& \-\-HydrogenBonds HBondsType1 \-r SampleProperties \-o Sample.sdf +.Ve +.PP +To calculate \s-1TPSA\s0 using sulfur and phosphorus atoms along with nitrogen and oxygen atoms and +generate a SampleProperties.csv file containing sequential compound IDs along with properties +data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m "TPSA" \-\-TPSA "IgnorePhosphorus,No, +\& IgnoreSulfur,No" \-r SampleProperties \-o Sample.sdf +.Ve +.PP +To calculate MolecularComplexity using extendend connectivity fingerprints corresponding +to atom neighborhood radius of 2 with atomic invariant atom types without any scaling and +generate a SampleProperties.csv file containing sequential compound IDs along with properties +data, type: +.PP +.Vb 5 +\& % CalculatePhysicochemicalProperties.pl \-m MolecularComplexity \-\-MolecularComplexity +\& "MolecularComplexityType,ExtendedConnectivityFingerprints,NeighborhoodRadius,2, +\& AtomIdentifierType, AtomicInvariantsAtomTypes, +\& AtomicInvariantsToUse,AS X BO H FC MN,NormalizationMethodology,None" +\& \-r SampleProperties \-o Sample.sdf +.Ve +.PP +To calculate RuleOf5 physicochemical properties along with counting RuleOf5 violations and generate +a SampleRuleOf5Properties.csv file containing compound IDs from molecule name line along with +properties data, type: +.PP +.Vb 3 +\& % CalculatePhysicochemicalProperties.pl \-m RuleOf5 \-\-RuleOf5Violations Yes +\& \-\-DataFieldsMode CompoundID \-\-CompoundIDMode MolName +\& \-r SampleRuleOf5Properties \-o Sample.sdf +.Ve +.PP +To calculate all available physicochemical properties and generate a SampleAllProperties.csv +file containing compound \s-1ID\s0 using specified data field along with along with properties data, +type: +.PP +.Vb 3 +\& % CalculatePhysicochemicalProperties.pl \-m All +\& \-\-DataFieldsMode CompoundID \-\-CompoundIDMode DataField \-\-CompoundID Mol_ID +\& \-r SampleAllProperties \-o Sample.sdf +.Ve +.PP +To calculate all available physicochemical properties and generate a SampleAllProperties.csv +file containing compound \s-1ID\s0 using combination of molecule name line and an explicit compound +prefix along with properties data, type: +.PP +.Vb 4 +\& % CalculatePhysicochemicalProperties.pl \-m All +\& \-\-DataFieldsMode CompoundID \-\-CompoundIDMode MolnameOrLabelPrefix +\& \-\-CompoundID Cmpd \-\-CompoundIDLabel MolID \-r SampleAllProperties +\& \-o Sample.sdf +.Ve +.PP +To calculate all available physicochemical properties and generate a SampleAllProperties.csv +file containing specific data fields columns along with with properties data, type: +.PP +.Vb 3 +\& % CalculatePhysicochemicalProperties.pl \-m All +\& \-\-DataFieldsMode Specify \-\-DataFields Mol_ID \-r SampleAllProperties +\& \-o Sample.sdf +.Ve +.PP +To calculate all available physicochemical properties and generate a SampleAllProperties.csv +file containing common data fields columns along with with properties data, type: +.PP +.Vb 2 +\& % CalculatePhysicochemicalProperties.pl \-m All +\& \-\-DataFieldsMode Common \-r SampleAllProperties \-o Sample.sdf +.Ve +.PP +To calculate all available physicochemical properties and generate both SampleAllProperties.csv +and \s-1CSV\s0 files containing all data fields columns in \s-1CSV\s0 files along with with properties data, type: +.PP +.Vb 3 +\& % CalculatePhysicochemicalProperties.pl \-m All +\& \-\-DataFieldsMode All \-\-output both \-r SampleAllProperties +\& \-o Sample.sdf +.Ve +.SH "AUTHOR" +.IX Header "AUTHOR" +Manish Sud <msud@san.rr.com> +.SH "SEE ALSO" +.IX Header "SEE ALSO" +ExtractFromSDtFiles.pl, ExtractFromTextFiles.pl, InfoSDFiles.pl, InfoTextFiles.pl +.SH "COPYRIGHT" +.IX Header "COPYRIGHT" +Copyright (C) 2015 Manish Sud. All rights reserved. +.PP +This file is part of MayaChemTools. +.PP +MayaChemTools is free software; you can redistribute it and/or modify it under +the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version.