Mercurial > repos > deepakjadmin > mayatool3_test3

.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.ie \nF \{\
.    de IX
.    tm Index:\\$1\t\\n%\t"\\$2"
..
.    nr % 0
.    rr F
.\}
.el \{\
.    de IX
..
.\}
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "ANALYZESEQUENCEFILESDATA 1"
.TH ANALYZESEQUENCEFILESDATA 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
AnalyzeSequenceFilesData.pl \- Analyze sequence and alignment files
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
AnalyzeSequenceFilesData.pl SequenceFile(s) AlignmentFile(s)...
.PP
AnalyzeSequenceFilesData.pl [\fB\-h, \-\-help\fR] [\fB\-i, \-\-IgnoreGaps\fR yes | no]
[\fB\-m, \-\-mode\fR PercentIdentityMatrix | ResidueFrequencyAnalysis | All]
[\fB\-\-outdelim\fR comma | tab | semicolon] [\fB\-o, \-\-overwrite\fR] [\fB\-p, \-\-precision\fR number] [\fB\-q, \-\-quote\fR yes | no]
[\fB\-\-ReferenceSequence\fR SequenceID | UseFirstSequenceID]
[\fB\-\-region\fR \*(L"StartResNum, EndResNum, [StartResNum, EndResNum...]\*(R" | UseCompleteSequence]
[\fB\-\-RegionResiduesMode\fR AminoAcids | NucleicAcids | None]
[\fB\-w, \-\-WorkingDir\fR dirname] SequenceFile(s) AlignmentFile(s)...
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
Analyze \fISequenceFile(s) and AlignmentFile(s)\fR data: calculate pairwise percent identity matrix or
calculate percent occurrence of various residues in specified sequence regions. All the sequences
in the input file must have the same sequence lengths; otherwise, the sequence file is ignored.
.PP
The file names are separated by spaces. All the sequence files in a current directory can
be specified by \fI*.aln\fR, \fI*.msf\fR, \fI*.fasta\fR, \fI*.fta\fR, \fI*.pir\fR or any other supported
formats; additionally, \fIDirName\fR corresponds to all the sequence files in the current directory
with any of the supported file extension: \fI.aln, .msf, .fasta, .fta, and .pir\fR.
.PP
Supported sequence formats are: \fIALN/CLustalW\fR, \fI\s-1GCG/MSF\s0\fR, \fI\s-1PILEUP/MSF\s0\fR, \fIPearson/FASTA\fR,
and \fI\s-1NBRF/PIR\s0\fR. Instead of using file extensions, file formats are detected by parsing the contents
of \fISequenceFile(s) and AlignmentFile(s)\fR.
.SH "OPTIONS"
.IX Header "OPTIONS"
.IP "\fB\-h, \-\-help\fR" 4
.IX Item "-h, --help"
Print this help message.
.IP "\fB\-i, \-\-IgnoreGaps\fR \fIyes | no\fR" 4
.IX Item "-i, --IgnoreGaps yes | no"
Ignore gaps during calculation of sequence lengths and specification of regions during residue
frequency analysis. Possible values: \fIyes or no\fR. Default value: \fIyes\fR.
.IP "\fB\-m, \-\-mode\fR \fIPercentIdentityMatrix | ResidueFrequencyAnalysis | All\fR" 4
.IX Item "-m, --mode PercentIdentityMatrix | ResidueFrequencyAnalysis | All"
Specify how to analyze data in sequence files: calculate percent identity matrix or calculate
frequency of occurrence of residues in specific regions. During \fIResidueFrequencyAnalysis\fR value
of \fB\-m, \-\-mode\fR option, output files are generated for both the residue count and percent residue
count. Possible values: \fIPercentIdentityMatrix, ResidueFrequencyAnalysis, or All\fR. Default value:
\&\fIPercentIdentityMatrix\fR.
.IP "\fB\-\-outdelim\fR \fIcomma | tab | semicolon\fR" 4
.IX Item "--outdelim comma | tab | semicolon"
Output text file delimiter. Possible values: \fIcomma, tab, or semicolon\fR.
Default value: \fIcomma\fR.
.IP "\fB\-o, \-\-overwrite\fR" 4
.IX Item "-o, --overwrite"
Overwrite existing files.
.IP "\fB\-p, \-\-precision\fR \fInumber\fR" 4
.IX Item "-p, --precision number"
Precision of calculated values in the output file. Default: up to \fI2\fR decimal places.
Valid values: positive integers.
.IP "\fB\-q, \-\-quote\fR \fIyes | no\fR" 4
.IX Item "-q, --quote yes | no"
Put quotes around column values in output text file. Possible values: \fIyes or
no\fR. Default value: \fIyes\fR.
.IP "\fB\-\-ReferenceSequence\fR \fISequenceID | UseFirstSequenceID\fR" 4
.IX Item "--ReferenceSequence SequenceID | UseFirstSequenceID"
Specify reference sequence \s-1ID\s0 to identify regions for performing \fIResidueFrequencyAnalysis\fR specified
using \fB\-m, \-\-mode\fR option. Default: \fIUseFirstSequenceID\fR.
.IP "\fB\-\-region\fR \fIStartResNum,EndResNum,[StartResNum,EndResNum...] | UseCompleteSequence\fR" 4
.IX Item "--region StartResNum,EndResNum,[StartResNum,EndResNum...] | UseCompleteSequence"
Specify how to perform frequency of occurrence analysis for residues: use specific regions
indicated by starting and ending residue numbers in reference sequence or use the whole reference
sequence as one region. Default: \fIUseCompleteSequence\fR.
.Sp
Based on the value of \fB\-i, \-\-IgnoreGaps\fR option, specified residue numbers \fIStartResNum,EndResNum\fR
correspond to the positions in the reference sequence without gaps or with gaps.
.Sp
For residue numbers corresponding to the reference sequence including gaps, percent occurrence
of various residues corresponding to gap position in reference sequence is also calculated.
.IP "\fB\-\-RegionResiduesMode\fR \fIAminoAcids | NucleicAcids | None\fR" 4
.IX Item "--RegionResiduesMode AminoAcids | NucleicAcids | None"
Specify how to process residues in the regions specified using \fB\-\-region\fR option during
\&\fIResidueFrequencyAnalysis\fR calculation: categorize residues as amino acids, nucleic acids, or simply
ignore residue category during the calculation. Possible values: \fIAminoAcids, NucleicAcids or None\fR.
Default value: \fINone\fR.
.Sp
For \fIAminoAcids\fR or \fINucleicAcids\fR values of \fB\-\-RegionResiduesMode\fR option, all the standard amino
acids or nucleic acids are listed in the output file for each region; Any gaps and other non standard residues
are added to the list as encountered.
.Sp
For \fINone\fR value of \fB\-\-RegionResiduesMode\fR option, no assumption is made about type of residues.
Residue and gaps are added to the list as encountered.
.IP "\fB\-r, \-\-root\fR \fIrootname\fR" 4
.IX Item "-r, --root rootname"
New sequence file name is generated using the root: <Root><Mode>.<Ext> and
<Root><Mode><RegionNum>.<Ext>. Default new file
name: <SequenceFileName><Mode>.<Ext> for \fIPercentIdentityMatrix\fR value \fBm, \-\-mode\fR option
and <SequenceFileName><Mode><RegionNum>.<Ext>  for \fIResidueFrequencyAnalysis\fR.
The csv, and tsv <Ext> values are used for comma/semicolon, and tab delimited text
files respectively. This option is ignored for multiple input files.
.IP "\fB\-w \-\-WorkingDir\fR \fItext\fR" 4
.IX Item "-w --WorkingDir text"
Location of working directory. Default: current directory.
.SH "EXAMPLES"
.IX Header "EXAMPLES"
To calculate percent identity matrix for all sequences in Sample1.msf file and generate
Sample1PercentIdentityMatrix.csv, type:
.PP
.Vb 1
\&    % AnalyzeSequenceFilesData.pl Sample1.msf
.Ve
.PP
To perform residue frequency analysis for all sequences in Sample1.aln file corresponding to
non-gap positions in the first sequence and generate Sample1ResidueFrequencyAnalysisRegion1.csv
and Sample1PercentResidueFrequencyAnalysisRegion1.csv files, type:
.PP
.Vb 2
\&    % AnalyzeSequenceFilesData.pl \-m ResidueFrequencyAnalysis \-o
\&      Sample1.aln
.Ve
.PP
To perform residue frequency analysis for all sequences in Sample1.aln file corresponding to
all positions in the first sequence and generate TestResidueFrequencyAnalysisRegion1.csv
and TestPercentResidueFrequencyAnalysisRegion1.csv files, type:
.PP
.Vb 2
\&    % AnalyzeSequenceFilesData.pl \-m ResidueFrequencyAnalysis \-\-IgnoreGaps
\&      No \-o \-r Test Sample1.aln
.Ve
.PP
To perform residue frequency analysis for all sequences in Sample1.aln file corresponding to
non-gap residue positions 5 to 10, and 30 to 40 in sequence \s-1ACHE_BOVIN\s0 and generate
Sample1ResidueFrequencyAnalysisRegion1.csv, Sample1ResidueFrequencyAnalysisRegion2.csv,
SamplePercentResidueFrequencyAnalysisRegion1.csv, and
SamplePercentResidueFrequencyAnalysisRegion2.csv files, type:
.PP
.Vb 2
\&    % AnalyzeSequenceFilesData.pl \-m ResidueFrequencyAnalysis
\&      \-\-ReferenceSequence ACHE_BOVIN \-\-region "5,15,30,40" \-o Sample1.msf
.Ve
.SH "AUTHOR"
.IX Header "AUTHOR"
Manish Sud <msud@san.rr.com>
.SH "SEE ALSO"
.IX Header "SEE ALSO"
ExtractFromSequenceFiles.pl, InfoSequenceFiles.pl
.SH "COPYRIGHT"
.IX Header "COPYRIGHT"
Copyright (C) 2015 Manish Sud. All rights reserved.
.PP
This file is part of MayaChemTools.
.PP
MayaChemTools is free software; you can redistribute it and/or modify it under
the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
author	deepakjadmin
date	Thu, 15 Dec 2016 14:04:29 -0500
parents	73ae111cf86f
children