view docs/scripts/man1/ExtractFromSequenceFiles.1 @ 0:4816e4a8ae95 draft default tip

Uploaded
author deepakjadmin
date Wed, 20 Jan 2016 09:23:18 -0500
parents
children
line wrap: on
line source

.\" Automatically generated by Pod::Man 2.25 (Pod::Simple 3.22)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings.  \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
.    ds -- \(*W-
.    ds PI pi
.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
.    ds L" ""
.    ds R" ""
.    ds C` ""
.    ds C' ""
'br\}
.el\{\
.    ds -- \|\(em\|
.    ds PI \(*p
.    ds L" ``
.    ds R" ''
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el       .ds Aq '
.\"
.\" If the F register is turned on, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD.  Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.ie \nF \{\
.    de IX
.    tm Index:\\$1\t\\n%\t"\\$2"
..
.    nr % 0
.    rr F
.\}
.el \{\
.    de IX
..
.\}
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
.    \" fudge factors for nroff and troff
.if n \{\
.    ds #H 0
.    ds #V .8m
.    ds #F .3m
.    ds #[ \f1
.    ds #] \fP
.\}
.if t \{\
.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
.    ds #V .6m
.    ds #F 0
.    ds #[ \&
.    ds #] \&
.\}
.    \" simple accents for nroff and troff
.if n \{\
.    ds ' \&
.    ds ` \&
.    ds ^ \&
.    ds , \&
.    ds ~ ~
.    ds /
.\}
.if t \{\
.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
.    \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
.    \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
.    \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
.    ds : e
.    ds 8 ss
.    ds o a
.    ds d- d\h'-1'\(ga
.    ds D- D\h'-1'\(hy
.    ds th \o'bp'
.    ds Th \o'LP'
.    ds ae ae
.    ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "EXTRACTFROMSEQUENCEFILES 1"
.TH EXTRACTFROMSEQUENCEFILES 1 "2015-03-29" "perl v5.14.2" "MayaChemTools"
.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
ExtractFromSequenceFiles.pl \- Extract data from sequence and alignment files
.SH "SYNOPSIS"
.IX Header "SYNOPSIS"
ExtractFromSequenceFiles.pl SequenceFile(s) AlignmentFile(s)...
.PP
ExtractFromSequenceFiles.pl [\fB\-h, \-\-help\fR] [\fB\-i, \-\-IgnoreGaps\fR yes | no]
[\fB\-m, \-\-mode\fR SequenceID | SequenceNum | SequenceNumRange] [\fB\-o, \-\-overwrite\fR]
[\fB\-r, \-\-root\fR rootname] [\fB\-s, \-\-Sequences\fR \*(L"SequenceID, [SequenceID,...]\*(R" | \*(L"SequenceNum, [SequenceNum,...]\*(R" |
\&\*(L"StartingSeqNum, EndingSeqNum\*(R"] [\fB\-\-SequenceIDMatch\fR Exact | Relaxed]
[\fB\-w, \-\-WorkingDir\fR dirname] SequenceFile(s) AlignmentFile(s)...
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
Extract specific data from \fISequenceFile(s) and AlignmentFile(s)\fR and generate
\&\s-1FASTA\s0 files. You can extract sequences using sequence IDs or sequence numbers.
.PP
The file names are separated by spaces. All the sequence files in a current directory can
be specified by \fI*.aln\fR, \fI*.msf\fR, \fI*.fasta\fR, \fI*.fta\fR, \fI*.pir\fR or any other supported
formats; additionally, \fIDirName\fR corresponds to all the sequence files in the current directory
with any of the supported file extension: \fI.aln, .msf, .fasta, .fta, and .pir\fR.
.PP
Supported sequence formats are: \fIALN/CLustalW\fR, \fI\s-1GCG/MSF\s0\fR, \fI\s-1PILEUP/MSF\s0\fR, \fIPearson/FASTA\fR,
and \fI\s-1NBRF/PIR\s0\fR. Instead of using file extensions, file formats are detected by parsing the contents
of \fISequenceFile(s) and AlignmentFile(s)\fR.
.SH "OPTIONS"
.IX Header "OPTIONS"
.IP "\fB\-h, \-\-help\fR" 4
.IX Item "-h, --help"
Print this help message.
.IP "\fB\-i, \-\-IgnoreGaps\fR \fIyes | no\fR" 4
.IX Item "-i, --IgnoreGaps yes | no"
Ignore gaps or gap columns during during generation of new sequence or alignment file(s).
Possible values: \fIyes or no\fR. Default value: \fIyes\fR.
.Sp
In order to remove gap columns, length of all the sequence must be same; otherwise,
this option is ignored.
.IP "\fB\-m, \-\-mode\fR \fISequenceID | SequenceNum | SequenceNumRange\fR" 4
.IX Item "-m, --mode SequenceID | SequenceNum | SequenceNumRange"
Specify how to extract data from sequence files: extract sequences using sequence
IDs or sequence numbers. Possible values: \fISequenceID | SequenceNum
| SequenceNumRange\fR. Default: \fISequenceNum\fR with value of 1.
.Sp
The sequence numbers correspond to position of sequences starting from 1 for first sequence
in \fISequenceFile(s) and AlignmentFile(s)\fR.
.IP "\fB\-o, \-\-overwrite\fR" 4
.IX Item "-o, --overwrite"
Overwrite existing files.
.IP "\fB\-r, \-\-root\fR \fIrootname\fR" 4
.IX Item "-r, --root rootname"
New sequence file name is generated using the root: <Root><Mode>.<Ext>. Default new file:
<SequenceFileName><Mode>.<Ext>. This option is ignored for multiple input files.
.ie n .IP "\fB\-s, \-\-Sequences\fR \fI""SequenceID,[SequenceID,...]"" | ""SequenceNum,[SequenceNum,...]"" | ""StartingSeqNum,EndingSeqNum""\fR" 4
.el .IP "\fB\-s, \-\-Sequences\fR \fI``SequenceID,[SequenceID,...]'' | ``SequenceNum,[SequenceNum,...]'' | ``StartingSeqNum,EndingSeqNum''\fR" 4
.IX Item "-s, --Sequences SequenceID,[SequenceID,...] | SequenceNum,[SequenceNum,...] | StartingSeqNum,EndingSeqNum"
This value is \fB\-m, \-\-mode\fR specific. In general, it's a comma delimites list of sequence IDs or sequence
numbers.
.Sp
For \fISequenceID\fR value of \fB\-m, \-\-mode\fR option, input value format is: \fISequenceID,...\fR. Examples:
.Sp
.Vb 2
\&    ACHE_BOVIN
\&    ACHE_BOVIN,ACHE_HUMAN
.Ve
.Sp
For \fISequenceNum\fR value of \fB\-m, \-\-mode\fR option, input value format is: \fISequenceNum,...\fR. Examples:
.Sp
.Vb 2
\&    2
\&    1,5
.Ve
.Sp
For \fISequenceNum\fR value of \fB\-m, \-\-mode\fR option, input value format is: \fIStaringSeqNum,EndingSeqNum\fR. Examples:
.Sp
.Vb 1
\&    2,4
.Ve
.IP "\fB\-\-SequenceIDMatch\fR \fIExact | Relaxed\fR" 4
.IX Item "--SequenceIDMatch Exact | Relaxed"
Sequence IDs matching criterion during \fISequenceID\fR value of \fB\-m, \-\-mode\fR option: match
specified sequence \s-1ID\s0 exactly or as sub string against sequence IDs in the files. Possible
values: \fIExact | Relaxed\fR. Default: \fIRelaxed\fR. Sequence \s-1ID\s0 match is case insenstitive
during both options.
.IP "\fB\-\-SequenceLength\fR \fInumber\fR" 4
.IX Item "--SequenceLength number"
Maximum sequence length per line in sequence file(s). Default: \fI80\fR.
.IP "\fB\-w \-\-WorkingDir\fR \fItext\fR" 4
.IX Item "-w --WorkingDir text"
Location of working directory. Default: current directory.
.SH "EXAMPLES"
.IX Header "EXAMPLES"
To extract first sequence from Sample1.fasta sequence file and generate Sample1SequenceNum.fasta
sequence file, type:
.PP
.Vb 1
\&    % ExtractFromSequenceFiles.pl \-o Sample1.fasta
.Ve
.PP
To extract first sequence from Sample1.aln alignment file and generate Sample1SequenceNum.fasta
sequence file without any column gaps, type:
.PP
.Vb 1
\&    % ExtractFromSequenceFiles.pl \-o Sample1.aln
.Ve
.PP
To extract first sequence from Sample1.aln alignment file and generate Sample1SequenceNum.fasta
sequence file with column gaps, type:
.PP
.Vb 1
\&    % ExtractFromSequenceFiles.pl \-\-IgnroreGaps No \-o Sample1.aln
.Ve
.PP
To extract sequence number 1 and 4 from Sample1.fasta sequence file and generate
Sample1SequenceNum.fasta sequence file, type:
.PP
.Vb 2
\&    % ExtractFromSequenceFiles.pl \-o \-m SequenceNum \-\-Sequences 1,4
\&      \-o Sample1.fasta
.Ve
.PP
To extract sequences from sequence  number 1 to 4 from Sample1.fasta sequence file and generate
Sample1SequenceNumRange.fasta sequence file, type:
.PP
.Vb 2
\&    % ExtractFromSequenceFiles.pl \-o \-m SequenceNumRange \-\-Sequences
\&      1,4 \-o Sample1.fasta
.Ve
.PP
To extract sequence \s-1ID\s0 \*(L"Q9P993/104\-387\*(R" from sequence  from Sample1.fasta sequence file and generate
Sample1SequenceID.fasta sequence file, type:
.PP
.Vb 2
\&    % ExtractFromSequenceFiles.pl \-o \-m SequenceID \-\-Sequences
\&      "Q9P993/104\-387" \-\-SequenceIDMatch Exact \-o Sample1.fasta
.Ve
.SH "AUTHOR"
.IX Header "AUTHOR"
Manish Sud <msud@san.rr.com>
.SH "SEE ALSO"
.IX Header "SEE ALSO"
AnalyzeSequenceFilesData.pl, InfoSequenceFiles.pl
.SH "COPYRIGHT"
.IX Header "COPYRIGHT"
Copyright (C) 2015 Manish Sud. All rights reserved.
.PP
This file is part of MayaChemTools.
.PP
MayaChemTools is free software; you can redistribute it and/or modify it under
the terms of the \s-1GNU\s0 Lesser General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.