Previous changeset 18:e4d75f9efb90 (2017-02-02) Next changeset 20:f800869c6070 (2017-02-02) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/dunovo commit b'4303231da9e48b2719b4429a29b72421d24310f4\n'-dirty |
added:
align_families.xml dunovo.xml make_families.xml tool_dependencies.xml |
removed:
0notes.txt LICENSE.txt Makefile README.md __pycache__/swalign.cpython-35.pyc align.c align_families.py align_families.pyc baralign.sh consensus-time.py consensus.c consensus.py consensus.pyc correct.py correct.sh disttbfast.pyc dunovo.py galaxy/align_families.xml galaxy/dunovo.xml galaxy/make_families.xml galaxy/tool_dependencies.xml libalign.so libconsensus.so libseqtools.so libswalign.so loeb-2.0.sh mafft/core/DNA.h mafft/core/Falign.c mafft/core/Falign_localhom.c mafft/core/Galign11.c mafft/core/JTT.c mafft/core/Lalign11.c mafft/core/Lalignmm.c mafft/core/MSalign11.c mafft/core/MSalignmm.c mafft/core/Makefile mafft/core/Makefile.sos mafft/core/SAalignmm.c mafft/core/Salignmm.c mafft/core/addfunctions.c mafft/core/addsingle.c mafft/core/blosum.c mafft/core/blosum.h mafft/core/constants.c mafft/core/contrafoldwrap.c mafft/core/countlen.c mafft/core/defs.c mafft/core/disttbfast.c mafft/core/dndblast.c mafft/core/dndfast4.c mafft/core/dndfast7.c mafft/core/dndpre.c mafft/core/dndpre_score.c mafft/core/dp.h mafft/core/dvtditr.c mafft/core/f2cl.c mafft/core/fft.c mafft/core/fft.h mafft/core/fftFunctions.c mafft/core/functions.h mafft/core/genalign11.c mafft/core/getlag.c mafft/core/interface.c mafft/core/io.c mafft/core/iteration.c mafft/core/mafft-distance.c mafft/core/mafft-homologs.1 mafft/core/mafft-homologs.tmpl mafft/core/mafft-profile.c mafft/core/mafft.1 mafft/core/mafft.bat mafft/core/mafft.h mafft/core/mafft.tmpl mafft/core/mafftash_premafft.tmpl mafft/core/makedirectionlist.c mafft/core/makemergetable.rb mafft/core/mccaskillwrap.c mafft/core/mingw64mingw32 mafft/core/mingw64mingw32dll mafft/core/miyata.h mafft/core/miyata5.h mafft/core/mltaln.h mafft/core/mltaln9.c mafft/core/mtxutl.c mafft/core/mtxutl.h mafft/core/multi2hat3s.c mafft/core/newick2mafft.rb mafft/core/nj.c mafft/core/pair2hat3s.c mafft/core/pairash.c mafft/core/pairlocalalign.c mafft/core/partSalignmm.c mafft/core/regionalrealignment.rb mafft/core/regtable2seq.c mafft/core/replaceu.c mafft/core/restoreu.c mafft/core/rna.c mafft/core/rnatest.c mafft/core/score.c mafft/core/seekquencer_premafft.tmpl mafft/core/seq2regtable.c mafft/core/setcore.c mafft/core/setdirection.c mafft/core/sextet5.c mafft/core/share.h mafft/core/splittbfast.c mafft/core/suboptalign11.c mafft/core/tbfast.c mafft/core/tddis.c mafft/core/tditeration.c mafft/core/treeOperation.c mafft/core/univscript.tmpl mafft/core/version.c mafft/license mafft/readme mafft/test/sample mafft/test/sample.dpparttree mafft/test/sample.fftns2 mafft/test/sample.fftnsi mafft/test/sample.gins1 mafft/test/sample.ginsi mafft/test/sample.lins1 mafft/test/sample.linsi mafft/test/sample.parttree mafft/test/samplerna mafft/test/samplerna.qinsi mafft/test/samplerna.xinsi make-barcodes.awk misc/00README.txt misc/ACCGACACAGACTAGGGATCAAAG.msa.qual.tsv misc/ACCGACACAGACTAGGGATCAAAG.msa.tsv misc/ACCGACACAGACTAGGGATCAAAG.tsv misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.family.msa.tsv misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.sscs.after.fa misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.sscs.before.fa misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.family.msa.tsv misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.sscs.after.fa misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.sscs.before.fa misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.family.msa.tsv misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.sscs.after.fa misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.sscs.before.fa misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.family.msa.tsv misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.sscs.after.fa misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.sscs.before.fa misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.family.msa.tsv misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.sscs.after.fa misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.sscs.before.fa misc/bug1/cmp.sh misc/bug1/cmp.txt misc/bug1/diff.family.msa.tsv misc/bug1/diff.sscs.after.fa misc/bug1/diff.sscs.before.fa misc/bug1/tmp.family.msa.tsv misc/bug1/tmp.sscs.after.fa misc/bug1/tmp.sscs.before.fa misc/family.align.fa misc/family.cons.fa misc/family.fa misc/family.msa.tsv misc/family2.align.fa misc/family2.cons.fa misc/family2.fa misc/family3.align.fa misc/family3.cons.fa misc/msa_sscs_matcher.py misc/read.fa misc/sscs_diff.py pipeline.sh planemo-template/cat.xml planemo-template/random_lines_two_pass.py planemo-template/randomlines.xml planemo-template/test-data/1.bed planemo-template/test-data/1_bed_random_lines_1_seed_asdf_out.bed seqtools.c seqtools.py seqtools.pyc swalign.c swalign.h swalign.py swalign.pyc test-msa.fa test.fa test.fq test.py test.sam test.sscs.fa test2.fa test_1.fa test_2.fa tests/families.cons.fa tests/families.cons.incl-sscs.fa tests/families.cons_1.fa tests/families.cons_2.fa tests/families.in.tsv tests/families.raw_1.fq tests/families.raw_2.fq tests/families.shuf.in.tsv tests/families.sort.tsv tests/families.unequal.fa tests/families.unequal.msa.tsv tests/families.unequal.sscs.fa tests/families.unequal.tsv tests/gapqual.cons.fa tests/gapqual.msa.tsv tests/gaps-diffs.out.tsv tests/gaps.msa.tsv tests/qual.cons.fa tests/qual.msa.tsv tests/quirks.msa.tsv tests/run.sh tests/smoke.families.aligned.tsv tests/smoke.families.i0.tsv tests/smoke.families.tsv tests/smoke_1.fq tests/smoke_2.fq utils/bars.tsv utils/chrM-alt.fa utils/correct-simple.py utils/fastareader.py utils/fastareader.pyc utils/fastqreader.py utils/fastqreader.pyc utils/frags.fq utils/frags1.fq utils/fuzzy-match.py utils/get_msa.py utils/getreads.py utils/getreads.pyc utils/msa2fa.awk utils/muts.genome.tsv utils/muts.old.tsv utils/muts.tsv utils/muts.wgsim.tsv utils/outconv.awk utils/outconv.py utils/precheck.py utils/query.fa utils/read.fq utils/sim-check.py utils/sim-genome.py utils/sim-label.py utils/sim.py utils/sim.pyc utils/stats.py utils/subsample.py utils/test_1.fq.gz utils/test_2.fq.gz |
b |
diff -r e4d75f9efb90 -r 675a8370675b 0notes.txt --- a/0notes.txt Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,97 +0,0 @@ -============================ -Reverse engineering of mafft -============================ -For reference, the source code I'm working with is at ~/bx/src/mafft-7.221-without-extensions. - ------------------ -mafft bash script ------------------ -Note: At this point, a faster way to reverse engineer this is probably by running it with debug prints added at various points to check variable values. - -align_families.py executes the mafft command "mafft --nuc --quiet $tempfile". -"mafft" is actually a bash script at "scripts/mafft" in the source. But it's so insane it's essentially obfuscated (it doesn't help that the only comments are in Japanese). It's clear, though, that the bash script decides which executable to run, based on your arguments. - -Approaching from another direction, I can see that when I execute the mafft command, it always executes the same exact sub-command (even for different input read lengths): - disttbfast -q 0 -E 2 -V -1.53 -s 0.0 -W 6 -O -C 0 -D -b 62 -f -1.53 -Q 100.0 -h 0 -F -X 0.1 -Searching the bash script, this occurs on line 2060. The input comes from stdin, which is fed $TMPFILE/infile. That resolves to /tmp/mafft.*/infile (lines 826 & 829). This seems to simply be the input file I give to the mafft command, processed a little: \r is converted to \n (line 849), and a newline is added to the end (line 850). HOWEVER, there are many points where the input file may be additionally processed before disttbfast gets to it. -The output is piped to $TMPFILE/pre. This appears to be the aligned FASTA. -It is potentially altered by several executables before final output: - line command executed? condition - 2068 splittbfast false [ $distance = "parttree" ] - 2083 setcore false [ $coreout -eq 1 ] - 2086 restoreu false ! [ $coreout -eq 1 ] && [ $anysymbol -eq 1 ] - 2181 f2cl false [ "$outputfile" = "" ] && ! [ "$outputopt" = "null" ] - 2187 f2cl false ! [ "$outputfile" = "" ] && ! [ "$outputopt" = "null" ] - -Here's a dissection of the invocation of disttbfast, correlating variables in line 2060 with their values, as seen in the actual executed command. I resolved all of them by looking at the mafft bash script. -value variable - "$prefix/disttbfast" - -q -0 $npickup - -E -2 $cycledisttbfast - -V --1.53 "-"$gopdist - -s -0.0 $unalignlevel - $legacygapopt - $mergearg - -W -6 $tuplesize - -O $termgapopt - $outnum - $addarg - -C -0 $numthreads - $memopt - $weightopt - $treeinopt - $treeoutopt - $distoutopt - -D $seqtype --b 62 $model - -f --1.53 "-"$gop - -Q -100.0 $spfactor - -h -0 $aof - -F $param_fft - $algopt - -X 0.1 $treealg - $scoreoutarg - --------- -Makefile --------- -The commands it executes when you run "make disttbfast" (in a directory lacking only disttbfast and disttbfast.o) are: -$ gcc -Denablemultithread -O3 -c disttbfast.c -$ gcc -o disttbfast mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o disttbfast.o defs.o fft.o fftFunctions.o addfunctions.o -Denablemultithread -O3 -lm -lpthread -The first command uses the -c option to stop gcc from doing the linking, producing only disttfast.o. The second command links it with all its dependencies and produces the final binary. -The long list of .o files is apparently stored in $(OBJDISTTBFAST). In order to create disttbfast.so, it seems you'll have to recompile all its dependencies as .so files. - -Update 1: -I looked at Makefile.sos, and used their CFLAGS to try compiling disttbfast.so: -$ gcc -Denablemultithread -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 disttbfast.c -o disttbfast.so mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o disttbfast.o defs.o fft.o fftFunctions.o addfunctions.o -lm -lpthread -It looks like it would've worked, except for many functions which are defined in multiple source files (including main()). I guess I could edit the source files to remove those definitions, but I'm guessing that's not right. But I don't know how to compile it correctly when functions are defined in both disttbfast.c and its dependencies. - -Update 2: -Okay, or all I need to do is use their Makefiles.sos' libdisttbfast.so directive: -[Sat Mar 19] me@yoga: ~/bx/code/mafft/core -$ make -f Makefile.sos libdisttbfast.so -gcc -shared -o libdisttbfast.so mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o disttbfast.o defs.o fft.o fftFunctions.o addfunctions.o -Denablemultithread -fPIC -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 -lm -lpthread - - ------------- -disttbfast.c ------------- -There's a lot that'll need modification: -- Assuming the cwd is the temporary directory. - - Reading from or writing to filenames (search "fopen", FILE variables) -- Calling external commands like line 2477: system( "cp infile.tree GuideTree" ); -- Printing to stderr (I'd like to be able to silence that). -- Calling exit(); - -It looks like the writing of results might happen wherever writeData_pointer() is called. -- Defined on line 2425 of io.c. -- Could just replace with a return statement. \ No newline at end of file |
b |
diff -r e4d75f9efb90 -r 675a8370675b LICENSE.txt --- a/LICENSE.txt Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,31 +0,0 @@ -Copyright (c) 2015, Pennsylvania State University -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -================================================================================ -Note: That's a BSD license (https://opensource.org/licenses/BSD-2-Clause). - -The following files contain software governed by a difference license, -which can be found in those files: -swalign.c -swalign.h |
b |
diff -r e4d75f9efb90 -r 675a8370675b Makefile --- a/Makefile Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ -CFLAGS=-Wall - -all: - gcc -Wall -shared -fPIC align.c -o libalign.so - gcc -Wall -shared -fPIC swalign.c -o libswalign.so -lm - gcc -Wall -shared -fPIC seqtools.c -o libseqtools.so - gcc -Wall -shared -fPIC consensus.c -o libconsensus.so - |
b |
diff -r e4d75f9efb90 -r 675a8370675b README.md --- a/README.md Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,110 +0,0 @@ -# _Du Novo_ - -This is a pipeline for processing of duplex sequencing data without the use of a reference genome. - -The pipeline was designed for use with the duplex method described in [Kennedy *et al.* 2014](https://dx.doi.org/10.1038/nprot.2014.170), but the assumptions are relatively minimal, so you should be able to apply it to variants of the protocol. - -The majority of _Du Novo_ is released under a BSD license, except for some portions governed by the MIT license. See `LICENSE.txt` for details. - - -## Running _Du Novo_ from Galaxy - -We created a comprehensive [tutorial](https://github.com/galaxyproject/dunovo/wiki) explaining all aspects of interactive use of _De Novo_ from within [Galaxy](http://usegalaxy.org). - - -## Running _Du Novo_ on the command line - - -### Requirements - -The pipeline requires a Unix command line, and it must be able to find the `mafft` command on your [`PATH`](https://en.wikipedia.org/wiki/Search_path). - -All known requirements are below. Version numbers in parentheses are what the development environment uses. Version numbers in **bold** are known to be required. - -* [MAFFT](http://mafft.cbrc.jp/alignment/software/) (v7.123b) -* [Python](https://www.python.org/) (**2.7**) -* And standard unix tools: - - [gcc](https://gcc.gnu.org/) (4.8.4) - - [make](https://www.gnu.org/software/make/) (3.81) - - [bash](https://www.gnu.org/software/bash/bash.html) (4.0) - - [awk](https://www.gnu.org/software/gawk/) (4.0.1) - - [paste](https://www.gnu.org/software/coreutils/coreutils.html) (8.21) - - [sort](https://www.gnu.org/software/coreutils/coreutils.html) (8.21) - - -### Installation - - $ git clone git@github.com:galaxyproject/dunovo.git - $ cd dunovo - $ make - -Instead of the `git` command, you can just click the "Clone or download" button on this page, then "Download ZIP", unzip it, and `cd` to the "dunovo-master" directory. - -The `make` command is needed to compile the C modules, which are required. You need to be in the root source directory (where the file `Makefile` is) before running the command. - - -### Usage - -This example shows how to go from raw duplex sequencing data to the final duplex consensus sequences. - -Your raw reads should be in `reads_1.fastq` and `reads_2.fastq`. And the scripts `align_families.py` and `dunovo.py` should be on your `PATH`. Also, in the following command, replace `make-barcodes.awk` with the actual path to that script (included in this pipeline). - -1. Sort the reads into families based on their barcodes and split the barcodes from the sequence. - ```bash - $ paste reads_1.fastq reads_2.fastq \ - | paste - - - - \ - | awk -f make-barcodes.awk \ - | sort > families.tsv - ``` - -2. Do multiple sequence alignments of the read families. -`$ align_families.py families.tsv > families.msa.tsv` - -3. Build duplex consensus sequences from the aligned families. -`$ dunovo.py families.msa.tsv > duplex.fa` - -See all options for a given command by giving it the `-h` flag. - - -### Details - -#### 1. Sort the reads into families based on their barcodes and split the barcodes from the sequence. - - $ paste reads_1.fastq reads_2.fastq \ - | paste - - - - \ - | awk -f make-barcodes.awk \ - | sort > families.tsv - -This command pipeline will transform each pair of reads into a one-line record, split the 12bp barcodes off them, and sort by their combined barcode. The end result is a file (named `families.tsv` above) listing read pairs, grouped by barcode. See `make-barcodes.awk` for the details on the formation of the barcodes and the format. - -Note: This step requires your FASTQ files to have exactly 4 lines per read (no multi-line sequences). Also, in the output, the read sequence does not include the barcode or the 5bp constant sequence after it. You can customize the length of the barcode or constant sequence by setting the awk constants `TAG_LEN` and `INVARIANT` (i.e. `awk -v TAG_LEN=10 make-barcodes.awk`). - - -#### 2. Do multiple sequence alignments of the read families. - -`$ align_families.py families.tsv > families.msa.tsv` - -This step aligns each family of reads, but it processes each strand separately. It can be parallelized with the `-p` option. - - -#### 3. Build duplex consensus sequences from the aligned families. - -`$ dunovo.py families.msa.tsv > duplex.fa` - -This calls a consensus sequence from the multiple sequence alignments of the previous step. It does this in two steps: First, single-strand consensus sequences (SSCSs) are called from the family alignments, then duplex consensus sequences are called from pairs of SSCSs. - -When calling SSCSs, by default 3 reads are required to successfully create a consensus from each strand (change this with `-r`). Quality filtering is done at this step by excluding bases below a quality threshold. By default, no base with a PHRED quality less than 20 will contribute to the consensus (change this with `-q`). If no base passes the threshold or there is no majority base, `N` will be used. - -The duplex consensus sequences are created by comparing the two SSCSs. For each base, if they agree, that base will be inserted. If they disagree, the IUPAC ambiguity code for the two bases will be used. Note that a disagreement between a base and a gap will result in an `N`. - -The output of this step is the duplex consensus sequences in FASTA format. By default, it will only include full duplex consensuses, meaning if one of the two SSCSs are missing, that sequence will be omitted. Include these with the `--incl-sscs` option. - -The reads will be printed in one, interleaved file, with the naming format: -`>{barcode}.{mate} {# reads in strand 1 family}/{# reads in strand 2 family}` -e.g. -`>TTGCGCCAGGGCGAGGAAAATACT.1 8/13` - -But this isn't easy to work with. A better output is in development, but for now you can use the script `outconv.py` to convert the interleaved output file into two standard forward/reverse paired files with a standard naming convention: - - $ python utils/outconv.py duplex.fa -1 duplex_1.fa -2 duplex_2.fa - $ python utils/outconv.py sscs.fa -1 sscs_1.fa -2 sscs_2.fa |
b |
diff -r e4d75f9efb90 -r 675a8370675b __pycache__/swalign.cpython-35.pyc |
b |
Binary file __pycache__/swalign.cpython-35.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b align.c --- a/align.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,192 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define NAIVE_TEST_WINDOW 6 -#define NAIVE_TEST_THRES 0.80 -#define NAIVE_TEST_MIN 2 -#define NAIVE_WINDOW 10 -#define NAIVE_THRES 0.80 - -typedef struct Gap { - int seq; - int coord; - int length; - struct Gap *next; -} Gap; - -typedef struct Gaps { - int length; - struct Gap *root; - struct Gap *tip; -} Gaps; - -int _test_match(char *seq1, int start1, char *seq2, int start2); -void add_gap(Gaps *gaps, int seq, int coord, int length); -Gaps *make_gaps(); -char *insert_gaps(Gaps *gaps, char *seq, int seq_num); - - -// A naive algorithm for aligning two sequences which are expected to be very similar to each other -// and already nearly aligned. -void naive2(char *seq1, char *seq2) { - Gaps *gaps = make_gaps(); - int i = 0; - int j = 0; - int matches = 0; - while (seq1[i] != 0 && seq2[j] != 0) { - // Match? - printf("%c %c | i %d j %d\n", seq1[i], seq2[j], i, j); - if (seq1[i] == seq2[j]) { - matches++; - i++; - j++; - continue; - } - printf("mismatch!\n"); - // Mismatch. Start adding gaps until the mismatches go away. - int new_i = i; - int new_j = j; - int gap_seq = 0; - int success; - while (1) { - if (seq1[new_i] == 0 && seq2[new_j] == 0) { - break; - } - success = _test_match(seq1, new_i, seq2, j); - if (success) { - gap_seq = 2; - break; - } - if (seq1[new_i] != 0) { - new_i++; - } - success = _test_match(seq1, i, seq2, new_j); - if (success) { - gap_seq = 1; - break; - } - if (seq2[new_j] != 0) { - new_j++; - } - } - // Which sequence are we putting the gap in? - if (gap_seq == 0) { - printf("No good gap found. new_i: %d, new_j: %d\n", new_i, new_j); - // No good gap found. - } else if (i == new_i && j == new_j) { - printf("No gap required.\n"); - } else if (gap_seq == 1) { - printf("%dbp gap in seq1 at base %d.\n", new_j-j, j); - add_gap(gaps, 1, j, new_j-j); - j = new_j; - } else if (gap_seq == 2) { - printf("%dbp gap in seq2 at base %d.\n", new_i-i, i); - add_gap(gaps, 2, i, new_i-i); - i = new_i; - } - i++; - j++; - } - - char *new_seq1 = insert_gaps(gaps, seq1, 1); - char *new_seq2 = insert_gaps(gaps, seq2, 2); - printf("alignment:\n%s\n%s\n", new_seq1, new_seq2); -} - -// Check if the few bases starting at start1 and start2 in seq1 and seq2, respectively, align with -// few mismatches. The number of bases checked is NAIVE_TEST_WINDOW, and they must have a match -// percentage greater than NAIVE_TEST_THRES. Also, the amount of sequence left to compare must be -// more than NAIVE_TEST_MIN. -int _test_match(char *seq1, int start1, char *seq2, int start2) { - int matches = 0; - int total = 0; - char base1, base2; - int i; - for (i = 0; i < NAIVE_TEST_WINDOW-1; i++) { - base1 = seq1[start1+i]; - base2 = seq2[start2+i]; - if (base1 == 0 || base2 == 0) { - break; - } - if (base1 == base2) { - matches++; - } - total++; - } - return total > NAIVE_TEST_MIN && (double)matches/total > NAIVE_TEST_THRES; -} - -Gaps *make_gaps() { - Gaps *gaps = malloc(sizeof(Gaps)); - gaps->root = 0; - gaps->tip = 0; - gaps->length = 0; - return gaps; -} - -void add_gap(Gaps *gaps, int seq, int coord, int length) { - Gap *gap = malloc(sizeof(Gap)); - gap->next = 0; - gap->seq = seq; - gap->coord = coord; - gap->length = length; - if (gaps->root == 0) { - gaps->root = gap; - } else { - gaps->tip->next = gap; - } - gaps->tip = gap; - gaps->length++; -} - -// Take gap information from the aligner and put them into the sequence string as "-" characters. -char *insert_gaps(Gaps *gaps, char *seq, int seq_num) { - if (gaps->root == 0) { - return seq; - } - - // How long should the new sequence be? - int extra_len = 0; - Gap *gap = gaps->root; - while (gap) { - if (gap->seq == seq_num) { - extra_len += gap->length; - } - gap = gap->next; - } - - //TODO: Handle a situation with no gaps. - int new_len = extra_len + strlen(seq) + 1; - char *new_seq = malloc(sizeof(char) * new_len); - int i = 0; - int j = 0; - gap = gaps->root; - while (gap) { - // Check that it's a gap in our sequence. - if (gap->seq != seq_num) { - gap = gap->next; - continue; - } - // Copy verbatim all the sequence until the gap. - while (i <= gap->coord) { - new_seq[j] = seq[i]; - i++; - j++; - } - // Add -'s the whole length of the gap. - while (j < gap->coord + gap->length + 1) { - new_seq[j] = '-'; - j++; - } - gap = gap->next; - } - // Fill in the end sequence. - while (seq[i]) { - new_seq[j] = seq[i]; - i++; - j++; - } - new_seq[new_len-1] = 0; - return new_seq; -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b align_families.py --- a/align_families.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,281 +0,0 @@\n-#!/usr/bin/env python\n-from __future__ import division\n-import os\n-import sys\n-import time\n-import tempfile\n-import argparse\n-import subprocess\n-import collections\n-import multiprocessing\n-import distutils.spawn\n-import seqtools\n-\n-#TODO: Warn if it looks like the two input FASTQ files are the same (i.e. the _1 file was given\n-# twice). Can tell by whether the alpha and beta (first and last 12bp) portions of the barcodes\n-# are always identical. This would be a good thing to warn about, since it\'s an easy mistake\n-# to make, but it\'s not obvious that it happened. The pipeline won\'t fail, but will just\n-# produce pretty weird results.\n-\n-REQUIRED_COMMANDS = [\'mafft\']\n-OPT_DEFAULTS = {\'processes\':1}\n-DESCRIPTION = """Read in sorted FASTQ data and do multiple sequence alignments of each family."""\n-\n-\n-def main(argv):\n-\n- parser = argparse.ArgumentParser(description=DESCRIPTION)\n- parser.set_defaults(**OPT_DEFAULTS)\n-\n- parser.add_argument(\'infile\', metavar=\'read-families.tsv\', nargs=\'?\',\n- help=\'The input reads, sorted into families. One line per read pair, 8 tab-delimited columns: \'\n- \'1. canonical barcode, 2. barcode order ("ab" for alpha+beta, "ba" for beta-alpha) 3. \'\n- \'read 1 name, 4. read 1 sequence, 5. read 1 quality scores, 6. read 2 name, 7. read 2 \'\n- \'sequence, 8. read 2 quality scores.\')\n- parser.add_argument(\'-p\', \'--processes\', type=int,\n- help=\'Number of worker subprocesses to use. Must be at least 1. Default: %(default)s.\')\n-\n- args = parser.parse_args(argv[1:])\n-\n- assert args.processes > 0, \'-p must be greater than zero\'\n-\n- # Check for required commands.\n- missing_commands = []\n- for command in REQUIRED_COMMANDS:\n- if not distutils.spawn.find_executable(command):\n- missing_commands.append(command)\n- if missing_commands:\n- fail(\'Error: Missing commands: "\'+\'", "\'.join(missing_commands)+\'".\')\n-\n- if args.infile:\n- infile = open(args.infile)\n- else:\n- infile = sys.stdin\n-\n- # Open all the worker processes.\n- workers = open_workers(args.processes)\n-\n- # Main loop.\n- # This processes whole duplexes (pairs of strands) at a time for a future option to align the\n- # whole duplex at a time.\n- stats = {\'duplexes\':0, \'time\':0, \'pairs\':0, \'runs\':0, \'aligned_pairs\':0}\n- current_worker_i = 0\n- duplex = collections.OrderedDict()\n- family = []\n- barcode = None\n- order = None\n- for line in infile:\n- fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n- if len(fields) != 8:\n- continue\n- (this_barcode, this_order, name1, seq1, qual1, name2, seq2, qual2) = fields\n- # If the barcode or order has changed, we\'re in a new family.\n- # Process the reads we\'ve previously gathered as one family and start a new family.\n- if this_barcode != barcode or this_order != order:\n- duplex[order] = family\n- # If the barcode is different, we\'re at the end of the whole duplex. Process the it and start\n- # a new one. If the barcode is the same, we\'re in the same duplex, but we\'ve switched strands.\n- if this_barcode != barcode:\n- # sys.stderr.write(\'processing {}: {} orders ({})\\n\'.format(barcode, len(duplex),\n- # \'/\'.join([str(len(duplex[order])) for order in duplex])))\n- output, run_stats, current_worker_i = delegate(workers, stats, duplex, barcode)\n- process_results(output, run_stats, stats)\n- duplex = collections.OrderedDict()\n- barcode = this_barcode\n- order = this_order\n- family = []\n- pair = {\'name1\': name1, \'seq1\':seq1, \'qual1\':qual1, \'name2\':name2, \'seq2\':seq2, \'qual2\':qual2}\n- family.append(pair)\n- stats[\'pairs\'] += 1\n- # Process the last family.\n- duplex[order] = family\n- # sys.stderr.write(\'processing {}: {} orders ({}) [last]\\n\'.format(barcode, len(duplex),\n- # \'/\'.join([str(len(duplex[order])) for order in duplex])))\n- output, run_stats, current_worker_i = delegate(workers, stats, duplex, barcode)\n- '..b'art\n- pairs = len(family)\n- #logging.info(\'{} sec for {} read pairs.\'.format(elapsed, pairs))\n- if pairs > 1:\n- run_stats[\'time\'] += elapsed\n- run_stats[\'runs\'] += 1\n- run_stats[\'aligned_pairs\'] += pairs\n- if alignment is None:\n- pass #logging.warning(\'Error aligning family {}/{} (read {}).\'.format(barcode, order, mate))\n- else:\n- output += format_msa(alignment, barcode, order, mate)\n- return output, run_stats\n-\n-\n-def align_family(family, mate):\n- """Do a multiple sequence alignment of the reads in a family and their quality scores."""\n- mate = str(mate)\n- assert mate == \'1\' or mate == \'2\'\n- # Do the multiple sequence alignment.\n- seq_alignment = make_msa(family, mate)\n- if seq_alignment is None:\n- return None\n- # Transfer the alignment to the quality scores.\n- seqs = [read[\'seq\'] for read in seq_alignment]\n- quals_raw = [pair[\'qual\'+mate] for pair in family]\n- qual_alignment = seqtools.transfer_gaps_multi(quals_raw, seqs, gap_char_out=\' \')\n- # Package them up in the output data structure.\n- alignment = []\n- for aligned_seq, aligned_qual in zip(seq_alignment, qual_alignment):\n- alignment.append({\'name\':aligned_seq[\'name\'], \'seq\':aligned_seq[\'seq\'], \'qual\':aligned_qual})\n- return alignment\n-\n-\n-def make_msa(family, mate):\n- """Perform a multiple sequence alignment on a set of sequences and parse the result.\n- Uses MAFFT."""\n- mate = str(mate)\n- assert mate == \'1\' or mate == \'2\'\n- if len(family) == 0:\n- return None\n- elif len(family) == 1:\n- # If there\'s only one read pair, there\'s no alignment to be done (and MAFFT won\'t accept it).\n- return [{\'name\':family[0][\'name\'+mate], \'seq\':family[0][\'seq\'+mate]}]\n- #TODO: Replace with tempfile.mkstemp()?\n- with tempfile.NamedTemporaryFile(\'w\', delete=False, prefix=\'align.msa.\') as family_file:\n- for pair in family:\n- name = pair[\'name\'+mate]\n- seq = pair[\'seq\'+mate]\n- family_file.write(\'>\'+name+\'\\n\')\n- family_file.write(seq+\'\\n\')\n- with open(os.devnull, \'w\') as devnull:\n- try:\n- command = [\'mafft\', \'--nuc\', \'--quiet\', family_file.name]\n- output = subprocess.check_output(command, stderr=devnull)\n- except (OSError, subprocess.CalledProcessError):\n- return None\n- os.remove(family_file.name)\n- return read_fasta(output, is_file=False, upper=True)\n-\n-\n-def read_fasta(fasta, is_file=True, upper=False):\n- """Quick and dirty FASTA parser. Return the sequences and their names.\n- Returns a list of sequences. Each is a dict of \'name\' and \'seq\'.\n- Warning: Reads the entire contents of the file into memory at once."""\n- sequences = []\n- sequence = \'\'\n- seq_name = None\n- if is_file:\n- with open(fasta) as fasta_file:\n- fasta_lines = fasta_file.readlines()\n- else:\n- fasta_lines = fasta.splitlines()\n- for line in fasta_lines:\n- if line.startswith(\'>\'):\n- if upper:\n- sequence = sequence.upper()\n- if sequence:\n- sequences.append({\'name\':seq_name, \'seq\':sequence})\n- sequence = \'\'\n- seq_name = line.rstrip(\'\\r\\n\')[1:]\n- continue\n- sequence += line.strip()\n- if upper:\n- sequence = sequence.upper()\n- if sequence:\n- sequences.append({\'name\':seq_name, \'seq\':sequence})\n- return sequences\n-\n-\n-def format_msa(align, barcode, order, mate, outfile=sys.stdout):\n- output = \'\'\n- for sequence in align:\n- output += \'{bar}\\t{order}\\t{mate}\\t{name}\\t{seq}\\t{qual}\\n\'.format(bar=barcode, order=order,\n- mate=mate, **sequence)\n- return output\n-\n-\n-def process_results(output, run_stats, stats):\n- """Process the outcome of a duplex run.\n- Print the aligned output and sum the stats from the run with the running totals."""\n- for key, value in run_stats.items():\n- stats[key] += value\n- if output:\n- sys.stdout.write(output)\n-\n-\n-def fail(message):\n- sys.stderr.write(message+"\\n")\n- sys.exit(1)\n-\n-if __name__ == \'__main__\':\n- sys.exit(main(sys.argv))\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b align_families.pyc |
b |
Binary file align_families.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b align_families.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/align_families.xml Thu Feb 02 19:14:13 2017 -0500 |
b |
@@ -0,0 +1,65 @@ +<?xml version="1.0"?> +<tool id="align_families" name="Du Novo: Align families" version="0.5"> + <description>of duplex sequencing reads</description> + <requirements> + <requirement type="package" version="7.221">mafft</requirement> + <requirement type="package" version="0.5">duplex</requirement> + <requirement type="set_environment">DUPLEX_DIR</requirement> + <!-- TODO: require Python 2.7 --> + </requirements> + <command detect_errors="exit_code">python "\$DUPLEX_DIR/align_families.py" -p \${GALAXY_SLOTS:-1} '$input' > '$output' + </command> + <inputs> + <param name="input" type="data" format="tabular" label="Input reads" help="with barcodes, grouped by family"/> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test> + <param name="input" value="smoke.families.tsv"/> + <output name="output" file="smoke.families.aligned.tsv"/> + </test> + <test> + <param name="input" value="families.in.tsv"/> + <output name="output" file="families.sort.tsv"/> + </test> + </tests> + <help> + +**What it does** + +This is for processing duplex sequencing data. It does a multiple sequence alignment on each (single-stranded) family of reads. + +----- + +**Input** + +This expects the output format of the "Make families" tool. + +----- + +**Output** + +The output is a tabular file where each line corresponds to a (single) read. + +The columns are:: + + 1: barcode (both tags) + 2: tag order in barcode ("ab" or "ba") + 3: read mate ("1" or "2") + 4: read name + 5: read sequence, aligned ("-" for gaps) + 6: read quality scores, aligned (" " for gaps) + +----- + +**Alignments** + +The alignments are done using MAFFT, specifically the command +:: + + $ mafft --nuc --quiet family.fa > family.aligned.fa + + </help> +</tool> |
b |
diff -r e4d75f9efb90 -r 675a8370675b baralign.sh --- a/baralign.sh Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,115 +0,0 @@ -#!/usr/bin/env bash -if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then - echo "Error: Must use bash version 4+." >&2 - exit 1 -fi -set -ue - -Usage="Usage: \$ $(basename $0) [-R] families_file ref_dir out_file -families_file: The families.tsv produced by make-barcodes.awk and sorted. -ref_dir: The directory to put the reference file (\"barcodes.fa\") and its index - files in. -out_file: The path to put the output alignment BAM file at. --R: Don't include reversed barcodes (alpha+beta -> beta+alpha) in the alignment target." - -function main { - - # Read in arguments and check them. - - reverse=true - while getopts ":rh" opt; do - case "$opt" in - r) reverse='';; - h) echo "$USAGE" - exit;; - esac - done - # Get positional arguments. - families=${@:$OPTIND:1} - refdir=${@:$OPTIND+1:1} - outfile=${@:$OPTIND+2:1} - - if ! [[ -f $families ]]; then - fail "Error: families_file \"$families\" not found." - fi - if ! [[ -d $refdir ]]; then - echo "Info: ref_dir \"$refdir\" not found. Creating.." >&2 - mkdir $refdir - fi - outbase=$(echo $outfile | sed -E 's/\.bam$//') - if [[ $outbase == $outfile ]]; then - fail "Error: out_file \"$outfile\" does not end in .bam." - fi - if [[ -e $outfile ]] || [[ -e $outbase.sam ]] || [[ -e $outbase.tmp.sam ]]; then - fail "Error: out_file \"$outfile\" conflicts with existing filename(s)." - fi - - for cmd in bowtie2 bowtie2-build samtools awk; do - if ! which $cmd >/dev/null 2>/dev/null; then - fail "Error: command \"$cmd\" not found." - fi - done - - echo " -families: $families -refdir: $refdir -outfile: $outfile -outbase: $outbase" - - # Create FASTA with barcodes as "reads" for alignment. - awk '$1 != last { - count++ - print ">" count - print $1 - } - { - last = $1 - }' $families > $refdir/barcodes.fa - - # Create "reference" to align the barcodes to. - if [[ $reverse ]]; then - # If we're including reversed barcodes, create a new FASTA which includes reversed barcodes - # as well as their forward versions. - awk ' - $1 != last { - count++ - bar = $1 - print ">" count - print bar - print ">" count ":rev" - print swap_halves(bar) - } - { - last = $1 - } - function swap_halves(str) { - half = length(str)/2 - alpha = substr(str, 1, half) - beta = substr(str, half+1) - return beta alpha - }' $families > $refdir/barcodes-ref.fa - else - # If we're not including reversed barcodes, the original FASTA is all we need. Just link to it. - ln -s $refdir/barcodes.fa $refdir/barcodes-ref.fa - fi - - # Perform alignment. - bowtie2-build --packed $refdir/barcodes-ref.fa $refdir/barcodes-ref >/dev/null - bowtie2 -a -x $refdir/barcodes-ref -f -U $refdir/barcodes.fa -S $outbase.sam - samtools view -Sb $outbase.sam > $outbase.tmp.bam - samtools sort $outbase.tmp.bam $outbase - if [[ -s $outfile ]]; then - samtools index $outbase.bam - rm $outbase.sam $outbase.tmp.bam - echo "Success. Output located in \"$outfile\"." >&2 - else - echo "Warning: No output file \"$outfile\" found." >&2 - fi -} - -function fail { - echo "$@" >&2 - exit 1 -} - -main "$@" |
b |
diff -r e4d75f9efb90 -r 675a8370675b consensus-time.py --- a/consensus-time.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,33 +0,0 @@ -#!/usr/bin/env python -import sys -import time -import ctypes - -first = None -times = [] -for i in range(10000): - py_seqs = [] - seq_len = 0 - with open(sys.argv[1]) as infile: - for line in infile: - if line.startswith('>'): - continue - seq = line.strip() - if len(seq) > seq_len: - seq_len = len(seq) - py_seqs.append(seq) - - seqs = (ctypes.c_char_p * len(py_seqs))() - for j, seq in enumerate(py_seqs): - seqs[j] = ctypes.c_char_p(seq) - - consensus = ctypes.cdll.LoadLibrary('./consensus.so') - start = time.time() - consensus.get_votes(seqs, len(seqs), seq_len) - elapsed = 1000 * 1000 * (time.time() - start) - if i == 0: - first = elapsed - else: - times.append(elapsed) - -print '{:0.1f}\t{:0.1f}'.format(first, sum(times)/len(times)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b consensus.c --- a/consensus.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,614 +0,0 @@\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <ctype.h>\n-#include <limits.h>\n-\n-// N.B. This defines the valid bases, but it\'s also effectively defined in the switches in\n-// get_votes_simple(), get_votes_qual(), and get_base_prime(), and in the constant IUPAC_BASES.\n-#define N_BASES 6\n-const char *BASES = "ACGTN-";\n-/* A C G T N - A: 2 Compute IUPAC ambiguous base character by representing each base\n-A 4 6 10 14 22 26 C: 3 with a prime and multiplying. Then use a lookup table (an array\n-C 9 15 21 33 39 G: 5 where the index is the product of the two primes).\n-G 25 35 55 65 T: 7\n-T 49 77 91 N: 11\n-N 121 143 -: 13 1 2 3 4 5 6 7\n-- 169 01234567890123456789012345678901234567890123456789012345678901234567890*/\n-const char *IUPAC_BASES = "N...A.M..CR...WS.....YN..GN......N.K...N.........T.....N.........N....."\n-// 8 9 10 11 12 13 14\n- "......N.............N.............................N..................."\n-// 15 16 17\n- "..N.........................-";\n-#define THRES_DEFAULT 0.5\n-#define WIN_LEN 4\n-#define GAP_CHAR \' \'\n-\n-int **get_votes_simple(char *align[], int n_seqs, int seq_len);\n-int **get_votes_qual(char *align[], char *quals[], int n_seqs, int seq_len, char thres);\n-int init_gap_qual_window(int *window, char *quals, int seq_len);\n-char get_gap_qual(int *window);\n-int push_qual(int *window, int win_edge, char *quals, int seq_len);\n-void print_window(int *window, int win_edge);\n-int **init_votes(int seq_len);\n-void free_votes(int *votes[], int seq_len);\n-void print_votes(char *consensus, int *votes[], int seq_len);\n-char *rm_gaps(char *consensus, int cons_len);\n-char *build_consensus(int *votes[], int seq_len, double thres);\n-char *build_consensus_duplex(int *votes1[], int *votes2[], int seq_len, double thres);\n-char *build_consensus_duplex_simple(char *cons1, char *cons2, int gapped);\n-int get_base_prime(char base);\n-char *get_consensus(char *align[], char *quals[], int n_seqs, int seq_len, double thres,\n- char qual_thres, int gapped);\n-char *get_consensus_duplex(char *align1[], char *align2[], char *quals1[], char *quals2[],\n- int n_seqs1, int n_seqs2, int seq_len, double cons_thres,\n- char qual_thres, int gapped, char *method);\n-\n-\n-// Tally the different bases at each position in an alignment.\n-// Returns an array of arrays: for each position in the alignment, an array of the number of times\n-// each base occurs at that position. The order of bases is as in the "BASES" constant.\n-int **get_votes_simple(char *align[], int n_seqs, int seq_len) {\n- int **votes = init_votes(seq_len);\n-\n- // Tally votes for each base.\n- int i, j;\n- for (i = 0; i < n_seqs; i++) {\n- for (j = 0; j < seq_len; j++) {\n- // N.B.: Could write this without hardcoded literals, but it\'s about 40% slower.\n- switch (toupper(align[i][j])) {\n- case \'A\':\n- votes[j][0]++;\n- break;\n- case \'C\':\n- votes[j][1]++;\n- break;\n- case \'G\':\n- votes[j][2]++;\n- break;\n- case \'T\':\n- votes[j][3]++;\n- break;\n- case \'N\':\n- votes[j][4]++;\n- break;\n- case \'-\':\n- votes[j][5]++;\n- break;\n- }\n- }\n- }\n-\n- return votes;\n-}\n-\n-\n-// Tally votes for each base, ignoring bases with a quality score below "thres".\n-int **get_votes_qual(char *align[], char *quals[], int n_seqs, int seq_len, char thres) {\n- int **votes = init_votes(seq_len);\n- int *window = malloc(sizeof(int) * WIN_LEN * 2);\n- int win_edge;\n-\n- // Tally votes for each base.\n- char qual;\n- int i, j'..b'ase_prime1, base_prime2;\n- while (cons1[i] != \'\\0\' && cons2[i] != \'\\0\') {\n- base_prime1 = get_base_prime(cons1[i]);\n- base_prime2 = get_base_prime(cons2[i]);\n- cons[i] = IUPAC_BASES[base_prime1*base_prime2];\n- i++;\n- }\n- cons[seq_len] = \'\\0\';\n- if (gapped) {\n- return cons;\n- } else {\n- return rm_gaps(cons, seq_len);\n- }\n-}\n-\n-\n-int get_base_prime(char base) {\n- switch (base) {\n- case \'A\':\n- return 2;\n- case \'C\':\n- return 3;\n- case \'G\':\n- return 5;\n- case \'T\':\n- return 7;\n- case \'N\':\n- return 11;\n- case \'-\':\n- return 13;\n- default:\n- return 0;\n- }\n-}\n-\n-\n-// Convenience function to create a consensus in one step.\n-// Give 0 as "quals" to not use quality scores, and -1.0 as "cons_thres" to use the default\n-// consensus threshold when evaluating base votes.\n-char *get_consensus(char *align[], char *quals[], int n_seqs, int seq_len, double cons_thres,\n- char qual_thres, int gapped) {\n- if (cons_thres == -1.0) {\n- cons_thres = THRES_DEFAULT;\n- }\n- int **votes;\n- if (quals == 0) {\n- votes = get_votes_simple(align, n_seqs, seq_len);\n- } else {\n- votes = get_votes_qual(align, quals, n_seqs, seq_len, qual_thres);\n- }\n- char *consensus_gapped = build_consensus(votes, seq_len, cons_thres);\n- char *consensus;\n- if (gapped) {\n- consensus = consensus_gapped;\n- } else {\n- consensus = rm_gaps(consensus_gapped, seq_len);\n- }\n- free_votes(votes, seq_len);\n- return consensus;\n-}\n-\n-\n-char *get_consensus_duplex(char *align1[], char *align2[], char *quals1[], char *quals2[],\n- int n_seqs1, int n_seqs2, int seq_len, double cons_thres,\n- char qual_thres, int gapped, char *method) {\n- if (cons_thres == -1.0) {\n- cons_thres = THRES_DEFAULT;\n- }\n- int **votes1;\n- int **votes2;\n- if (quals1 == 0 || quals2 == 0) {\n- votes1 = get_votes_simple(align1, n_seqs1, seq_len);\n- votes2 = get_votes_simple(align2, n_seqs2, seq_len);\n- } else {\n- votes1 = get_votes_qual(align1, quals1, n_seqs1, seq_len, qual_thres);\n- votes2 = get_votes_qual(align2, quals2, n_seqs2, seq_len, qual_thres);\n- }\n- char *consensus_gapped;\n- if (!strncmp(method, "freq", 4)) {\n- consensus_gapped = build_consensus_duplex(votes1, votes2, seq_len, cons_thres);\n- } else if (!strncmp(method, "iupac", 5)) {\n- char *cons1 = build_consensus(votes1, seq_len, cons_thres);\n- char *cons2 = build_consensus(votes2, seq_len, cons_thres);\n- consensus_gapped = build_consensus_duplex_simple(cons1, cons2, 1);\n- } else {\n- return "";\n- }\n- char *consensus;\n- if (gapped) {\n- consensus = consensus_gapped;\n- } else {\n- consensus = rm_gaps(consensus_gapped, seq_len);\n- }\n- free_votes(votes1, seq_len);\n- free_votes(votes2, seq_len);\n- return consensus;\n-}\n-\n-\n-void get_gap_quals(char *quals) {\n- int seq_len = strlen(quals);\n- int *window = malloc(sizeof(int) * WIN_LEN * 2);\n- int win_edge = init_gap_qual_window(window, quals, seq_len);\n- print_window(window, win_edge);\n-\n- int i;\n- char gap_qual;\n- for (i = 0; i < seq_len; i++) {\n- if (quals[i] == GAP_CHAR) {\n- gap_qual = get_gap_qual(window);\n- printf("gap %2d: %2d\\n", i, gap_qual);\n- } else {\n- win_edge = push_qual(window, win_edge, quals, seq_len);\n- print_window(window, win_edge);\n- }\n- }\n-}\n-\n-\n-int main(int argc, char *argv[]) {\n- char **align = malloc(sizeof(char *) * (argc-1));\n-\n- int seq_len = INT_MAX;\n- int i;\n- for (i = 1; i < argc; i++) {\n- if (strlen(argv[i]) < seq_len) {\n- seq_len = strlen(argv[i]);\n- }\n- align[i-1] = argv[i];\n- }\n-\n- if (argc <= 1) {\n- return 1;\n- }\n-\n- get_gap_quals(align[0]);\n- return 0;\n-\n- int **votes = get_votes_simple(align, argc-1, seq_len);\n- char *consensus = build_consensus(votes, seq_len, THRES_DEFAULT);\n- print_votes(consensus, votes, seq_len);\n- printf("%s\\n", consensus);\n- free_votes(votes, seq_len);\n-\n- return 0;\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b consensus.py --- a/consensus.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,117 +0,0 @@ -#!/usr/bin/env python -import os -import sys -import ctypes -import argparse - -script_dir = os.path.dirname(os.path.realpath(__file__)) -consensus = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'libconsensus.so')) -consensus.get_consensus.restype = ctypes.c_char_p -consensus.get_consensus_duplex.restype = ctypes.c_char_p -consensus.build_consensus_duplex_simple.restype = ctypes.c_char_p - -ARG_DEFAULTS = {'alignment':sys.stdin} -DESCRIPTION = "Get the consensus of a set of aligned sequences." - - -def make_argparser(): - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**ARG_DEFAULTS) - parser.add_argument('alignment', type=argparse.FileType('r'), - help='The aligned sequences, in FASTA format (but no multi-line sequences).') - return parser - - -def main(argv): - parser = make_argparser() - args = parser.parse_args(argv[1:]) - sequences = [] - line_num = 0 - for line in args.alignment: - line_num += 1 - if line_num % 2 == 0: - sequences.append(line.rstrip('\r\n')) - cons = get_consensus(sequences) - print(cons) - - -# N.B.: The quality scores must be aligned with their accompanying sequences. -def get_consensus(align, quals=[], cons_thres=-1.0, qual_thres=' ', gapped=False): - cons_thres_c = ctypes.c_double(cons_thres) - qual_thres_c = ctypes.c_char(qual_thres) - n_seqs = len(align) - if gapped: - gapped_c = 1 - else: - gapped_c = 0 - assert not quals or len(quals) == n_seqs, 'Different number of sequences and quals.' - seq_len = None - for seq in (align + quals): - if seq_len is None: - seq_len = len(seq) - else: - assert seq_len == len(seq), ('All sequences in the alignment must be the same length: ' - '{}bp != {}bp.\nAlignment:\n{}'.format(seq_len, len(seq), - '\n'.join(align))) - align_c = (ctypes.c_char_p * n_seqs)() - for i, seq in enumerate(align): - align_c[i] = ctypes.c_char_p(seq) - quals_c = (ctypes.c_char_p * n_seqs)() - for i, qual in enumerate(quals): - quals_c[i] = ctypes.c_char_p(qual) - if not quals: - quals_c = 0 - return consensus.get_consensus(align_c, quals_c, n_seqs, seq_len, cons_thres_c, qual_thres_c, - gapped_c) - - -# N.B.: The quality scores must be aligned with their accompanying sequences. -def get_consensus_duplex(align1, align2, quals1=[], quals2=[], cons_thres=-1.0, qual_thres=' ', - method='iupac'): - assert method in ('iupac', 'freq') - cons_thres_c = ctypes.c_double(cons_thres) - qual_thres_c = ctypes.c_char(qual_thres) - n_seqs1 = len(align1) - n_seqs2 = len(align2) - assert (not quals1 and not quals2) or (quals1 and quals2) - assert not quals1 or len(quals1) == n_seqs1 - assert not quals2 or len(quals2) == n_seqs2 - seq_len = None - for seq in (align1 + align2 + quals1 + quals2): - if seq_len is None: - seq_len = len(seq) - else: - assert seq_len == len(seq), 'All sequences in the alignment must be the same length.' - align1_c = (ctypes.c_char_p * n_seqs1)() - for i, seq in enumerate(align1): - align1_c[i] = ctypes.c_char_p(seq) - align2_c = (ctypes.c_char_p * n_seqs1)() - for i, seq in enumerate(align2): - align2_c[i] = ctypes.c_char_p(seq) - quals1_c = (ctypes.c_char_p * n_seqs1)() - for i, seq in enumerate(quals1): - quals1_c[i] = ctypes.c_char_p(seq) - quals2_c = (ctypes.c_char_p * n_seqs1)() - for i, seq in enumerate(quals2): - quals2_c[i] = ctypes.c_char_p(seq) - if not quals1: - quals1_c = 0 - if not quals2: - quals2_c = 0 - return consensus.get_consensus_duplex(align1_c, align2_c, quals1_c, quals2_c, n_seqs1, n_seqs2, - seq_len, cons_thres_c, qual_thres_c, method) - - -def build_consensus_duplex_simple(cons1, cons2, gapped=False): - assert len(cons1) == len(cons2) - cons1_c = ctypes.c_char_p(cons1) - cons2_c = ctypes.c_char_p(cons2) - if gapped: - gapped_c = 1 - else: - gapped_c = 0 - return consensus.build_consensus_duplex_simple(cons1_c, cons2_c, gapped_c) - - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b consensus.pyc |
b |
Binary file consensus.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b correct.py --- a/correct.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,608 +0,0 @@\n-#!/usr/bin/env python\n-from __future__ import division\n-from __future__ import print_function\n-import os\n-import sys\n-import gzip\n-import logging\n-import argparse\n-import resource\n-import subprocess\n-import networkx\n-import swalign\n-\n-VERBOSE = (logging.DEBUG+logging.INFO)//2\n-ARG_DEFAULTS = {\'sam\':sys.stdin, \'qual\':20, \'pos\':2, \'dist\':1, \'choose_by\':\'reads\', \'output\':True,\n- \'visualize\':0, \'viz_format\':\'png\', \'log\':sys.stderr, \'volume\':logging.WARNING}\n-USAGE = "%(prog)s [options]"\n-DESCRIPTION = """Correct barcodes using an alignment of all barcodes to themselves. Reads the\n-alignment in SAM format and corrects the barcodes in an input "families" file (the output of\n-make-barcodes.awk). It will print the "families" file to stdout with barcodes (and orders)\n-corrected."""\n-\n-\n-def main(argv):\n-\n- parser = argparse.ArgumentParser(description=DESCRIPTION)\n- parser.set_defaults(**ARG_DEFAULTS)\n-\n- parser.add_argument(\'families\', type=open_as_text_or_gzip,\n- help=\'The sorted output of make-barcodes.awk. The important part is that it\\\'s a tab-delimited \'\n- \'file with at least 2 columns: the barcode sequence and order, and it must be sorted in \'\n- \'the same order as the "reads" in the SAM file.\')\n- parser.add_argument(\'reads\', type=open_as_text_or_gzip,\n- help=\'The fasta/q file given to the aligner. Used to get barcode sequences from read names.\')\n- parser.add_argument(\'sam\', type=argparse.FileType(\'r\'), nargs=\'?\',\n- help=\'Barcode alignment, in SAM format. Omit to read from stdin. The read names must be \'\n- \'integers, representing the (1-based) order they appear in the families file.\')\n- parser.add_argument(\'-P\', \'--prepend\', action=\'store_true\',\n- help=\'Prepend the corrected barcodes and orders to the original columns.\')\n- parser.add_argument(\'-d\', \'--dist\', type=int,\n- help=\'NM edit distance threshold. Default: %(default)s\')\n- parser.add_argument(\'-m\', \'--mapq\', type=int,\n- help=\'MAPQ threshold. Default: %(default)s\')\n- parser.add_argument(\'-p\', \'--pos\', type=int,\n- help=\'POS tolerance. Alignments will be ignored if abs(POS - 1) is greater than this value. \'\n- \'Set to greater than the barcode length for no threshold. Default: %(default)s\')\n- parser.add_argument(\'-t\', \'--tag-len\', type=int,\n- help=\'Length of each half of the barcode. If not given, it will be determined from the first \'\n- \'barcode in the families file.\')\n- parser.add_argument(\'-c\', \'--choose-by\', choices=(\'reads\', \'connectivity\'))\n- parser.add_argument(\'--limit\', type=int,\n- help=\'Limit the number of lines that will be read from each input file, for testing purposes.\')\n- parser.add_argument(\'-S\', \'--structures\', action=\'store_true\',\n- help=\'Print a list of the unique isoforms\')\n- parser.add_argument(\'--struct-human\', action=\'store_true\')\n- parser.add_argument(\'-V\', \'--visualize\', nargs=\'?\',\n- help=\'Produce a visualization of the unique structures write the image to this file. \'\n- \'If you omit a filename, it will be displayed in a window.\')\n- parser.add_argument(\'-F\', \'--viz-format\', choices=(\'dot\', \'graphviz\', \'png\'))\n- parser.add_argument(\'-n\', \'--no-output\', dest=\'output\', action=\'store_false\')\n- parser.add_argument(\'-l\', \'--log\', type=argparse.FileType(\'w\'),\n- help=\'Print log messages to this file instead of to stderr. Warning: Will overwrite the file.\')\n- parser.add_argument(\'-q\', \'--quiet\', dest=\'volume\', action=\'store_const\', const=logging.CRITICAL)\n- parser.add_argument(\'-i\', \'--info\', dest=\'volume\', action=\'store_const\', const=logging.INFO)\n- parser.add_argument(\'-v\', \'--verbose\', dest=\'volume\', action=\'store_const\', const=VERBOSE)\n- parser.add_argument(\'-D\', \'--debug\', dest=\'volume\', action=\'store_const\', const=logging.DEBUG,\n- help=\'Print debug messages (very verbose).\')\n-\n- args = parser.parse_args(argv[1:])\n-\n- logging.basicConfig(stream=args.log, level=args.volume, format=\'%(message)s\')\n- tone_down_logger()\n'..b' size = structure[\'size\']\n- graph = structure[\'graph\']\n- if size == last_size:\n- i += 1\n- else:\n- i = 0\n- if width is None:\n- width = str(len(str(structure[\'count\'])))\n- letters = num_to_letters(i)\n- degrees = sorted(graph.degree().values(), reverse=True)\n- if human:\n- degrees_str = \' \'.join(map(str, degrees))\n- else:\n- degrees_str = \',\'.join(map(str, degrees))\n- if human:\n- format_str = \'{:2d}{:<3s} {count:<\'+width+\'d} {central:<\'+width+\'d} {}\'\n- print(format_str.format(size, letters+\':\', degrees_str, **structure))\n- else:\n- print(size, letters, structure[\'count\'], structure[\'central\'], degrees_str, sep=\'\\t\')\n- last_size = size\n-\n-\n-def num_to_letters(i):\n- """Translate numbers to letters, e.g. 1 -> A, 10 -> J, 100 -> CV"""\n- letters = \'\'\n- while i > 0:\n- n = (i-1) % 26\n- i = i // 26\n- if n == 25:\n- i -= 1\n- letters = chr(65+n) + letters\n- return letters\n-\n-\n-def visualize(graphs, viz_path, args_viz_format):\n- import matplotlib\n- from networkx.drawing.nx_agraph import graphviz_layout\n- meta_graph = networkx.Graph()\n- for graph in graphs:\n- add_graph(meta_graph, graph)\n- pos = graphviz_layout(meta_graph)\n- networkx.draw(meta_graph, pos)\n- if viz_path:\n- ext = os.path.splitext(viz_path)[1]\n- if ext == \'.dot\':\n- viz_format = \'graphviz\'\n- elif ext == \'.png\':\n- viz_format = \'png\'\n- else:\n- viz_format = args_viz_format\n- if viz_format == \'graphviz\':\n- from networkx.drawing.nx_pydot import write_dot\n- assert viz_path is not None, \'Must provide a filename to --visualize if using --viz-format "graphviz".\'\n- base_path = os.path.splitext(viz_path)\n- write_dot(meta_graph, base_path+\'.dot\')\n- run_command(\'dot\', \'-T\', \'png\', \'-o\', base_path+\'.png\', base_path+\'.dot\')\n- logging.info(\'Wrote image of graph to \'+base_path+\'.dot\')\n- elif viz_format == \'png\':\n- if viz_path is None:\n- matplotlib.pyplot.show()\n- else:\n- matplotlib.pyplot.savefig(viz_path)\n-\n-\n-def add_graph(graph, subgraph):\n- # I\'m sure there\'s a function in the library for this, but just cause I need it quick..\n- for node in subgraph.nodes():\n- graph.add_node(node)\n- for edge in subgraph.edges():\n- graph.add_edge(*edge)\n- return graph\n-\n-\n-def open_as_text_or_gzip(path):\n- """Return an open file-like object reading the path as a text file or a gzip file, depending on\n- which it looks like."""\n- if detect_gzip(path):\n- return gzip.open(path, \'r\')\n- else:\n- return open(path, \'rU\')\n-\n-\n-def detect_gzip(path):\n- """Return True if the file looks like a gzip file: ends with .gz or contains non-ASCII bytes."""\n- ext = os.path.splitext(path)[1]\n- if ext == \'.gz\':\n- return True\n- elif ext in (\'.txt\', \'.tsv\', \'.csv\'):\n- return False\n- with open(path) as fh:\n- is_not_ascii = detect_non_ascii(fh.read(100))\n- if is_not_ascii:\n- return True\n-\n-\n-def detect_non_ascii(bytes, max_test=100):\n- """Return True if any of the first "max_test" bytes are non-ASCII (the high bit set to 1).\n- Return False otherwise."""\n- for i, char in enumerate(bytes):\n- # Is the high bit a 1?\n- if ord(char) & 128:\n- return True\n- if i >= max_test:\n- return False\n- return False\n-\n-\n-def run_command(*command):\n- try:\n- exit_status = subprocess.call(command)\n- except subprocess.CalledProcessError as cpe:\n- exit_status = cpe.returncode\n- except OSError:\n- exit_status = None\n- return exit_status\n-\n-\n-def tone_down_logger():\n- """Change the logging level names from all-caps to capitalized lowercase.\n- E.g. "WARNING" -> "Warning" (turn down the volume a bit in your log files)"""\n- for level in (logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG):\n- level_name = logging.getLevelName(level)\n- logging.addLevelName(level, level_name.capitalize())\n-\n-\n-if __name__ == \'__main__\':\n- sys.exit(main(sys.argv))\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b correct.sh --- a/correct.sh Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,39 +0,0 @@ -#!/usr/bin/env bash -if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then - echo "Error: Must use bash version 4+." >&2 - exit 1 -fi -set -ue - -INITIAL_BARCODES_DEFAULT=20 - -Usage="Usage: \$ $(basename $0) [barcodes to try]" - -function main { - if [[ $# -ge 1 ]] && [[ $1 == '-h' ]]; then - fail "$Usage" - fi - - initial_barcodes="$INITIAL_BARCODES_DEFAULT" - if [[ $# -ge 1 ]]; then - initial_barcodes=$1 - fi - - cat border-families.txt | paste - - | shuf --random-source=border-families.txt \ - | head -n $initial_barcodes | while read count1 barcode order count2 rest; do - echo -ne "$barcode\t$count1\t$count2\t" - read_name=$(grep -B 1 $barcode barcodes.fq | head -n 1 | tail -c +2) - echo "$read_name" - samtools view -f 256 barcodes.bam | awk '$1 == '$read_name' && $5 > 25 {print $3}' | while read read_name2; do - barcode=$(grep -A 1 -E '^@'$read_name2'$' barcodes.fq | tail -n 1) - done - done - -} - -function fail { - echo "$@" >&2 - exit 1 -} - -main "$@" |
b |
diff -r e4d75f9efb90 -r 675a8370675b disttbfast.pyc |
b |
Binary file disttbfast.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b dunovo.py --- a/dunovo.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,340 +0,0 @@\n-#!/usr/bin/env python\n-from __future__ import division\n-import os\n-import sys\n-import time\n-import logging\n-import tempfile\n-import argparse\n-import subprocess\n-import collections\n-import consensus\n-import swalign\n-\n-SANGER_START = 33\n-SOLEXA_START = 64\n-OPT_DEFAULTS = {\'min_reads\':3, \'processes\':1, \'qual\':20, \'qual_format\':\'sanger\'}\n-USAGE = "%(prog)s [options]"\n-DESCRIPTION = """Build consensus sequences from read aligned families. Prints duplex consensus\n-sequences in FASTA to stdout. The sequence ids are BARCODE.MATE, e.g. "CTCAGATAACATACCTTATATGCA.1",\n-where "BARCODE" is the input barcode, and "MATE" is "1" or "2" as an arbitrary designation of the\n-two reads in the pair. The id is followed by the count of the number of reads in the two families\n-(one from each strand) that make up the duplex, in the format READS1/READS2. If the duplex is\n-actually a single-strand consensus because the matching strand is missing, only one number is\n-listed.\n-Rules for consensus building: Single-strand consensus sequences are made by counting how many of\n-each base are at a given position. Bases with a PHRED quality score below the --qual threshold are\n-not counted. If a majority of the reads (that pass the --qual threshold at that position) have one\n-base at that position, then that base is used as the consensus base. If no base has a majority, then\n-an N is used. Duplex consensus sequences are made by aligning pairs of single-strand consensuses,\n-and comparing bases at each position. If they agree, that base is used in the consensus. Otherwise,\n-the IUPAC ambiguity code for both bases is used (N + anything and gap + non-gap result in an N)."""\n-\n-\n-def main(argv):\n-\n- parser = argparse.ArgumentParser(description=DESCRIPTION)\n- parser.set_defaults(**OPT_DEFAULTS)\n-\n- parser.add_argument(\'infile\', metavar=\'read-families.tsv\', nargs=\'?\',\n- help=\'The output of align_families.py. 6 columns: 1. (canonical) barcode. 2. order ("ab" or \'\n- \'"ba"). 3. mate ("1" or "2"). 4. read name. 5. aligned sequence. 6. aligned quality \'\n- \'scores.\')\n- parser.add_argument(\'-r\', \'--min-reads\', type=int,\n- help=\'The minimum number of reads (from each strand) required to form a single-strand \'\n- \'consensus. Strands with fewer reads will be skipped. Default: %(default)s.\')\n- parser.add_argument(\'-q\', \'--qual\', type=int,\n- help=\'Base quality threshold. Bases below this quality will not be counted. \'\n- \'Default: %(default)s.\')\n- parser.add_argument(\'-F\', \'--qual-format\', choices=(\'sanger\', \'solexa\'),\n- help=\'FASTQ quality score format. Sanger scores are assumed to begin at \\\'{}\\\' ({}). Default: \'\n- \'%(default)s.\'.format(SANGER_START, chr(SANGER_START)))\n- parser.add_argument(\'--incl-sscs\', action=\'store_true\',\n- help=\'When outputting duplex consensus sequences, include reads without a full duplex (missing \'\n- \'one strand). The result will just be the single-strand consensus of the remaining read.\')\n- parser.add_argument(\'-s\', \'--sscs-file\',\n- help=\'Save single-strand consensus sequences in this file (FASTA format). Currently does not \'\n- \'work when in parallel mode.\')\n- parser.add_argument(\'-f\', \'--family-log\', type=argparse.FileType(\'w\'),\n- help=\'Save a record of the reads in each family to this file.\')\n- parser.add_argument(\'-l\', \'--log\', metavar=\'LOG_FILE\', dest=\'stats_file\',\n- help=\'Print statistics on the run to this file. Use "-" to print to stderr.\')\n- parser.add_argument(\'-p\', \'--processes\', type=int,\n- help=\'Number of processes to use. If > 1, launches this many worker subprocesses. Note: if \'\n- \'this option is used, no output will be generated until the end of the entire run, so no \'\n- \'streaming is possible. Default: %(default)s.\')\n-\n- args = parser.parse_args(argv[1:])\n-\n- assert args.processes > 0, \'-p must be greater than zero\'\n- # Make dict of process_family() parameters that don\'t change between families.\n- static = {}'..b'ait()\n- with open(worker[\'outfile\'].name, \'r\') as outfile:\n- for line in outfile:\n- sys.stdout.write(line)\n-\n-\n-def delete_tempfiles(workers):\n- for worker in workers:\n- os.remove(worker[\'outfile\'].name)\n- if worker[\'stats\']:\n- os.remove(worker[\'stats\'])\n-\n-\n-def log_family(family_log, duplex, barcode, min_reads):\n- """Write a record of the reads in this family."""\n- if not family_log:\n- return\n- for (order, mate), family in duplex.items():\n- if len(family) < min_reads:\n- continue\n- \n-\n-\n-def process_duplex(duplex, barcode, workers=None, stats=None, incl_sscs=False, sscs_fh=None,\n- processes=1, min_reads=1, qual_thres=\' \'):\n- stats[\'families\'] += 1\n- # Are we the controller process or a worker?\n- if processes > 1:\n- i = stats[\'families\'] % len(workers)\n- worker = workers[i]\n- delegate(worker, duplex, barcode)\n- return\n- # We\'re a worker. Actually process the family.\n- start = time.time()\n- consensi = []\n- reads_per_strand = []\n- duplex_mate = None\n- for (order, mate), family in duplex.items():\n- reads = len(family)\n- if reads < min_reads:\n- continue\n- # The mate number for the duplex consensus. It\'s arbitrary, but all that matters is that the\n- # two mates have different numbers. This system ensures that:\n- # Mate 1 is from the consensus of ab/1 and ba/2 families, while mate 2 is from ba/1 and ab/2.\n- if (order == \'ab\' and mate == 1) or (order == \'ba\' and mate == 2):\n- duplex_mate = 1\n- else:\n- duplex_mate = 2\n- seqs = [read[\'seq\'] for read in family]\n- quals = [read[\'qual\'] for read in family]\n- consensi.append(consensus.get_consensus(seqs, quals, qual_thres=qual_thres))\n- reads_per_strand.append(reads)\n- assert len(consensi) <= 2\n- if sscs_fh:\n- for cons, (order, mate), reads in zip(consensi, duplex.keys(), reads_per_strand):\n- sscs_fh.write(\'>{bar}.{order}.{mate} {reads}\\n\'.format(bar=barcode, order=order, mate=mate,\n- reads=reads))\n- sscs_fh.write(cons+\'\\n\')\n- if len(consensi) == 1 and incl_sscs:\n- print_duplex(consensi[0], barcode, duplex_mate, reads_per_strand)\n- elif len(consensi) == 2:\n- align = swalign.smith_waterman(*consensi)\n- #TODO: log error & return if len(align.target) != len(align.query)\n- cons = consensus.build_consensus_duplex_simple(align.target, align.query)\n- print_duplex(cons, barcode, duplex_mate, reads_per_strand)\n- elapsed = time.time() - start\n- logging.info(\'{} sec for {} reads.\'.format(elapsed, sum(reads_per_strand)))\n- if stats and len(consensi) > 0:\n- stats[\'time\'] += elapsed\n- stats[\'reads\'] += sum(reads_per_strand)\n- stats[\'runs\'] += 1\n-\n-\n-def print_duplex(cons, barcode, mate, reads_per_strand, outfile=sys.stdout):\n- header = \'>{bar}.{mate} {reads}\'.format(bar=barcode, mate=mate,\n- reads=\'-\'.join(map(str, reads_per_strand)))\n- outfile.write(header+\'\\n\')\n- outfile.write(cons+\'\\n\')\n-\n-\n-def read_fasta(fasta, is_file=True):\n- """Quick and dirty FASTA parser. Return the sequences and their names.\n- Returns a list of sequences. Each is a dict of \'name\' and \'seq\'.\n- Warning: Reads the entire contents of the file into memory at once."""\n- sequences = []\n- seq_lines = []\n- seq_name = None\n- if is_file:\n- with open(fasta) as fasta_file:\n- fasta_lines = fasta_file.readlines()\n- else:\n- fasta_lines = fasta.splitlines()\n- for line in fasta_lines:\n- if line.startswith(\'>\'):\n- if seq_lines:\n- sequences.append({\'name\':seq_name, \'seq\':\'\'.join(seq_lines)})\n- seq_lines = []\n- seq_name = line.rstrip(\'\\r\\n\')[1:]\n- continue\n- seq_lines.append(line.strip())\n- if seq_lines:\n- sequences.append({\'name\':seq_name, \'seq\':\'\'.join(seq_lines)})\n- return sequences\n-\n-\n-def fail(message):\n- sys.stderr.write(message+"\\n")\n- sys.exit(1)\n-\n-if __name__ == \'__main__\':\n- sys.exit(main(sys.argv))\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b dunovo.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dunovo.xml Thu Feb 02 19:14:13 2017 -0500 |
[ |
@@ -0,0 +1,67 @@ +<?xml version="1.0"?> +<tool id="duplex" name="Du Novo: Make consensus reads" version="0.5"> + <description>from duplex sequencing alignments</description> + <requirements> + <requirement type="package" version="0.5">duplex</requirement> + <requirement type="set_environment">DUPLEX_DIR</requirement> + <!-- TODO: require Python 2.7 --> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python "\$DUPLEX_DIR/dunovo.py" -r $min_reads -q $qual_thres -F $qual_format '$input' + #if $keep_sscs: + --sscs-file sscs.fa + #end if + > duplex.fa + && python "\$DUPLEX_DIR/utils/outconv.py" duplex.fa -1 '$dcs1' -2 '$dcs2' + #if $keep_sscs: + && python "\$DUPLEX_DIR/utils/outconv.py" sscs.fa -1 '$sscs1' -2 '$sscs2' + #end if + ]]> + </command> + <inputs> + <param name="input" type="data" format="tabular" label="Aligned input reads" /> + <param name="min_reads" type="integer" value="3" min="1" label="Minimum reads per family" help="Single-strand families with fewer than this many reads will be skipped."/> + <param name="qual_thres" type="integer" value="25" min="1" label="Minimum base quality" help="Bases with a PHRED score less than this will not be counted in the consensus making."/> + <param name="qual_format" type="select" label="FASTQ format" help="Solexa should also work for Illumina 1.3+ and 1.5+, and Sanger should work for Illumina 1.8+"> + <option value="sanger" selected="true">Sanger (PHRED 0 = "!")</option> + <option value="solexa">Solexa (PHRED 0 = "@")</option> + </param> + <param name="keep_sscs" type="boolean" truevalue="true" falsevalue="" label="Output single-strand consensus sequences as well" /> + </inputs> + <outputs> + <data name="dcs1" format="fasta" label="$tool.name on $on_string (mate 1)"/> + <data name="dcs2" format="fasta" label="$tool.name on $on_string (mate 2)"/> + <data name="sscs1" format="fasta" label="$tool.name on $on_string (SSCS mate 1)"> + <filter>keep_sscs</filter> + </data> + <data name="sscs2" format="fasta" label="$tool.name on $on_string (SSCS mate 2)"> + <filter>keep_sscs</filter> + </data> + </outputs> + <tests> + <test> + <param name="input" value="families.msa.tsv"/> + <output name="dcs1" file="families.cons_1.fa"/> + <output name="dcs2" file="families.cons_2.fa"/> + </test> + </tests> + <help> + +**What it does** + +This is for processing duplex sequencing data. It creates single-strand and duplex consensus reads from aligned read families. + +----- + +**Input** + +This expects the output format of the "Align families" tool. + +----- + +**Output** + +This will output final, duplex consensus reads in two FASTA files (first and second reads in the pairs). Optionally, you can save the single-strand reads too, in a separate FASTA file. + + </help> +</tool> |
b |
diff -r e4d75f9efb90 -r 675a8370675b galaxy/align_families.xml --- a/galaxy/align_families.xml Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,65 +0,0 @@ -<?xml version="1.0"?> -<tool id="align_families" name="Du Novo: Align families" version="0.5"> - <description>of duplex sequencing reads</description> - <requirements> - <requirement type="package" version="7.221">mafft</requirement> - <requirement type="package" version="0.5">duplex</requirement> - <requirement type="set_environment">DUPLEX_DIR</requirement> - <!-- TODO: require Python 2.7 --> - </requirements> - <command detect_errors="exit_code">python "\$DUPLEX_DIR/align_families.py" -p \${GALAXY_SLOTS:-1} '$input' > '$output' - </command> - <inputs> - <param name="input" type="data" format="tabular" label="Input reads" help="with barcodes, grouped by family"/> - </inputs> - <outputs> - <data name="output" format="tabular"/> - </outputs> - <tests> - <test> - <param name="input" value="smoke.families.tsv"/> - <output name="output" file="smoke.families.aligned.tsv"/> - </test> - <test> - <param name="input" value="families.in.tsv"/> - <output name="output" file="families.sort.tsv"/> - </test> - </tests> - <help> - -**What it does** - -This is for processing duplex sequencing data. It does a multiple sequence alignment on each (single-stranded) family of reads. - ------ - -**Input** - -This expects the output format of the "Make families" tool. - ------ - -**Output** - -The output is a tabular file where each line corresponds to a (single) read. - -The columns are:: - - 1: barcode (both tags) - 2: tag order in barcode ("ab" or "ba") - 3: read mate ("1" or "2") - 4: read name - 5: read sequence, aligned ("-" for gaps) - 6: read quality scores, aligned (" " for gaps) - ------ - -**Alignments** - -The alignments are done using MAFFT, specifically the command -:: - - $ mafft --nuc --quiet family.fa > family.aligned.fa - - </help> -</tool> |
b |
diff -r e4d75f9efb90 -r 675a8370675b galaxy/dunovo.xml --- a/galaxy/dunovo.xml Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,67 +0,0 @@ -<?xml version="1.0"?> -<tool id="duplex" name="Du Novo: Make consensus reads" version="0.5"> - <description>from duplex sequencing alignments</description> - <requirements> - <requirement type="package" version="0.5">duplex</requirement> - <requirement type="set_environment">DUPLEX_DIR</requirement> - <!-- TODO: require Python 2.7 --> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - python "\$DUPLEX_DIR/dunovo.py" -r $min_reads -q $qual_thres -F $qual_format '$input' - #if $keep_sscs: - --sscs-file sscs.fa - #end if - > duplex.fa - && python "\$DUPLEX_DIR/utils/outconv.py" duplex.fa -1 '$dcs1' -2 '$dcs2' - #if $keep_sscs: - && python "\$DUPLEX_DIR/utils/outconv.py" sscs.fa -1 '$sscs1' -2 '$sscs2' - #end if - ]]> - </command> - <inputs> - <param name="input" type="data" format="tabular" label="Aligned input reads" /> - <param name="min_reads" type="integer" value="3" min="1" label="Minimum reads per family" help="Single-strand families with fewer than this many reads will be skipped."/> - <param name="qual_thres" type="integer" value="25" min="1" label="Minimum base quality" help="Bases with a PHRED score less than this will not be counted in the consensus making."/> - <param name="qual_format" type="select" label="FASTQ format" help="Solexa should also work for Illumina 1.3+ and 1.5+, and Sanger should work for Illumina 1.8+"> - <option value="sanger" selected="true">Sanger (PHRED 0 = "!")</option> - <option value="solexa">Solexa (PHRED 0 = "@")</option> - </param> - <param name="keep_sscs" type="boolean" truevalue="true" falsevalue="" label="Output single-strand consensus sequences as well" /> - </inputs> - <outputs> - <data name="dcs1" format="fasta" label="$tool.name on $on_string (mate 1)"/> - <data name="dcs2" format="fasta" label="$tool.name on $on_string (mate 2)"/> - <data name="sscs1" format="fasta" label="$tool.name on $on_string (SSCS mate 1)"> - <filter>keep_sscs</filter> - </data> - <data name="sscs2" format="fasta" label="$tool.name on $on_string (SSCS mate 2)"> - <filter>keep_sscs</filter> - </data> - </outputs> - <tests> - <test> - <param name="input" value="families.msa.tsv"/> - <output name="dcs1" file="families.cons_1.fa"/> - <output name="dcs2" file="families.cons_2.fa"/> - </test> - </tests> - <help> - -**What it does** - -This is for processing duplex sequencing data. It creates single-strand and duplex consensus reads from aligned read families. - ------ - -**Input** - -This expects the output format of the "Align families" tool. - ------ - -**Output** - -This will output final, duplex consensus reads in two FASTA files (first and second reads in the pairs). Optionally, you can save the single-strand reads too, in a separate FASTA file. - - </help> -</tool> |
b |
diff -r e4d75f9efb90 -r 675a8370675b galaxy/make_families.xml --- a/galaxy/make_families.xml Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,84 +0,0 @@ -<?xml version="1.0"?> -<tool id="make_families" name="Du Novo: Make families" version="0.5"> - <description>of duplex sequencing reads</description> - <requirements> - <requirement type="package" version="0.5">duplex</requirement> - <requirement type="set_environment">DUPLEX_DIR</requirement> - </requirements> - <!-- TODO: Add dependency on coreutils to get paste? --> - <command>paste '$fastq1' '$fastq2' - | paste - - - - - | awk -f "\$DUPLEX_DIR/make-barcodes.awk" -v TAG_LEN=$taglen -v INVARIANT=$invariant - | sort - > '$output' - </command> - <inputs> - <param name="fastq1" type="data" format="fastq" label="Sequencing reads, mate 1"/> - <param name="fastq2" type="data" format="fastq" label="Sequencing reads, mate 2"/> - <param name="taglen" type="integer" value="12" min="0" label="Tag length" help="length of each random barcode on the ends of the fragments"/> - <param name="invariant" type="integer" value="5" min="0" label="Invariant sequence length" help="length of the sequence between the tag and actual sample sequence (the restriction site, normally)"/> - </inputs> - <outputs> - <data name="output" format="tabular"/> - </outputs> - <tests> - <test> - <param name="fastq1" value="smoke_1.fq"/> - <param name="fastq2" value="smoke_2.fq"/> - <param name="taglen" value="5"/> - <param name="invariant" value="1"/> - <output name="output" file="smoke.families.tsv"/> - </test> - <test> - <param name="fastq1" value="smoke_1.fq"/> - <param name="fastq2" value="smoke_2.fq"/> - <param name="taglen" value="5"/> - <param name="invariant" value="0"/> - <output name="output" file="smoke.families.i0.tsv"/> - </test> - </tests> - <help> - -**What it does** - -This tool is for processing raw duplex sequencing data, removing the barcodes and grouping by them into families of reads from the same fragment. - ------ - -**Output** - -The output will be a tabular file where each line corresponds to a pair of input reads. - -The columns are:: - - 1: barcode (both tags joined and ordered) - 2: tag order in barcode ("ab" or "ba") - 3: read1 name - 4: read1 sequence (minus the tag and invariant sequences) - 5: read1 quality scores (minus the same tag and invariant) - 6: read2 name - 7: read2 sequence (minus the tag and invariant sequences) - 8: read2 quality scores (minus the same tag and invariant) - ------ - -**Barcode creation** - -For each pair, the tool will remove the tag at the beginning of each read and create a barcode by concatenating the two tags. The order of the tags is determined by a string comparison so that it will make an identical barcode from pairs of either order. The original tag order will be noted in the second column. - -Since pairs from opposite strands will have the same tags, but in the reverse order, this produces the same barcode for reads from the same fragment, regardless of strand. Then a simple sort will group all reads from the same strand together, separated into strands by the different "order" values. - -Examples:: - - +---------------+-----------------+ - | input tags | output | - +-------+-------+-------+---------+ - | read1 | read2 | order | barcode | - +-------+-------+-------+---------+ - | ATG | CCT | ab | ATGCCT | - +-------+-------+-------+---------+ - | CCT | ATG | ba | ATGCCT | - +-------+-------+-------+---------+ - - </help> -</tool> |
b |
diff -r e4d75f9efb90 -r 675a8370675b galaxy/tool_dependencies.xml --- a/galaxy/tool_dependencies.xml Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,22 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="mafft" version="7.221"> - <repository changeset_revision="dd4a533a0e3c" name="mafft" owner="rnateam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> - <package name="duplex" version="0.5"> - <install version="1.0"> - <actions> - <action type="download_by_url">https://github.com/galaxyproject/dunovo/archive/v0.5.tar.gz</action> - <action type="shell_command">make</action> - <action type="move_directory_files"> - <source_directory>.</source_directory> - <destination_directory>$INSTALL_DIR</destination_directory> - </action> - <action type="set_environment"> - <environment_variable action="set_to" name="DUPLEX_DIR">$INSTALL_DIR</environment_variable> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable> - </action> - </actions> - </install> - </package> -</tool_dependency> |
b |
diff -r e4d75f9efb90 -r 675a8370675b libalign.so |
b |
Binary file libalign.so has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b libconsensus.so |
b |
Binary file libconsensus.so has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b libseqtools.so |
b |
Binary file libseqtools.so has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b libswalign.so |
b |
Binary file libswalign.so has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b loeb-2.0.sh --- a/loeb-2.0.sh Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,357 +0,0 @@\n-#!/usr/bin/env bash\n-if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then\n- echo "Error: Must use bash version 4+." >&2\n- exit 1\n-fi\n-set -ue\n-\n-Start=${Start:=}\n-BarcodeLen=${BarcodeLen:=12}\n-SpacerLen=${SpacerLen:=5}\n-StartClip=${StartClip:=5}\n-EndClip=${EndClip:=5}\n-BwaCmd=${BwaCmd:="bwa"}\n-SamtoolsCmd=${SamtoolsCmd:="samtools"}\n-PythonCmd=${PythonCmd:="python"}\n-JavaCmd=${JavaCmd:="java"}\n-PicardDir=${PicardDir:="$HOME/src/picard-tools-1.100"}\n-GatkDir=${GatkDir:="$HOME/src/GenomeAnalysisTK"}\n-\n-Usage="Usage: \\$ $(basename $0) [-d|-c|-a] ref.fa reads_1.fq reads_2.fq readlen [outdir]\n-Run the Loeb pipeline as it was published in the Kennedy et al. 2014 paper\n-(release 2.0). If -d (\\"duplex\\") is given, it will stop after producing the\n-final duplex reads (step 62). This is the default. If -c (\\"cleanup\\") is given,\n-it will skip producing the duplex reads, assuming it\'s already been done, and\n-just do the filtering, realignment, and trimming (steps 63-71). If -a (\\"all\\")\n-is given, it will do the whole pipeline (both halves). If it\'s not doing the\n-second part, Picard and GATK are not required. Otherwise, provide the paths to\n-the directories containing their .jars by setting \\$PicardDir and \\$GatkDir.\n-Dependencies:\n-Python >= 2.7 (and < 3.0)\n-BWA <= 0.6.2\n-Samtools <= 0.1.18\n-BioPython 1.62\n-PySAM 0.7.5\n-Picard 1.107\n-GATK 2.4-9\n-To just check your dependency versions, run \\$ $(basename $0) -v"\n-\n-function main {\n-\n- script_dir=$(real_dir)\n-\n- duplex=true\n- cleanup=\'\'\n- if [[ $# -ge 1 ]]; then\n- if [[ $1 == \'-v\' ]]; then\n- print_versions $script_dir\n- exit\n- elif [[ $1 == \'-d\' ]]; then\n- duplex=true\n- cleanup=\'\'\n- elif [[ $1 == \'-c\' ]]; then\n- duplex=\'\'\n- cleanup=true\n- elif [[ $1 == \'-a\' ]]; then\n- duplex=true\n- cleanup=true\n- fi\n- shift\n- fi\n- if [[ $# -lt 4 ]] || [[ $1 == \'-h\' ]]; then\n- fail "$Usage"\n- else\n- ref="$1"\n- fastq1="$2"\n- fastq2="$3"\n- readlen="$4"\n- fi\n- if [[ $# -ge 5 ]]; then\n- outdir="$5"\n- else\n- outdir=.\n- fi\n-\n- if ! echo "$readlen" | grep -qE \'^[0-9]+$\'; then\n- fail "ERROR: Invalid read length \\"$readlen\\"."\n- fi\n- if ! [[ -d $outdir ]]; then\n- fail "ERROR: Invalid output directory \\"$outdir\\"."\n- fi\n-\n- print_versions $script_dir\n-\n- echo "\n-Parameters:\n-ref: $ref\n-fastq1: $fastq1\n-fastq2: $fastq2\n-readlen: $readlen\n-"\n-\n- refdict=$(echo "$ref" | sed -E \'s/\\.fa(sta)?$//\').dict\n- rlenreal=$((readlen-BarcodeLen-SpacerLen))\n- end_clip_start=$((rlenreal-EndClip+1))\n- start=$(echo $Start | cut -d . -f 1)\n-\n- if [[ $duplex ]]; then\n- if ! [[ $start ]] || [[ $start -le 56 ]]; then\n- echo \'===== 56 =====\' && echo \'===== 56 =====\' >&2\n- # Concatenate the 12-nt tag sequences from the paired reads and evaluate for tag quality\n- $PythonCmd $script_dir/tag_to_header.py --infile1 $fastq1 --infile2 $fastq2 \\\n- --outfile1 $outdir/read_1.fq.smi --outfile2 $outdir/read_2.fq.smi \\\n- --barcode_length $BarcodeLen --spacer_length $SpacerLen\n- fi\n- if ! [[ $start ]] || [[ $start -le 57 ]]; then\n- # Allow specifying 57.2 so only the second half is executed.\n- if ! [[ $Start ]] || [[ $Start != 57.2 ]]; then\n- echo \'===== 57 =====\' && echo \'===== 57 =====\' >&2\n- # Align each read to the reference genome\n- $BwaCmd aln $ref $outdir/read_1.fq.smi > $outdir/read_1.aln\n- fi\n- echo \'===== 57.2 =====\' && echo \'===== 57.2 =====\' >&2\n- $BwaCmd aln $ref $outdir/read_2.fq.smi > $outdir/read_2.aln\n- fi\n- if ! [[ $start ]] || [[ $start -le 58 ]]; then\n- echo \'===== 58 =====\' && echo \'===== 58 =====\' >&2\n- # Make a single paired-end .sam file\n- $BwaCmd sampe -s $ref $outdir/read_1.aln $outdir/read_2.aln \\\n- $outdir/read_1.fq.smi $outdir/read_2.fq.smi > $outdir/PE_reads.sam\n- fi\n- if ! [[ $start ]] || [[ $start -le 59 ]]; then\n- echo \'===='..b'rim "1-$StartClip,$end_clip_start-$rlenreal" --clipRepresentation SOFTCLIP_BASES\n- fi\n- fi\n- echo \'===== DONE =====\' && echo \'===== DONE =====\' >&2\n-}\n-\n-\n-# Get the script\'s actual directory path\n-function real_dir {\n- # Does readlink -f work? (It doesn\'t on BSD.)\n- if readlink -f dummy >/dev/null 2>/dev/null; then\n- dirname $(readlink -f ${BASH_SOURCE[0]})\n- else\n- # If readlink -f doesn\'t work (like on BSD).\n- # Read the link destination from the output of ls -l and cd to it.\n- # Have to cd to the link\'s directory first, to handle relative links.\n- # With help from https://stackoverflow.com/a/246128/726773\n- unset CDPATH\n- local source="${BASH_SOURCE[0]}"\n- while [[ -h "$source" ]]; do\n- local dir="$(cd -P $(dirname "$source") && pwd)"\n- local link="$(ls -l "$source" | awk \'{print $NF}\')"\n- # absolute or relative path?\n- if [[ "$link" == /* ]]; then\n- source="$link"\n- else\n- source="$dir/$link"\n- fi\n- done\n- dir="$(cd -P $(dirname "$source") && pwd)"\n- echo "$dir"\n- fi\n-}\n-\n-\n-function print_versions {\n- script_dir="$1"\n- echo -e \'VERSIONS\\trecommended\\tpresent\'\n- # pipeline\n- echo -en \'pipeline:\\te0897da\\t\\t\'\n- if ! [[ -d $script_dir ]]; then\n- echo \'MISSING\'\n- elif ! which git >/dev/null 2>/dev/null; then\n- echo \'ERROR 1\'\n- else\n- unset CDPATH\n- cd $script_dir\n- if ! git log >/dev/null 2>/dev/null; then\n- echo \'ERROR 2\'\n- else\n- git log --oneline -n 1 | grep --color=never -Eo \'^\\S+\'\n- fi\n- cd - >/dev/null\n- fi\n- # Python\n- echo -en \'Python:\\t\\t2.7\\t\\t\'\n- if which $PythonCmd >/dev/null 2>/dev/null; then\n- $PythonCmd --version 2>&1 | sed -E \'s/python\\s//I\'\n- else\n- echo \'MISSING\'\n- fi\n- # BWA\n- echo -en \'BWA:\\t\\t0.6.2\\t\\t\'\n- if which $BwaCmd >/dev/null 2>/dev/null; then\n- $BwaCmd 2>&1 | sed -En \'s/^.*version.*\\s([0-9].*)$/\\1/Ip\'\n- else\n- echo \'MISSING\'\n- fi\n- # Samtools\n- echo -en \'Samtools:\\t0.1.18\\t\\t\'\n- echo $SamtoolsCmd\n- if which $SamtoolsCmd >/dev/null 2>/dev/null; then\n- $SamtoolsCmd 2>&1 | sed -En \'s/^.*version.*\\s([0-9].*)$/\\1/Ip\'\n- else\n- echo \'MISSING\'\n- fi\n- # PySAM\n- echo -en \'PySAM:\\t\\t0.7.5\\t\\t\'\n- if $PythonCmd -c \'import pysam\' 2>/dev/null; then\n- $PythonCmd -c \'import pysam; print pysam.__version__\'\n- elif which $PythonCmd >/dev/null 2>/dev/null; then\n- echo \'MISSING\'\n- else\n- echo \'ERROR 1\'\n- fi\n- # BioPython\n- echo -en \'BioPython:\\t1.62\\t\\t\'\n- if $PythonCmd -c \'import Bio\' 2>/dev/null; then\n- $PythonCmd -c \'import Bio; print Bio.__version__\'\n- elif which $PythonCmd >/dev/null 2>/dev/null; then\n- echo \'MISSING\'\n- else\n- echo \'ERROR 1\'\n- fi\n- if ! which $JavaCmd 2>/dev/null >/dev/null; then\n- echo "ERROR: Java command \\"$JavaCmd\\" not found." >&2\n- return\n- fi\n- # Picard\n- echo -en \'Picard:\\t\\t1.107\\t\\t\'\n- if [[ -f $PicardDir/picard.jar ]]; then\n- $JavaCmd -jar $PicardDir/picard.jar AddOrReplaceReadGroups --version 2>&1 >/dev/null | sed -E \'s/\\(.*\\)//\'\n- elif ! [[ -f $PicardDir/AddOrReplaceReadGroups.jar ]]; then\n- echo \'MISSING\'\n- elif [[ $($JavaCmd -jar $PicardDir/AddOrReplaceReadGroups.jar 2>&1 >/dev/null | sed -En \'s/^Version:?\\s//ip\') ]]; then\n- $JavaCmd -jar $PicardDir/AddOrReplaceReadGroups.jar 2>&1 >/dev/null | sed -En \'s/^Version:?\\s//ip\'\n- else\n- echo \'ERROR 1\'\n- fi\n- # GATK\n- echo -en \'GATK:\\t\\t2.4-9\\t\\t\'\n- if ! [[ -f $GatkDir/GenomeAnalysisTK.jar ]]; then\n- echo \'MISSING\'\n- else\n- set +e\n- version=$($JavaCmd -jar $GatkDir/GenomeAnalysisTK.jar --version 2>/dev/null)\n- exit_code=$?\n- set -e\n- if [[ $exit_code == 0 ]]; then\n- echo $version\n- else\n- version=$($JavaCmd -jar $GatkDir/GenomeAnalysisTK.jar 2>&1 >/dev/null | sed -En \'s/^.*version\\s([0-9.-]+[0-9.]).*$/\\1/p\')\n- if [[ $version ]]; then\n- echo $version\n- else\n- echo \'ERROR 1\'\n- fi\n- fi\n- fi\n-}\n-\n-\n-function fail {\n- echo "$@" >&2\n- exit 1\n-}\n-\n-\n-main "$@"\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/DNA.h --- a/mafft/core/DNA.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,210 +0,0 @@\n-#define DEFAULTGOP_N -1530\n-#define DEFAULTGEP_N 0\n-#define DEFAULTOFS_N -369 \n-#define DEFAULTPAMN 200\n-\n-#define DEFAULTRNAGOP_N -1530\n-#define DEFAULTRNAGEP_N 0\n-#define DEFAULTRNATHR_N 0\n-\n-// -h 0.11150 -> all positive\n-\n-double ribosum4[4][4] = \n-{\n-// a g c t \n-{ 2.22, -1.46, -1.86, -1.39, }, // a\n-{ -1.46, 1.03, -2.48, -1.74, }, // g\n-{ -1.86, -2.48, 1.16, -1.05, }, // c\n-{ -1.39, -1.74, -1.05, 1.65, }, // t\n-};\n-\n-double ribosum16[16][16] = \n-{\n-// aa ag ac at ga gg gc gt ca cg cc ct ta tg tc tt \n-{ -2.49, -8.24, -7.04, -4.32, -6.86, -8.39, -5.03, -5.84, -8.84, -4.68, -14.37, -12.64, -4.01, -6.16, -11.32, -9.05, }, // aa\n-{ -8.24, -0.80, -8.89, -5.13, -8.61, -5.38, -5.77, -6.60, -10.41, -4.57, -14.53, -10.14, -5.43, -5.94, -8.87, -11.07, }, // ag\n-{ -7.04, -8.89, -2.11, -2.04, -9.73, -11.05, -3.81, -4.72, -9.37, -5.86, -9.08, -10.45, -5.33, -6.93, -8.67, -7.83, }, // ac\n-{ -4.32, -5.13, -2.04, 4.49, -5.33, -5.61, 2.70, 0.59, -5.56, 1.67, -6.71, -5.17, 1.61, -0.51, -4.81, -2.98, }, // at\n-{ -6.86, -8.61, -9.73, -5.33, -1.05, -8.67, -4.88, -6.10, -7.98, -6.00, -12.43, -7.71, -5.85, -7.55, -6.63, -11.54, }, // ga\n-{ -8.39, -5.38, -11.05, -5.61, -8.67, -1.98, -4.13, -5.77, -11.36, -4.66, -12.58, -13.69, -5.75, -4.27, -12.01, -10.79, }, // gg\n-{ -5.03, -5.77, -3.81, 2.70, -4.88, -4.13, 5.62, 1.21, -5.95, 2.11, -3.70, -5.84, 1.60, -0.08, -4.49, -3.90, }, // gc\n-{ -5.84, -6.60, -4.72, 0.59, -6.10, -5.77, 1.21, 3.47, -7.93, -0.27, -7.88, -5.61, -0.57, -2.09, -5.30, -4.45, }, // gt\n-{ -8.84, -10.41, -9.37, -5.56, -7.98, -11.36, -5.95, -7.93, -5.13, -3.57, -10.45, -8.49, -2.42, -5.63, -7.08, -8.39, }, // ca\n-{ -4.68, -4.57, -5.86, 1.67, -6.00, -4.66, 2.11, -0.27, -3.57, 5.36, -5.71, -4.96, 2.75, 1.32, -4.91, -3.67, }, // cg\n-{ -14.37, -14.53, -9.08, -6.71, -12.43, -12.58, -3.70, -7.88, -10.45, -5.71, -3.59, -5.77, -6.88, -8.41, -7.40, -5.41, }, // cc\n-{ -12.64, -10.14, -10.45, -5.17, -7.71, -13.69, -5.84, -5.61, -8.49, -4.96, -5.77, -2.28, -4.72, -7.36, -3.83, -5.21, }, // ct\n-{ -4.01, -5.43, -5.33, 1.61, -5.85, -5.75, 1.60, -0.57, -2.42, 2.75, -6.88, -4.72, 4.97, 1.14, -2.98, -3.39, }, // ta\n-{ -6.16, -5.94, -6.93, -0.51, -7.55, -4.27, -0.08, -2.09, -5.63, 1.32, -8.41, -7.36, 1.14, 3.36, -4.76, -4.28, }, // tg\n-{ -11.32, -8.87, -8.67, -4.81, -6.63, -12.01, -4.49, -5.30, -7.08, -4.91, -7.40, -3.83, -2.98, -4.76, -3.21, -5.97, }, // tc\n-{ -9.05, -11.07, -7.83, -2.98, -11.54, -10.79, -3.90, -4.45, -8.39, -3.67, -5.41, -5.21, -3.39, -4.28, -5.97, -0.02, }, // tt\n-};\n-\n-int locpenaltyn = -1750;\n-char locaminon[] = "agctuAGCTUnNbdhkmnrsvwyx-O";\n-char locgrpn[] = \n-{\n-\t0, 1, 2, 3, 3, 0, 1, 2, 3, 3,\n-\t4, 4, 5, 5, 5, 5, 5, 5, 5, 5, \n-\t5, 5, 5, 5, 5, 5 \n-};\n-int exgpn = +00;\n-int locn_disn[26][26] = \n-/* u ha constants.c no nakade shori */\n-/* 0 - 4 dake yomareru. */\n- {\n-\t\t{\n- 1000, 600, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 600, 1000, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 1000, 600, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 600, 1000, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- '..b' 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 500, 500, 0, 0, 0, 500, 500, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, -500,\n-\t\t},\n-\n-\t\t{\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n- 0, 0, 0, 0, 0, 0,\n-\t\t},\n-\n-\t\t{\n- -500, -500, -500, -500, -500, -500, -500, -500, -500, -500,\n- -500, -500, -500, -500, -500, -500, -500, -500, -500, -500,\n- -500, -500, -500, -500, 0, 500,\n-\t\t},\n- };\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/Falign.c --- a/mafft/core/Falign.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2429 +0,0 @@\n-#include "mltaln.h"\n-\n-#if 0\n-static FILE *fftfp;\n-#endif\n-static TLS int n20or4or2;\n-\n-#define KEIKA 0\n-#define RND 0\n-#define DEBUG 0\n-\n-#if RND // by D.Mathog\n-static void generateRndSeq( char *seq, int len )\n-{\n-\twhile( len-- )\n-#if 1\n-\t\t*seq++ = (int)( rnd() * n20or4or2 );\n-#else\n-\t\t*seq++ = (int)1;\n-#endif\n-}\n-#endif\n-\n-static void vec_init( Fukusosuu *result, int nlen )\n-{\n-\twhile( nlen-- )\n-\t{\n-\t\tresult->R = result->I = 0.0;\n-\t\tresult++;\n-\t}\n-}\n-\n-#if 0 // by D.Mathog\n-static void vec_init2( Fukusosuu **result, char *seq, double eff, int st, int ed )\n-{\n-\tint i;\n-\tfor( i=st; i<ed; i++ )\n-\t\tresult[(int)*seq++][i].R += eff;\n-}\n-#endif\n-\n-static void seq_vec_2( Fukusosuu *result, double *score, double incr, char *seq )\n-{\n-\tstatic TLS int n;\n-\tfor( ; *seq; result++ )\n-\t{\n-\t\tn = amino_n[(int)*seq++];\n-\t\tif( n < 20 && n >= 0 ) result->R += incr * score[n];\n-#if 0\n-\t\tfprintf( stderr, "n=%d, score=%f, inc=%f R=%f\\n",n, score[n], incr * score[n], result->R );\n-#endif\n-\t}\n-}\n-\n-static void seq_vec_3( Fukusosuu **result, double incr, char *seq )\n-{\n-\tint i;\n-\tint n;\n-\tfor( i=0; *seq; i++ )\n-\t{\n-\t\tn = amino_n[(int)*seq++];\n-\t\tif( n < n20or4or2 && n >= 0 ) result[n][i].R += incr;\n-\t}\n-}\n-\n-static void seq_vec_5( Fukusosuu *result, double *score1, double *score2, double incr, char *seq )\n-{\n-\tint n;\n-\tfor( ; *seq; result++ )\n-\t{\n-\t\tn = amino_n[(int)*seq++];\n-\t\tif( n > 20 ) continue;\n-\t\tresult->R += incr * score1[n];\n-\t\tresult->I += incr * score2[n];\n-#if 0\n-\t\tfprintf( stderr, "n=%d, score=%f, inc=%f R=%f\\n",n, score[n], incr * score[n], result->R );\n-#endif\n-\t}\n-}\n-\n-\n-static void seq_vec_4( Fukusosuu *result, double incr, char *seq )\n-{\n-\tchar s;\n-\tfor( ; *seq; result++ )\n-\t{\n-\t\ts = *seq++;\n-\t\tif( s == \'a\' )\n-\t\t\tresult->R += incr;\n-\t\telse if( s == \'t\' )\n-\t\t\tresult->R -= incr;\n-\t\telse if( s == \'g\' )\n-\t\t\tresult->I += incr;\n-\t\telse if( s == \'c\' )\n-\t\t\tresult->I -= incr;\n-\t}\n-}\n-\n-#if 0 // by D.Mathog\n-static void seq_vec( Fukusosuu *result, char query, double incr, char *seq )\n-{\n-#if 0\n-\tint bk = nlen;\n-#endif\n-\twhile( *seq )\n-\t{\n-\t\tif( *seq++ == query ) result->R += incr;\n-\t\tresult++;\n-#if 0\n-fprintf( stderr, "i = %d result->R = %f\\n", bk-nlen, (result-1)->R );\n-#endif\n-\t}\n-}\n-\n-static int checkRepeat( int num, int *cutpos )\n-{\n-\tint tmp, buf;\n-\n-\tbuf = *cutpos;\n-\twhile( num-- )\n-\t{\n-\t\tif( ( tmp = *cutpos++ ) < buf ) return( 1 );\n-\t\tbuf = tmp;\n-\t}\n-\treturn( 0 );\n-}\n-\n-static int segcmp( void *ptr1, void *ptr2 )\n-{\n-\tint diff;\n-\tSegment **seg1 = (Segment **)ptr1;\n-\tSegment **seg2 = (Segment **)ptr2;\n-#if 0\n-\treturn( (*seg1)->center - (*seg2)->center );\n-#else\n-\tdiff = (*seg1)->center - (*seg2)->center;\n-\tif( diff ) return( diff );\n-\n-\tdiff = (*seg1)->start - (*seg2)->start;\n-\tif( diff ) return( diff );\n-\n-\tdiff = (*seg1)->end - (*seg2)->end;\n-\tif( diff ) return( diff );\n-\n-\tfprintf( stderr, "USE STABLE SORT !!\\n" );\n-\texit( 1 );\n-\treturn( 0 );\n-#endif\n-}\n-#endif\n-\n-\n-static void mymergesort( int first, int last, Segment **seg )\n-{\n-\tint middle;\n-\tstatic TLS int i, j, k, p;\n-\tstatic TLS int allo = 0;\n-\tstatic TLS Segment **work = NULL;\n-\n-\tif( seg == NULL )\n-\t{\n-\t\tif( work ) free( work ); \n-\t\twork = NULL;\n-\t\tallo = 0;\n-\t\treturn;\n-\t}\n-\n-\tif( last > allo )\n-\t{\n-\t\tallo = last;\n-\t\tif( work ) free( work );\n-\t\twork = (Segment **)calloc( allo / 2 + 1, sizeof( Segment *) );\n-\t}\n-\n-\tif( first < last )\n-\t{\n-\t\tmiddle = ( first + last ) / 2;\n-\t\tmymergesort( first, middle, seg );\n-\t\tmymergesort( middle+1, last, seg );\n-\t\tp = 0;\n-\t\tfor( i=first; i<=middle; i++ ) work[p++] = seg[i];\n-\t\ti = middle + 1; j = 0; k = first;\n-\t\twhile( i <= last && j < p )\n-\t\t{\n-\t\t\tif( work[j]->center <= seg[i]->center ) \n-\t\t\t\tseg[k++] = work[j++];\n-\t\t\telse\n-\t\t\t\tseg[k++] = seg[i++];\n-\t\t}\n-\t\twhile( j < p ) seg[k++] = work[j++];\n-\t}\n-}\n-\n-\n-double Fgetlag( \n-\t\t\t\tdouble **n_dynamicmtx, \n-\t\t\t\tchar **seq1, char **seq2, \n-\t\t\t double *eff1, double *eff2, \n-\t\t\t int clus1, int clus2,\n-\t\t\t int alloclen )\n-{\n-\tint i, j, k, l, m;\n-\tint nlen, n'..b'p2, seq2, cut2[i]-1, clus2 );\n-\t\t\tgetkyokaigap( sgap1, tmpres1, nlen-1, clus1 );\n-\t\t\tgetkyokaigap( sgap2, tmpres2, nlen-1, clus2 );\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tfor( j=0; j<clus1; j++ ) sgap1[j] = \'o\';\n-\t\t\tfor( j=0; j<clus2; j++ ) sgap2[j] = \'o\';\n-\t\t}\n-\t\tif( cut1[i+1] != len1 )\n-\t\t{ \n-\t\t\tgetkyokaigap( egap1, seq1, cut1[i+1], clus1 );\n-\t\t\tgetkyokaigap( egap2, seq2, cut2[i+1], clus2 );\n-\t\t} \n-\t\telse \n-\t\t{ \n-\t\t\tfor( j=0; j<clus1; j++ ) egap1[j] = \'o\';\n-\t\t\tfor( j=0; j<clus2; j++ ) egap2[j] = \'o\';\n-\t\t}\n-#if DEBUG\n-\t\tfprintf( stderr, "DP %03d / %03d %4d to ", i+1, count-1, totallen );\n-#else\n-#if 1\n-\t\tfprintf( stderr, "DP %05d / %05d \\b\\b\\b\\b\\b\\b\\b\\b\\b\\b\\b\\b\\b\\b\\b\\b\\b", i+1, count-1 );\n-#endif\n-#endif\n-\t\tfor( j=0; j<clus1; j++ )\n-\t\t{\n-\t\t\tstrncpy( tmpres1[j], seq1[j]+cut1[i], cut1[i+1]-cut1[i] );\n-\t\t\ttmpres1[j][cut1[i+1]-cut1[i]] = 0;\n-\t\t}\n-\t\tif( kobetsubunkatsu && fftkeika ) commongappick( clus1, tmpres1 ); //dvtditr \x1b$B$K8F$P$l$?$H$-\x1b(B fftkeika=1\n-//\t\tif( kobetsubunkatsu ) commongappick( clus1, tmpres1 );\n-\t\tfor( j=0; j<clus2; j++ )\n-\t\t{\n-//\t\t\tfprintf( stderr, "### cut2[i+1]-cut2[i] = %d\\n", cut2[i+1]-cut2[i] );\n-\t\t\tif( cut2[i+1]-cut2[i] <= 0 )\n-\t\t\t\tfprintf( stderr, "### cut2[i+1]=%d, cut2[i]=%d\\n", cut2[i+1], cut2[i] );\n-\t\t\tstrncpy( tmpres2[j], seq2[j]+cut2[i], cut2[i+1]-cut2[i] );\n-\t\t\ttmpres2[j][cut2[i+1]-cut2[i]] = 0;\n-\t\t}\n-\t\tif( kobetsubunkatsu && fftkeika ) commongappick( clus2, tmpres2 ); //dvtditr \x1b$B$K8F$P$l$?$H$-\x1b(B fftkeika=1\n-//\t\tif( kobetsubunkatsu ) commongappick( clus2, tmpres2 );\n-\n-\t\tif( constraint )\n-\t\t{\n-\t\t\tfprintf( stderr, "Not supported\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-#if 0\n-\t\tfprintf( stderr, "i=%d, before alignment", i );\n-\t\tfprintf( stderr, "%4d\\n", totallen );\n-\t\tfprintf( stderr, "\\n\\n" );\n-\t\tfor( j=0; j<clus1; j++ ) \n-\t\t{\n-\t\t\tfprintf( stderr, "%s\\n", tmpres1[j] );\n-\t\t}\n-\t\tfprintf( stderr, "-------\\n" );\n-\t\tfor( j=0; j<clus2; j++ ) \n-\t\t{\n-\t\t\tfprintf( stderr, "%s\\n", tmpres2[j] );\n-\t\t}\n-#endif\n-\n-#if 0\n-\t\tfprintf( stdout, "writing input\\n" );\n-\t\tfor( j=0; j<clus1; j++ )\n-\t\t{\n-\t\t\tfprintf( stdout, ">%d of GROUP1\\n", j );\n-\t\t\tfprintf( stdout, "%s\\n", tmpres1[j] );\n-\t\t}\n-\t\tfor( j=0; j<clus2; j++ )\n-\t\t{\n-\t\t\tfprintf( stdout, ">%d of GROUP2\\n", j );\n-\t\t\tfprintf( stdout, "%s\\n", tmpres2[j] );\n-\t\t}\n-\t\tfflush( stdout );\n-#endif\n-\t\tswitch( alg )\n-\t\t{\n-\t\t\tcase( \'M\' ):\n-\t\t\t\t\tif( scoringmatrices ) // called by tditeration.c\n-\t\t\t\t\t\ttotalscore += MSalignmm_variousdist( NULL, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, NULL, 0, NULL, headgp, tailgp );\n-\t\t\t\t\telse\n-\t\t\t\t\t\ttotalscore += MSalignmm( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, NULL, 0, NULL, headgp, tailgp );\n-\t\t\t\tbreak;\n-\t\t\tdefault:\n-\t\t\t\tfprintf( stderr, "alg = %c\\n", alg );\n-\t\t\t\tErrorExit( "ERROR IN SOURCE FILE Falign.c" );\n-\t\t\t\tbreak;\n-\t\t}\n-\n-\t\tnlen = strlen( tmpres1[0] );\n-\t\tif( totallen + nlen > alloclen )\n-\t\t{\n-\t\t\tfprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\\n", totallen, nlen, alloclen );\n-\t\t\tErrorExit( "LENGTH OVER in Falign\\n " );\n-\t\t}\n-\t\tfor( j=0; j<clus1; j++ ) strcat( result1[j], tmpres1[j] );\n-\t\tfor( j=0; j<clus2; j++ ) strcat( result2[j], tmpres2[j] );\n-\t\ttotallen += nlen;\n-#if 0\n-\t\tfprintf( stderr, "i=%d", i );\n-\t\tfprintf( stderr, "%4d\\n", totallen );\n-\t\tfprintf( stderr, "\\n\\n" );\n-\t\tfor( j=0; j<clus1; j++ ) \n-\t\t{\n-\t\t\tfprintf( stderr, "%s\\n", tmpres1[j] );\n-\t\t}\n-\t\tfprintf( stderr, "-------\\n" );\n-\t\tfor( j=0; j<clus2; j++ ) \n-\t\t{\n-\t\t\tfprintf( stderr, "%s\\n", tmpres2[j] );\n-\t\t}\n-#endif\n-\t}\n-#if KEIKA\n-\tfprintf( stderr, "DP ... done \\n" );\n-#endif\n-\n-\tfor( j=0; j<clus1; j++ ) strcpy( seq1[j], result1[j] );\n-\tfor( j=0; j<clus2; j++ ) strcpy( seq2[j], result2[j] );\n-#if 0\n-\tfor( j=0; j<clus1; j++ ) \n-\t{\n-\t\tfprintf( stderr, "%s\\n", result1[j] );\n-\t}\n-\tfprintf( stderr, "- - - - - - - - - - -\\n" );\n-\tfor( j=0; j<clus2; j++ ) \n-\t{\n-\t\tfprintf( stderr, "%s\\n", result2[j] );\n-\t}\n-#endif\n-\treturn( totalscore );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/Falign_localhom.c --- a/mafft/core/Falign_localhom.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,875 +0,0 @@\n-#include "mltaln.h"\n-\n-//static FILE *fftfp;\n-static TLS int n20or4or2;\n-\n-#define KEIKA 0\n-#define RND 0\n-#define DEBUG 0\n-\n-extern int fft( int, Fukusosuu *, int );\n-\n-\n-#if 0\n-static void generateRndSeq( char *seq, int len )\n-{\n-\twhile( len-- )\n-#if 1\n-\t\t*seq++ = (int)( rnd() * n20or4or2 );\n-#else\n-\t\t*seq++ = (int)1;\n-#endif\n-}\n-#endif\n-\n-static void vec_init( Fukusosuu *result, int nlen )\n-{\n-\twhile( nlen-- )\n-\t{\n-\t\tresult->R = result->I = 0.0;\n-\t\tresult++;\n-\t}\n-}\n-\n-#if 0\n-static void vec_init2( Fukusosuu **result, char *seq, double eff, int st, int ed )\n-{\n-\tint i;\n-\tfor( i=st; i<ed; i++ )\n-\t\tresult[(int)*seq++][i].R += eff;\n-}\n-#endif\n-\n-static void seq_vec_2( Fukusosuu *result, double *score, double incr, char *seq )\n-{\n-\tstatic TLS int n;\n-\tfor( ; *seq; result++ )\n-\t{\n-\t\tn = amino_n[(int)*seq++];\n-\t\tif( n < 20 && n >= 0 ) result->R += incr * score[n];\n-#if 0\n-\t\tfprintf( stderr, "n=%d, score=%f, inc=%f R=%f\\n",n, score[n], incr * score[n], result->R );\n-#endif\n-\t}\n-}\n-\n-static void seq_vec_3( Fukusosuu **result, double incr, char *seq )\n-{\n-\tint i;\n-\tint n;\n-\tfor( i=0; *seq; i++ )\n-\t{\n-\t\tn = amino_n[(int)*seq++];\n-\t\tif( n < n20or4or2 && n >= 0 ) result[n][i].R += incr;\n-\t}\n-}\n-\n-\t\n-#if 0\n-static void seq_vec( Fukusosuu *result, char query, double incr, char *seq )\n-{\n-#if 0\n-\tint bk = nlen;\n-#endif\n-\twhile( *seq )\n-\t{\n-\t\tif( *seq++ == query ) result->R += incr;\n-\t\tresult++;\n-#if 0\n-fprintf( stderr, "i = %d result->R = %f\\n", bk-nlen, (result-1)->R );\n-#endif\n-\t}\n-}\n-\n-static int checkRepeat( int num, int *cutpos )\n-{\n-\tint tmp, buf;\n-\n-\tbuf = *cutpos;\n-\twhile( num-- )\n-\t{\n-\t\tif( ( tmp = *cutpos++ ) < buf ) return( 1 );\n-\t\tbuf = tmp;\n-\t}\n-\treturn( 0 );\n-}\n-\n-static int segcmp( void *ptr1, void *ptr2 )\n-{\n-\tint diff;\n-\tSegment **seg1 = (Segment **)ptr1;\n-\tSegment **seg2 = (Segment **)ptr2;\n-#if 0\n-\treturn( (*seg1)->center - (*seg2)->center );\n-#else\n-\tdiff = (*seg1)->center - (*seg2)->center;\n-\tif( diff ) return( diff );\n-\n-\tdiff = (*seg1)->start - (*seg2)->start;\n-\tif( diff ) return( diff );\n-\n-\tdiff = (*seg1)->end - (*seg2)->end;\n-\tif( diff ) return( diff );\n-\n-\tfprintf( stderr, "USE STABLE SORT !!\\n" );\n-\texit( 1 );\n-\treturn( 0 );\n-#endif\n-}\n-\n-#endif\n-\n-\n-static void mymergesort( int first, int last, Segment **seg )\n-{\n-\tint middle;\n-\tstatic TLS int i, j, k, p;\n-\tstatic TLS int allo = 0;\n-\tstatic TLS Segment **work = NULL;\n-\n-\tif( seg == NULL )\n-\t{\n-\t\tfree( work ); work = NULL;\n-\t\treturn;\n-\t}\n-\n-\tif( last > allo )\n-\t{\n-\t\tallo = last;\n-\t\tif( work ) free( work );\n-\t\twork = (Segment **)calloc( allo / 2 + 1, sizeof( Segment *) );\n-\t}\n-\n-\tif( first < last )\n-\t{\n-\t\tmiddle = ( first + last ) / 2;\n-\t\tmymergesort( first, middle, seg );\n-\t\tmymergesort( middle+1, last, seg );\n-\t\tp = 0;\n-\t\tfor( i=first; i<=middle; i++ ) work[p++] = seg[i];\n-\t\ti = middle + 1; j = 0; k = first;\n-\t\twhile( i <= last && j < p )\n-\t\t{\n-\t\t\tif( work[j]->center <= seg[i]->center ) \n-\t\t\t\tseg[k++] = work[j++];\n-\t\t\telse\n-\t\t\t\tseg[k++] = seg[i++];\n-\t\t}\n-\t\twhile( j < p ) seg[k++] = work[j++];\n-\t}\n-}\n-\n-\n-float Falign_localhom( double **offsetmtx, double ***scoringmatrices, double **n_dynamicmtx,\n-\t\t\t char **seq1, char **seq2, \n-\t\t\t double *eff1, double *eff2, \n-\t\t\t double **eff1s, double **eff2s,\n-\t\t\t int clus1, int clus2,\n-\t\t\t int alloclen, \n-\t\t\t LocalHom ***localhom, float *totalimpmatch,\n-\t\t\t int *gapmap1, int *gapmap2,\n-\t\t\t\tint *chudanpt, int chudanref, int *chudanres )\n-{\n- // tditeration.c deha alloclen ha huhen nanode\n- // prevalloclen ha iranai.\n-\tint i, j, k, l, m, maxk;\n-\tint nlen, nlen2, nlen4;\n-\tstatic TLS int crossscoresize = 0;\n-\tstatic TLS char **tmpseq1 = NULL;\n-\tstatic TLS char **tmpseq2 = NULL;\n-\tstatic TLS char **tmpptr1 = NULL;\n-\tstatic TLS char **tmpptr2 = NULL;\n-\tstatic TLS char **tmpres1 = NULL;\n-\tstatic TLS char **tmpres2 = NULL;\n-\tstatic TLS char **result1 = NULL;\n-\tstatic TLS char **result2 = NULL;\n-#if RND\n-\tstatic TLS char **rndseq1 = NULL;\n-\tstatic TLS char **rndseq2 = NULL;\n-#endif\n-\tstat'..b'ort:\\n" );\n-\tfor( l=0; l<count; l++ )\n-\t{\n-\t\tfprintf( fftfp, "cut : %d %d\\n", cut1[l], cut2[l] );\n-\t}\n-\tfclose( fftfp );\n-#endif\n-\n-#if KEIKA\n-\tfprintf( trap_g, "Devided to %d segments\\n", count-1 );\n-\tfprintf( trap_g, "%d %d forg\\n", MIN( clus1, clus2 ), count-1 );\n-#endif\n-\n-\ttotallen = 0;\n-\tfor( j=0; j<clus1; j++ ) result1[j][0] = 0;\n-\tfor( j=0; j<clus2; j++ ) result2[j][0] = 0;\n-\ttotalscore = 0.0;\n-\t*totalimpmatch = 0.0;\n-\tfor( i=0; i<count-1; i++ )\n-\t{\n-#if DEBUG\n-\t\tfprintf( stderr, "DP %03d / %03d %4d to ", i+1, count-1, totallen );\n-#else\n-#if KEIKA\n-\t\tfprintf( stderr, "DP %03d / %03d\\r", i+1, count-1 );\n-#endif\n-#endif\n-\n-\t\tif( cut1[i] ) \n-\t\t{\n-\t\t\tgetkyokaigap( sgap1, seq1, cut1[i]-1, clus1 );\n-\t\t\tgetkyokaigap( sgap2, seq2, cut2[i]-1, clus2 );\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tfor( j=0; j<clus1; j++ ) sgap1[j] = \'o\';\n-\t\t\tfor( j=0; j<clus2; j++ ) sgap2[j] = \'o\';\n-\t\t}\n-\t\tif( cut1[i+1] != len1 )\n-\t\t{\n-\t\t\tgetkyokaigap( egap1, seq1, cut1[i+1], clus1 );\n-\t\t\tgetkyokaigap( egap2, seq2, cut2[i+1], clus2 );\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tfor( j=0; j<clus1; j++ ) egap1[j] = \'o\';\n-\t\t\tfor( j=0; j<clus2; j++ ) egap2[j] = \'o\';\n-\t\t}\n-\n-\t\tfor( j=0; j<clus1; j++ )\n-\t\t{\n-\t\t\tstrncpy( tmpres1[j], seq1[j]+cut1[i], cut1[i+1]-cut1[i] );\n-\t\t\ttmpres1[j][cut1[i+1]-cut1[i]] = 0;\n-\t\t}\n-\t\tif( kobetsubunkatsu ) commongappick_record( clus1, tmpres1, gapmap1 );\n-\t\tfor( j=0; j<clus2; j++ )\n-\t\t{\n-\t\t\tstrncpy( tmpres2[j], seq2[j]+cut2[i], cut2[i+1]-cut2[i] );\n-\t\t\ttmpres2[j][cut2[i+1]-cut2[i]] = 0;\n-\t\t}\n-\t\tif( kobetsubunkatsu ) commongappick_record( clus2, tmpres2, gapmap2 );\n-\n-#if 0\n-\t\tfprintf( stderr, "count = %d\\n", count );\n-\t\tfprintf( stderr, "### reg1 = %d-%d\\n", cut1[i], cut1[i+1]-1 );\n-\t\tfprintf( stderr, "### reg2 = %d-%d\\n", cut2[i], cut2[i+1]-1 );\n-#endif\n-\n-\t\tswitch( alg )\n-\t\t{\n-\t\t\tcase( \'a\' ):\n-\t\t\t\ttotalscore += Aalign( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen );\n-\t\t\t\tbreak;\n-\t\t\tcase( \'A\' ):\n-\t\t\t\tif( scoringmatrices ) // called by tditeration.c \n-\t\t\t\t{\n-\t\t\t\t\ttotalscore += partA__align_variousdist( NULL, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres );\n-\t\t\t\t}\n-\t\t\t\telse\n-\t\t\t\t\ttotalscore += partA__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres );\n-\t\t\t\t*totalimpmatch += impmatch;\n-//\t\t\t\tfprintf( stderr, "*totalimpmatch in Falign_localhom = %f\\n", *totalimpmatch );\n-\n-\n-\t\t\t\tbreak;\n-\t\t\tdefault:\n-\t\t\t\tfprintf( stderr, "alg = %c\\n", alg );\n-\t\t\t\tErrorExit( "ERROR IN SOURCE FILE Falign.c" );\n-\t\t\t\tbreak;\n-\t\t}\n-#ifdef enablemultithread\n-\t\tif( chudanres && *chudanres )\n-\t\t{\n-//\t\t\tfprintf( stderr, "\\n\\n## CHUUDAN!!! at Falign_localhom\\n" );\n-\t\t\treturn( -1.0 );\n-\t\t}\n-#endif\n-\n-\t\tnlen = strlen( tmpres1[0] );\n-\t\tif( totallen + nlen > alloclen )\n-\t\t{\n-\t\t\tfprintf( stderr, "totallen=%d + nlen=%d > alloclen = %d\\n", totallen, nlen, alloclen );\n-\t\t\tErrorExit( "LENGTH OVER in Falign\\n " );\n-\t\t}\n-\t\tfor( j=0; j<clus1; j++ ) strcat( result1[j], tmpres1[j] );\n-\t\tfor( j=0; j<clus2; j++ ) strcat( result2[j], tmpres2[j] );\n-\t\ttotallen += nlen;\n-#if 0\n-\t\tfprintf( stderr, "%4d\\r", totallen );\n-\t\tfprintf( stderr, "\\n\\n" );\n-\t\tfor( j=0; j<clus1; j++ ) \n-\t\t{\n-\t\t\tfprintf( stderr, "%s\\n", tmpres1[j] );\n-\t\t}\n-\t\tfprintf( stderr, "-------\\n" );\n-\t\tfor( j=0; j<clus2; j++ ) \n-\t\t{\n-\t\t\tfprintf( stderr, "%s\\n", tmpres2[j] );\n-\t\t}\n-#endif\n-\t}\n-#if KEIKA\n-\tfprintf( stderr, "DP ... done \\n" );\n-#endif\n-\n-\tfor( j=0; j<clus1; j++ ) strcpy( seq1[j], result1[j] );\n-\tfor( j=0; j<clus2; j++ ) strcpy( seq2[j], result2[j] );\n-#if 0\n-\tfor( j=0; j<clus1; j++ ) \n-\t{\n-\t\tfprintf( stderr, "%s\\n", result1[j] );\n-\t}\n-\tfprintf( stderr, "- - - - - - - - - - -\\n" );\n-\tfor( j=0; j<clus2; j++ ) \n-\t{\n-\t\tfprintf( stderr, "%s\\n", result2[j] );\n-\t}\n-#endif\n-\treturn( totalscore );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/Galign11.c --- a/mafft/core/Galign11.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,960 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define DEBUG 0\n-#define XXXXXXX 0\n-#define USE_PENALTY_EX 1\n-\n-#if 1\n-static void match_calc_mtx( double **mtx, float *match, char **s1, char **s2, int i1, int lgth2 ) \n-{\n-\tchar *seq2 = s2[0];\n-\tdouble *doubleptr = mtx[(int)s1[0][i1]];\n-\n-\twhile( lgth2-- )\n-\t\t*match++ = doubleptr[(int)*seq2++];\n-}\n-#else\n-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )\n-{\n-\tint j;\n-\n-\tfor( j=0; j<lgth2; j++ )\n-\t\tmatch[j] = amino_dis[(*s1)[i1]][(*s2)[j]];\n-}\n-#endif\n-\n-static float Atracking( float *lasthorizontalw, float *lastverticalw, \n-\t\t\t\t\t\tchar **seq1, char **seq2, \n- char **mseq1, char **mseq2, \n- int **ijp,\n-\t\t\t\t\t\tint tailgp,\n-\t\t\t\t\t\tint *warpis, int *warpjs, int warpbase )\n-{\n-\tint i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;\n-//\tchar gap[] = "-";\n-\tchar *gap;\n-\tgap = newgapstr;\n-\tlgth1 = strlen( seq1[0] );\n-\tlgth2 = strlen( seq2[0] );\n-\tfloat wm;\n-\n-\n-#if 0\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\tfprintf( stderr, "lastverticalw[%d] = %f\\n", i, lastverticalw[i] );\n-\t}\n-#endif\n- \n- for( i=0; i<lgth1+1; i++ ) \n- {\n- ijp[i][0] = i + 1;\n- }\n- for( j=0; j<lgth2+1; j++ ) \n- {\n- ijp[0][j] = -( j + 1 );\n- }\n-\n-//\tif( tailgp == 1 || ijp[lgth1][lgth2] >= warpbase )\n-\tif( tailgp == 1 )\n-\t\t;\n-\telse\n-\t{\n-\t\twm = lastverticalw[0];\n-\t\tfor( i=0; i<lgth1; i++ )\n-\t\t{\n-\t\t\tif( lastverticalw[i] >= wm )\n-\t\t\t{\n-\t\t\t\twm = lastverticalw[i];\n-\t\t\t\tiin = i; jin = lgth2-1;\n-\t\t\t\tijp[lgth1][lgth2] = +( lgth1 - i );\n-\t\t\t}\n-\t\t}\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tif( lasthorizontalw[j] >= wm )\n-\t\t\t{\n-\t\t\t\twm = lasthorizontalw[j];\n-\t\t\t\tiin = lgth1-1; jin = j;\n-\t\t\t\tijp[lgth1][lgth2] = -( lgth2 - j );\n-\t\t\t}\n-\t\t}\n-\t}\n-\n-\n-\n-\tmseq1[0] += lgth1+lgth2;\n-\t*mseq1[0] = 0;\n-\tmseq2[0] += lgth1+lgth2;\n-\t*mseq2[0] = 0;\n-\n-\n-\n-\tiin = lgth1; jin = lgth2;\n-\tlimk = lgth1+lgth2 + 1;\n-\tfor( k=0; k<limk; k++ ) \n-\t{\n-\t\tif( ijp[iin][jin] >= warpbase )\n-\t\t{\n-//\t\t\tfprintf( stderr, "WARP!\\n" );\n-\t\t\tifi = warpis[ijp[iin][jin]-warpbase]; \n-\t\t\tjfi = warpjs[ijp[iin][jin]-warpbase];\n-\t\t}\n-\t\telse if( ijp[iin][jin] < 0 ) \n-\t\t{\n-\t\t\tifi = iin-1; jfi = jin+ijp[iin][jin];\n-\t\t}\n-\t\telse if( ijp[iin][jin] > 0 )\n-\t\t{\n-\t\t\tifi = iin-ijp[iin][jin]; jfi = jin-1;\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tifi = iin-1; jfi = jin-1;\n-\t\t}\n-\n-\t\tif( ifi == -warpbase && jfi == -warpbase )\n-\t\t{\n-\t\t\tl = iin;\n-\t\t\twhile( --l >= 0 ) \n-\t\t\t{\n-\t\t\t\t*--mseq1[0] = seq1[0][l];\n-\t\t\t\t*--mseq2[0] = *gap;\n-\t\t\t\tk++;\n-\t\t\t}\n-\t\t\tl= jin;\n-\t\t\twhile( --l >= 0 )\n-\t\t\t{\n-\t\t\t\t*--mseq1[0] = *gap;\n-\t\t\t\t*--mseq2[0] = seq2[0][l];\n-\t\t\t\tk++;\n-\t\t\t}\n-\t\t\tbreak;\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tl = iin - ifi;\n-\t\t\twhile( --l > 0 ) \n-\t\t\t{\n-\t\t\t\t*--mseq1[0] = seq1[0][ifi+l];\n-\t\t\t\t*--mseq2[0] = *gap;\n-\t\t\t\tk++;\n-\t\t\t}\n-\t\t\tl= jin - jfi;\n-\t\t\twhile( --l > 0 )\n-\t\t\t{\n-\t\t\t\t*--mseq1[0] = *gap;\n-\t\t\t\t*--mseq2[0] = seq2[0][jfi+l];\n-\t\t\t\tk++;\n-\t\t\t}\n-\t\t}\n-\t\tif( iin <= 0 || jin <= 0 ) break;\n-\t\t*--mseq1[0] = seq1[0][ifi];\n-\t\t*--mseq2[0] = seq2[0][jfi];\n-\t\tk++;\n-\t\tiin = ifi; jin = jfi;\n-\t}\n-\n-//\tfprintf( stderr, "%s\\n", mseq1[0] );\n-//\tfprintf( stderr, "%s\\n", mseq2[0] );\n-\treturn( 0.0 );\n-}\n-\n-\n-float G__align11( double **n_dynamicmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp )\n-{\n-//\tint k;\n-\tregister int i, j;\n-\tint lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */\n-\tint lastj;\n-\tint lgth1, lgth2;\n-\tint resultlen;\n-\tfloat wm; /* int ?????? */\n-\tfloat g;\n-\tfloat *currentw, *previousw;\n-\tfloat fpenalty = (float)penalty;\n-\tfloat fpenalty_shift = (float)penalty_shift;\n-\tfloat fpenalty_tmp;\n-#if USE_PENALTY_EX\n-\tfloat fpenalty_ex = (float)penalty_ex;\n-#endif\n-#if 1\n-\tfloat *wtmp;\n-\tint *ijppt;\n-\tfloat *mjpt, *prept, *curpt;\n-\tint *mpjpt;\n-#endif\n-\tstatic TLS float mi = 0.0;\n-\tstatic TLS float *m = NULL;\n-\tstatic TLS int **ijp = NULL;\n-\tstatic TLS int mpi = 0;\n-\tstatic TLS int *mp = NULL;\n-\tstatic TLS float *w1 = NULL;\n-\tstatic TLS float *w2 = NULL;\n-\tstatic TLS float *match = NULL;\n-\tst'..b' ll1, ll2;\n-\n-\t\tif( orlgth1 > 0 && orlgth2 > 0 )\n-\t\t{\n-\t\t\tFreeFloatVec( w1 );\n-\t\t\tFreeFloatVec( w2 );\n-\t\t\tFreeFloatVec( match );\n-\t\t\tFreeFloatVec( initverticalw );\n-\t\t\tFreeFloatVec( lastverticalw );\n-\n-\t\t\tFreeFloatVec( m );\n-\n-\n-\n-\n-\t\t\tFreeFloatMtx( floatwork );\n-\t\t\tFreeIntMtx( intwork );\n-\n-\t\t\tFreeDoubleMtx( amino_dynamicmtx );\n-\t\t}\n-\n-\t\tll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;\n-\t\tll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;\n-\n-#if DEBUG\n-\t\tfprintf( stderr, "\\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );\n-#endif\n-\n-\t\tw1 = AllocateFloatVec( ll2+2 );\n-\t\tw2 = AllocateFloatVec( ll2+2 );\n-\t\tmatch = AllocateFloatVec( ll2+2 );\n-\n-\t\tinitverticalw = AllocateFloatVec( ll1+2 );\n-\t\tlastverticalw = AllocateFloatVec( ll1+2 );\n-\n-\t\tm = AllocateFloatVec( ll2+2 );\n-\n-\n-\n-\t\tfloatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); \n-\t\tintwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); \n-\n-\n-\t\tamino_dynamicmtx = AllocateDoubleMtx( 0x80, 0x80 );\n-#if DEBUG\n-\t\tfprintf( stderr, "succeeded\\n" );\n-#endif\n-\n-\t\torlgth1 = ll1 - 100;\n-\t\torlgth2 = ll2 - 100;\n-\t}\n-\n-\n- for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )\n-\t\tamino_dynamicmtx[(int)amino[i]][(int)amino[j]] = (double)n_dynamicmtx[i][j];\n-\n-\n-\n-\n-#if 0\n-\tfor( i=0; i<lgth1; i++ ) \n-\t\tfprintf( stderr, "ogcp1[%d]=%f\\n", i, ogcp1[i] );\n-#endif\n-\n-\tcurrentw = w1;\n-\tpreviousw = w2;\n-\n-\n-\tmatch_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 );\n-\n-\n-\tmatch_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 );\n-\n-\tif( 1 ) // tsuneni outgap-1\n-\t{\n-\t\tfor( i=1; i<lgth1+1; i++ )\n-\t\t{\n-\t\t\tinitverticalw[i] += fpenalty;\n-\t\t}\n-\t\tfor( j=1; j<lgth2+1; j++ )\n-\t\t{\n-\t\t\tcurrentw[j] += fpenalty;\n-\t\t}\n-\t}\n-\n-\tfor( j=1; j<lgth2+1; ++j ) \n-\t{\n-\t\tm[j] = currentw[j-1];\n-\t}\n-\n-\tif( lgth2 == 0 )\n-\t\tlastverticalw[0] = 0.0; // lgth2==0 no toki error\n-\telse\n-\t\tlastverticalw[0] = currentw[lgth2-1]; // lgth2==0 no toki error\n-\n-\tif( 1 ) lasti = lgth1+1; else lasti = lgth1; // tsuneni outgap-1\n-\n-#if XXXXXXX\n-fprintf( stderr, "currentw = \\n" );\n-for( i=0; i<lgth1+1; i++ )\n-{\n-\tfprintf( stderr, "%5.2f ", currentw[i] );\n-}\n-fprintf( stderr, "\\n" );\n-fprintf( stderr, "initverticalw = \\n" );\n-for( i=0; i<lgth2+1; i++ )\n-{\n-\tfprintf( stderr, "%5.2f ", initverticalw[i] );\n-}\n-fprintf( stderr, "\\n" );\n-#endif\n-\n-\tfor( i=1; i<lasti; i++ )\n-\t{\n-\t\twtmp = previousw; \n-\t\tpreviousw = currentw;\n-\t\tcurrentw = wtmp;\n-\n-\t\tpreviousw[0] = initverticalw[i-1];\n-\n-\t\tmatch_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );\n-#if XXXXXXX\n-fprintf( stderr, "\\n" );\n-fprintf( stderr, "i=%d\\n", i );\n-fprintf( stderr, "currentw = \\n" );\n-for( j=0; j<lgth2; j++ )\n-{\n-\tfprintf( stderr, "%5.2f ", currentw[j] );\n-}\n-fprintf( stderr, "\\n" );\n-#endif\n-#if XXXXXXX\n-fprintf( stderr, "\\n" );\n-fprintf( stderr, "i=%d\\n", i );\n-fprintf( stderr, "currentw = \\n" );\n-for( j=0; j<lgth2; j++ )\n-{\n-\tfprintf( stderr, "%5.2f ", currentw[j] );\n-}\n-fprintf( stderr, "\\n" );\n-#endif\n-\t\tcurrentw[0] = initverticalw[i];\n-\n-\t\tmi = previousw[0];\n-\n-\t\tmjpt = m + 1;\n-\t\tprept = previousw;\n-\t\tcurpt = currentw + 1;\n-\t\tfor( j=1; j<lgth2+1; j++ )\n-\t\t{\n-\t\t\twm = *prept;\n-\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f->", wm );\n-#endif\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f?", g );\n-#endif\n-\t\t\tif( (g=mi+fpenalty) > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t}\n-\t\t\tif( (g=*prept) >= mi )\n-\t\t\t{\n-\t\t\t\tmi = g;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\tmi += fpenalty_ex;\n-#endif\n-\n-#if 0 \n-\t\t\tfprintf( stderr, "%5.0f?", g );\n-#endif\n-\t\t\tif( (g=*mjpt + fpenalty) > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t}\n-\t\t\tif( (g=*prept) >= *mjpt )\n-\t\t\t{\n-\t\t\t\t*mjpt = g;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\tm[j] += fpenalty_ex;\n-#endif\n-\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f ", wm );\n-#endif\n-\t\t\t*curpt++ += wm;\n-\t\t\tmjpt++;\n-\t\t\tprept++;\n-\t\t}\n-\t\tlastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error\n-\t}\n-\n-#if 0\n-\tfprintf( stderr, "\\n" );\n-\tfprintf( stderr, ">\\n%s\\n", mseq1[0] );\n-\tfprintf( stderr, ">\\n%s\\n", mseq2[0] );\n-\tfprintf( stderr, "wm = %f\\n", wm );\n-#endif\n-\n-\treturn( wm );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/JTT.c --- a/mafft/core/JTT.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,244 +0,0 @@\n-#if 0\n-#include "mltaln.h"\n-#endif\n-#define DEFAULTGOP_J -1530\n-#define DEFAULTGEP_J -00 \n-#define DEFAULTOFS_J -123 /* +10 -- -50 teido ka ? */\n-#define DEFAULTPAMN 200\n-\n-void JTTmtx( double **rsr, double *freq, char locamino[26], char locgrp[26], int isTM )\n-{\n-\tint i, j;\n-\tdouble r[20][20];\n-//\tchar locamino0[] = "ARNDCQEGHILKMFPSTWYVBZX.-U";\n-\tchar locamino0[] = "ARNDCQEGHILKMFPSTWYVBZX.-J";\n-\tchar locgrp0[] = \n-\t{\n-\t\t0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2,\n-\t\t6, 6, 6, 1, \n-\t};\n-\n-\tdouble freq0[20] = \n-\t{\n-\t\t0.077,\n-\t\t0.051,\n-\t\t0.043,\n-\t\t0.052,\n-\t\t0.020,\n-\t\t0.041,\n-\t\t0.062,\n-\t\t0.074,\n-\t\t0.023,\n-\t\t0.052,\n-\t\t0.091,\n-\t\t0.059,\n-\t\t0.024,\n-\t\t0.040,\n-\t\t0.051,\n-\t\t0.069,\n-\t\t0.059,\n-\t\t0.014,\n-\t\t0.032,\n-\t\t0.066,\n-\t};\n-\tdouble freq0_TM[20] = \n-\t{\n-\t\t 0.1051,\n-\t\t 0.0157,\n-\t\t 0.0185,\n-\t\t 0.0089,\n-\t\t 0.0219,\n-\t\t 0.0141,\n-\t\t 0.0097,\n-\t\t 0.0758,\n-\t\t 0.0168,\n-\t\t 0.1188,\n-\t\t 0.1635,\n-\t\t 0.0112,\n-\t\t 0.0333,\n-\t\t 0.0777,\n-\t\t 0.0260,\n-\t\t 0.0568,\n-\t\t 0.0523,\n-\t\t 0.0223,\n-\t\t 0.0324,\n-\t\t 0.1195,\n-\t};\n-\n- /* Lower triangular is JTT\'s Accepted point mutations */\n- r[ 1][ 0]= 247;\n-\n- r[ 2][ 0]= 216; r[ 2][ 1]= 116;\n-\n- r[ 3][ 0]= 386; r[ 3][ 1]= 48; r[ 3][ 2]= 1433;\n-\n- r[ 4][ 0]= 106; r[ 4][ 1]= 125; r[ 4][ 2]= 32; r[ 4][ 3]= 13;\n-\n- r[ 5][ 0]= 208; r[ 5][ 1]= 750; r[ 5][ 2]= 159; r[ 5][ 3]= 130;\n- r[ 5][ 4]= 9;\n-\n- r[ 6][ 0]= 600; r[ 6][ 1]= 119; r[ 6][ 2]= 180; r[ 6][ 3]= 2914;\n- r[ 6][ 4]= 8; r[ 6][ 5]= 1027;\n-\n- r[ 7][ 0]= 1183; r[ 7][ 1]= 614; r[ 7][ 2]= 291; r[ 7][ 3]= 577;\n- r[ 7][ 4]= 98; r[ 7][ 5]= 84; r[ 7][ 6]= 610;\n-\n- r[ 8][ 0]= 46; r[ 8][ 1]= 446; r[ 8][ 2]= 466; r[ 8][ 3]= 144;\n- r[ 8][ 4]= 40; r[ 8][ 5]= 635; r[ 8][ 6]= 41; r[ 8][ 7]= 41;\n-\n- r[ 9][ 0]= 173; r[ 9][ 1]= 76; r[ 9][ 2]= 130; r[ 9][ 3]= 37;\n- r[ 9][ 4]= 19; r[ 9][ 5]= 20; r[ 9][ 6]= 43; r[ 9][ 7]= 25;\n- r[ 9][ 8]= 26;\n-\n- r[10][ 0]= 257; r[10][ 1]= 205; r[10][ 2]= 63; r[10][ 3]= 34;\n- r[10][ 4]= 36; r[10][ 5]= 314; r[10][ 6]= 65; r[10][ 7]= 56;\n- r[10][ 8]= 134; r[10][ 9]= 1324;\n-\n- r[11][ 0]= 200; r[11][ 1]= 2348; r[11][ 2]= 758; r[11][ 3]= 102;\n- r[11][ 4]= 7; r[11][ 5]= 858; r[11][ 6]= 754; r[11][ 7]= 142;\n- r[11][ 8]= 85; r[11][ 9]= 75; r[11][10]= 94;\n-\n- r[12][ 0]= 100; r[12][ 1]= 61; r[12][ 2]= 39; r[12][ 3]= 27;\n- r[12][ 4]= 23; r[12][ 5]= 52; r[12][ 6]= 30; r[12][ 7]= 27;\n- r[12][ 8]= 21; r[12][ 9]= 704; r[12][10]= 974; r[12][11]= 103;\n-\n- r[13][ 0]= 51; r[13][ 1]= 16; r[13][ 2]= 15; r[13][ 3]= 8;\n- r[13][ 4]= 66; r[13][ 5]= 9; r[13][ 6]= 13; r[13][ 7]= 18;\n- r[13][ 8]= 50; r[13][ 9]= 196; r[13][10]= 1093; r[13][11]= 7;\n- r[13][12]= 49;\n-\n- r[14][ 0]= 901; r[14][ 1]= 217; r[14][ 2]= 31; r[14][ 3]= 39;\n- r[14][ 4]= 15; r[14][ 5]= 395; r[14][ 6]= 71; r[14][ 7]= 93;\n- r[14][ 8]= 157; r[14][ 9]= 31; r[14][10]= 578; r[14][11]= 77;\n- r[14][12]= 23; r[14][13]= 36;\n-\n- r[15][ 0]= 2413; r[15][ 1]= 413; r[15][ 2]= 1738; r[15][ 3]= 244;\n- r[15][ 4]= 353; r[15][ 5]= 182; r[15][ 6]= 156; r[15][ 7]= 1131;\n- r[15][ 8]= 138; r[15][ 9]= 172; r[15][10]= 436; r[15][11]= 228;\n- r[15][12]= 54; r[15][13]= 309; r[15][14]= 1138;\n-\n- r[16][ 0]= 2440; r[16][ 1]= 230; r[16][ 2]= 693; r[16][ 3]= 151;\n- r[16][ 4]= 66; r[16][ 5]= 149; r[16][ 6]= 142; r[16][ 7]= 164;\n- r[16][ 8]= 76; r[16][ 9]= 930; r[16][10]= 172; r[16][11]= 398;\n- r[16][12]= 343; r[16][13]= 39; r[16][14]= 412; r[16][15]= 2258;\n-\n- r[17][ 0]= 11; r[17][ 1]= 109; r[17][ 2]= 2; r[17][ 3]= 5;\n- r[17][ 4]= 38; r[17][ 5]= 12; r[17][ 6]= 12; r[17][ 7]= 69;\n- r[17][ 8]= 5; r[17][ 9]= 12; r[17][10]= 82; r[17][11]= 9;\n- r[17][12]= 8; r[17][13]= 37; r[17][14]= 6; r[17][15]= 36;\n- r[17][16]= 8;\n-\n- r[18][ 0]= 41; r[18][ '..b'r[ 0][ 2]= 2; r[ 0][ 3]= 7; r[ 0][ 4]= 13;\n- r[ 0][ 5]= 4; r[ 0][ 6]= 6; r[ 0][ 7]= 160; r[ 0][ 8]= 6;\n- r[ 0][ 9]= 44; r[ 0][10]= 43; r[ 0][11]= 5; r[ 0][12]= 10;\n- r[ 0][13]= 21; r[ 0][14]= 34; r[ 0][15]= 198; r[ 0][16]= 202;\n- r[ 0][17]= 0; r[ 0][18]= 1; r[ 0][19]= 292; \n- \n- r[ 1][ 2]= 0; r[ 1][ 3]= 1; r[ 1][ 4]= 2; r[ 1][ 5]= 21;\n- r[ 1][ 6]= 3; r[ 1][ 7]= 22; r[ 1][ 8]= 21; r[ 1][ 9]= 4;\n- r[ 1][10]= 8; r[ 1][11]= 53; r[ 1][12]= 19; r[ 1][13]= 0;\n- r[ 1][14]= 1; r[ 1][15]= 5; r[ 1][16]= 5; r[ 1][17]= 28;\n- r[ 1][18]= 0; r[ 1][19]= 0; \n- \n- r[ 2][ 3]= 14; r[ 2][ 4]= 1; r[ 2][ 5]= 7; r[ 2][ 6]= 0;\n- r[ 2][ 7]= 0; r[ 2][ 8]= 8; r[ 2][ 9]= 4; r[ 2][10]= 5;\n- r[ 2][11]= 11; r[ 2][12]= 3; r[ 2][13]= 1; r[ 2][14]= 2;\n- r[ 2][15]= 32; r[ 2][16]= 19; r[ 2][17]= 1; r[ 2][18]= 1;\n- r[ 2][19]= 2; \n- \n- r[ 3][ 4]= 0; r[ 3][ 5]= 0; r[ 3][ 6]= 12; r[ 3][ 7]= 15;\n- r[ 3][ 8]= 4; r[ 3][ 9]= 1; r[ 3][10]= 0; r[ 3][11]= 2;\n- r[ 3][12]= 1; r[ 3][13]= 0; r[ 3][14]= 1; r[ 3][15]= 0;\n- r[ 3][16]= 6; r[ 3][17]= 0; r[ 3][18]= 1; r[ 3][19]= 4;\n- \n- r[ 4][ 5]= 0; r[ 4][ 6]= 0; r[ 4][ 7]= 13; r[ 4][ 8]= 2;\n- r[ 4][ 9]= 4; r[ 4][10]= 11; r[ 4][11]= 0; r[ 4][12]= 1;\n- r[ 4][13]= 34; r[ 4][14]= 0; r[ 4][15]= 48; r[ 4][16]= 13;\n- r[ 4][17]= 8; r[ 4][18]= 23; r[ 4][19]= 47; \n- \n- r[ 5][ 6]= 16; r[ 5][ 7]= 1; r[ 5][ 8]= 26; r[ 5][ 9]= 1;\n- r[ 5][10]= 16; r[ 5][11]= 6; r[ 5][12]= 3; r[ 5][13]= 0;\n- r[ 5][14]= 5; r[ 5][15]= 7; r[ 5][16]= 2; r[ 5][17]= 0;\n- r[ 5][18]= 0; r[ 5][19]= 0; \n- \n- r[ 6][ 7]= 21; r[ 6][ 8]= 0; r[ 6][ 9]= 0; r[ 6][10]= 0;\n- r[ 6][11]= 0; r[ 6][12]= 0; r[ 6][13]= 0; r[ 6][14]= 0;\n- r[ 6][15]= 4; r[ 6][16]= 2; r[ 6][17]= 0; r[ 6][18]= 0;\n- r[ 6][19]= 7; \n- \n- r[ 7][ 8]= 1; r[ 7][ 9]= 10; r[ 7][10]= 0; r[ 7][11]= 0;\n- r[ 7][12]= 3; r[ 7][13]= 4; r[ 7][14]= 7; r[ 7][15]= 64;\n- r[ 7][16]= 12; r[ 7][17]= 5; r[ 7][18]= 0; r[ 7][19]= 53;\n- \n- r[ 8][ 9]= 3; r[ 8][10]= 2; r[ 8][11]= 0; r[ 8][12]= 1;\n- r[ 8][13]= 0; r[ 8][14]= 0; r[ 8][15]= 0; r[ 8][16]= 4;\n- r[ 8][17]= 0; r[ 8][18]= 29; r[ 8][19]= 2;\n-\n- r[ 9][10]= 273; r[ 9][11]= 0; r[ 9][12]= 161; r[ 9][13]= 66;\n- r[ 9][14]= 4; r[ 9][15]= 22; r[ 9][16]= 150; r[ 9][17]= 1;\n- r[ 9][18]= 4; r[ 9][19]= 883;\n-\n- r[10][11]= 1; r[10][12]= 153; r[10][13]= 251; r[10][14]= 37;\n- r[10][15]= 43; r[10][16]= 26; r[10][17]= 20; r[10][18]= 6;\n- r[10][19]= 255;\n-\n- r[11][12]= 4; r[11][13]= 0; r[11][14]= 0; r[11][15]= 1;\n- r[11][16]= 2; r[11][17]= 0; r[11][18]= 5; r[11][19]= 1;\n-\n- r[12][13]= 8; r[12][14]= 0; r[12][15]= 1; r[12][16]= 32;\n- r[12][17]= 1; r[12][18]= 5; r[12][19]= 89;\n-\n- r[13][14]= 0; r[13][15]= 32; r[13][16]= 9; r[13][17]= 2;\n- r[13][18]= 54; r[13][19]= 37;\n-\n- r[14][15]= 9; r[14][16]= 10; r[14][17]= 0; r[14][18]= 1;\n- r[14][19]= 1;\n-\n- r[15][16]= 134; r[15][17]= 1; r[15][18]= 22; r[15][19]= 13;\n-\n- r[16][17]= 1; r[16][18]= 3; r[16][19]= 48;\n-\n- r[17][18]= 2; r[17][19]= 18;\n-\n- r[18][19]= 2;\n-\n-\n-\n-\tfor (i = 0; i < 20; i++) r[i][i] = 0.0;\n-\tif( isTM )\n-\t{\n-\t\tfor (i = 1; i < 20; i++) for (j = 0; j < i; j++)\n-\t\t{\n-\t\t\tr[j][i] /= 400.0 * freq0_TM[i] * freq0_TM[j];\n-\t\t\tr[i][j] = r[j][i];\n-\t\t}\n-\t\tfor( i=0; i<20; i++ ) freq[i] = freq0_TM[i];\n-\t}\n-\telse\n-\t{\n-\t\tfor (i = 1; i < 20; i++) for (j = 0; j < i; j++)\n-\t\t{\n-\t\t\tr[i][j] /= 400.0 * freq0[i] * freq0[j];\n-\t\t\tr[j][i] = r[i][j];\n-\t\t}\n-\t\tfor( i=0; i<20; i++ ) freq[i] = freq0[i];\n-\t}\n-\n-\tfor( i=0; i<26; i++ ) locamino[i] = locamino0[i];\n-\tfor( i=0; i<26; i++ ) locgrp[(int)locamino[i]] = locgrp0[i];\n-\tfor( i=0; i<20; i++ ) for( j=0; j<20; j++ ) rsr[i][j] = r[i][j];\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/Lalign11.c --- a/mafft/core/Lalign11.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1150 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define DEBUG 0\n-#define DEBUG2 0\n-#define XXXXXXX 0\n-#define USE_PENALTY_EX 1\n-\n-\n-static TLS int localstop; // 060910\n-\n-#if 1\n-static void match_calc_mtx( double **mtx, float *match, char **s1, char **s2, int i1, int lgth2 ) \n-{\n-\tchar *seq2 = s2[0];\n-\tdouble *doubleptr = mtx[(int)s1[0][i1]];\n-\n-\twhile( lgth2-- )\n-\t\t*match++ = doubleptr[(int)*seq2++];\n-}\n-#else\n-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )\n-{\n-\tint j;\n-\n-\tfor( j=0; j<lgth2; j++ )\n-\t\tmatch[j] = amino_dis[(*s1)[i1]][(*s2)[j]];\n-}\n-#endif\n-\n-#if 0\n-static void match_calc_bk( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )\n-{\n-\tint j, k, l;\n-\tfloat scarr[nalphabets];\n-\tfloat **cpmxpd = floatwork;\n-\tint **cpmxpdn = intwork;\n-\tint count = 0;\n-\n-\tif( initialize )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tcount = 0;\n-\t\t\tfor( l=0; l<nalphabets; l++ )\n-\t\t\t{\n-\t\t\t\tif( cpmx2[l][j] )\n-\t\t\t\t{\n-\t\t\t\t\tcpmxpd[count][j] = cpmx2[l][j];\n-\t\t\t\t\tcpmxpdn[count][j] = l;\n-\t\t\t\t\tcount++;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tcpmxpdn[count][j] = -1;\n-\t\t}\n-\t}\n-\n-\tfor( l=0; l<nalphabets; l++ )\n-\t{\n-\t\tscarr[l] = 0.0;\n-\t\tfor( k=0; k<nalphabets; k++ )\n-\t\t\tscarr[l] += n_dis[k][l] * cpmx1[k][i1];\n-\t}\n-#if 0 /* \xa4\xb3\xa4\xec\xa4\xf2\xbb\xc8\xa4\xa6\xa4\xc8\xa4\xad\xa4\xcffloatwork\xa4\xce\xa5\xa2\xa5\xed\xa5\xb1\xa1\xbc\xa5\xc8\xa4\xf2\xb5\xd5\xa4\xcb\xa4\xb9\xa4\xeb */\n-\t{\n-\t\tfloat *fpt, **fptpt, *fpt2;\n-\t\tint *ipt, **iptpt;\n-\t\tfpt2 = match;\n-\t\tiptpt = cpmxpdn;\n-\t\tfptpt = cpmxpd;\n-\t\twhile( lgth2-- )\n-\t\t{\n-\t\t\t*fpt2 = 0.0;\n-\t\t\tipt=*iptpt,fpt=*fptpt;\n-\t\t\twhile( *ipt > -1 )\n-\t\t\t\t*fpt2 += scarr[*ipt++] * *fpt++;\n-\t\t\tfpt2++,iptpt++,fptpt++;\n-\t\t} \n-\t}\n-#else\n-\tfor( j=0; j<lgth2; j++ )\n-\t{\n-\t\tmatch[j] = 0.0;\n-\t\tfor( k=0; cpmxpdn[k][j]>-1; k++ )\n-\t\t\tmatch[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];\n-\t} \n-#endif\n-}\n-#endif\n-\n-static float Ltracking( float *lasthorizontalw, float *lastverticalw, \n-\t\t\t\t\t\tchar **seq1, char **seq2, \n- char **mseq1, char **mseq2, \n- int **ijp, int *off1pt, int *off2pt, int endi, int endj,\n-\t\t\t\t\t\tint *warpis, int *warpjs, int warpbase )\n-{\n-\tint i, j, l, iin, jin, lgth1, lgth2, k, limk;\n-\tint ifi=0, jfi=0; // by D.Mathog, a guess\n-//\tchar gap[] = "-";\n-\tchar *gap;\n-\tgap = newgapstr;\n-\tlgth1 = strlen( seq1[0] );\n-\tlgth2 = strlen( seq2[0] );\n-\n-#if 0\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\tfprintf( stderr, "lastverticalw[%d] = %f\\n", i, lastverticalw[i] );\n-\t}\n-#endif\n- \n- for( i=0; i<lgth1+1; i++ ) \n- {\n- ijp[i][0] = localstop;\n- }\n- for( j=0; j<lgth2+1; j++ ) \n- {\n- ijp[0][j] = localstop;\n- }\n-\n-\tmseq1[0] += lgth1+lgth2;\n-\t*mseq1[0] = 0;\n-\tmseq2[0] += lgth1+lgth2;\n-\t*mseq2[0] = 0;\n-\tiin = endi; jin = endj;\n-\tlimk = lgth1+lgth2;\n-\tfor( k=0; k<=limk; k++ ) \n-\t{\n-\t\tif( ijp[iin][jin] >= warpbase )\n-\t\t{\n-//\t\t\tfprintf( stderr, "WARP!\\n" );\n-\t\t\tifi = warpis[ijp[iin][jin]-warpbase];\n-\t\t\tjfi = warpjs[ijp[iin][jin]-warpbase];\n-\t\t}\n-\t\telse if( ijp[iin][jin] < 0 ) \n-\t\t{\n-\t\t\tifi = iin-1; jfi = jin+ijp[iin][jin];\n-\t\t}\n-\t\telse if( ijp[iin][jin] > 0 )\n-\t\t{\n-\t\t\tifi = iin-ijp[iin][jin]; jfi = jin-1;\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tifi = iin-1; jfi = jin-1;\n-\t\t}\n-\n-\n-#if 1 // sentou de warp?\n-\t\tif( ifi == -warpbase && jfi == -warpbase )\n-\t\t{\n-\t\t\tl = iin;\n-\t\t\twhile( --l >= 0 ) \n-\t\t\t{\n-\t\t\t\t*--mseq1[0] = seq1[0][l];\n-\t\t\t\t*--mseq2[0] = *gap;\n-\t\t\t\tk++;\n-\t\t\t}\n-\t\t\tl= jin;\n-\t\t\twhile( --l >= 0 )\n-\t\t\t{\n-\t\t\t\t*--mseq1[0] = *gap;\n-\t\t\t\t*--mseq2[0] = seq2[0][l];\n-\t\t\t\tk++;\n-\t\t\t}\n-\t\t\tbreak;\n-\t\t}\n-\t\telse\n-#endif\n-\t\t{\n-\t\t\tl = iin - ifi;\n-\t\t\twhile( --l > 0 ) \n-\t\t\t{\n-\t\t\t\t*--mseq1[0] = seq1[0][ifi+l];\n-\t\t\t\t*--mseq2[0] = *gap;\n-\t\t\t\tk++;\n-\t\t\t}\n-\t\t\tl= jin - jfi;\n-\t\t\twhile( --l > 0 )\n-\t\t\t{\n-\t\t\t\t*--mseq1[0] = *gap;\n-\t\t\t\t*--mseq2[0] = seq2[0][jfi+l];\n-\t\t\t\tk++;\n-\t\t\t}\n-\t\t}\n-\n-\n-\t\tif( iin <= 0 || jin <= 0 ) break;\n-\t\t*--mseq1[0] = seq1[0][ifi];\n-\t\t*--mseq2[0] = seq2[0][jfi];\n-\t\tif( ijp[ifi][jfi] == localstop ) break;\n-\t\tk++;\n-\t\tiin = ifi; jin = jfi;\n-\t}\n-\tif( ifi == -1 ) *off1pt = 0; else *off1pt = ifi;\n-\tif( jfi == -1 ) *off2pt = 0;'..b' i<lgth2+1; i++ )\n-{\n-\tfprintf( stderr, "%5.2f ", initverticalw[i] );\n-}\n-fprintf( stderr, "\\n" );\n-#endif\n-#if DEBUG2\n-\tfprintf( stderr, "\\n" );\n-\tfprintf( stderr, " " );\n-\tfor( j=0; j<lgth2; j++ )\n-\t\tfprintf( stderr, "%c ", seq2[0][j] );\n-\tfprintf( stderr, "\\n" );\n-#endif\n-\n-\tlocalstop = lgth1+lgth2+1;\n-\tmaxwm = -999999999.9;\n-#if DEBUG2\n-\tfprintf( stderr, "\\n" );\n-\tfprintf( stderr, "%c ", seq1[0][0] );\n-\n-\tfor( j=0; j<lgth2+1; j++ )\n-\t\tfprintf( stderr, "%5.0f ", currentw[j] );\n-\tfprintf( stderr, "\\n" );\n-#endif\n-\n-\tfor( i=1; i<lasti; i++ )\n-\t{\n-\t\twtmp = previousw; \n-\t\tpreviousw = currentw;\n-\t\tcurrentw = wtmp;\n-\n-\t\tpreviousw[0] = initverticalw[i-1];\n-\n-\t\tmatch_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );\n-#if DEBUG2\n-\t\tfprintf( stderr, "%c ", seq1[0][i] );\n-\t\tfprintf( stderr, "%5.0f ", currentw[0] );\n-#endif\n-\n-#if XXXXXXX\n-fprintf( stderr, "\\n" );\n-fprintf( stderr, "i=%d\\n", i );\n-fprintf( stderr, "currentw = \\n" );\n-for( j=0; j<lgth2; j++ )\n-{\n-\tfprintf( stderr, "%5.2f ", currentw[j] );\n-}\n-fprintf( stderr, "\\n" );\n-#endif\n-#if XXXXXXX\n-fprintf( stderr, "\\n" );\n-fprintf( stderr, "i=%d\\n", i );\n-fprintf( stderr, "currentw = \\n" );\n-for( j=0; j<lgth2; j++ )\n-{\n-\tfprintf( stderr, "%5.2f ", currentw[j] );\n-}\n-fprintf( stderr, "\\n" );\n-#endif\n-\t\tcurrentw[0] = initverticalw[i];\n-\n-\t\tmi = previousw[0]; \n-//\t\tmpi = 0;\n-\n-#if 0\n-\t\tif( mi < localthr ) mi = localthr2;\n-#endif\n-\n-//\t\tijppt = ijp[i] + 1;\n-\t\tmjpt = m + 1;\n-\t\tprept = previousw;\n-\t\tcurpt = currentw + 1;\n-//\t\tmpjpt = mp + 1;\n-\t\tlastj = lgth2+1;\n-\t\tfor( j=1; j<lastj; j++ )\n-\t\t{\n-\t\t\twm = *prept;\n-//\t\t\t*ijppt = 0;\n-\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f->", wm );\n-#endif\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f?", g );\n-#endif\n-\t\t\tif( (g=mi+fpenalty) > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-//\t\t\t\t*ijppt = -( j - mpi );\n-\t\t\t}\n-\t\t\tif( *prept > mi )\n-\t\t\t{\n-\t\t\t\tmi = *prept;\n-//\t\t\t\tmpi = j-1;\n-\t\t\t}\n-\n-#if USE_PENALTY_EX\n-\t\t\tmi += fpenalty_ex;\n-#endif\n-\n-#if 0 \n-\t\t\tfprintf( stderr, "%5.0f?", g );\n-#endif\n-\t\t\tif( (g=*mjpt+fpenalty) > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-//\t\t\t\t*ijppt = +( i - *mpjpt );\n-\t\t\t}\n-\t\t\tif( *prept > *mjpt )\n-\t\t\t{\n-\t\t\t\t*mjpt = *prept;\n-//\t\t\t\t*mpjpt = i-1;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\t*mjpt += fpenalty_ex;\n-#endif\n-\n-\t\t\tif( maxwm < wm )\n-\t\t\t{\n-\t\t\t\tmaxwm = wm;\n-//\t\t\t\tendali = i;\n-//\t\t\t\tendalj = j;\n-\t\t\t}\n-#if 1\n-\t\t\tif( wm < localthr )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "stop i=%d, j=%d, curpt=%f\\n", i, j, *curpt );\n-//\t\t\t\t*ijppt = localstop;\n-\t\t\t\twm = localthr2;\n-\t\t\t}\n-#endif\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f ", *curpt );\n-#endif\n-#if DEBUG2\n-\t\t\tfprintf( stderr, "%5.0f ", wm );\n-//\t\t\tfprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\\n", seq1[0][i], seq2[0][j], *ijppt, localstop );\n-#endif\n-\n-\t\t\t*curpt++ += wm;\n-//\t\t\tijppt++;\n-\t\t\tmjpt++;\n-\t\t\tprept++;\n-//\t\t\tmpjpt++;\n-\t\t}\n-#if DEBUG2\n-\t\tfprintf( stderr, "\\n" );\n-#endif\n-\n-\t\tlastverticalw[i] = currentw[lgth2-1];\n-\t}\n-\n-\n-#if 0\n-\tfprintf( stderr, "maxwm = %f\\n", maxwm );\n-\tfprintf( stderr, "endali = %d\\n", endali );\n-\tfprintf( stderr, "endalj = %d\\n", endalj );\n-#endif\n-\n-\n-#if 0 // IRUKAMO!!!!\n-\tif( ijp[endali][endalj] == localstop )\n-\t{\n-\t\tstrcpy( seq1[0], "" );\n-\t\tstrcpy( seq2[0], "" );\n-\t\t*off1pt = *off2pt = 0;\n-\t\tfprintf( stderr, "maxwm <- 0.0 \\n" );\n-\t\treturn( 0.0 );\n-\t}\n-#else\n-\tif( maxwm < localthr )\n-\t{\n-\t\tfprintf( stderr, "maxwm <- 0.0 \\n" );\n-\t\treturn( 0.0 );\n-\t}\n-#endif\n-\t\t\n-//\tLtracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, off1pt, off2pt, endali, endalj );\n-\n-\n-//\tresultlen = strlen( mseq1[0] );\n-//\tif( alloclen < resultlen || resultlen > N )\n-//\t{\n-//\t\tfprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\\n", alloclen, resultlen, N );\n-//\t\tErrorExit( "LENGTH OVER!\\n" );\n-//\t}\n-\n-\n-//\tstrcpy( seq1[0], mseq1[0] );\n-//\tstrcpy( seq2[0], mseq2[0] );\n-\n-#if 0\n-\tfprintf( stderr, "wm=%f\\n", wm );\n-\tfprintf( stderr, ">\\n%s\\n", mseq1[0] );\n-\tfprintf( stderr, ">\\n%s\\n", mseq2[0] );\n-\n-\tfprintf( stderr, "maxwm = %f\\n", maxwm );\n-\tfprintf( stderr, " wm = %f\\n", wm );\n-#endif\n-\n-\treturn( maxwm );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/Lalignmm.c --- a/mafft/core/Lalignmm.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2563 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define MEMSAVE 1\n-\n-#define DEBUG 0\n-#define USE_PENALTY_EX 0\n-#define STOREWM 1\n-\n-#define DPTANNI 10\n-\n-#define LOCAL 0\n-\n-static int reccycle = 0;\n-\n-static float localthr;\n-\n-static void match_ribosum( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )\n-{\n-\tint j, k, l;\n-\tfloat scarr[38];\n-\tfloat **cpmxpd = floatwork;\n-\tint **cpmxpdn = intwork;\n-\tint count = 0;\n-\tfloat *matchpt;\n-\tfloat **cpmxpdpt;\n-\tint **cpmxpdnpt;\n-\tint cpkd;\n-\n-\tif( initialize )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tcount = 0;\n-\t\t\tfor( l=0; l<37; l++ )\n-\t\t\t{\n-\t\t\t\tif( cpmx2[j][l] )\n-\t\t\t\t{\n-\t\t\t\t\tcpmxpd[j][count] = cpmx2[j][l];\n-\t\t\t\t\tcpmxpdn[j][count] = l;\n-\t\t\t\t\tcount++;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tcpmxpdn[j][count] = -1;\n-\t\t}\n-\t}\n-\n-\tfor( l=0; l<37; l++ )\n-\t{\n-\t\tscarr[l] = 0.0;\n-\t\tfor( k=0; k<37; k++ )\n-\t\t{\n-\t\t\tscarr[l] += ribosumdis[k][l] * cpmx1[i1][k];\n-\t\t}\n-\t}\n-#if 0 /* \xa4\xb3\xa4\xec\xa4\xf2\xbb\xc8\xa4\xa6\xa4\xc8\xa4\xad\xa4\xcffloatwork\xa4\xce\xa5\xa2\xa5\xed\xa5\xb1\xa1\xbc\xa5\xc8\xa4\xf2\xb5\xd5\xa4\xcb\xa4\xb9\xa4\xeb */\n-\t{\n-\t\tfloat *fpt, **fptpt, *fpt2;\n-\t\tint *ipt, **iptpt;\n-\t\tfpt2 = match;\n-\t\tiptpt = cpmxpdn;\n-\t\tfptpt = cpmxpd;\n-\t\twhile( lgth2-- )\n-\t\t{\n-\t\t\t*fpt2 = 0.0;\n-\t\t\tipt=*iptpt,fpt=*fptpt;\n-\t\t\twhile( *ipt > -1 )\n-\t\t\t\t*fpt2 += scarr[*ipt++] * *fpt++;\n-\t\t\tfpt2++,iptpt++,fptpt++;\n-\t\t} \n-\t}\n-\tfor( j=0; j<lgth2; j++ )\n-\t{\n-\t\tmatch[j] = 0.0;\n-\t\tfor( k=0; cpmxpdn[j][k]>-1; k++ )\n-\t\t\tmatch[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k];\n-\t} \n-#else\n-\tmatchpt = match;\n-\tcpmxpdnpt = cpmxpdn;\n-\tcpmxpdpt = cpmxpd;\n-\twhile( lgth2-- )\n-\t{\n-\t\t*matchpt = 0.0;\n-\t\tfor( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ )\n-\t\t\t*matchpt += scarr[cpkd] * (*cpmxpdpt)[k];\n-\t\tmatchpt++;\n-\t\tcpmxpdnpt++;\n-\t\tcpmxpdpt++;\n-\t}\n-#endif\n-}\n-\n-static void match_calc( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )\n-{\n-\tint j, k, l;\n-//\tfloat scarr[26];\n-\tfloat **cpmxpd = floatwork;\n-\tint **cpmxpdn = intwork;\n-\tint count = 0;\n-\tfloat *matchpt;\n-\tfloat **cpmxpdpt;\n-\tint **cpmxpdnpt;\n-\tint cpkd;\n-\tfloat *scarr;\n-\tscarr = calloc( nalphabets, sizeof( float ) );\n-\n-\tif( initialize )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tcount = 0;\n-\t\t\tfor( l=0; l<nalphabets; l++ )\n-\t\t\t{\n-\t\t\t\tif( cpmx2[j][l] )\n-\t\t\t\t{\n-\t\t\t\t\tcpmxpd[j][count] = cpmx2[j][l];\n-\t\t\t\t\tcpmxpdn[j][count] = l;\n-\t\t\t\t\tcount++;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tcpmxpdn[j][count] = -1;\n-\t\t}\n-\t}\n-\n-\tfor( l=0; l<nalphabets; l++ )\n-\t{\n-\t\tscarr[l] = 0.0;\n-\t\tfor( k=0; k<nalphabets; k++ )\n-\t\t{\n-\t\t\tscarr[l] += (n_dis[k][l]-RNAthr) * cpmx1[i1][k];\n-\t\t}\n-\t}\n-#if 0 /* \xa4\xb3\xa4\xec\xa4\xf2\xbb\xc8\xa4\xa6\xa4\xc8\xa4\xad\xa4\xcffloatwork\xa4\xce\xa5\xa2\xa5\xed\xa5\xb1\xa1\xbc\xa5\xc8\xa4\xf2\xb5\xd5\xa4\xcb\xa4\xb9\xa4\xeb */\n-\t{\n-\t\tfloat *fpt, **fptpt, *fpt2;\n-\t\tint *ipt, **iptpt;\n-\t\tfpt2 = match;\n-\t\tiptpt = cpmxpdn;\n-\t\tfptpt = cpmxpd;\n-\t\twhile( lgth2-- )\n-\t\t{\n-\t\t\t*fpt2 = 0.0;\n-\t\t\tipt=*iptpt,fpt=*fptpt;\n-\t\t\twhile( *ipt > -1 )\n-\t\t\t\t*fpt2 += scarr[*ipt++] * *fpt++;\n-\t\t\tfpt2++,iptpt++,fptpt++;\n-\t\t} \n-\t}\n-\tfor( j=0; j<lgth2; j++ )\n-\t{\n-\t\tmatch[j] = 0.0;\n-\t\tfor( k=0; cpmxpdn[j][k]>-1; k++ )\n-\t\t\tmatch[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k];\n-\t} \n-#else\n-\tmatchpt = match;\n-\tcpmxpdnpt = cpmxpdn;\n-\tcpmxpdpt = cpmxpd;\n-\twhile( lgth2-- )\n-\t{\n-\t\t*matchpt = 0.0;\n-\t\tfor( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ )\n-\t\t\t*matchpt += scarr[cpkd] * (*cpmxpdpt)[k];\n-\t\tmatchpt++;\n-\t\tcpmxpdnpt++;\n-\t\tcpmxpdpt++;\n-\t}\n-#endif\n-\tfree( scarr );\n-}\n-\n-#if 0\n-static void match_add( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )\n-{\n-\tint j, k, l;\n-\tfloat scarr[nalphabets];\n-\tfloat **cpmxpd = floatwork;\n-\tint **cpmxpdn = intwork;\n-\tint count = 0;\n-\tfloat *matchpt;\n-\tfloat **cpmxpdpt;\n-\tint **cpmxpdnpt;\n-\tint cpkd;\n-\n-\n-\tif( initialize )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tcount = 0;\n-\t\t\tfor( l=0; l<nalphabets; l++ )\n-\t\t\t{\n-\t\t\t\tif( cpmx2[j][l] )\n-\t\t\t\t{\n-\t\t\t\t\tcpmxpd[j][count] = cpmx2[j][l];\n-\t\t\t\t\tcpmxpdn[j][count] = l;\n-\t\t\t\t\tcount++;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tcpmxpdn[j][count] = -1;\n-\t\t}\n-\t}\n-\n-\tfor( l=0; l<nalphabets; l++ )\n-\t{\n-\t\tscarr[l] = 0.0;\n-\t\tfor( k=0; k<nalphabets'..b'cateFloatVec( ll2+2 );\n-\n-\n-\tcpmx1 = AllocateFloatMtx( ll1+2, 39 );\n-\tcpmx2 = AllocateFloatMtx( ll2+2, 39 );\n-\n-\tfor( i=0; i<icyc; i++ ) \n-\t{\n-\t\tif( strlen( seq1[i] ) != lgth1 )\n-\t\t{\n-\t\t\tfprintf( stderr, "i = %d / %d\\n", i, icyc );\n-\t\t\tfprintf( stderr, "bug! hairetsu ga kowareta!\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\tfor( j=0; j<jcyc; j++ )\n-\t{\n-\t\tif( strlen( seq2[j] ) != lgth2 )\n-\t\t{\n-\t\t\tfprintf( stderr, "j = %d / %d\\n", j, jcyc );\n-\t\t\tfprintf( stderr, "bug! hairetsu ga kowareta!\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\n-#if 0\n-\tMScpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc );\n-\tMScpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc );\n-#else\n-\tcpmx_ribosum( seq1, seq1r, dir1, cpmx1, eff1, lgth1, icyc );\n-\tcpmx_ribosum( seq2, seq2r, dir2, cpmx2, eff2, lgth2, jcyc );\n-#endif\n-\n-\n-#if 1\n-\n-\tif( sgap1 )\n-\t{\n-\t\tnew_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1, sgap1 );\n-\t\tnew_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 );\n-\t\tnew_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap2 );\n-\t\tnew_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 );\n-\t}\n-\telse\n-\t{\n-\t\tst_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1 );\n-\t\tst_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 );\n-\t\tst_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 );\n-\t\tst_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 );\n-\t}\n-\n-#if 1\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\togcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty;\n-\t\tfgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty;\n-//\t\tfprintf( stderr, "fgcp1[%d] = %f\\n", i, fgcp1[i] );\n-\t}\n-\tfor( i=0; i<lgth2; i++ ) \n-\t{\n-\t\togcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty;\n-\t\tfgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty;\n-//\t\tfprintf( stderr, "fgcp2[%d] = %f\\n", i, fgcp2[i] );\n-\t}\n-#else\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\togcp1[i] = 0.5 * fpenalty;\n-\t\tfgcp1[i] = 0.5 * fpenalty;\n-\t}\n-\tfor( i=0; i<lgth2; i++ ) \n-\t{\n-\t\togcp2[i] = 0.5 * fpenalty;\n-\t\tfgcp2[i] = 0.5 * fpenalty;\n-\t}\n-#endif\n-\n-\tgapinfo[0] = ogcp1;\n-\tgapinfo[1] = fgcp1;\n-\tgapinfo[2] = ogcp2;\n-\tgapinfo[3] = fgcp2;\n-#endif\n-\n-#if 0\n-\tfprintf( stdout, "in MSalignmm.c\\n" );\n-\tfor( i=0; i<icyc; i++ )\n-\t{\n-\t\tfprintf( stdout, ">%d of GROUP1\\n", i );\n-\t\tfprintf( stdout, "%s\\n", seq1[i] );\n-\t}\n-\tfor( i=0; i<jcyc; i++ )\n-\t{\n-\t\tfprintf( stdout, ">%d of GROUP2\\n", i );\n-\t\tfprintf( stdout, "%s\\n", seq2[i] );\n-\t}\n-\tfflush( stdout );\n-#endif\n-\n-\twm = MSalign2m2m_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, 0, lgth1-1, 0, lgth2-1, alloclen, mseq1, mseq2, 0, gapinfo, map );\n-#if DEBUG\n-\t\tfprintf( stderr, " seq1[0] = %s\\n", seq1[0] );\n-\t\tfprintf( stderr, " seq2[0] = %s\\n", seq2[0] );\n-\t\tfprintf( stderr, "mseq1[0] = %s\\n", mseq1[0] );\n-\t\tfprintf( stderr, "mseq2[0] = %s\\n", mseq2[0] );\n-#endif\n-\n-//\tfprintf( stderr, "wm = %f\\n", wm );\n-\n-#if 0\n-\n-\tfor( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );\n-\tfor( i=0; i<jcyc; i++ ) strcpy( seq2[i], mseq2[i] );\n-\n-\tif( seqlen( seq1[0] ) != nglen1 )\n-\t{\n-\t\tfprintf( stderr, "bug! hairetsu ga kowareta! (nglen1) seqlen(seq1[0])=%d but nglen1=%d\\n", seqlen( seq1[0] ), nglen1 );\n-\t\tfprintf( stderr, "seq1[0] = %s\\n", seq1[0] );\n-\t\texit( 1 );\n-\t}\n-\tif( seqlen( seq2[0] ) != nglen2 )\n-\t{\n-\t\tfprintf( stderr, "bug! hairetsu ga kowareta! (nglen2) seqlen(seq2[0])=%d but nglen2=%d\\n", seqlen( seq2[0] ), nglen2 );\n-\t\texit( 1 );\n-\t}\n-#endif\n-\n-\tFreeFloatVec( ogcp1 );\n-\tFreeFloatVec( ogcp2 );\n-\tFreeFloatVec( fgcp1 );\n-\tFreeFloatVec( fgcp2 );\n-\tFreeFloatMtx( cpmx1 );\n-\tFreeFloatMtx( cpmx2 );\n-\tfree( (void *)gapinfo );\n-\n-\tFreeCharMtx( mseq1 );\n-\tFreeCharMtx( mseq2 );\n-\n-\tlgth1 = strlen( seq1[0] );\n-\tlgth2 = strlen( seq2[0] );\n-\tfor( i=0; i<icyc; i++ ) \n-\t{\n-\t\tif( strlen( seq1[i] ) != lgth1 )\n-\t\t{\n-\t\t\tfprintf( stderr, "i = %d / %d\\n", i, icyc );\n-\t\t\tfprintf( stderr, "hairetsu ga kowareta (end of MSalignmm) !\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\tfor( j=0; j<jcyc; j++ )\n-\t{\n-\t\tif( strlen( seq2[j] ) != lgth2 )\n-\t\t{\n-\t\t\tfprintf( stderr, "j = %d / %d\\n", j, jcyc );\n-\t\t\tfprintf( stderr, "hairetsu ga kowareta (end of MSalignmm) !\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\n-\treturn( wm );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/MSalign11.c --- a/mafft/core/MSalign11.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,665 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define DEBUG 0\n-#define XXXXXXX 0\n-#define USE_PENALTY_EX 0\n-\n-static void extendmseq( char **mseq1, char **mseq2, char **seq1, char **seq2, int i, int j, int prevhiti, int prevhitj )\n-{\n-//\tchar gap[] = "-";\n-\tchar *gap;\n-\tgap = newgapstr;\n-\tint l;\n-\n-\tfprintf( stderr, "i=%d, prevhiti=%d\\n", i, prevhiti );\n-\tfprintf( stderr, "j=%d, prevhitj=%d\\n", j, prevhitj );\n-\tl = prevhiti - i - 1;\n-\tfprintf( stderr, "l=%d\\n", l );\n-\twhile( l>0 ) \n-\t{\n-\t\t*--mseq1[0] = seq1[0][i+l--];\n-\t\t*--mseq2[0] = *gap;\n-\t}\n-\tl= prevhitj - j - 1;\n-\tfprintf( stderr, "l=%d\\n", l );\n-\twhile( l>0 )\n-\t{\n-\t\t*--mseq1[0] = *gap;\n-\t\t*--mseq2[0] = seq2[0][j+l--];\n-\t}\n-\tif( i < 0 || j < 0 ) return;\n-\t*--mseq1[0] = seq1[0][i];\n-\t*--mseq2[0] = seq2[0][j];\n-\tfprintf( stderr, "added %c to mseq1, mseq1 = %s \\n", seq1[0][i], mseq1[0] );\n-\tfprintf( stderr, "added %c to mseq2, mseq2 = %s \\n", seq2[0][j], mseq2[0] );\n-}\n-\n-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )\n-{\n-\tchar tmpc = s1[0][i1];\n-\tchar *seq2 = s2[0];\n-\n-\twhile( lgth2-- )\n-\t\t*match++ = amino_dis[(int)tmpc][(int)*seq2++];\n-}\n-\n-static float Atracking( float *lasthorizontalw, float *lastverticalw, \n-\t\t\t\t\t\tchar **seq1, char **seq2, \n- char **mseq1, char **mseq2, \n- float **cpmx1, float **cpmx2, \n- int **ijp )\n-{\n-\tint i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;\n-//\tchar gap[] = "-";\n-\tchar *gap;\n-\tgap = newgapstr;\n-\tlgth1 = strlen( seq1[0] );\n-\tlgth2 = strlen( seq2[0] );\n-\n-\n-#if 0\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\tfprintf( stderr, "lastverticalw[%d] = %f\\n", i, lastverticalw[i] );\n-\t}\n-#endif\n- \n- for( i=0; i<lgth1+1; i++ ) \n- {\n- ijp[i][0] = i + 1;\n- }\n- for( j=0; j<lgth2+1; j++ ) \n- {\n- ijp[0][j] = -( j + 1 );\n- }\n-\n-\n-\tmseq1[0] += lgth1+lgth2;\n-\t*mseq1[0] = 0;\n-\tmseq2[0] += lgth1+lgth2;\n-\t*mseq2[0] = 0;\n-\tiin = lgth1; jin = lgth2;\n-\tlimk = lgth1+lgth2 + 1;\n-\tfor( k=0; k<limk; k++ ) \n-\t{\n-\t\tif( ijp[iin][jin] < 0 ) \n-\t\t{\n-\t\t\tifi = iin-1; jfi = jin+ijp[iin][jin];\n-\t\t}\n-\t\telse if( ijp[iin][jin] > 0 )\n-\t\t{\n-\t\t\tifi = iin-ijp[iin][jin]; jfi = jin-1;\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tifi = iin-1; jfi = jin-1;\n-\t\t}\n-\t\tl = iin - ifi;\n-\t\twhile( --l ) \n-\t\t{\n-\t\t\t*--mseq1[0] = seq1[0][ifi+l];\n-\t\t\t*--mseq2[0] = *gap;\n-\t\t\tk++;\n-\t\t}\n-\t\tl= jin - jfi;\n-\t\twhile( --l )\n-\t\t{\n-\t\t\t*--mseq1[0] = *gap;\n-\t\t\t*--mseq2[0] = seq2[0][jfi+l];\n-\t\t\tk++;\n-\t\t}\n-\t\tif( iin <= 0 || jin <= 0 ) break;\n-\t\t*--mseq1[0] = seq1[0][ifi];\n-\t\t*--mseq2[0] = seq2[0][jfi];\n-\t\tk++;\n-\t\tiin = ifi; jin = jfi;\n-\t}\n-\treturn( 0.0 );\n-}\n-\n-void backdp( float **WMMTX, float wmmax, float *maxinw, float *maxinh, int lgth1, int lgth2, int alloclen, float *w1, float *w2, float *initverticalw, float *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 )\n-{\n-\tregister int i, j;\n-\tint prevhiti, prevhitj;\n-//\tint lasti, lastj; \n-\tfloat g;\n-\tfloat fpenalty = (float)penalty;\n-#if USE_PENALTY_EX\n-\tfloat fpenalty_ex = (float)penalty_ex;\n-#endif\n-\tfloat *currentw, *previousw, *wtmp;\n-\tfloat mi;\n-\tint mpi;\n-\tint *mpjpt;\n-\tfloat *mjpt, *prept, *curpt;\n-\tfloat wm = 0.0;\n-\tfloat forwwm;\n-\n-\tcurrentw = w1;\n-\tpreviousw = w2;\n-\n-\tmatch_calc( initverticalw, seq2, seq1, lgth2-1, lgth1 );\n-\tmatch_calc( currentw, seq1, seq2, lgth1-1, lgth2 );\n-\n-\n-\tprevhiti = iin;\n-\tprevhitj = jin;\n-\tfprintf( stderr, "prevhiti = %d, lgth1 = %d\\n", prevhiti, lgth1 );\n-\tfprintf( stderr, "prevhitj = %d, lgth2 = %d\\n", prevhitj, lgth2 );\n-\textendmseq( mseq1, mseq2, seq1, seq2, prevhiti, prevhitj, lgth1, lgth2 );\n-\n-\tfor( i=0; i<lgth1-1; i++ )\n-\t{\n-\t\tinitverticalw[i] += fpenalty;\n-\t\tWMMTX[i][lgth2-1] += fpenalty;\n-\t}\n-\tfor( j=0; j<lgth2-1; j++ )\n-\t{\n-\t\tcurrentw[j] += fpenalty;\n-\t\tWMMTX[lgth1-1][j] += fpenalty;\n-\t}\n-\n-\n-#if 0\n-\tfprintf( stderr, "initverticalw = \\n" );\n-\tfor( i=0; i<lgth1; i++ )\n-\t\tfprintf( stderr, "% 8.2f", initverticalw[i] );\n-\tfprintf( stderr, "\\n" );\n-\tfprintf( stderr,'..b' initverticalw[i] ) maxinh[0] = initverticalw[i];\n-\t}\n-\tmaxinw[0] = currentw[0];\n-\tfor( j=1; j<lgth2+1; j++ )\n-\t{\n-\t\tcurrentw[j] += fpenalty;\n-\t\tWMMTX[0][j] = currentw[j];\n-\t\tif( maxinw[0] < currentw[j] ) maxinw[0] = currentw[j];\n-\t}\n-\n-\tfor( j=1; j<lgth2+1; ++j ) \n-\t{\n-\t\tm[j] = currentw[j-1]; mp[j] = 0;\n-\t}\n-\n-\tlastverticalw[0] = currentw[lgth2-1];\n-\n-\tlasti = lgth1+1;\n-\n-\tfor( i=1; i<lasti; i++ )\n-\t{\n-\t\twtmp = previousw; \n-\t\tpreviousw = currentw;\n-\t\tcurrentw = wtmp;\n-\n-\t\tpreviousw[0] = initverticalw[i-1];\n-\n-\t\tmatch_calc( currentw, seq1, seq2, i, lgth2 );\n-\n-\t\tcurrentw[0] = initverticalw[i];\n-\n-\t\tmi = previousw[0]; mpi = 0;\n-\n-\t\tmaxinwpt = maxinw + i;\n-\t\t*maxinwpt = currentw[0];\n-\n-\t\tfprintf( stderr, "currentw[0] = %f, *maxinwpt = %f\\n", currentw[0], maxinw[i] );\n-\n-\t\tijppt = ijp[i] + 1;\n-\t\tmjpt = m + 1;\n-\t\tprept = previousw;\n-\t\tcurpt = currentw + 1;\n-\t\tmpjpt = mp + 1;\n-\t\tlastj = lgth2+1;\n-\n-\t\tfor( j=1; j<lastj; j++ )\n-\t\t{\n-\t\t\twm = *prept;\n-\t\t\t*ijppt = 0;\n-\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f->", wm );\n-#endif\n-\t\t\tg = mi + fpenalty;\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f?", g );\n-#endif\n-\t\t\tif( g > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t\t*ijppt = -( j - mpi );\n-\t\t\t}\n-\t\t\tg = *prept;\n-\t\t\tif( g >= mi )\n-\t\t\t{\n-\t\t\t\tmi = g;\n-\t\t\t\tmpi = j-1;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\tmi += fpenalty_ex;\n-#endif\n-\n-\t\t\tg = *mjpt + fpenalty;\n-#if 0 \n-\t\t\tfprintf( stderr, "%5.0f?", g );\n-#endif\n-\t\t\tif( g > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t\t*ijppt = +( i - *mpjpt );\n-\t\t\t}\n-\t\t\tg = *prept;\n-\t\t\tif( g >= *mjpt )\n-\t\t\t{\n-\t\t\t\t*mjpt = g;\n-\t\t\t\t*mpjpt = i-1;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\tm[j] += fpenalty_ex;\n-#endif\n-\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f ", wm );\n-#endif\n-\t\t\t*curpt += wm;\n-\n-\t\t\tWMMTX[i][j] = *curpt;\n-\n-\n-\t\t\tif( j<lgth2 && *maxinwpt < *curpt ) *maxinwpt = *curpt;\n-\t\t\tif( j<lgth2 && maxinh[j] < *curpt ) maxinh[j] = *curpt;\n-//\t\t\tfprintf( stderr, "maxintwpt = %f\\n", *maxinwpt );\n-\n-\t\t\tijppt++;\n-\t\t\tmjpt++;\n-\t\t\tprept++;\n-\t\t\tmpjpt++;\n-\t\t\tcurpt++;\n-\t\t}\n-\t\tlastverticalw[i] = currentw[lgth2-1];\n-\t}\n-\n-\twmmax = -999.9;\n-\tfor( i=0; i<lgth1; i++ )\n-\t{\n-\t\tg = lastverticalw[i];\n-\t\tif( g > wmmax ) \n-\t\t{\n-\t\t\twmmax = g;\n-\t\t\tiin = i;\n-\t\t\tjin = lgth2-1;\n-\t\t}\n-\t}\n-\tfor( j=0; j<lgth2; j++ )\n-\t{\n-\t\tg = currentw[j];\n-\t\tif( g > wmmax )\n-\t\t{\n-\t\t\twmmax = g;\n-\t\t\tiin = lgth1-1;\n-\t\t\tjin = j;\n-\t\t}\n-\t}\n-\n-\tfor( i=0; i<lgth1; i++ )\n-\t\tfprintf( stderr, "maxinw[%d] = %f\\n", i, maxinw[i] );\n-\tfor( j=0; j<lgth2; j++ )\n-\t\tfprintf( stderr, "maxinh[%d] = %f\\n", j, maxinh[j] );\n-\n-\tfprintf( stderr, "wmmax = %f (%d,%d)\\n", wmmax, iin, jin );\n-\tif( iin == lgth1 - 1 && jin == lgth2 - 1 )\n-\t\t;\n-\telse\n-\t\twmmax += fpenalty;\n-\n-\tfprintf( stderr, "wmmax = %f\\n", wmmax );\n-\n-#if 0\n-\tfor( i=0; i<lgth1; i++ )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tfprintf( stderr, "% 10.2f ", WMMTX[i][j] );\n-\t\t}\n-\t\tfprintf( stderr, "\\n" );\n-\t}\n-#endif\n-\n-\tmseq1[0] += lgth1+lgth2;\n-\t*mseq1[0] = 0;\n-\tmseq2[0] += lgth1+lgth2;\n-\t*mseq2[0] = 0;\n-\n-\tbackdp( WMMTX, wmmax, maxinw, maxinh, lgth1, lgth2, alloclen, w1, w2, initverticalw, m, mp, iin, jin, seq1, seq2, mseq1, mseq2 );\n-\n-\tfprintf( stderr, "\\n" );\n-#if 1\n-\tfprintf( stderr, "\\n" );\n-\tfprintf( stderr, ">MSres\\n%s\\n", mseq1[0] );\n-\tfprintf( stderr, ">MSres\\n%s\\n", mseq2[0] );\n-#endif\n-\n-#if 0\n-\tfor( i=0; i<lgth1; i++ )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tfprintf( stderr, "% 10.2f ", WMMTX[i][j] );\n-\t\t}\n-\t\tfprintf( stderr, "\\n" );\n-\t}\n-#endif\n-\n-\tmseq1[0] = mseq[0];\n-\tmseq2[0] = mseq[1];\n-\tmseq1[0] += lgth1+lgth2;\n-\t*mseq1[0] = 0;\n-\tmseq2[0] += lgth1+lgth2;\n-\t*mseq2[0] = 0;\n-\n-\tAtracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp );\n-\n-\n-\tresultlen = strlen( mseq1[0] );\n-\tif( alloclen < resultlen || resultlen > N )\n-\t{\n-\t\tfprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\\n", alloclen, resultlen, N );\n-\t\tErrorExit( "LENGTH OVER!\\n" );\n-\t}\n-\n-\n-\tstrcpy( seq1[0], mseq1[0] );\n-\tstrcpy( seq2[0], mseq2[0] );\n-#if 1\n-\tfprintf( stderr, "\\n" );\n-\tfprintf( stderr, ">\\n%s\\n", mseq1[0] );\n-\tfprintf( stderr, ">\\n%s\\n", mseq2[0] );\n-#endif\n-\n-\n-\treturn( wm );\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/MSalignmm.c --- a/mafft/core/MSalignmm.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,3561 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define MEMSAVE 1\n-\n-#define DEBUG 0\n-#define USE_PENALTY_EX 0\n-#define STOREWM 0\n-\n-#define DPTANNI 100\n-\n-\n-static TLS int reccycle = 0;\n-\n-\n-static void match_calc_add( double **scoringmtx, float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )\n-{\n-\tint j, k, l;\n-//\tfloat scarr[26];\n-\tfloat **cpmxpd = floatwork;\n-\tint **cpmxpdn = intwork;\n-\tint count = 0;\n-\tfloat *matchpt;\n-\tfloat **cpmxpdpt;\n-\tint **cpmxpdnpt;\n-\tint cpkd;\n-\tfloat *scarr;\n-\tscarr = calloc( nalphabets, sizeof( float ) );\n-\tif( initialize )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tcount = 0;\n-\t\t\tfor( l=0; l<nalphabets; l++ )\n-\t\t\t{\n-\t\t\t\tif( cpmx2[j][l] )\n-\t\t\t\t{\n-\t\t\t\t\tcpmxpd[j][count] = cpmx2[j][l];\n-\t\t\t\t\tcpmxpdn[j][count] = l;\n-\t\t\t\t\tcount++;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tcpmxpdn[j][count] = -1;\n-\t\t}\n-\t}\n-\n-\tfor( l=0; l<nalphabets; l++ )\n-\t{\n-\t\tscarr[l] = 0.0;\n-\t\tfor( k=0; k<nalphabets; k++ )\n-\t\t{\n-//\t\t\tscarr[l] += n_dis[k][l] * cpmx1[i1][k];\n-\t\t\tscarr[l] += scoringmtx[k][l] * cpmx1[i1][k];\n-\t\t}\n-\t}\n-#if 0 /* \xa4\xb3\xa4\xec\xa4\xf2\xbb\xc8\xa4\xa6\xa4\xc8\xa4\xad\xa4\xcffloatwork\xa4\xce\xa5\xa2\xa5\xed\xa5\xb1\xa1\xbc\xa5\xc8\xa4\xf2\xb5\xd5\xa4\xcb\xa4\xb9\xa4\xeb */\n-\t{\n-\t\tfloat *fpt, **fptpt, *fpt2;\n-\t\tint *ipt, **iptpt;\n-\t\tfpt2 = match;\n-\t\tiptpt = cpmxpdn;\n-\t\tfptpt = cpmxpd;\n-\t\twhile( lgth2-- )\n-\t\t{\n-\t\t\t*fpt2 = 0.0;\n-\t\t\tipt=*iptpt,fpt=*fptpt;\n-\t\t\twhile( *ipt > -1 )\n-\t\t\t\t*fpt2 += scarr[*ipt++] * *fpt++;\n-\t\t\tfpt2++,iptpt++,fptpt++;\n-\t\t} \n-\t}\n-\tfor( j=0; j<lgth2; j++ )\n-\t{\n-\t\tmatch[j] = 0.0;\n-\t\tfor( k=0; cpmxpdn[j][k]>-1; k++ )\n-\t\t\tmatch[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k];\n-\t} \n-#else\n-\tmatchpt = match;\n-\tcpmxpdnpt = cpmxpdn;\n-\tcpmxpdpt = cpmxpd;\n-\twhile( lgth2-- )\n-\t{\n-//\t\t*matchpt = 0.0; // add dakara\n-\t\tfor( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ )\n-\t\t\t*matchpt += scarr[cpkd] * (*cpmxpdpt)[k];\n-\t\tmatchpt++;\n-\t\tcpmxpdnpt++;\n-\t\tcpmxpdpt++;\n-\t}\n-#endif\n-\tfree( scarr );\n-}\n-\n-static void match_calc( double **n_dynamicmtx, float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )\n-{\n-\tint j, k, l;\n-//\tfloat scarr[26];\n-\tfloat **cpmxpd = floatwork;\n-\tint **cpmxpdn = intwork;\n-\tint count = 0;\n-\tfloat *matchpt;\n-\tfloat **cpmxpdpt;\n-\tint **cpmxpdnpt;\n-\tint cpkd;\n-\tfloat *scarr;\n-\tscarr = calloc( nalphabets, sizeof( float ) );\n-\tif( initialize )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tcount = 0;\n-\t\t\tfor( l=0; l<nalphabets; l++ )\n-\t\t\t{\n-\t\t\t\tif( cpmx2[j][l] )\n-\t\t\t\t{\n-\t\t\t\t\tcpmxpd[j][count] = cpmx2[j][l];\n-\t\t\t\t\tcpmxpdn[j][count] = l;\n-\t\t\t\t\tcount++;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tcpmxpdn[j][count] = -1;\n-\t\t}\n-\t}\n-\n-\tfor( l=0; l<nalphabets; l++ )\n-\t{\n-\t\tscarr[l] = 0.0;\n-\t\tfor( k=0; k<nalphabets; k++ )\n-\t\t{\n-//\t\t\tscarr[l] += n_dis[k][l] * cpmx1[i1][k];\n-\t\t\tscarr[l] += n_dynamicmtx[k][l] * cpmx1[i1][k];\n-\t\t}\n-\t}\n-#if 0 /* \xa4\xb3\xa4\xec\xa4\xf2\xbb\xc8\xa4\xa6\xa4\xc8\xa4\xad\xa4\xcffloatwork\xa4\xce\xa5\xa2\xa5\xed\xa5\xb1\xa1\xbc\xa5\xc8\xa4\xf2\xb5\xd5\xa4\xcb\xa4\xb9\xa4\xeb */\n-\t{\n-\t\tfloat *fpt, **fptpt, *fpt2;\n-\t\tint *ipt, **iptpt;\n-\t\tfpt2 = match;\n-\t\tiptpt = cpmxpdn;\n-\t\tfptpt = cpmxpd;\n-\t\twhile( lgth2-- )\n-\t\t{\n-\t\t\t*fpt2 = 0.0;\n-\t\t\tipt=*iptpt,fpt=*fptpt;\n-\t\t\twhile( *ipt > -1 )\n-\t\t\t\t*fpt2 += scarr[*ipt++] * *fpt++;\n-\t\t\tfpt2++,iptpt++,fptpt++;\n-\t\t} \n-\t}\n-\tfor( j=0; j<lgth2; j++ )\n-\t{\n-\t\tmatch[j] = 0.0;\n-\t\tfor( k=0; cpmxpdn[j][k]>-1; k++ )\n-\t\t\tmatch[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k];\n-\t} \n-#else\n-\tmatchpt = match;\n-\tcpmxpdnpt = cpmxpdn;\n-\tcpmxpdpt = cpmxpd;\n-\twhile( lgth2-- )\n-\t{\n-\t\t*matchpt = 0.0;\n-\t\tfor( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ )\n-\t\t\t*matchpt += scarr[cpkd] * (*cpmxpdpt)[k];\n-\t\tmatchpt++;\n-\t\tcpmxpdnpt++;\n-\t\tcpmxpdpt++;\n-\t}\n-#endif\n-\tfree( scarr );\n-}\n-\n-static float Atracking( float *lasthorizontalw, float *lastverticalw,\n-\t\t\t\t\t\tchar **seq1, char **seq2, \n- char **mseq1, char **mseq2, \n- int **ijp, int icyc, int jcyc,\n-\t\t\t\t\t\tint ist, int ien, int jst, int jen, \n-\t\t\t\t\t\tint fulllen1, int fulllen2, int tailgp )\n-{\n-\tint i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, klim;\n-\tchar *gaptable1, *gt1bk;\n-\tchar *gaptable2, *gt2bk;\n-\tfloat wm;\n-\tlgth1 = ien-ist+1;\n-\tlgth2 = jen-jst+'..b', jcyc, seq2, eff2, lgth2 );\n-\t\theadgapfreq1 = 0.0;\n-\t\theadgapfreq2 = 0.0;\n-\t\tgapfreq1f[lgth1] = 0.0;\n-\t\tgapfreq2f[lgth2] = 0.0;\n-\t}\n-\n-\tif( legacygapcost == 0 )\n-\t{\n-\t\tgapcountf( gapfreq1f, seq1, icyc, eff1, lgth1 );\n-\t\tgapcountf( gapfreq2f, seq2, jcyc, eff2, lgth2 );\n-\t\tfor( i=0; i<lgth1+1; i++ ) gapfreq1f[i] = 1.0 - gapfreq1f[i];\n-\t\tfor( i=0; i<lgth2+1; i++ ) gapfreq2f[i] = 1.0 - gapfreq2f[i];\n-\t\theadgapfreq1 = 1.0 - headgapfreq1;\n-\t\theadgapfreq2 = 1.0 - headgapfreq2;\n-\t}\n-\telse\n-\t{\n-\t\tfor( i=0; i<lgth1+1; i++ ) gapfreq1f[i] = 1.0;\n-\t\tfor( i=0; i<lgth2+1; i++ ) gapfreq2f[i] = 1.0;\n-\t\theadgapfreq1 = 1.0;\n-\t\theadgapfreq2 = 1.0;\n-\t}\n-\n-#if 1\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\togcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty * ( gapfreq1f[i] );\n-\t\tfgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty * ( gapfreq1f[i] );\n-//\t\tfprintf( stderr, "fgcp1[%d] = %f\\n", i, fgcp1[i] );\n-\t}\n-\tfor( i=0; i<lgth2; i++ ) \n-\t{\n-\t\togcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty * ( gapfreq2f[i] );\n-\t\tfgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty * ( gapfreq2f[i] );\n-//\t\tfprintf( stderr, "fgcp2[%d] = %f\\n", i, fgcp2[i] );\n-\t}\n-#else\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\togcp1[i] = 0.5 * fpenalty;\n-\t\tfgcp1[i] = 0.5 * fpenalty;\n-\t}\n-\tfor( i=0; i<lgth2; i++ ) \n-\t{\n-\t\togcp2[i] = 0.5 * fpenalty;\n-\t\tfgcp2[i] = 0.5 * fpenalty;\n-\t}\n-#endif\n-\n-\tgapinfo[0] = ogcp1;\n-\tgapinfo[1] = fgcp1;\n-\tgapinfo[2] = ogcp2;\n-\tgapinfo[3] = fgcp2;\n-\tgapinfo[4] = gapfreq1f;\n-\tgapinfo[5] = gapfreq2f;\n-#endif\n-\n-#if 0\n-\tfprintf( stdout, "in MSalignmm.c\\n" );\n-\tfor( i=0; i<icyc; i++ )\n-\t{\n-\t\tfprintf( stdout, ">%d of GROUP1\\n", i );\n-\t\tfprintf( stdout, "%s\\n", seq1[i] );\n-\t}\n-\tfor( i=0; i<jcyc; i++ )\n-\t{\n-\t\tfprintf( stdout, ">%d of GROUP2\\n", i );\n-\t\tfprintf( stdout, "%s\\n", seq2[i] );\n-\t}\n-\tfflush( stdout );\n-#endif\n-\n-\twm = MSalignmm_rec_variousdist( matrices, icyc, jcyc, seq1, seq2, cpmx1s, cpmx2s, 0, lgth1-1, 0, lgth2-1, alloclen, lgth1, lgth2, mseq1, mseq2, 0, gapinfo, chudanpt, chudanref, chudanres, headgp, tailgp, headgapfreq1, headgapfreq2 );\n-#ifdef enablemultithread\n-\tif( chudanres && *chudanres ) \n-\t{\n-//\t\tfprintf( stderr, "\\n\\n## CHUUDAN!!! relay\\n" );\n-\t\t*chudanres = 1;\n-\t\tfreearrays_variousdist( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1s, cpmx2s, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2 );\n-\t\treturn( -1.0 );\n-\t}\n-#endif\n-\n-#if 0\n-\t\tfprintf( stderr, "\\n" );\n-\t\tfprintf( stderr, " seq1[0] = %s\\n", seq1[0] );\n-\t\tfprintf( stderr, " seq2[0] = %s\\n", seq2[0] );\n-\t\tfprintf( stderr, "mseq1[0] = %s\\n", mseq1[0] );\n-\t\tfprintf( stderr, "mseq2[0] = %s\\n", mseq2[0] );\n-\t\tfprintf( stderr, "\\n" );\n-#endif\n-\n-//\tfprintf( stderr, "wm = %f\\n", wm );\n-\n-\n-\tfor( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );\n-\tfor( i=0; i<jcyc; i++ ) strcpy( seq2[i], mseq2[i] );\n-\n-\tif( seqlen( seq1[0] ) != nglen1 )\n-\t{\n-\t\tfprintf( stderr, "bug! hairetsu ga kowareta! (nglen1) seqlen(seq1[0])=%d but nglen1=%d\\n", seqlen( seq1[0] ), nglen1 );\n-\t\tfprintf( stderr, "seq1[0] = %s\\n", seq1[0] );\n-\t\texit( 1 );\n-\t}\n-\tif( seqlen( seq2[0] ) != nglen2 )\n-\t{\n-\t\tfprintf( stderr, "bug! hairetsu ga kowareta! (nglen2) seqlen(seq2[0])=%d but nglen2=%d\\n", seqlen( seq2[0] ), nglen2 );\n-\t\texit( 1 );\n-\t}\n-\n-\n-\tfreearrays_variousdist( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1s, cpmx2s, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2 );\n-\n-\tlgth1 = strlen( seq1[0] );\n-\tlgth2 = strlen( seq2[0] );\n-\tfor( i=0; i<icyc; i++ ) \n-\t{\n-\t\tif( strlen( seq1[i] ) != lgth1 )\n-\t\t{\n-\t\t\tfprintf( stderr, "i = %d / %d\\n", i, icyc );\n-\t\t\tfprintf( stderr, "hairetsu ga kowareta (end of MSalignmm) !\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\tfor( j=0; j<jcyc; j++ )\n-\t{\n-\t\tif( strlen( seq2[j] ) != lgth2 )\n-\t\t{\n-\t\t\tfprintf( stderr, "j = %d / %d\\n", j, jcyc );\n-\t\t\tfprintf( stderr, "hairetsu ga kowareta (end of MSalignmm) !\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\n-#if 0\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<icyc; i++ ) fprintf( stderr, " seq1[i] = %s\\n", seq1[i] );\n-\tfor( j=0; j<jcyc; j++ ) fprintf( stderr, " seq2[j] = %s\\n", seq2[j] );\n-\tfprintf( stderr, "\\n" );\n-#endif\n-\n-\treturn( wm );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/Makefile --- a/mafft/core/Makefile Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,551 +0,0 @@\n-PREFIX = /usr/local\n-LIBDIR = $(PREFIX)/libexec/mafft\n-BINDIR = $(PREFIX)/bin\n-MANDIR = $(PREFIX)/share/man/man1\n-DESTDIR = \n-\n-#MNO_CYGWIN = -mno-cygwin\n-\n-ENABLE_MULTITHREAD = -Denablemultithread\n-# Comment out the above line if your compiler \n-# does not support TLS (thread-local strage).\n-\n-CC = gcc\n-CFLAGS = -O3\n-#CFLAGS = -O3 -fPIC\n-# add -fPIC when building .so files\n-\n-#CC = icc\n-#CFLAGS = -fast\n-# if you have icc, use this.\n-\n-#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3\n-#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3 -fprofile-arcs -ftest-coverage \n-#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 # for shark, valgrind\n-\n-\n-MYCFLAGS = $(MNO_CYGWIN) $(ENABLE_MULTITHREAD) $(CFLAGS)\n-\n-ifdef ENABLE_MULTITHREAD\n-LIBS = -lm -lpthread\n-else\n-LIBS = -lm\n-endif\n-\n-INSTALL = install\n-\n-PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \\\n-\t\tpair2hat3s multi2hat3s pairash addsingle \\\n- splittbfast disttbfast tbfast mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \\\n-\t\tseq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version\n-SOS = libdisttbfast.so\n-DLLS = libdisttbfast.dll\n-DYLIBS = libdisttbfast.dylib\n-\n-PERLPROGS = mafftash_premafft.pl seekquencer_premafft.pl\n-SCRIPTS = mafft mafft-homologs.rb\n-OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o\n-OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o\n-OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o\n-OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o\n-OBJSEQ2REGTABLE = mtxutl.o io.o seq2regtable.o defs.o\n-OBJCOUNTLEN = mtxutl.o io.o countlen.o defs.o\n-OBJF2CL = mtxutl.o io.o f2cl.o constants.o defs.o\n-OBJMCCASKILLWRAP = mtxutl.o io.o mccaskillwrap.o constants.o defs.o mltaln9.o\n-OBJCONTRAFOLDWRAP = mtxutl.o io.o contrafoldwrap.o constants.o defs.o mltaln9.o\n-OBJMULTI2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \\\n-\t\t\t\t multi2hat3s.o defs.o fft.o fftFunctions.o\n-OBJPAIR2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \\\n-\t\t\t\tpair2hat3s.o defs.o fft.o fftFunctions.o\n-OBJPAIRASH = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t\t Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \\\n-\t\t\t pairash.o defs.o fft.o fftFunctions.o\n-OBJPAIRLOCALALIGN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \\\n-\t\t\t\t\tpairlocalalign.o defs.o fft.o fftFunctions.o\n-OBJDUMMY = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tdisttbfast_dummy.o dummy.o defs.o fft.o fftFunctions.o\n-OBJSPLITFROMALN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tLalign11.o splitfromaln.o defs.o fft.o fftFunctions.o\n-OBJSPLITTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tLalign11.o splittbfast.o defs.o fft.o fftFunctions.o\n-OBJSPLITTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tLalign11.o splittbfast2.o defs.o fft.o fftFunctions.o\n-OBJSPLITTBFASTP = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tLalign11.o defs.o fft.o fftFunctions.o\n-OBJDISTTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localh'..b"dblast.o : dndblast.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c dndblast.c\n-\n-dndfast3.o : dndfast3.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c dndfast3.c\n-\n-dndpre.o : dndpre.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c dndpre.c\n-\n-countlen.o : countlen.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c countlen.c\n-\n-seq2regtable.o : seq2regtable.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c seq2regtable.c\n-\n-regtable2seq.o : regtable2seq.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c regtable2seq.c\n-\n-f2cl.o : f2cl.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c f2cl.c\n-\n-setdirection.o : setdirection.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c setdirection.c\n-\n-replaceu.o : replaceu.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c replaceu.c\n-\n-restoreu.o : restoreu.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c restoreu.c\n-\n-mccaskillwrap.o : mccaskillwrap.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c mccaskillwrap.c\n-\n-contrafoldwrap.o : contrafoldwrap.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c contrafoldwrap.c\n-\n-pairlocalalign.o : pairlocalalign.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c pairlocalalign.c\n-\n-pairash.o : pairash.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c pairash.c\n-\n-multi2hat3s.o : multi2hat3s.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c multi2hat3s.c\n-\n-pair2hat3s.o : pair2hat3s.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c pair2hat3s.c\n-\n-io.o : io.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c io.c\n-\n-nj.o : nj.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c nj.c\n-\n-treeOperation.o : treeOperation.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c treeOperation.c\n-\n-sextet5.o : sextet5.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c sextet5.c\n-\n-mafft-distance.o : mafft-distance.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c mafft-distance.c\n-\n-triplet5.o : triplet5.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c triplet5.c\n-\n-triplet6.o : triplet6.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c triplet6.c\n-\n-fft.o : fft.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c fft.c \n-\n-fftFunctions.o : fftFunctions.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c fftFunctions.c\n-\n-Falign.o : Falign.c $(HEADER) $(FFTHEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c Falign.c\n-\n-Falign_localhom.o : Falign_localhom.c $(HEADER) $(FFTHEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c Falign_localhom.c\n-\n-mtxutl.o : mtxutl.c \n-\t$(CC) $(MYCFLAGS) -c mtxutl.c\n-\n-addfunctions.o : addfunctions.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c addfunctions.c\n-\n-score.o : score.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c score.c\n-\n-clean :\n-\trm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) $(SOS) $(DYLIBS) $(DLLS) *.gcda *.gcno\n-#\trm -f ../binaries/* ../scripts/*\n-\n-install : all\n-\tmkdir -p $(DESTDIR)$(LIBDIR)\n-\tchmod 755 $(DESTDIR)$(LIBDIR)\n-\tmkdir -p $(DESTDIR)$(BINDIR)\n-\tchmod 755 $(DESTDIR)$(BINDIR)\n-\tchmod 755 $(SCRIPTS)\n-\t$(INSTALL) $(SCRIPTS) $(DESTDIR)$(BINDIR)\n-\tchmod 755 $(PROGS) ||: # in MinGW, it's ok if this fails\n-\t$(INSTALL) -s $(PROGS) $(DESTDIR)$(LIBDIR)\n-\t$(INSTALL) $(PERLPROGS) $(DESTDIR)$(LIBDIR)\n-\t$(INSTALL) -m 644 $(MANPAGES) $(DESTDIR)$(LIBDIR)\n-\n-\t( cd $(DESTDIR)$(BINDIR); \\\n-rm -f linsi ginsi einsi fftns fftnsi nwns nwnsi xinsi qinsi; \\\n-rm -f mafft-linsi mafft-ginsi mafft-einsi mafft-fftns mafft-fftnsi mafft-nwns mafft-nwnsi mafft-xinsi mafft-qinsi mafft-randomcore.rb ; \\\n-ln -s mafft linsi; ln -s mafft ginsi; ln -s mafft fftns; \\\n-ln -s mafft fftnsi; ln -s mafft nwns; ln -s mafft nwnsi; \\\n-ln -s mafft einsi; \\\n-ln -s mafft mafft-linsi; ln -s mafft mafft-ginsi; ln -s mafft mafft-fftns; \\\n-ln -s mafft mafft-fftnsi; ln -s mafft mafft-nwns; ln -s mafft mafft-nwnsi; \\\n-ln -s mafft mafft-einsi; ln -s mafft mafft-xinsi; ln -s mafft mafft-qinsi;\\\n-rm -f mafft-profile mafft-profile.exe; ln -s $(LIBDIR)/mafft-profile .; \\\n-rm -f mafft-distance mafft-distance.exe; ln -s $(LIBDIR)/mafft-distance . )\n-\n-\tmkdir -p $(DESTDIR)$(MANDIR)\n-\tchmod 755 $(DESTDIR)$(MANDIR)\n-\t$(INSTALL) -m 644 $(MANPAGES) $(DESTDIR)$(MANDIR)\n-# remove incorrectly installed manpages by previous versions\n-#\trm -f /usr/local/man/man1/mafft.1 /usr/local/man/man1/mafft-homologs.1 \n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/Makefile.sos --- a/mafft/core/Makefile.sos Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,545 +0,0 @@\n-PREFIX = /usr/local\n-LIBDIR = $(PREFIX)/libexec/mafft\n-BINDIR = $(PREFIX)/bin\n-MANDIR = $(PREFIX)/share/man/man1\n-\n-#MNO_CYGWIN = -mno-cygwin\n-\n-ENABLE_MULTITHREAD = -Denablemultithread\n-# Comment out the above line if your compiler \n-# does not support TLS (thread-local strage).\n-\n-CC = gcc\n-#CFLAGS = -O3\n-#CFLAGS = -O3 -fPIC\n-# add -fPIC when building .so files\n-\n-#CC = icc\n-#CFLAGS = -fast\n-# if you have icc, use this.\n-\n-#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3\n-CFLAGS = -fPIC -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 # for shark, valgrind\n-\n-\n-MYCFLAGS = $(MNO_CYGWIN) $(ENABLE_MULTITHREAD) $(CFLAGS)\n-\n-ifdef ENABLE_MULTITHREAD\n-LIBS = -lm -lpthread\n-else\n-LIBS = -lm\n-endif\n-\n-INSTALL = install\n-\n-PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \\\n-\t\tpair2hat3s multi2hat3s pairash addsingle \\\n- splittbfast disttbfast tbfast mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \\\n-\t\tseq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version\n-SOS = libdisttbfast.so\n-DLLS = libdisttbfast.dll\n-DYLIBS = libdisttbfast.dylib\n-\n-PERLPROGS = mafftash_premafft.pl seekquencer_premafft.pl\n-SCRIPTS = mafft mafft-homologs.rb\n-OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o\n-OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o\n-OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o\n-OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o\n-OBJSEQ2REGTABLE = mtxutl.o io.o seq2regtable.o defs.o\n-OBJCOUNTLEN = mtxutl.o io.o countlen.o defs.o\n-OBJF2CL = mtxutl.o io.o f2cl.o constants.o defs.o\n-OBJMCCASKILLWRAP = mtxutl.o io.o mccaskillwrap.o constants.o defs.o mltaln9.o\n-OBJCONTRAFOLDWRAP = mtxutl.o io.o contrafoldwrap.o constants.o defs.o mltaln9.o\n-OBJMULTI2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \\\n-\t\t\t\t multi2hat3s.o defs.o fft.o fftFunctions.o\n-OBJPAIR2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \\\n-\t\t\t\tpair2hat3s.o defs.o fft.o fftFunctions.o\n-OBJPAIRASH = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t\t Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \\\n-\t\t\t pairash.o defs.o fft.o fftFunctions.o\n-OBJPAIRLOCALALIGN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \\\n-\t\t\t\t\tpairlocalalign.o defs.o fft.o fftFunctions.o\n-OBJDUMMY = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tdisttbfast_dummy.o dummy.o defs.o fft.o fftFunctions.o\n-OBJSPLITFROMALN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tLalign11.o splitfromaln.o defs.o fft.o fftFunctions.o\n-OBJSPLITTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tLalign11.o splittbfast.o defs.o fft.o fftFunctions.o\n-OBJSPLITTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tLalign11.o splittbfast2.o defs.o fft.o fftFunctions.o\n-OBJSPLITTBFASTP = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tLalign11.o defs.o fft.o fftFunctions.o\n-OBJDISTTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \\\n-\t\t Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \\\n-\t\t\tdisttbfast.o defs.o fft.o fftFunctions.o addfunctions.o\n-OBJMAKEDI'..b'MYCFLAGS) -c dndfast5.c\n-\n-dndfast6.o : dndfast6.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c dndfast6.c\n-\n-dndfast7.o : dndfast7.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c dndfast7.c\n-\n-dndblast.o : dndblast.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c dndblast.c\n-\n-dndfast3.o : dndfast3.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c dndfast3.c\n-\n-dndpre.o : dndpre.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c dndpre.c\n-\n-countlen.o : countlen.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c countlen.c\n-\n-seq2regtable.o : seq2regtable.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c seq2regtable.c\n-\n-regtable2seq.o : regtable2seq.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c regtable2seq.c\n-\n-f2cl.o : f2cl.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c f2cl.c\n-\n-setdirection.o : setdirection.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c setdirection.c\n-\n-replaceu.o : replaceu.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c replaceu.c\n-\n-restoreu.o : restoreu.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c restoreu.c\n-\n-mccaskillwrap.o : mccaskillwrap.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c mccaskillwrap.c\n-\n-contrafoldwrap.o : contrafoldwrap.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c contrafoldwrap.c\n-\n-pairlocalalign.o : pairlocalalign.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c pairlocalalign.c\n-\n-pairash.o : pairash.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c pairash.c\n-\n-multi2hat3s.o : multi2hat3s.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c multi2hat3s.c\n-\n-pair2hat3s.o : pair2hat3s.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c pair2hat3s.c\n-\n-io.o : io.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c io.c\n-\n-nj.o : nj.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c nj.c\n-\n-treeOperation.o : treeOperation.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c treeOperation.c\n-\n-sextet5.o : sextet5.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c sextet5.c\n-\n-mafft-distance.o : mafft-distance.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c mafft-distance.c\n-\n-triplet5.o : triplet5.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c triplet5.c\n-\n-triplet6.o : triplet6.c $(HEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c triplet6.c\n-\n-fft.o : fft.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c fft.c \n-\n-fftFunctions.o : fftFunctions.c $(HEADER) $(FFTHEADER)\n-\t$(CC) $(MYCFLAGS) -c fftFunctions.c\n-\n-Falign.o : Falign.c $(HEADER) $(FFTHEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c Falign.c\n-\n-Falign_localhom.o : Falign_localhom.c $(HEADER) $(FFTHEADER) $(MTXHEADER)\n-\t$(CC) $(MYCFLAGS) -c Falign_localhom.c\n-\n-mtxutl.o : mtxutl.c \n-\t$(CC) $(MYCFLAGS) -c mtxutl.c\n-\n-addfunctions.o : addfunctions.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c addfunctions.c\n-\n-score.o : score.c $(HEADER)\n-\t$(CC) $(MYCFLAGS) -c score.c\n-\n-clean :\n-\trm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) $(SOS) $(DYLIBS) $(DLLS)\n-#\trm -f ../binaries/* ../scripts/*\n-\n-install : all\n-\tmkdir -p $(LIBDIR)\n-\tchmod 755 $(LIBDIR)\n-\tmkdir -p $(BINDIR)\n-\tchmod 755 $(BINDIR)\n-\tchmod 755 $(SCRIPTS)\n-\t$(INSTALL) $(SCRIPTS) $(BINDIR)\n-\tchmod 755 $(PROGS)\n-\t$(INSTALL) -s $(PROGS) $(LIBDIR)\n-\t$(INSTALL) $(PERLPROGS) $(LIBDIR)\n-\t$(INSTALL) -m 644 $(MANPAGES) $(LIBDIR)\n-\n-\t( cd $(BINDIR); \\\n-rm -f linsi ginsi einsi fftns fftnsi nwns nwnsi xinsi qinsi; \\\n-rm -f mafft-linsi mafft-ginsi mafft-einsi mafft-fftns mafft-fftnsi mafft-nwns mafft-nwnsi mafft-xinsi mafft-qinsi; \\\n-ln -s mafft linsi; ln -s mafft ginsi; ln -s mafft fftns; \\\n-ln -s mafft fftnsi; ln -s mafft nwns; ln -s mafft nwnsi; \\\n-ln -s mafft einsi; \\\n-ln -s mafft mafft-linsi; ln -s mafft mafft-ginsi; ln -s mafft mafft-fftns; \\\n-ln -s mafft mafft-fftnsi; ln -s mafft mafft-nwns; ln -s mafft mafft-nwnsi; \\\n-ln -s mafft mafft-einsi; ln -s mafft mafft-xinsi; ln -s mafft mafft-qinsi;\\\n-rm -f mafft-profile mafft-profile.exe; ln -s $(LIBDIR)/mafft-profile .; \\\n-rm -f mafft-distance mafft-distance.exe; ln -s $(LIBDIR)/mafft-distance . )\n-\n-\tmkdir -p $(MANDIR)\n-\tchmod 755 $(MANDIR)\n-\t$(INSTALL) -m 644 $(MANPAGES) $(MANDIR)\n-# remove incorrectly installed manpages by previous versions\n-#\trm -f /usr/local/man/man1/mafft.1 /usr/local/man/man1/mafft-homologs.1 \n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/SAalignmm.c --- a/mafft/core/SAalignmm.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,381 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define DEBUG 0\n-\n-static void match_calc( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )\n-{\n-\tint j, k, l;\n-//\tfloat scarr[26];\n-\tfloat **cpmxpd = floatwork;\n-\tint **cpmxpdn = intwork;\n-\tint count = 0;\n-\tfloat *scarr;\n-\tscarr = calloc( nalphabets, sizeof( float ) );\n-\n-\tif( initialize )\n-\t{\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tcount = 0;\n-\t\t\tfor( l=0; l<nalphabets; l++ )\n-\t\t\t{\n-\t\t\t\tif( cpmx2[l][j] )\n-\t\t\t\t{\n-\t\t\t\t\tcpmxpd[count][j] = cpmx2[l][j];\n-\t\t\t\t\tcpmxpdn[count][j] = l;\n-\t\t\t\t\tcount++;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tcpmxpdn[count][j] = -1;\n-\t\t}\n-\t}\n-\n-\tfor( l=0; l<nalphabets; l++ )\n-\t{\n-\t\tscarr[l] = 0.0;\n-\t\tfor( k=0; k<nalphabets; k++ )\n-\t\t\tscarr[l] += n_dis[k][l] * cpmx1[k][i1];\n-\t}\n-\tfor( j=0; j<lgth2; j++ )\n-\t{\n-\t\tmatch[j] = 0;\n-\t\tfor( k=0; cpmxpdn[k][j] > -1; k++ )\n-\t\t\tmatch[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];\n-\t} \n-\tfree( scarr );\n-}\n-\n-static float Atracking( float *lasthorizontalw, float *lastverticalw, \n-\t\t\t\t\t\tchar **seq1, char **seq2, \n- char **mseq1, char **mseq2, \n- float **cpmx1, float **cpmx2, \n- int **ijp, int icyc, int jcyc )\n-{\n-\tint i, j, k, l, iin, jin, ifi, jfi, lgth1, lgth2;\n-//\tchar gap[] = "-";\n-\tchar *gap;\n-\tfloat wm;\n-\tgap = newgapstr;\n-\tlgth1 = strlen( seq1[0] );\n-\tlgth2 = strlen( seq2[0] );\n-\n-#if DEBUG\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\tfprintf( stderr, "lastverticalw[%d] = %f\\n", i, lastverticalw[i] );\n-\t}\n-#endif\n- \n-\tif( outgap == 1 )\n-\t\t;\n-\telse\n-\t{\n-\t\twm = lastverticalw[0];\n-\t\tfor( i=0; i<lgth1; i++ )\n-\t\t{\n-\t\t\tif( lastverticalw[i] >= wm )\n-\t\t\t{\n-\t\t\t\twm = lastverticalw[i];\n-\t\t\t\tiin = i; jin = lgth2-1;\n-\t\t\t\tijp[lgth1][lgth2] = +( lgth1 - i );\n-\t\t\t}\n-\t\t}\n-\t\tfor( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\tif( lasthorizontalw[j] >= wm )\n-\t\t\t{\n-\t\t\t\twm = lasthorizontalw[j];\n-\t\t\t\tiin = lgth1-1; jin = j;\n-\t\t\t\tijp[lgth1][lgth2] = -( lgth2 - j );\n-\t\t\t}\n-\t\t}\n-\t}\n-\n- for( i=0; i<lgth1+1; i++ ) \n- {\n- ijp[i][0] = i + 1;\n- }\n- for( j=0; j<lgth2+1; j++ ) \n- {\n- ijp[0][j] = -( j + 1 );\n- }\n-\n-\tfor( i=0; i<icyc; i++ )\n-\t{\n-\t\tmseq1[i] += lgth1+lgth2;\n-\t\t*mseq1[i] = 0;\n-\t}\n-\tfor( j=0; j<jcyc; j++ )\n-\t{\n-\t\tmseq2[j] += lgth1+lgth2;\n-\t\t*mseq2[j] = 0;\n-\t}\n-\tiin = lgth1; jin = lgth2;\n-\tfor( k=0; k<=lgth1+lgth2; k++ ) \n-\t{\n-\t\tif( ijp[iin][jin] < 0 ) \n-\t\t{\n-\t\t\tifi = iin-1; jfi = jin+ijp[iin][jin];\n-\t\t}\n-\t\telse if( ijp[iin][jin] > 0 )\n-\t\t{\n-\t\t\tifi = iin-ijp[iin][jin]; jfi = jin-1;\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tifi = iin-1; jfi = jin-1;\n-\t\t}\n-\t\tl = iin - ifi;\n-\t\twhile( --l ) \n-\t\t{\n-\t\t\tfor( i=0; i<icyc; i++ )\n-\t\t\t\t*--mseq1[i] = seq1[i][ifi+l];\n-\t\t\tfor( j=0; j<jcyc; j++ ) \n-\t\t\t\t*--mseq2[j] = *gap;\n-\t\t\tk++;\n-\t\t}\n-\t\tl= jin - jfi;\n-\t\twhile( --l )\n-\t\t{\n-\t\t\tfor( i=0; i<icyc; i++ ) \n-\t\t\t\t*--mseq1[i] = *gap;\n-\t\t\tfor( j=0; j<jcyc; j++ ) \n-\t\t\t\t*--mseq2[j] = seq2[j][jfi+l];\n-\t\t\tk++;\n-\t\t}\n-\t\tif( iin <= 0 || jin <= 0 ) break;\n-\t\tfor( i=0; i<icyc; i++ ) \n-\t\t\t*--mseq1[i] = seq1[i][ifi];\n-\t\tfor( j=0; j<jcyc; j++ ) \n-\t\t\t*--mseq2[j] = seq2[j][jfi];\n-\t\tk++;\n-\t\tiin = ifi; jin = jfi;\n-\t}\n-\treturn( 0.0 );\n-}\n-\n-\n-float Aalign( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen )\n-/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */\n-{\n-\tregister int i, j;\n-\tint lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */\n-\tint lgth1, lgth2;\n-\tint resultlen;\n-\tfloat wm = 0.0; /* int ?????? */\n-\tfloat g;\n-\tfloat x;\n-\tstatic TLS float mi, *m;\n-\tstatic TLS int **ijp;\n-\tstatic TLS int mpi, *mp;\n-\tstatic TLS float *currentw;\n-\tstatic TLS float *previousw;\n-\tstatic TLS float *match;\n-\tstatic TLS float *initverticalw; /* kufuu sureba iranai */\n-\tstatic TLS float *lastverticalw; /* kufuu sureba iranai */\n-\tstatic TLS char **mseq1;\n-\tstatic TLS char **mseq2;\n-\tstatic TLS char **mseq;\n-\tstatic TLS float **cpmx1;\n-\tstatic TLS float **cpmx2;\n-\tstatic TLS int **intwork;\n-\tstatic TLS'..b'-\t\t\tFreeFloatMtx( cpmx2 );\n-\n-\t\t\tFreeFloatMtx( floatwork );\n-\t\t\tFreeIntMtx( intwork );\n-\t\t}\n-\n-\t\tll1 = MAX( (int)(1.1*lgth1), orlgth1 ) + 100;\n-\t\tll2 = MAX( (int)(1.1*lgth2), orlgth2 ) + 100;\n-\n-\t\tfprintf( stderr, "\\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );\n-\n-\t\tcurrentw = AllocateFloatVec( ll2+2 );\n-\t\tpreviousw = AllocateFloatVec( ll2+2 );\n-\t\tmatch = AllocateFloatVec( ll2+2 );\n-\n-\t\tinitverticalw = AllocateFloatVec( ll1+2 );\n-\t\tlastverticalw = AllocateFloatVec( ll1+2 );\n-\n-\t\tm = AllocateFloatVec( ll2+2 );\n-\t\tmp = AllocateIntVec( ll2+2 );\n-\n-\t\tmseq = AllocateCharMtx( njob, ll1+ll2 );\n-\n-\t\tcpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );\n-\t\tcpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );\n-\n-\t\tfloatwork = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 ); \n-\t\tintwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 ); \n-\n-\t\tfprintf( stderr, "succeeded\\n" );\n-\n-\t\torlgth1 = ll1;\n-\t\torlgth2 = ll2;\n-\t}\n-\n-\tfor( i=0; i<icyc; i++ ) mseq1[i] = mseq[i];\n-\tfor( j=0; j<jcyc; j++ ) mseq2[j] = mseq[icyc+j];\n-\n-\n-\tif( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )\n-\t{\n-\t\tint ll1, ll2;\n-\n-\t\tif( commonAlloc1 && commonAlloc2 )\n-\t\t{\n-\t\t\tFreeIntMtx( commonIP );\n-\t\t}\n-\n-\t\tll1 = MAX( orlgth1, commonAlloc1 );\n-\t\tll2 = MAX( orlgth2, commonAlloc2 );\n-\n-\t\tfprintf( stderr, "\\n\\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );\n-\n-\t\tcommonIP = AllocateIntMtx( ll1+10, ll2+10 );\n-\n-\t\tfprintf( stderr, "succeeded\\n\\n" );\n-\n-\t\tcommonAlloc1 = ll1;\n-\t\tcommonAlloc2 = ll2;\n-\t}\n-\tijp = commonIP;\n-\n-\tcpmx_calc( seq1, cpmx1, eff1, strlen( seq1[0] ), icyc );\n-\tcpmx_calc( seq2, cpmx2, eff2, strlen( seq2[0] ), jcyc );\n-\n-\tmatch_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, floatwork, intwork, 1 );\n-\tmatch_calc( currentw, cpmx1, cpmx2, 0, lgth2, floatwork, intwork, 1 );\n-\n-\tif( outgap == 1 )\n-\t{\n-\t\tfor( i=1; i<lgth1+1; i++ )\n-\t\t{\n-\t\t\tinitverticalw[i] += penalty * 0.5;\n-\t\t}\n-\t\tfor( j=1; j<lgth2+1; j++ )\n-\t\t{\n-\t\t\tcurrentw[j] += penalty * 0.5;\n-\t\t}\n-\t}\n-\n-\tfor( j=0; j<lgth2+1; ++j ) \n-\t{\n-\t\tm[j] = currentw[j-1] + penalty * 0.5; mp[j] = 0;\n-\t}\n-\n-\tlastverticalw[0] = currentw[lgth2-1];\n-\n-\tif( outgap ) lasti = lgth1+1; else lasti = lgth1;\n-\n-\tfor( i=1; i<lasti; i++ )\n-\t{\n-\n-\t\tfloatncpy( previousw, currentw, lgth2+1 );\n-\t\tpreviousw[0] = initverticalw[i-1];\n-\n-\t\tmatch_calc( currentw, cpmx1, cpmx2, i, lgth2, floatwork, intwork, 0 );\n-\t\tcurrentw[0] = initverticalw[i];\n-\n-\t\tmi = previousw[0] + penalty * 0.5; mpi = 0;\n-\t\tfor( j=1; j<lgth2+1; j++ )\n-\t\t{\n-\t\t\twm = previousw[j-1];\n-\t\t\tijp[i][j] = 0;\n-\n-\t\t\tg = penalty * 0.5;\n-\t\t\tx = mi + g;\n-\t\t\tif( x > wm )\n-\t\t\t{\n-\t\t\t\twm = x;\n-\t\t\t\tijp[i][j] = -( j - mpi );\n-\t\t\t}\n-\t\t\tg = penalty * 0.5;\n-\t\t\tx = previousw[j-1] + g;\n-\t\t\tif( mi <= x )\n-\t\t\t{\n-\t\t\t\tmi = x;\n-\t\t\t\tmpi = j-1;\n-\t\t\t}\n-\n-\t\t\tg = penalty * 0.5;\n-\t\t\tx = m[j] + g;\n-\t\t\tif( x > wm )\n-\t\t\t{\n-\t\t\t\twm = x;\n-\t\t\t\tijp[i][j] = +( i - mp[j] );\n-\t\t\t}\n-\t\t\tg = penalty * 0.5;\n-\t\t\tx = previousw[j-1] + g;\n-\t\t\tif( m[j] <= x )\n-\t\t\t{\n-\t\t\t\tm[j] = x;\n-\t\t\t\tmp[j] = i-1;\n-\t\t\t}\n-\t\t\tcurrentw[j] += wm;\n-\t\t}\n-\t\tlastverticalw[i] = currentw[lgth2-1];\n-\t}\n-\t/*\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<icyc; i++ ) fprintf( stderr,"%s\\n", seq1[i] );\n-\tfprintf( stderr, "#####\\n" );\n-\tfor( j=0; j<jcyc; j++ ) fprintf( stderr,"%s\\n", seq2[j] );\n-\tfprintf( stderr, "====>" );\n-\tfor( i=0; i<icyc; i++ ) strcpy( mseq1[i], seq1[i] );\n-\tfor( j=0; j<jcyc; j++ ) strcpy( mseq2[j], seq2[j] );\n-\t*/\n-\tAtracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp, icyc, jcyc );\n-\n-\tresultlen = strlen( mseq1[0] );\n-\tif( alloclen < resultlen || resultlen > N )\n-\t{\n-\t\tfprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\\n", alloclen, resultlen, N );\n-\t\tErrorExit( "LENGTH OVER!\\n" );\n-\t}\n-\n-\tfor( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );\n-\tfor( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );\n-\t/*\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<icyc; i++ ) fprintf( stderr, "%s\\n", mseq1[i] );\n-\tfprintf( stderr, "#####\\n" );\n-\tfor( j=0; j<jcyc; j++ ) fprintf( stderr, "%s\\n", mseq2[j] );\n-\t*/\n-\treturn( wm );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/Salignmm.c --- a/mafft/core/Salignmm.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2532 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define MACHIGAI 0\n-#define OUTGAP0TRY 0\n-#define DEBUG 0\n-#define XXXXXXX 0\n-#define USE_PENALTY_EX 1\n-#define FASTMATCHCALC 1\n-#define MCD 0\n-\n-\n-static TLS float **impmtx = NULL;\n-static TLS int impalloclen = 0;\n-float imp_match_out_sc( int i1, int j1 )\n-{\n-//\tfprintf( stderr, "imp+match = %f\\n", impmtx[i1][j1] * fastathreshold );\n-//\tfprintf( stderr, "val = %f\\n", impmtx[i1][j1] );\n-\treturn( impmtx[i1][j1] );\n-}\n-\n-static void imp_match_out_vead_gapmap( float *imp, int i1, int lgth2, int *gapmap2 )\n-{\n-#if FASTMATCHCALC\n-\tfloat *pt = impmtx[i1];\n-\tint *gapmappt = gapmap2;\n-\twhile( lgth2-- )\n-\t\t*imp++ += pt[*gapmappt++];\n-#else\n-\tint j;\n-\tfloat *pt = impmtx[i1];\n-\tfor( j=0; j<lgth2; j++ )\n-\t\t*imp++ += pt[gapmap2[j]];\n-#endif\n-}\n-\n-\n-static void imp_match_out_vead_tate_gapmap( float *imp, int j1, int lgth1, int *gapmap1 )\n-{\n-#if FASTMATCHCALC\n-\tint *gapmappt = gapmap1;\n-\twhile( lgth1-- )\n-\t\t*imp++ += impmtx[*gapmappt++][j1];\n-#else\n-\tint i;\n-\tfor( i=0; i<lgth1; i++ )\n-\t\t*imp++ += impmtx[gapmap1[i]][j1];\n-#endif\n-}\n-\n-static void imp_match_out_vead( float *imp, int i1, int lgth2 )\n-{\n-#if FASTMATCHCALC \n-\tfloat *pt = impmtx[i1];\n-\twhile( lgth2-- )\n-\t\t*imp++ += *pt++;\n-#else\n-\tint j;\n-\tfloat *pt = impmtx[i1];\n-\tfor( j=0; j<lgth2; j++ )\n-\t\t*imp++ += pt[j];\n-#endif\n-}\n-static void imp_match_out_vead_tate( float *imp, int j1, int lgth1 )\n-{\n-\tint i;\n-\tfor( i=0; i<lgth1; i++ )\n-\t\t*imp++ += impmtx[i][j1];\n-}\n-\n-void imp_rna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***grouprna1, RNApair ***grouprna2, int *gapmap1, int *gapmap2, RNApair *pair )\n-{\n-\tfoldrna( nseq1, nseq2, seq1, seq2, eff1, eff2, grouprna1, grouprna2, impmtx, gapmap1, gapmap2, pair );\n-}\n-\n-void imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, int forscore )\n-{\n-\tint i, j, k1, k2, tmpint, start1, start2, end1, end2;\n-\tfloat effij;\n-\tfloat effij_kozo;\n-\tdouble effijx;\n-\tchar *pt, *pt1, *pt2;\n-\tstatic TLS char *nocount1 = NULL;\n-\tstatic TLS char *nocount2 = NULL;\n-\tLocalHom *tmpptr;\n-\n-\tif( seq1 == NULL )\n-\t{\n-\t\tif( impmtx ) FreeFloatMtx( impmtx );\n-\t\timpmtx = NULL;\n-\t\tif( nocount1 ) free( nocount1 );\n-\t\tnocount1 = NULL;\n-\t\tif( nocount2 ) free( nocount2 );\n-\t\tnocount2 = NULL;\n-\t\t\n-\t\treturn;\n-\t}\n-\n-\tif( impalloclen < lgth1 + 2 || impalloclen < lgth2 + 2 )\n-\t{\n-\t\tif( impmtx ) FreeFloatMtx( impmtx );\n-\t\tif( nocount1 ) free( nocount1 );\n-\t\tif( nocount2 ) free( nocount2 );\n-\t\timpalloclen = MAX( lgth1, lgth2 ) + 2;\n-\t\timpmtx = AllocateFloatMtx( impalloclen, impalloclen );\n-\t\tnocount1 = AllocateCharVec( impalloclen );\n-\t\tnocount2 = AllocateCharVec( impalloclen );\n-\t}\n-\n-\tfor( i=0; i<lgth1; i++ )\n-\t{\n-\t\tfor( j=0; j<clus1; j++ )\n-\t\t\tif( seq1[j][i] == \'-\' ) break;\n-\t\tif( j != clus1 ) nocount1[i] = 1; \n-\t\telse\t\t\t nocount1[i] = 0;\n-\t}\n-\tfor( i=0; i<lgth2; i++ )\n-\t{\n-\t\tfor( j=0; j<clus2; j++ )\n-\t\t\tif( seq2[j][i] == \'-\' ) break;\n-\t\tif( j != clus2 ) nocount2[i] = 1;\n-\t\telse\t\t\t nocount2[i] = 0;\n-\t}\n-\n-#if 0\n-fprintf( stderr, "nocount2 =\\n" );\n-for( i = 0; i<impalloclen; i++ )\n-{\n-\tfprintf( stderr, "nocount2[%d] = %d (%c)\\n", i, nocount2[i], seq2[0][i] );\n-}\n-#endif\n-\n-\n-\n-#if 0\n-\tfprintf( stderr, "eff1 in _init_strict = \\n" );\n-\tfor( i=0; i<clus1; i++ )\n-\t\tfprintf( stderr, "eff1[] = %f\\n", eff1[i] );\n-\tfor( i=0; i<clus2; i++ )\n-\t\tfprintf( stderr, "eff2[] = %f\\n", eff2[i] );\n-#endif\n-\n-\tfor( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )\n-\t\timpmtx[i][j] = 0.0;\n-\teffijx = fastathreshold;\n-\tfor( i=0; i<clus1; i++ )\n-\t{\n-\t\tfor( j=0; j<clus2; j++ )\n-\t\t{\n-\t\t\teffij = (float)( eff1[i] * eff2[j] * effijx );\n-\t\t\teffij_kozo = (float)( eff1_kozo[i] * eff2_kozo[j] * effijx );\n-\t\t\ttmpptr = localhom[i][j];\n-\t\t\twhile( tmpptr )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "start1 = %d\\n", tmpptr->start1 );\n-//\t\t\t\tfprintf( stderr, "end1 = %d\\n", tmpptr->end1 );\n-//\t'..b'ppt = -( j - mpi );\n-//\t\t\t\tfprintf( stderr, "Jump to %d (%c)!", mpi, seq2[0][mpi] );\n-\t\t\t}\n-\t\t\tif( (g=*prept+*ogcp2pt*gf1vapre) >= mi )\n-\t\t\t{\n-\t\t\t\tmi = g;\n-\t\t\t\tmpi = j-1;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\tmi += fpenalty_ex;\n-#endif\n-\n-#if 0 \n-\t\t\tfprintf( stderr, "%5.0f->", wm );\n-\t\t\tfprintf( stderr, "%5.0f? (penal=%5.2f)", g=*mjpt+fgcp1va*(1.0-gapfreq2[j]), fgcp1va*(1.0-gapfreq2[j]) );\n-#endif\n-\t\t\tif( (g=*mjpt+ fgcp1va* *gf2pt) > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t\t*ijppt = +( i - *mpjpt );\n-//\t\t\t\tfprintf( stderr, "Jump to %d (%c)!", *mpjpt, seq1[0][*mpjpt] );\n-\t\t\t}\n-\t\t\tif( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt )\n-\t\t\t{\n-\t\t\t\t*mjpt = g;\n-\t\t\t\t*mpjpt = i-1;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\tm[j] += fpenalty_ex;\n-#endif\n-\t\t\tif( trywarp )\n-\t\t\t{\n-#if USE_PENALTY_EX\n-\t\t\t\tif( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai\n-#else\n-\t\t\t\tif( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai\n-#endif\n-\t\t\t\t{\n-\t\t\t\t\tif( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )\n-\t\t\t\t\t{\n-\t\t\t\t\t\t*ijppt = warpbase + warpn - 1;\n-\t\t\t\t\t}\n-\t\t\t\t\telse\n-\t\t\t\t\t{\n-\t\t\t\t\t\t*ijppt = warpbase + warpn;\n-\t\t\t\t\t\twarpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );\n-\t\t\t\t\t\twarpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );\n-\t\t\t\t\t\twarpis[warpn] = prevwarpi[j-1];\n-\t\t\t\t\t\twarpjs[warpn] = prevwarpj[j-1];\n-\t\t\t\t\t\twarpn++;\n-\t\t\t\t\t}\n-\t\t\t\t\twm = g;\n-\t\t\t\t}\n-\t\t\t\tcurm = *curpt + wm;\n-\n-\t\t\t\tif( *wmrecords1pt > *wmrecordspt )\n-\t\t\t\t{\n-\t\t\t\t\t*wmrecordspt = *wmrecords1pt;\n-\t\t\t\t\t*warpipt = *(warpipt-1);\n-\t\t\t\t\t*warpjpt = *(warpjpt-1);\n-\t\t\t\t}\n-\t\t\t\tif( curm > *wmrecordspt )\n-\t\t\t\t{\n-\t\t\t\t\t*wmrecordspt = curm;\n-\t\t\t\t\t*warpipt = i;\n-\t\t\t\t\t*warpjpt = j;\n-\t\t\t\t}\n-\t\t\t\twmrecordspt++;\n-\t\t\t\twmrecords1pt++;\n-\t\t\t\twarpipt++;\n-\t\t\t\twarpjpt++;\n-\t\t\t}\n-\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f ", wm );\n-#endif\n-\t\t\t*curpt++ += wm;\n-\t\t\tijppt++;\n-\t\t\tmjpt++;\n-\t\t\tprept++;\n-\t\t\tmpjpt++;\n-\t\t\tfgcp2pt++;\n-\t\t\togcp2pt++;\n-\t\t\tgf2ptpre++;\n-\t\t\tgf2pt++;\n-\n-\t\t}\n-\t\tlastverticalw[i] = currentw[lgth2-1];\n-\n-\t\tif( trywarp )\n-\t\t{\n-\t\t\tfltncpy( prevwmrecords, wmrecords, lastj );\n-\t\t\tintncpy( prevwarpi, warpi, lastj );\n-\t\t\tintncpy( prevwarpj, warpj, lastj );\n-\t\t}\n-\t}\n-\tif( trywarp )\n-\t{\n-//\t\tfprintf( stderr, "wm = %f\\n", wm );\n-//\t\tfprintf( stderr, "warpn = %d\\n", warpn );\n-\t\tfree( wmrecords );\n-\t\tfree( prevwmrecords );\n-\t\tfree( warpi );\n-\t\tfree( warpj );\n-\t\tfree( prevwarpi );\n-\t\tfree( prevwarpj );\n-\t}\n-\n-\n-#if OUTGAP0TRY\n-\tif( !outgap )\n-\t{\n-\t\tfor( j=1; j<lgth2+1; j++ )\n-\t\t\tcurrentw[j] -= offset * ( lgth2 - j ) / 2.0;\n-\t\tfor( i=1; i<lgth1+1; i++ )\n-\t\t\tlastverticalw[i] -= offset * ( lgth1 - i / 2.0);\n-\t}\n-#endif\n-\t\t\n-\t/*\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<icyc; i++ ) fprintf( stderr,"%s\\n", seq1[i] );\n-\tfprintf( stderr, "#####\\n" );\n-\tfor( j=0; j<jcyc; j++ ) fprintf( stderr,"%s\\n", seq2[j] );\n-\tfprintf( stderr, "====>" );\n-\tfor( i=0; i<icyc; i++ ) strcpy( mseq1[i], seq1[i] );\n-\tfor( j=0; j<jcyc; j++ ) strcpy( mseq2[j], seq2[j] );\n-\t*/\n-\tif( localhom )\n-\t{\n-\t\tAtracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, warpis, warpjs, warpbase );\n-\t}\n-\telse\n-\t\tAtracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, tailgp, warpis, warpjs, warpbase );\n-\n-\tif( warpis ) free( warpis );\n-\tif( warpjs ) free( warpjs );\n-\n-//\tfprintf( stderr, "### impmatch = %f\\n", *impmatch );\n-\n-\tresultlen = strlen( mseq1[0] );\n-\tif( alloclen < resultlen || resultlen > N )\n-\t{\n-\t\tfprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\\n", alloclen, resultlen, N );\n-\t\tErrorExit( "LENGTH OVER!\\n" );\n-\t}\n-\n-\n-\tfor( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );\n-\tfor( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );\n-#if 0\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<icyc; i++ ) fprintf( stderr, "%s\\n", mseq1[i] );\n-\tfprintf( stderr, "#####\\n" );\n-\tfor( j=0; j<jcyc; j++ ) fprintf( stderr, "%s\\n", mseq2[j] );\n-#endif\n-\n-//\tfprintf( stderr, "wm = %f\\n", wm );\n-\n-\treturn( wm );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/addfunctions.c --- a/mafft/core/addfunctions.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,571 +0,0 @@\n-#include "mltaln.h"\n-\n-\n-void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ) // n1 ha allgap\n-{\n-\tint i, newlen;\n-\tdouble *effarr0, *effarr2;\n-\tfloat dumfl;\n-\tdouble eff;\n-\teffarr0 = AllocateDoubleVec( n0 );\n-\teffarr2 = AllocateDoubleVec( n2 );\n-\n-//\treporterr( "profilealignment!\\n" );\n-\n-\tcommongappick( n0, aln0 );\n-\tcommongappick( n2, aln2 );\n-\n-\teff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;\n-\teff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;\n-\n-\tnewgapstr = "-";\n-\tif( alg == \'M\' )\n-\t\tMSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1\n-\telse\n-\t\tA__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1\n-\n-\tnewlen = strlen( aln0[0] );\n-\n-#if 0 // tabun hitsuyou\n-\tfor( j=0; j<newlen; j++ )\n-\t{\n-//\t\tfprintf( stderr, "j=%d\\n", j );\n-\t\tfor( i=0; i<n0; i++ )\n-\t\t{\n-\t\t\tif( aln0[i][j] != \'-\' ) break;\n-\t\t}\n-\t\tif( i == n0 ) \n-\t\t{\n-\t\t\tfor( i=0; i<n1; i++ ) \n-\t\t\t{\n-\t\t\t\tif( aln1[i][j] != \'-\' ) break;\n-\t\t\t}\n-\t\t}\n-\t\telse i = -1;\n-\n-\t\tif( i == n1 ) \n-\t\t{\n-\t\t\tfor( i=0; i<n1; i++ ) aln1[i][j] = \'=\';\n-\t\t}\n-\t}\n-\tfprintf( stderr, "in profilealignment,\\n" );\n-\tfor( i=0; i<n0; i++ ) fprintf( stderr, "\\n>aln0[%d] = \\n%s\\n", i, aln0[i] );\n-\tfor( i=0; i<n1; i++ ) fprintf( stderr, "\\n>aln1[%d] = \\n%s\\n", i, aln1[i] );\n-\tfor( i=0; i<n2; i++ ) fprintf( stderr, "\\n>aln2[%d] = \\n%s\\n", i, aln2[i] );\n-#endif\n-\n-\tfree( effarr0 );\n-\tfree( effarr2 );\n-}\n-\n-void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap\n-{\n-\tint i, j, newlen;\n-\tdouble *effarr0, *effarr2;\n-\tfloat dumfl;\n-\tdouble eff;\n-\teffarr0 = AllocateDoubleVec( n0 );\n-\teffarr2 = AllocateDoubleVec( n2 );\n-\n-\n-//\treporterr( "profilealignment!\\n" );\n-\n-\tcommongappick( n0, aln0 );\n-\tcommongappick( n2, aln2 );\n-\n-\teff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;\n-\teff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;\n-\n-\tnewgapstr = "-";\n-\tif( alg == \'M\' )\n-\t\tMSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1\n-\telse\n-\t\tA__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1\n-\n-\tnewlen = strlen( aln0[0] );\n-\n-\tfor( i=0; i<newlen; i++ ) aln1[0][i] = \'-\';\n-\taln1[0][i] = 0;\n-\tfor( i=1; i<n1; i++ ) strcpy( aln1[i], aln1[0] );\n-\n-\tfor( j=0; j<newlen; j++ )\n-\t{\n-//\t\tfprintf( stderr, "j=%d\\n", j );\n-\t\tfor( i=0; i<n0; i++ )\n-\t\t{\n-\t\t\tif( aln0[i][j] != \'-\' ) break;\n-\t\t}\n-\t\tif( i == n0 ) \n-\t\t{\n-\t\t\tfor( i=0; i<n1; i++ ) \n-\t\t\t{\n-\t\t\t\tif( aln1[i][j] != \'-\' ) break;\n-\t\t\t}\n-\t\t}\n-\t\telse i = -1;\n-\n-\t\tif( i == n1 ) \n-\t\t{\n-\t\t\tfor( i=0; i<n1; i++ ) aln1[i][j] = \'=\';\n-\t\t}\n-\t}\n-#if 0\n-\tfprintf( stderr, "in profilealignment,\\n" );\n-\tfor( i=0; i<n0; i++ ) fprintf( stderr, "\\n>aln0[%d] = \\n%s\\n", i, aln0[i] );\n-\tfor( i=0; i<n1; i++ ) fprintf( stderr, "\\n>aln1[%d] = \\n%s\\n", i, aln1[i] );\n-\tfor( i=0; i<n2; i++ ) fprintf( stderr, "\\n>aln2[%d] = \\n%s\\n", i, aln2[i] );\n-#endif\n-\n-\tfree( effarr0 );\n-\tfree( effarr2 );\n-}\n-\n-void eq2dashmatomete( char **s, int n )\n-{\n-\tint i, j;\n-\tchar sj;\n-\n-\tfor( j=0; (sj=s[0][j]); j++ )\n-\t{\n-\t\tif( sj == \'=\' )\n-\t\t{\n-\t\t\tfor( i=0; i<n; i++ )\n-\t\t\t{\n-\t\t\t\ts[i][j] = \'-\';\n-\t\t\t}\n-\t\t}\n-\t}\n-}\n-\n-void eq2dashmatometehayaku( char **s, int n )\n-{\n-\tint i, j, c;\n-\tint *tobechanged;\n-\tint len = strlen( s[0] );\n-\n-\ttobechanged = calloc( len, sizeof( int ) );\n-\tc = 0;\n-\tfor( j=0; j<len; j++ )\n-\t{\n-\t\tif( s[0][j] == \'=\' ) tobechanged[c++] = j;\n-\t}\n-\ttobechanged[c] = -1;\n-\n-\tfor( i=0; i<n; i++ )\n-\t{\n-\t\tfor( c=0; (j=tobechanged[c])!=-1; c++ )\n-\t\t\ts[i][j] = \'-\';\n-\t}\n-\tfree( tobechanged );\n-}\n-\n-void eq2dash( char *s )\n-{\n-\twhile('..b'q2[i] );\n-\n-//\t\t\tfprintf( stderr, "gapshift = %d\\n", gapshift );\n-\t\t}\n-\t\tblocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 );\n-//\t\tfprintf( stderr, "\\nj=%d, blocklen=%d, len0=%d\\n", j, blocklen, len0 );\n-//\t\tblocklen = 1;\n-//\t\tif( tmptmptmpmark ) exit( 1 );\n-\n-\t\tnewpos = strlen( aseq[rep] );\n-\n-#if 0\n-\t\tfor( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j];\n-\t\tfor( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12];\n-\t\tfor( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12];\n-\t\tfor( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0;\n-\t\tfor( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0;\n-\t\tfor( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0;\n-#else\n-\n-\t\tfor( i=0; i<ngroup0; i++ )\n-\t\t{\n-\t\t\tlp = list0[i];\n-\t\t\tnewchar = aseq[lp] + newpos;\n-\t\t\tstrncpy0( newchar, seq[lp]+j, blocklen );\n-\t\t}\n-\t\tfor( i=0; i<ngroup1; i++ )\n-\t\t{\n-\t\t\tlp = list1[i];\n-\t\t\tnewchar = aseq[lp] + newpos;\n-\t\t\tstrncpy0( newchar, seq[lp]+posin12, blocklen );\n-\t\t}\n-\t\tfor( i=0; i<ngroup2; i++ )\n-\t\t{\n-\t\t\tlp = list2[i];\n-\t\t\tnewchar = aseq[lp] + newpos;\n-\t\t\tstrncpy0( newchar, seq[lp]+posin12, blocklen );\n-\t\t}\n-//\t\tfprintf( stderr, "### aseq[l0] = %s\\n", aseq[list0[0]] );\n-//\t\tfprintf( stderr, "### aseq[l1] = %s\\n", aseq[list1[0]] );\n-//\t\tfprintf( stderr, "### aseq[l2] = %s\\n", aseq[list2[0]] );\n-//\t\texit( 1 );\n-#endif\n-\n-//\t\tfprintf( stderr, "j=%d -> %d\\n", j, j+blocklen-1 );\n-\t\tj += (blocklen-1);\n-\n-\n-\t\tposin12 += (blocklen-1);\n-\n-\n-\t\tposin12++;\n-\t}\n-#if 0\n-\tfprintf( stderr, "\\n" );\n-\tfprintf( stderr, " seq[l0] = %s\\n", seq[list0[0]] );\n-\tfprintf( stderr, " seq[l1] = %s\\n", seq[list1[0]] );\n-\tfprintf( stderr, " seq[l2] = %s\\n", seq[list2[0]] );\n-\tfprintf( stderr, "=====>\\n" );\n-\tfprintf( stderr, "aseq[l0] = %s\\n", aseq[list0[0]] );\n-\tfprintf( stderr, "aseq[l1] = %s\\n", aseq[list1[0]] );\n-\tfprintf( stderr, "aseq[l2] = %s\\n", aseq[list2[0]] );\n-//if( tmptmptmpmark ) exit( 1 );\n-#endif\n-\n-//\tfor( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] );\n-\tfor( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] );\n-\tfor( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] );\n-\tfor( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] );\n-\n-\n-\tfree( mar );\n-\tfree( gaps );\n-\tfree( list0 );\n-\tfree( list1 );\n-\tfree( list2 );\n-\tFreeCharMtx( mseq2 );\n-\tFreeCharMtx( mseq1 ); // ? added 2012/02/12\n-\tFreeCharMtx( mseq0 );\n-\tFreeCharMtx( aseq ); // ? added 2012/02/12\n-}\n-\n-\n-void restorecommongaps( int njob, char **seq, int *ex1, int *ex2, int *gaplen, int alloclen, char gapchar )\n-{\n-\tint *mar;\n-\tchar *tmpseq;\n-\tchar *cptr;\n-\tint *iptr;\n-\tint *tmpgaplen;\n-\tint i, j, k, len, rep, len1, klim;\n-\n-\tmar = calloc( njob, sizeof( int ) );\n-\ttmpseq = calloc( alloclen, sizeof( char ) );\n-\ttmpgaplen = calloc( alloclen, sizeof( int ) );\n-//\ttmpseq = calloc( alloclen+2, sizeof( char ) );\n-//\ttmpgaplen = calloc( alloclen+2, sizeof( int ) );\n-\n-\n-\tfor( i=0; i<njob; i++ ) mar[i] = 0;\n-\tfor( i=0; (k=ex1[i])>-1; i++ ) \n-\t{\n-\t\tmar[k] = 1;\n-//\t\tfprintf( stderr, "excluding %d\\n", ex1[i] );\n-\t}\n-\tfor( i=0; (k=ex2[i])>-1; i++ ) \n-\t{\n-\t\tmar[k] = 1;\n-//\t\tfprintf( stderr, "excluding %d\\n", ex2[i] );\n-\t}\n-\n-\tfor( i=0; i<njob; i++ )\n-\t\tif( mar[i] ) break;\n-\n-\tif( i == njob )\n-\t{\n-//\t\tfprintf( stderr, "Nothing to do\\n" );\n-\t\tfree( mar );\n-\t\tfree( tmpseq );\n-\t\tfree( tmpgaplen );\n-\t\treturn;\n-\t}\n-\trep = i;\n-\tlen = strlen( seq[rep] );\n-\tlen1 = len+1;\n-\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tif( mar[i] == 0 ) continue;\n-\t\tcptr = tmpseq;\n-\t\tfor( j=0; j<len1; j++ )\n-\t\t{\n-\t\t\tklim = gaplen[j];\n-//\t\t\tfor( k=0; k<gaplen[j]; k++ )\n-\t\t\twhile( klim-- )\n-\t\t\t\t*(cptr++) = gapchar; // ???\n-\t\t\t*(cptr++) = seq[i][j];\n-\t\t}\n-\t\t*cptr = 0;\n-\t\tstrcpy( seq[i], tmpseq );\n-\t}\n-\n-\tiptr = tmpgaplen;\n-\tfor( j=0; j<len1; j++ )\n-\t{\n-\t\t*(iptr++) = gaplen[j];\n-\t\tfor( k=0; k<gaplen[j]; k++ )\n-\t\t\t*(iptr++) = 0;\n-\t}\n-\t*iptr = -1;\n-\n-\tiptr = tmpgaplen;\n-\twhile( *iptr != -1 ) *gaplen++ = *iptr++;\n-\n-\tfree( mar );\n-\tfree( tmpseq );\n-\tfree( tmpgaplen );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/addsingle.c --- a/mafft/core/addsingle.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,3409 +0,0 @@\n-#include "mltaln.h"\n-\n-#define SMALLMEMORY 1\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 0\n-\n-static int nadd;\n-static int treein;\n-static int topin;\n-static int treeout;\n-static int distout;\n-static int noalign;\n-static int multidist;\n-static int maxdist = 2; // scale -> 2bai\n-static int allowlongadds;\n-\n-static float lenfaca, lenfacb, lenfacc, lenfacd;\n-static int tuplesize;\n-\n-#define PLENFACA 0.01\n-#define PLENFACB 10000\n-#define PLENFACC 10000\n-#define PLENFACD 0.1\n-#define D6LENFACA 0.01\n-#define D6LENFACB 2500\n-#define D6LENFACC 2500\n-#define D6LENFACD 0.1\n-#define D10LENFACA 0.01\n-#define D10LENFACB 1000000\n-#define D10LENFACC 1000000\n-#define D10LENFACD 0.0\n-\n-typedef struct _thread_arg\n-{\n-\tint njob; \n-\tint nadd; \n-\tint *nlen; \n-\tint *follows; \n-\tchar **name; \n-\tchar **seq; \n-\tLocalHom **localhomtable; \n-\tfloat **iscore; \n-\tfloat **nscore; \n-\tint *istherenewgap; \n-\tint **newgaplist;\n-\tRNApair ***singlerna; \n-\tdouble *eff_kozo_mapped; \n-\tint alloclen;\n-\tTreedep *dep;\n-\tint ***topol;\n-\tfloat **len;\n-\tAddtree *addtree;\n-#ifdef enablemultithread\n-\tint *iaddshare;\n-\tint thread_no;\n-\tpthread_mutex_t *mutex_counter;\n-#endif\n-} thread_arg_t;\n-\n-\n-#ifdef enablemultithread\n-typedef struct _gaplist2alnxthread_arg\n-{\n-//\tint thread_no;\n-\tint ncycle;\n-\tint *jobpospt;\n-\tint tmpseqlen;\n-\tint lenfull;\n-\tchar **seq;\n-\tint *newgaplist;\n-\tint *posmap;\n-\tpthread_mutex_t *mutex;\n-} gaplist2alnxthread_arg_t;\n-\n-typedef struct _distancematrixthread_arg\n-{\n-\tint thread_no;\n-\tint njob;\n-\tint norg;\n-\tint *jobpospt;\n-\tint **pointt;\n-\tint *nogaplen;\n-\tfloat **imtx;\n-\tfloat **nmtx;\n-\tfloat *selfscore;\n-\tpthread_mutex_t *mutex;\n-} distancematrixthread_arg_t;\n-\n-typedef struct _jobtable2d\n-{\n- int i; \n- int j; \n-} Jobtable2d;\n-\n-typedef struct _dndprethread_arg\n-{\n-\tint njob;\n-\tint thread_no;\n-\tfloat *selfscore;\n-\tfloat **mtx;\n-\tchar **seq;\n-\tJobtable2d *jobpospt;\n-\tpthread_mutex_t *mutex;\n-} dndprethread_arg_t;\n-\n-#endif\n-\n-typedef struct _blocktorealign\n-{\n-\tint start;\n-\tint end;\n-\tint nnewres;\n-} Blocktorealign;\n-\n-static void cnctintvec( int *res, int *o1, int *o2 )\n-{\n-\twhile( *o1 != -1 ) *res++ = *o1++;\n-\twhile( *o2 != -1 ) *res++ = *o2++;\n-\t*res = -1;\n-}\n-\n-static void countnewres( int len, Blocktorealign *realign, int *posmap, int *gaplist )\n-{\n-\tint i, regstart, regend, len1;\n-\tregstart = 0;\n-\tlen1 = len+1;\n-\tfor( i=0; i<len1; i++ )\n-\t{\n-\t\tif( realign[i].nnewres || gaplist[i] )\n-\t\t{\n-\t\t\tregend = posmap[i]-1;\n-\t\t\trealign[i].start = regstart;\n-\t\t\trealign[i].end = regend;\n-\t\t}\n-\t\tif( gaplist[i] )\n-\t\t{\n-\t\t\trealign[i].nnewres++;\n-//\t\t\tfprintf( stderr, "hit? reg = %d-%d\\n", regstart, regend );\n-\t\t}\n-\t\tregstart = posmap[i]+1;\n-\t}\n-}\n-static void fillgap( char *s, int len )\n-{\n-\tint orilen = strlen( s );\n-\ts += orilen;\n-\tlen -= orilen;\n-\twhile( len-- )\n-\t\t*s++ = \'-\';\n-\t*s = 0;\n-}\n-\n-static int lencomp( const void *a, const void *b ) // osoikamo\n-{\n-\tchar **ast = (char **)a;\n-\tchar **bst = (char **)b;\n-\tint lena = strlen( *ast );\n-\tint lenb = strlen( *bst );\n-//\tfprintf( stderr, "a=%s, b=%s\\n", *ast, *bst );\n-//\tfprintf( stderr, "lena=%d, lenb=%d\\n", lena, lenb );\n-\tif( lena > lenb ) return -1;\n-\telse if( lena < lenb ) return 1;\n-\telse return( 0 );\n-}\n-\n-static int dorealignment_tree( Blocktorealign *block, char **fullseq, int *fullseqlenpt, int norg, int ***topol, int *follows )\n-{\n-\tint i, j, k, posinold, newlen, *nmem;\n-\tint n0, n1, localloclen, nhit, hit1, hit2;\n-\tint *pickhistory;\n-\tint nprof1, nprof2, pos, zure;\n-\tchar **prof1, **prof2;\n-\tint *iinf0, *iinf1;\n-\tint *group, *nearest, *g2n, ngroup;\n-\tchar ***mem;\n-\tstatic char **tmpaln0 = NULL;\n-\tstatic char **tmpaln1 = NULL;\n-\tstatic char **tmpseq;\n-\tint ***topolpick;\n-\tint *tmpint;\n-\tint *intptr, *intptrx;\n-\tchar *tmpseq0, *cptr, **cptrptr;\n-\n-\n-\tlocalloclen = 4 * ( block->end - block->start + 1 );\t // ookisugi?\n-\ttmpaln0 = AllocateCharMtx( njob, localloclen );\n-\ttmpaln1 = AllocateCharMtx( njob, localloclen );\n-\ttmpseq = AllocateCharMtx( 1, *fullseql'..b' );\n-\t\t\t\thandle = calloc( nthread, sizeof( pthread_t ) );\n-\t\t\t\tpthread_mutex_init( &mutex, NULL );\n-\t\t\t\tjobpos = 1;\n-\t\t\t\tfor( i=0; i<nthread; i++ )\n-\t\t\t\t{\n-//\t\t\t\t\ttarg[i].thread_no = i;\n-\t\t\t\t\ttarg[i].ncycle = ien;\n-\t\t\t\t\ttarg[i].jobpospt = &jobpos;\n-\t\t\t\t\ttarg[i].tmpseqlen = tmpseqlen;\n-\t\t\t\t\ttarg[i].lenfull = lenfull;\n-\t\t\t\t\ttarg[i].seq = seq;\n-//\t\t\t\t\ttarg[i].newgaplist = newgaplist_o[iadd];\n-\t\t\t\t\ttarg[i].newgaplist = newgaplist_compact;\n-\t\t\t\t\ttarg[i].posmap = posmap;\n-\t\t\t\t\ttarg[i].mutex = &mutex;\n-\n-\t\t\t\t\tpthread_create( handle+i, NULL, gaplist2alnxthread, (void *)(targ+i) );\n-\t\t\t\t}\n-\t\t\t\tfor( i=0; i<nthread; i++ )\n-\t\t\t\t{\n-\t\t\t\t\tpthread_join( handle[i], NULL );\n-\t\t\t\t}\n-\t\t\t\tpthread_mutex_destroy( &mutex );\n-\t\t\t\tfree( handle );\n-\t\t\t\tfree( targ );\n-\t\t\t}\n-\t\t\telse\n-#endif\n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "%d / %d\\r", iadd, nadd );\n-\t\t\t\tfor( i=1; i<ien; i++ )\n-\t\t\t\t{\n-\t\t\t\t\ttmpseq1 = tmpseq[0];\n-\t\t\t\t\tif( i == 1 ) fprintf( stderr, " %d / %d\\r", iadd, nadd );\n-// \t\t\t\t\tgaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist_o[iadd], posmap, tmpseqlen );\n- \t\t\t\t\tgaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist_compact, posmap, tmpseqlen );\n-//\t\t\t\t\tfprintf( stderr, ">%s (iadd=%d)\\n%s\\n", name[i], iadd, tmpseq1 );\n-\t\t\t\t\tstrcpy( seq[i], tmpseq1 );\n-\t\t\t\t}\n-\t\t\t}\n-\t\t}\n-\t\ttmpseq1 = tmpseq[0];\n-//\t\tinsertgapsbyotherfragments_simple( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap );\n-\t\tinsertgapsbyotherfragments_compact( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap );\n-//\t\tfprintf( stderr, "%d = %s\\n", iadd, tmpseq1 );\n-\t\teq2dash( tmpseq1 );\n-\t\tstrcpy( seq[norg+iadd], tmpseq1 );\n-\n-//\t\tadjustposmap( lenfull, posmap, newgaplist_o[iadd] );\n-\t\tadjustposmap( lenfull, posmap, newgaplist_compact );\n-\t\tcountnewres( lenfull, realign, posmap, newgaplist_o[iadd] ); // muda?\n-//\t\tcountnewres( lenfull, realign, posmap, newgaplist_compact ); // muda?\n-\n-\t}\n-\tfprintf( stderr, "\\r done. \\n\\n" );\n-\n-#if 0\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tfprintf( stdout, ">%s\\n", name[i] );\n-\t\tfprintf( stdout, "%s\\n", seq[i] );\n-\t}\n-#endif\n-\n-#if 0\n-\tfprintf( stderr, "realign[].nnewres = " );\n-\tfor( i=0; i<lenfull+1; i++ )\n-\t{\n-\t\tfprintf( stderr, "%d ", realign[i].nnewres );\n-\t}\n-\tfprintf( stderr, "\\n" );\n-#endif\n-\n-\tfor( i=0; i<lenfull+1; i++ )\n-\t{\n-\t\tif( realign[i].nnewres > 1 ) \n-\t\t{\n-//\t\t\tfprintf( stderr, "i=%d: %d-%d\\n", i, realign[i].start, realign[i].end );\n-\t\t\tfprintf( stderr, "\\rRealigning %d/%d \\r", i, lenfull );\n-//\t\t\tzure = dorealignment_compact( realign+i, seq, &fullseqlen, norg );\n-//\t\t\tzure = dorealignment_order( realign+i, seq, &fullseqlen, norg, ordertable, follows );\n-\t\t\tzure = dorealignment_tree( realign+i, seq, &fullseqlen, norg, topol, follows );\n-#if 0\n-\t\t\tgappick0( check1, seq[0] );\n-\t\t\tfprintf( stderr, "check1 = %s\\n", check1 );\n-\t\t\tif( strcmp( check1, check2 ) )\n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "CHANGED!!!!!\\n" );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-#endif\n-\t\t\tfor( j=i+1; j<lenfull+1; j++ )\n-\t\t\t{\n-\t\t\t\tif( realign[j].nnewres )\n-\t\t\t\t{\n-\t\t\t\t\trealign[j].start -= zure;\n-\t\t\t\t\trealign[j].end -= zure;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t}\n-\t}\n-\tFreeIntCub( topol );\n-\tfprintf( stderr, "\\r done. \\n\\n" );\n-\n-\tfflush( stderr );\n-\n-\n-\tFreeIntMtx( newgaplist_o );\n-\tFreeIntVec( newgaplist_compact );\n-\tFreeIntVec( posmap );\n-\tfree( realign );\n-\tfree( istherenewgap );\n-\tFreeIntMtx( follower );\n-\tfree( follows );\n-\tfree( ordertable );\n-\tFreeCharMtx( tmpseq );\n-\n-\n-\twriteData_pointer( prep_g, njob, name, nlen, seq );\n-#if 0\n-\twriteData( stdout, njob, name, nlen, bseq );\n-\twritePre( njob, name, nlen, bseq, !contin );\n-\twriteData_pointer( prep_g, njob, name, nlen, aseq );\n-#endif\n-#if IODEBUG\n-\tfprintf( stderr, "OSHIMAI\\n" );\n-#endif\n-\n-#if SMALLMEMORY\n-\tif( multidist )\n-\t{\n-//\t\tif( constraint ) FreeLocalHomTable_two( localhomtable, norg, nadd );\n-\t\tif( constraint ) FreeLocalHomTable_one( localhomtable, norg, nadd );\n-\t}\n-\telse\n-#endif\n-\t{\n-\t\tif( constraint ) FreeLocalHomTable( localhomtable, njob );\n-\t}\n-\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/blosum.c --- a/mafft/core/blosum.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,308 +0,0 @@\n-#define DEFAULTGOP_B -1530\n-#define DEFAULTGEP_B -00 \n-#define DEFAULTOFS_B -123 /* +10 -- -50 teido ka ? */\n-\n-\n-void BLOSUMmtx( int n, double **matrix, double *freq, char *amino, char *amino_grp )\n-{\n-\t/*\n-\tchar locaminod[26] = "GASTPLIMVDNEQFYWKRHCXXX.-U";\n-\t*/\n-//\tchar locaminod[] = "ARNDCQEGHILKMFPSTWYVBZX.-U";\n-\tchar locaminod[] = "ARNDCQEGHILKMFPSTWYVBZX.-J";\n-\tchar locgrpd[] = \n-\t{\n-\t\t0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2,\n-\t\t6, 6, 6, 1,\n-\t};\n-\tdouble freqd[20] = \n-\t{\n-\t 0.077,\n-\t 0.051,\n-\t 0.043,\n-\t 0.052,\n-\t 0.020,\n-\t 0.041,\n-\t 0.062,\n-\t 0.074,\n-\t 0.023,\n-\t 0.052,\n-\t 0.091,\n-\t 0.059,\n-\t 0.024,\n-\t 0.040,\n-\t 0.051,\n-\t 0.069,\n-\t 0.059,\n-\t 0.014,\n-\t 0.032,\n-\t 0.066,\n-\t};\n-\n-\tdouble tmpmtx30[] = \n-\t{\n- 4,\n- -1, 8,\n- 0, -2, 8,\n- 0, -1, 1, 9,\n- -3, -2, -1, -3, 17,\n- 1, 3, -1, -1, -2, 8,\n- 0, -1, -1, 1, 1, 2, 6,\n- 0, -2, 0, -1, -4, -2, -2, 8,\n- -2, -1, -1, -2, -5, 0, 0, -3, 14,\n- 0, -3, 0, -4, -2, -2, -3, -1, -2, 6,\n- -1, -2, -2, -1, 0, -2, -1, -2, -1, 2, 4,\n- 0, 1, 0, 0, -3, 0, 2, -1, -2, -2, -2, 4,\n- 1, 0, 0, -3, -2, -1, -1, -2, 2, 1, 2, 2, 6,\n- -2, -1, -1, -5, -3, -3, -4, -3, -3, 0, 2, -1, -2, 10,\n- -1, -1, -3, -1, -3, 0, 1, -1, 1, -3, -3, 1, -4, -4, 11,\n- 1, -1, 0, 0, -2, -1, 0, 0, -1, -1, -2, 0, -2, -1, -1, 4,\n- 1, -3, 1, -1, -2, 0, -2, -2, -2, 0, 0, -1, 0, -2, 0, 2, 5,\n- -5, 0, -7, -4, -2, -1, -1, 1, -5, -3, -2, -2, -3, 1, -3, -3, -5, 20,\n- -4, 0, -4, -1, -6, -1, -2, -3, 0, -1, 3, -1, -1, 3, -2, -2, -1, 5, 9,\n- 1, -1, -2, -2, -2, -3, -3, -3, -3, 4, 1, -2, 0, 1, -4, -1, 1, -3, 1, 5,\n- 0, -2, 4, 5, -2, -1, 0, 0, -2, -2, -1, 0, -2, -3, -2, 0, 0, -5, -3, -2, 5,\n- 0, 0, -1, 0, 0, 4, 5, -2, 0, -3, -1, 1, -1, -4, 0, -1, -1, -1, -2, -3, 0, 4,\n- 0, -1, 0, -1, -2, 0, -1, -1, -1, 0, 0, 0, 0, -1, -1, 0, 0, -2, -1, 0, -1, 0, -1,\n-\t};\n-\t\n-\tdouble tmpmtx45[] = \n-\t{\n- 5,\n- -2, 7,\n- -1, 0, 6,\n- -2, -1, 2, 7,\n- -1, -3, -2, -3, 12,\n- -1, 1, 0, 0, -3, 6,\n- -1, 0, 0, 2, -3, 2, 6,\n- 0, -2, 0, -1, -3, -2, -2, 7,\n- -2, 0, 1, 0, -3, 1, 0, -2, 10,\n- -1, -3, -2, -4, -3, -2, -3, -4, -3, 5,\n- -1, -2, -3, -3, -2, -2, -2, -3, -2, 2, 5,\n- -1, 3, 0, 0, -3, 1, 1, -2, -1, -3, -3, 5,\n- -1, -1, -2, -3, -2, 0, -2, -2, 0, 2, 2, -1, 6,\n- -2, -2, -2, -4, -2, -4, -3, -3, -2, 0, 1, -3, 0, 8,\n- -1, -2, -2, -1, -4, -1, 0, -2, -2, -2, -3, -1, -2, -3, 9,\n- 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -3, -1, -2, -2, -1, 4,\n- 0, -1, 0, -1, -1, -1, -1, '..b'.0, -1.5, -1.6, -2.8, -4.4, -1.9, 2.8, 4.0,\n- -0.4, 2.7, 0.8, 0.5, -2.8, 1.5, 1.2, -1.1, 0.6, -2.1, -2.1, 3.2,\n- -0.7, -1.7, -2.2, -3.0, -0.9, -1.0, -2.0, -3.5, -1.3, 2.5, 2.8, -1.4, 4.3,\n- -2.3, -3.2, -3.1, -4.5, -0.8, -2.6, -3.9, -5.2, -0.1, 1.0, 2.0, -3.3, 1.6, 7.0,\n- 0.3, -0.9, -0.9, -0.7, -3.1, -0.2, -0.5, -1.6, -1.1, -2.6, -2.3, -0.6, -2.4, -3.8, 7.6,\n- 1.1, -0.2, 0.9, 0.5, 0.1, 0.2, 0.2, 0.4, -0.2, -1.8, -2.1, 0.1, -1.4, -2.8, 0.4, 2.2,\n- 0.6, -0.2, 0.5, 0.0, -0.5, 0.0, -0.1, -1.1, -0.3, -0.6, -1.3, 0.1, -0.6, -2.2, 0.1, 1.5, 2.5,\n- -3.6, -1.6, -3.6, -5.2, -1.0, -2.7, -4.3, -4.0, -0.8, -1.8, -0.7, -3.5, -1.0, 3.6, -5.0, -3.3, -3.5, 14.2,\n- -2.2, -1.8, -1.4, -2.8, -0.5, -1.7, -2.7, -4.0, 2.2, -0.7, 0.0, -2.1, -0.2, 5.1, -3.1, -1.9, -1.9, 4.1, 7.8,\n- 0.1, -2.0, -2.2, -2.9, 0.0, -1.5, -1.9, -3.3, -2.0, 3.1, 1.8, -1.7, 1.6, 0.1, -1.8, -1.0, 0.0, -2.6, -1.1, 3.4,\n-\t};\n-\n-\tint i, j, count;\n-\tdouble av;\n-\tdouble *tmpmtx;\n-\n-\tif( n == 30 ) tmpmtx = tmpmtx30;\n-\telse if( n == 45 ) tmpmtx = tmpmtx45;\n-\telse if( n == 50 ) tmpmtx = tmpmtx50;\n-\telse if( n == 62 ) tmpmtx = tmpmtx62;\n-\telse if( n == 80 ) tmpmtx = tmpmtx80;\n-\telse if( n == 0 ) tmpmtx = tmpmtx0;\n-\telse if( n == -1 ) tmpmtx = loadaamtx();\n-\telse\n-\t{\n-\t\tfprintf( stderr, "blosum %d ?\\n", n );\n-\t\texit( 1 );\n-\t}\n-\n-\tcount = 0;\n-\tfor( i=0; i<20; i++ )\n-\t{\n-\t\tfor( j=0; j<=i; j++ )\n-\t\t{\n-\t\t\tmatrix[i][j] = matrix[j][i] = (double)tmpmtx[count++];\n-\t\t}\n-\t}\n-\tif( n == -1 && tmpmtx[400] != -1.0 ) \n-\t{\n-\t\tfor( i=0; i<20; i++ ) freq[i] = tmpmtx[400+i];\n-\t\tav = 0.0;\n-\t\tfor( i=0; i<20; i++ ) av += freq[i];\n-\t\tfor( i=0; i<20; i++ ) freq[i] /= av;\n-\t}\n-\telse\n-\t\tfor( i=0; i<20; i++ ) freq[i] = freqd[i];\n-\n-#if 0\n-\tav = 0.0;\n-\tfor( i=0; i<20; i++ )\n-\t\tav += matrix[i][i];\n-\tav /= 20;\n-\tfprintf( stdout, "av = %f\\n", av );\n-\n-\tfor( i=0; i<20; i++ ) for( j=0; j<20; j++ )\n-\t\tmatrix[i][j] /= av;\n-\n-\tav = wav = 0;\n-\tcount = 0;\n-\twcount = 0.0;\n-\ttmptmp = 0.0;\n-\tfor( i=0; i<20; i++ )\n-\t{\n-\t\tfprintf( stdout, "freq[%d] = %f\\n", i, freq[i] );\n-\t\ttmptmp += freq[i];\n-\t\tfor( j=0; j<20; j++ )\n-\t\t{\n-\t\t\tav += matrix[i][j];\n-\t\t\twav += freq[i] * freq[j] * matrix[i][j];\n-\t\t\tcount++;\n-\t\t\twcount += freq[i] * freq[j];\n-\t\t}\n-\t}\n-\n-\tav /= count;\n-\twav /= wcount;\n-\tfprintf( stdout, "av = %f\\n", av );\n-\tfprintf( stdout, "wav = %f\\n", wav );\n-\tfprintf( stdout, "wcount = %f\\n", wcount );\n-\tfprintf( stdout, "tmptmp = %f\\n", tmptmp );\n-\n-\tfor( i=0; i<20; i++ )\n-\t{\n-\t\tfor( j=0; j<=i; j++ )\n-\t\t{\n-\t\t\tfprintf( stderr, "## %d-%d, %f\\n", i, j, matrix[i][j] );\n-\t\t}\n-\t}\n-\n-\texit( 1 );\n-#endif\n-\n- for( i=0; i<26; i++ ) amino[i] = locaminod[i];\n- for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i];\n-}\n-\n-void extendedmtx( double **matrix, double *freq, char *amino, char *amino_grp )\n-{\n-\tint i;\n-\tint j;\n-\n-\tfor( i=0; i<nalphabets; i++ ) \n-\t{\n-//\t\tfprintf( stderr, "i=%d, i=%c\\n", i, i );\n-\t\tamino[i] = (char)i;\n-\t}\n-\tfor( i=0; i<nalphabets; i++ ) amino_grp[(int)amino[i]] = i % 6;\n-\tfor( i=0; i<nalphabets; i++ ) freq[i] = 1.0/nalphabets;\n-\n-\tfor( i=0; i<nalphabets; i++ )\n-\t{\n-\t\tfor( j=0; j<=i; j++ )\n-\t\t{\n-\t\t\tmatrix[i][j] = matrix[j][i] = (double)-1.0;\n-\t\t}\n-\t}\n-\tfor( i=0; i<nalphabets; i++ )\n-\t\tmatrix[i][i] = matrix[i][i] = (double)1.0;\n-#if 0 // user-defined matrix no toki fukkatsu saseru.\n-\tif( tmpmtx[400] != -1.0 ) \n-\t{\n-\t\tfor( i=0; i<20; i++ ) freq[i] = tmpmtx[400+i];\n-\t\tav = 0.0;\n-\t\tfor( i=0; i<20; i++ ) av += freq[i];\n-\t\tfor( i=0; i<20; i++ ) freq[i] /= av;\n-\t}\n-\telse\n-\t\tfor( i=0; i<20; i++ ) freq[i] = freqd[i];\n-#endif\n-#if 0\n-\tfor( i=0; i<nalphabets; i++ )\n-\t{\n-\t\tfprintf( stderr, "%d: %c, %d, %f\\n", i, amino[i], amino_grp[amino[i]], freq[i] );\n-\t}\n-#endif\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/blosum.h --- a/mafft/core/blosum.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,11 +0,0 @@ -/* -int locpenaltyd = -2400; -int locoffsetd = -60; -char locaminod[26] = "GASTPLIMVDNEQFYWKRHCXXX.-U"; -char locaminod[] = "ARNDCQEGHILKMFPSTWYVBZX.-U"; -char locgrpd[] = -{ - 0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2, - 6, 6, 6, 6, -}; -*/ |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/constants.c --- a/mafft/core/constants.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1395 +0,0 @@\n-#include "mltaln.h"\n-#include "miyata.h"\n-#include "miyata5.h"\n-#include "DNA.h"\n-\n-#include "JTT.c"\n-#include "blosum.c"\n-\n-#define DEBUG 0\n-#define TEST 0\n-\n-#define NORMALIZE1 1\n-\n-\n-static int shishagonyuu( double in )\n-{\n-\tint out;\n-\tif ( in > 0.0 ) out = ( (int)( in + 0.5 ) );\n-\telse if( in == 0.0 ) out = ( 0 );\n-\telse if( in < 0.0 ) out = ( (int)( in - 0.5 ) );\n-\telse out = 0;\n-\treturn( out );\n-}\n-\n-static void ambiguousscore( int *amino_n, int **n_dis )\n-{\n-\tint i;\n-\tfor( i=0; i<26; i++ )\n-\t{\n-\t\tn_dis[i][amino_n[\'r\']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'a\']][i] + n_dis[amino_n[\'g\']][i] ) );\n-\t\tn_dis[i][amino_n[\'y\']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'c\']][i] + n_dis[amino_n[\'t\']][i] ) );\n-\t\tn_dis[i][amino_n[\'k\']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'g\']][i] + n_dis[amino_n[\'t\']][i] ) );\n-\t\tn_dis[i][amino_n[\'m\']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'a\']][i] + n_dis[amino_n[\'c\']][i] ) );\n-\t\tn_dis[i][amino_n[\'s\']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'g\']][i] + n_dis[amino_n[\'c\']][i] ) );\n-\t\tn_dis[i][amino_n[\'w\']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'a\']][i] + n_dis[amino_n[\'t\']][i] ) );\n-\t\tn_dis[i][amino_n[\'b\']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n[\'c\']][i] + n_dis[amino_n[\'g\']][i] + n_dis[amino_n[\'t\']][i] ) );\n-\t\tn_dis[i][amino_n[\'d\']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n[\'a\']][i] + n_dis[amino_n[\'g\']][i] + n_dis[amino_n[\'t\']][i] ) );\n-\t\tn_dis[i][amino_n[\'h\']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n[\'a\']][i] + n_dis[amino_n[\'c\']][i] + n_dis[amino_n[\'t\']][i] ) );\n-\t\tn_dis[i][amino_n[\'v\']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n[\'a\']][i] + n_dis[amino_n[\'c\']][i] + n_dis[amino_n[\'g\']][i] ) );\n-\n-\t\tn_dis[amino_n[\'r\']][i] = n_dis[i][amino_n[\'r\']];\n-\t\tn_dis[amino_n[\'y\']][i] = n_dis[i][amino_n[\'y\']];\n-\t\tn_dis[amino_n[\'k\']][i] = n_dis[i][amino_n[\'k\']];\n-\t\tn_dis[amino_n[\'m\']][i] = n_dis[i][amino_n[\'m\']];\n-\t\tn_dis[amino_n[\'s\']][i] = n_dis[i][amino_n[\'s\']];\n-\t\tn_dis[amino_n[\'w\']][i] = n_dis[i][amino_n[\'w\']];\n-\t\tn_dis[amino_n[\'b\']][i] = n_dis[i][amino_n[\'b\']];\n-\t\tn_dis[amino_n[\'d\']][i] = n_dis[i][amino_n[\'d\']];\n-\t\tn_dis[amino_n[\'h\']][i] = n_dis[i][amino_n[\'h\']];\n-\t\tn_dis[amino_n[\'v\']][i] = n_dis[i][amino_n[\'v\']];\n-\t}\n-\n-\ti = amino_n[\'r\']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'a\']][amino_n[\'a\']] + n_dis[amino_n[\'g\']][amino_n[\'g\']] ) );\n-\ti = amino_n[\'y\']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'c\']][amino_n[\'c\']] + n_dis[amino_n[\'t\']][amino_n[\'t\']] ) );\n-\ti = amino_n[\'k\']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'g\']][amino_n[\'g\']] + n_dis[amino_n[\'t\']][amino_n[\'t\']] ) );\n-\ti = amino_n[\'m\']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'a\']][amino_n[\'a\']] + n_dis[amino_n[\'c\']][amino_n[\'c\']] ) );\n-\ti = amino_n[\'s\']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'g\']][amino_n[\'g\']] + n_dis[amino_n[\'c\']][amino_n[\'c\']] ) );\n-\ti = amino_n[\'w\']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n[\'a\']][amino_n[\'a\']] + n_dis[amino_n[\'t\']][amino_n[\'t\']] ) );\n-\ti = amino_n[\'b\']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n[\'c\']][amino_n[\'c\']] + n_dis[amino_n[\'g\']][amino_n[\'g\']] + n_dis[amino_n[\'t\']][amino_n[\'t\']] ) );\n-\ti = amino_n[\'d\']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n[\'a\']][amino_n[\'a\']] + n_dis[amino_n[\'g\']][amino_n[\'g\']] + n_dis[amino_n[\'t\']][amino_n[\'t\']] ) );\n-\ti = amino_n[\'h\']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n[\'a\']][amino_n[\'a\']] + n_dis[amino_n[\'c\']][amino_n[\'c\']] + n_dis[amino_n[\'t\']][amino_n[\'t\']] ) );\n-\ti = amino_n[\'v\']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n[\'a\']][amino_n[\'a\']] + n_dis[amino_n[\'c\']][amino_n[\'c\']] + n_dis[amino_n[\'g\']][amino_n[\'g\']] ) );\n-}\n-\n-\n-static void calcfreq_nuc( int nseq, char **seq, double *datafreq )\n-{\n-\tint i, j, l;\n-\tint aan;\n-\tdouble total;\n-\tfor( i=0; i<4; i++ )\n-\t\tdatafreq[i] = 0.0;\n-\ttotal = 0.0;'..b'alphabets; i++) amino_n[(int)amino[i]] = i;\n- for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis[i][j] = 0;\n- for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_disLN[i][j] = 0;\n- for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis_consweight_multi[i][j] = 0.0;\n-\n-\tn_dis_consweight_multi = AllocateDoubleMtx( nalphabets, nalphabets );\n-\tn_disFFT = AllocateIntMtx( nalphabets, nalphabets );\n- for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )\n-\t{\n- amino_dis[(int)amino[i]][(int)amino[j]] = n_dis[i][j];\n-\t\tn_dis_consweight_multi[i][j] = (float)n_dis[i][j] * consweight_multi;\n-\t\tamino_dis_consweight_multi[(int)amino[i]][(int)amino[j]] = (double)n_dis[i][j] * consweight_multi;\n-\t}\n-\n-\tif( dorp == \'d\' ) /* DNA */\n-\t{\n-#if 0 // ???\n-\t for( i=0; i<5; i++) for( j=0; j<5; j++ )\n- \tn_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;\n-\t for( i=5; i<10; i++) for( j=5; j<10; j++ )\n- \tn_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;\n-\t for( i=0; i<5; i++) for( j=0; j<5; j++ )\n- \tn_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;\n-\t for( i=5; i<10; i++) for( j=5; j<10; j++ )\n- \tn_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;\n-#else\n-\t for( i=0; i<10; i++) for( j=0; j<10; j++ )\n- \tn_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;\n-\t for( i=0; i<10; i++) for( j=0; j<10; j++ )\n- \tn_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;\n-#endif\n-\t}\n-\telse // protein\n-\t{\n-\t for( i=0; i<20; i++) for( j=0; j<20; j++ )\n- \tn_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;\n-\t for( i=0; i<20; i++) for( j=0; j<20; j++ )\n- \tn_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;\n-\t}\n-\n-#if 0\n-\t\treporterr( "amino_dis (offset = %d): \\n", offset );\n-\t\tfor( i=0; i<20; i++ )\n-\t\t{\n-\t\t\tfor( j=0; j<20; j++ ) \n-\t\t\t{\n-\t\t\t\treporterr( "%5d", amino_dis[(int)amino[i]][(int)amino[j]] );\n-\t\t\t}\n-\t\t\treporterr( "\\n" );\n-\t\t}\n-\n-\t\treporterr( "amino_disLN (offsetLN = %d): \\n", offsetLN );\n-\t\tfor( i=0; i<20; i++ )\n-\t\t{\n-\t\t\tfor( j=0; j<20; j++ ) \n-\t\t\t{\n-\t\t\t\treporterr( "%5d", amino_disLN[(int)amino[i]][(int)amino[j]] );\n-\t\t\t}\n-\t\t\treporterr( "\\n" );\n-\t\t}\n-\n-\t\treporterr( "n_dis (offset = %d): \\n", offset );\n-\t\tfor( i=0; i<26; i++ )\n-\t\t{\n-\t\t\tfor( j=0; j<26; j++ ) \n-\t\t\t{\n-\t\t\t\treporterr( "%5d", n_dis[i][j] );\n-\t\t\t}\n-\t\t\treporterr( "\\n" );\n-\t\t}\n-\n-\t\treporterr( "n_disFFT (offsetFFT = %d): \\n", offsetFFT );\n-\t\tfor( i=0; i<26; i++ )\n-\t\t{\n-\t\t\tfor( j=0; j<26; j++ ) \n-\t\t\t{\n-\t\t\t\treporterr( "%5d", n_disFFT[i][j] );\n-\t\t\t}\n-\t\t\treporterr( "\\n" );\n-\t\t}\n-exit( 1 );\n-#endif\n-\n-\n-\tppid = 0;\n-\n-\n-\tif( fftThreshold == NOTSPECIFIED )\n-\t{\n-\t\tfftThreshold = FFT_THRESHOLD;\n-\t}\n-\tif( fftWinSize == NOTSPECIFIED )\n-\t{\n-\t\tif( dorp == \'d\' ) \n-\t\t\tfftWinSize = FFT_WINSIZE_D;\n-\t\telse \n-\t\t\tfftWinSize = FFT_WINSIZE_P;\n-\t}\n-\n-\n-\tif( fftscore )\n-\t{\n-\t\tdouble av, sd;\n-\n-\t\tfor( i=0; i<20; i++ ) polarity[i] = polarity_[i];\n-\t\tfor( av=0.0, i=0; i<20; i++ ) av += polarity[i];\n-\t\tav /= 20.0;\n-\t\tfor( sd=0.0, i=0; i<20; i++ ) sd += ( polarity[i]-av ) * ( polarity[i]-av );\n-\t\tsd /= 20.0; sd = sqrt( sd );\n-\t\tfor( i=0; i<20; i++ ) polarity[i] -= av;\n-\t\tfor( i=0; i<20; i++ ) polarity[i] /= sd;\n-\t\n-\t\tfor( i=0; i<20; i++ ) volume[i] = volume_[i];\n-\t\tfor( av=0.0, i=0; i<20; i++ ) av += volume[i];\n-\t\tav /= 20.0;\n-\t\tfor( sd=0.0, i=0; i<20; i++ ) sd += ( volume[i]-av ) * ( volume[i]-av );\n-\t\tsd /= 20.0; sd = sqrt( sd );\n-\t\tfor( i=0; i<20; i++ ) volume[i] -= av;\n-\t\tfor( i=0; i<20; i++ ) volume[i] /= sd;\n-\n-#if 0\n-\t\tfor( i=0; i<20; i++ ) fprintf( stdout, "amino=%c, pol = %f<-%f, vol = %f<-%f\\n", amino[i], polarity[i], polarity_[i], volume[i], volume_[i] );\n-\t\tfor( i=0; i<20; i++ ) fprintf( stdout, "%c %+5.3f %+5.3f\\n", amino[i], volume[i], polarity[i] );\n-#endif\n-\t}\n-}\n-\n-void freeconstants()\n-{\n-\tFreeDoubleMtx( n_disLN );\n-\tFreeIntMtx( n_dis );\n-\tFreeIntMtx( n_disFFT );\n-\tFreeDoubleMtx( n_dis_consweight_multi );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/contrafoldwrap.c --- a/mafft/core/contrafoldwrap.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,312 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-\n-static char *whereiscontrafold;\n-\n-void unknown_n( char *out, char *in )\n-{\n-\twhile( *in )\n-\t{\n-\t\tif( *in == \'a\' || *in == \'A\' )\n-\t\t\t*out = \'A\';\n-\t\telse if( *in == \'t\' || *in == \'T\' || *in == \'u\' || *in == \'U\' )\n-\t\t\t*out = \'U\';\n-\t\telse if( *in == \'g\' || *in == \'G\' )\n-\t\t\t*out = \'G\';\n-\t\telse if( *in == \'c\' || *in == \'C\' )\n-\t\t\t*out = \'C\';\n-\t\telse if( *in == \'-\' )\n-\t\t\t*out = \'-\';\n-\t\telse\n-\t\t\t*out = \'N\';\n-\n-\t\tout++;\n-\t\tin++;\n-\t}\n-\t*out = 0;\n-}\n-\n-void outcontrafold( FILE *fp, RNApair **pairprob, int length )\n-{\n-\tint i;\n-\tRNApair *pt;\n-\tfor( i=0; i<length; i++ ) for( pt=pairprob[i]; pt->bestpos!=-1; pt++ )\n-\t{\n-\t\tif( pt->bestpos > i ) \n-\t\t\tfprintf( fp, "%d %d %f\\n", i, pt->bestpos, pt->bestscore );\n-\t}\n-}\n-\n-#if 1\n-static void readcontrafold( FILE *fp, RNApair **pairprob, int length )\n-{\n-\tchar gett[10000];\n-\tint *pairnum;\n-\tchar *pt;\n-\tint i;\n-\tint left, right;\n-\tfloat prob;\n-\n-\tpairnum = (int *)calloc( length, sizeof( int ) );\n-\tfor( i=0; i<length; i++ ) pairnum[i] = 0;\n-\n-\twhile( 1 )\n-\t{\n-\t\tif( feof( fp ) ) break;\n-\t\tfgets( gett, 9999, fp );\n-\n-//\t\tfprintf( stderr, "gett=%s\\n", gett );\n-\n-\t\tpt = gett;\n-\n-\t\tsscanf( gett, "%d ", &left );\n-\t\tleft--;\n-\n-//\t\tfprintf( stderr, "left=%d\\n", left );\n-\t\tpt = strchr( pt, \' \' ) + 1;\n-//\t\tfprintf( stderr, "pt=%s\\n", pt );\n-\n-\t\twhile( (pt = strchr( pt, \' \' ) ) )\n-\t\t{\n-\t\t\tpt++;\n-//\t\t\tfprintf( stderr, "pt=%s\\n", pt );\n-\t\t\tsscanf( pt, "%d:%f", &right, &prob );\n-\t\t\tright--;\n-\n-//\t\t\tfprintf( stderr, "%d-%d, %f\\n", left, right, prob );\n-\n-\t\t\tpairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) );\n-\t\t\tpairprob[left][pairnum[left]].bestscore = prob;\n-\t\t\tpairprob[left][pairnum[left]].bestpos = right;\n-\t\t\tpairnum[left]++;\n-\t\t\tpairprob[left][pairnum[left]].bestscore = -1.0;\n-\t\t\tpairprob[left][pairnum[left]].bestpos = -1;\n-//\t\t\tfprintf( stderr, "%d-%d, %f\\n", left, right, prob );\n-\n-\t\t\tpairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) );\n-\t\t\tpairprob[right][pairnum[right]].bestscore = prob;\n-\t\t\tpairprob[right][pairnum[right]].bestpos = left;\n-\t\t\tpairnum[right]++;\n-\t\t\tpairprob[right][pairnum[right]].bestscore = -1.0;\n-\t\t\tpairprob[right][pairnum[right]].bestpos = -1;\n-//\t\t\tfprintf( stderr, "%d-%d, %f\\n", right, left, prob );\n-\t\t}\n-\t}\n-\tfree( pairnum );\n-}\n-#endif\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\tinputfile = NULL;\n-\tdorp = NOTSPECIFIED;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\twhereiscontrafold = NULL;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( (c = *++argv[0]) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'d\':\n-\t\t\t\t\twhereiscontrafold = *++argv;\n-\t\t\t\t\tfprintf( stderr, "whereiscontrafold = %s\\n", whereiscontrafold );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n- default:\n- fprintf( stderr, "illegal option %c\\n", c );\n- argc = 0;\n- break;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\t}\n- if( argc != 0 ) \n- {\n- fprintf( stderr, "options: Check source file !\\n" );\n- exit( 1 );\n- }\n-}\n-\n-\n-int main( int argc, char *argv[] )\n-{\n-\tstatic char com[10000];\n-\tstatic int *nlen;\t\n-\tint left, right;\n-\tint res;\n-\tstatic char **name, **seq, **nogap;\n-\tstatic int **gapmap;\n-\tstatic int *order;\n-\tint i, j;\n-\tFILE *infp;\n-\tRNApair ***pairprob;\n-\tRNApair **alnpairprob;\n-\tRNApair *pairprobpt;\n-\tRNApair *pt;\n-\tint *alnpairnum;\n-\tfloat prob;\n-\tint adpos;\n-\n-\targuments( argc, argv );\n-\n-\tif( inputfile )\n-\t{\n-\t\tinfp = fopen( inputfile, "r" );\n-\t\tif( !infp )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot open %s\\n", inputfile );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\telse\n-\t\tinfp = stdin;\n-\n-\tif( !whereiscontrafold )\n-\t\twhereiscontrafold = "";\n-\n-\tgetnumlen( infp );\n-\trewind( infp );\n-\n-\tif( dorp != \'d\' )\n-\t{\n-\t\tfprintf( st'..b'job, nlenmax*2+1 );\n-\tgapmap = AllocateIntMtx( njob, nlenmax*2+1 );\n-\torder = AllocateIntVec( njob );\n-\tname = AllocateCharMtx( njob, B+1 );\n- nlen = AllocateIntVec( njob );\n-\tpairprob = (RNApair ***)calloc( njob, sizeof( RNApair ** ) );\n-\talnpairprob = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) );\n-\talnpairnum = AllocateIntVec( nlenmax );\n-\n-\tfor( i=0; i<nlenmax; i++ ) alnpairnum[i] = 0;\n-\n-\treadData_pointer( infp, name, nlen, seq );\n-\tfclose( infp );\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tpairprob[i] = (RNApair **)calloc( nlenmax, sizeof( RNApair * ) );\n-\t\tfor( j=0; j<nlenmax; j++ )\n-\t\t{\n-\t\t\tpairprob[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );\n-\t\t\tpairprob[i][j][0].bestpos = -1;\n-\t\t\tpairprob[i][j][0].bestscore = -1.0;\n-\t\t}\n-\t\tunknown_n( nogap[i], seq[i] );\n-\t\torder[i] = i;\n-\t}\n-\tfor( j=0; j<nlenmax; j++ )\n-\t{\n-\t\talnpairprob[j] = (RNApair *)calloc( 1, sizeof( RNApair ) );\n-\t\talnpairprob[j][0].bestpos = -1;\n-\t\talnpairprob[j][0].bestscore = -1.0;\n-\t}\n-\n-\n-\tconstants( njob, seq );\n-\n-\tfprintf( stderr, "running contrafold\\n" );\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tfprintf( stderr, "%d / %d\\n", i+1, njob );\n-\t\tcommongappick_record( 1, nogap+i, gapmap[i] );\n-\t\tinfp = fopen( "_contrafoldin", "w" );\n-\t\tfprintf( infp, ">in\\n%s\\n", nogap[i] );\n-\t\tfclose( infp );\n-#if 0 // contrafold v1\n-\t\tsprintf( com, "env PATH=%s contrafold predict _contrafoldin --posteriors 0.01 > _contrafoldout", whereiscontrafold );\n-#else // contrafold v2\n-\t\tsprintf( com, "env PATH=%s contrafold predict _contrafoldin --posteriors 0.01 _contrafoldout", whereiscontrafold );\n-#endif\n-\t\tres = system( com );\n-\t\tif( res )\n-\t\t{\n-\t\t\tfprintf( stderr, "error in contrafold\\n" );\n-\t\t\tfprintf( stderr, "=================================================================\\n" );\n-\t\t\tfprintf( stderr, "=================================================================\\n" );\n-\t\t\tfprintf( stderr, "==\\n" );\n-\t\t\tfprintf( stderr, "== This version of MAFFT supports CONTRAfold v2.02.\\n" );\n-\t\t\tfprintf( stderr, "== If you have a lower version of CONTRAfold installed in the\\n" );\n-\t\t\tfprintf( stderr, "== %s directory,\\n", whereiscontrafold );\n-\t\t\tfprintf( stderr, "== please update it!\\n" );\n-\t\t\tfprintf( stderr, "==\\n" );\n-\t\t\tfprintf( stderr, "=================================================================\\n" );\n-\t\t\tfprintf( stderr, "=================================================================\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\n-\n-\t\tinfp = fopen( "_contrafoldout", "r" );\n-\t\treadcontrafold( infp, pairprob[i], nlenmax );\n-\t\tfclose( infp );\n-\t\tfprintf( stdout, ">%d\\n", i );\n-\t\toutcontrafold( stdout, pairprob[i], nlenmax );\n-\t}\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tfor( j=0; j<nlen[i]; j++ ) for( pairprobpt=pairprob[i][j]; pairprobpt->bestpos!=-1; pairprobpt++ )\n-\t\t{\n-\t\t\tleft = gapmap[i][j];\n-\t\t\tright = gapmap[i][pairprobpt->bestpos];\n-\t\t\tprob = pairprobpt->bestscore;\n-\n-\t\t\tfor( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ )\n-\t\t\t\tif( pt->bestpos == right ) break;\n-\n-\t\t\tif( pt->bestpos == -1 )\n-\t\t\t{\n-\t\t\t\talnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) );\n-\t\t\t\tadpos = alnpairnum[left];\n-\t\t\t\talnpairnum[left]++;\n-\t\t\t\talnpairprob[left][adpos].bestscore = 0.0;\n-\t\t\t\talnpairprob[left][adpos].bestpos = right;\n-\t\t\t\talnpairprob[left][adpos+1].bestscore = -1.0;\n-\t\t\t\talnpairprob[left][adpos+1].bestpos = -1;\n-\t\t\t\tpt = alnpairprob[left]+adpos;\n-\t\t\t}\n-\t\t\telse\n-\t\t\t\tadpos = pt-alnpairprob[left];\n-\n-\t\t\tpt->bestscore += prob;\n-\t\t\tif( pt->bestpos != right )\n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "okashii!\\n" );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-//\t\t\tfprintf( stderr, "adding %d-%d, %f\\n", left, right, prob );\n-\t\t}\n-\t}\n-\treturn( 0 );\n-\n-#if 0\n-\tfprintf( stdout, "result=\\n" );\n-\n-\tfor( i=0; i<nlenmax; i++ ) for( pairprobpt=alnpairprob[i]; pairprobpt->bestpos!=-1; pairprobpt++ )\n-\t{\n-\t\tpairprobpt->bestscore /= (float)njob;\n-\t\tleft = i;\n-\t\tright = pairprobpt->bestpos;\n-\t\tprob = pairprobpt->bestscore;\n-\t\tfprintf( stdout, "%d-%d, %f\\n", left, right, prob );\n-\t}\n-\n-\treturn( 0 );\n-#endif\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/countlen.c --- a/mafft/core/countlen.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,61 +0,0 @@ -#include "mltaln.h" - -#define DEBUG 0 - -void arguments( int argc, char *argv[] ) -{ - int c; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( (c = *++argv[0]) ) - { - switch( c ) - { - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: Check source file !\n" ); - exit( 1 ); - } -} - - -int main( int argc, char *argv[] ) -{ - FILE *infp; - int nlenmin; - - arguments( argc, argv ); - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - - dorp = NOTSPECIFIED; - getnumlen_nogap( infp, &nlenmin ); - - fprintf( stdout, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); - return( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/defs.c --- a/mafft/core/defs.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,158 +0,0 @@ -#include <stdio.h> -#include "dp.h" -#include "mltaln.h" - -int TLS commonAlloc1 = 0; -int TLS commonAlloc2 = 0; -int TLS **commonIP = NULL; -int TLS **commonJP = NULL; -int nthread = 1; -int randomseed = 0; -int parallelizationstrategy = BAATARI1; - - -char modelname[500]; -int njob, nlenmax; -int amino_n[0x80]; -char amino_grp[0x80]; -int amino_dis[0x80][0x80]; -double **n_disLN; -double amino_dis_consweight_multi[0x80][0x80]; -int **n_dis; -int **n_disFFT; -double **n_dis_consweight_multi; -char amino[0x80]; -double polarity[0x80]; -double volume[0x80]; -int ribosumdis[37][37]; - -int ppid; -double thrinter; -double fastathreshold; -int pslocal, ppslocal; -int constraint; -int divpairscore; -int fmodel; // 1-> fmodel 0->default -1->raw -int nblosum; // 45, 50, 62, 80 -int kobetsubunkatsu; -int bunkatsu; -int dorp; -int niter; -int contin; -int calledByXced; -int devide; -int scmtd; -int weight; -int utree; -int tbutree; -int refine; -int check; -double cut; -int cooling; -int trywarp = 0; -int penalty, ppenalty, penaltyLN; -int penalty_dist, ppenalty_dist; -int RNApenalty, RNAppenalty; -int RNApenalty_ex, RNAppenalty_ex; -int penalty_ex, ppenalty_ex, penalty_exLN; -int penalty_EX, ppenalty_EX; -int penalty_OP, ppenalty_OP; -int penalty_shift, ppenalty_shift; -double penalty_shift_factor = 100.0; -int RNAthr, RNApthr; -int offset, poffset, offsetLN, offsetFFT; -int scoremtx; -int TMorJTT; -char use_fft; -char force_fft; -int nevermemsave; -int fftscore; -int fftWinSize; -int fftThreshold; -int fftRepeatStop; -int fftNoAnchStop; -int divWinSize; -int divThreshold; -int disp; -int outgap = 1; -char alg; -int cnst; -int mix; -int tbitr; -int tbweight; -int tbrweight; -int disopt; -int pamN; -int checkC; -float geta2; -int treemethod; -int kimuraR; -char *swopt; -int fftkeika; -int score_check; -int makedistmtx; -char *inputfile; -char *addfile; -int addprofile = 1; -int rnakozo; -char rnaprediction; -int scoreout = 0; -int spscoreout = 0; -int outnumber = 0; -int legacygapcost = 0; - -char *signalSM; -FILE *prep_g; -FILE *trap_g; -char **seq_g; -char **res_g; - -float consweight_multi = 1.0; -float consweight_rna = 0.0; -char RNAscoremtx = 'n'; - -char TLS *newgapstr = "-"; - -int nalphabets = 26; -int nscoredalphabets = 20; - -double specificityconsideration = 0.0; -int ndistclass = 10; -int maxdistclass = -1; - -int gmsg = 0; - -double sueff_global = SUEFF; - -void initglobalvariables() -{ - commonAlloc1 = 0; - commonAlloc2 = 0; - commonIP = NULL; - commonJP = NULL; - nthread = 1; - randomseed = 0; - parallelizationstrategy = BAATARI1; - - trywarp = 0; - penalty_shift_factor = 100.0; - outgap = 1; - addprofile = 1; - scoreout = 0; - outnumber = 0; - legacygapcost = 0; - consweight_multi = 1.0; - consweight_rna = 0.0; - RNAscoremtx = 'n'; - - newgapstr = "-"; - - nalphabets = 26; - nscoredalphabets = 20; - - specificityconsideration = 0.0; - ndistclass = 10; - maxdistclass = -1; - - gmsg = 0; -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/disttbfast.c --- a/mafft/core/disttbfast.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2867 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 0\n-#define SKIP 1\n-\n-#define END_OF_VEC -1\n-\n-static int nadd;\n-static int treein;\n-static int topin;\n-static int treeout;\n-static int noalign;\n-static int distout;\n-static float lenfaca, lenfacb, lenfacc, lenfacd;\n-static int tuplesize;\n-static int subalignment;\n-static int subalignmentoffset;\n-static int nguidetree;\n-static int sparsepickup;\n-#if 0\n-#define PLENFACA 0.0123\n-#define PLENFACB 10252\n-#define PLENFACC 10822\n-#define PLENFACD 0.5\n-#define DLENFACA 0.01\n-#define DLENFACB 2445\n-#define DLENFACC 2412\n-#define DLENFACD 0.1\n-#else\n-#define PLENFACA 0.01\n-#define PLENFACB 10000\n-#define PLENFACC 10000\n-#define PLENFACD 0.1\n-#define D6LENFACA 0.01\n-#define D6LENFACB 2500\n-#define D6LENFACC 2500\n-#define D6LENFACD 0.1\n-#define D10LENFACA 0.01\n-#define D10LENFACB 1000000\n-#define D10LENFACC 1000000\n-#define D10LENFACD 0.0\n-#endif\n-\n-typedef struct _jobtable\n-{\n- int i; \n- int j; \n-} Jobtable;\n-\n-typedef struct _msadistmtxthread_arg\n-{\n-\tint njob;\n-\tint thread_no;\n-\tfloat *selfscore;\n-\tfloat **iscore;\n-\tchar **seq;\n-\tint **skiptable;\n-\tJobtable *jobpospt;\n-#ifdef enablemultithread\n-\tpthread_mutex_t *mutex;\n-#endif\n-} msadistmtxthread_arg_t;\n-\n-#ifdef enablemultithread\n-// ue futatsu ha singlethread demo tsukau\n-typedef struct _treebasethread_arg\n-{\n-\tint thread_no;\n-\tint njob;\n-\tint *nrunpt;\n-\tint *nlen;\n-\tint *jobpospt;\n-\tint ***topol;\n-\tTreedep *dep;\n-\tchar **aseq;\n-\tdouble *effarr;\n-\tint *alloclenpt;\n-\tint *fftlog;\n-\tchar *mergeoralign;\n-\tfloat **newdistmtx;\n-\tfloat *selfscore;\n-\tpthread_mutex_t *mutex;\n-\tpthread_cond_t *treecond;\n-} treebasethread_arg_t;\n-\n-typedef struct _distancematrixthread_arg\n-{\n-\tint thread_no;\n-\tint njob;\n-\tint *jobpospt;\n-\tint **pointt;\n-\tfloat **mtx;\n-\tpthread_mutex_t *mutex;\n-} distancematrixthread_arg_t;\n-#endif\n-\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tnthread = 1;\n-\toutnumber = 0;\n-\ttopin = 0;\n-\ttreein = 0;\n-\ttreeout = 0;\n-\tdistout = 0;\n-\tnoalign = 0;\n-\tnevermemsave = 0;\n-\tinputfile = NULL;\n-\tnadd = 0;\n-\taddprofile = 1;\n-\tfftkeika = 0;\n-\tconstraint = 0;\n-\tnblosum = 62;\n-\tfmodel = 0;\n-\tcalledByXced = 0;\n-\tdevide = 0;\n-\tuse_fft = 0;\n-\tforce_fft = 0;\n-\tfftscore = 1;\n-\tfftRepeatStop = 0;\n-\tfftNoAnchStop = 0;\n- weight = 3;\n- utree = 1;\n-\ttbutree = 1;\n- refine = 0;\n- check = 1;\n- cut = 0.0;\n- disp = 0;\n- outgap = 1;\n- alg = \'A\';\n- mix = 0;\n-\ttbitr = 0;\n-\tscmtd = 5;\n-\ttbweight = 0;\n-\ttbrweight = 3;\n-\tcheckC = 0;\n-\ttreemethod = \'X\';\n-\tsueff_global = 0.1;\n-\tcontin = 0;\n-\tscoremtx = 1;\n-\tkobetsubunkatsu = 0;\n-\tdorp = NOTSPECIFIED;\n-\tppenalty_dist = NOTSPECIFIED;\n-\tppenalty = -1530;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tpenalty_shift_factor = 1000.0;\n-\tpoffset = -123;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tgeta2 = GETA2;\n-\tfftWinSize = NOTSPECIFIED;\n-\tfftThreshold = NOTSPECIFIED;\n-\tTMorJTT = JTT;\n-\tscoreout = 0;\n-\tspscoreout = 0;\n-\ttuplesize = 6;\n-\tsubalignment = 0;\n-\tsubalignmentoffset = 0;\n-\tlegacygapcost = 0;\n-\tspecificityconsideration = 0.0;\n-\tnguidetree = 1;\n-\tsparsepickup = 0;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( (c = *++argv[0]) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\treporterr( "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'I\':\n-\t\t\t\t\tnadd = myatoi( *++argv );\n-\t\t\t\t\treporterr( "nadd = %d\\n", nadd );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'V\':\n-\t\t\t\t\tppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\tfprintf( stderr, "ppenalty = %d\\n", ppenalty );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'f\':\n-\t\t\t\t\tppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\treporterr( "ppenalty = %d\\n", ppenalty );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'Q\':\n-\t\t\t\t\tpenalty_shift_factor = atof( *++argv );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'g\':\n-\t\t\t\t\tppenalty_ex = (int)( ato'..b'-\t\t\t}\n-\t\t\telse\n-#endif\n-\t\t\t{\n-//\t\t\t\treporterr( "Check source!\\n" );\n-//\t\t\t\texit( 1 );\n-\n-#if 1\n-\t\t\t\tmsadistmtxthread_arg_t *targ;\n-\t\t\t\tJobtable jobpos;\n-\n-\t\t\t\tjobpos.i = 0;\n-\t\t\t\tjobpos.j = 0;\n-\t\n-\t\t\t\ttarg = calloc( 1, sizeof( msadistmtxthread_arg_t ) );\n-\t\n-\t\t\t\t{\n-\t\t\t\t\ttarg[0].thread_no = 0;\n-\t\t\t\t\ttarg[0].njob = njob;\n-\t\t\t\t\ttarg[0].selfscore = selfscore;\n-\t\t\t\t\ttarg[0].iscore = mtx;\n-\t\t\t\t\ttarg[0].seq = bseq;\n-\t\t\t\t\ttarg[0].skiptable = skiptable;\n-\t\t\t\t\ttarg[0].jobpospt = &jobpos;\n-\t\n-\t\t\t\t\tmsadistmtxthread( targ );\n-\t\t\t\t}\n-\t\n-\t\t\t\tfree( targ );\n-#endif\n-\t\t\t}\n-\t\t\tif( skiptable) FreeIntMtx( skiptable ); skiptable = NULL;\n-\t\t\treporterr( "\\ndone.\\n\\n" );\n-\t\t}\n-// Distance matrix from MSA end\n-#endif\n-\n-\t\tif( calcpairdists ) \n-\t\t{\n-\t\t\tfree( selfscore );\n-\t\t\tselfscore = NULL;\n-\t\t\tFreeCharMtx( bseq );\n-\t\t\tbseq = NULL;\n-\t\t}\n-\t}\n-#if DEBUG\n-\treporterr( "closing trap_g\\n" );\n-#endif\n-//\tfclose( trap_g );\n-\n-\tif( scoreout )\n-\t{\n-\t\tunweightedspscore = plainscore( njob, bseq );\n-\t\treporterr( "\\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );\n-\t\treporterr( "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );\n-\t\treporterr( "\\n\\n" );\n-\t}\n-\n-#if DEBUG\n-\treporterr( "writing alignment to stdout\\n" );\n-#endif\n-\n-\n-\tval = 0;\n-\tif( ngui ) \n-\t{\n-\t\tien = strlen( bseq[0] );\n-\t\tif( ien > lgui )\n-\t\t{\n-\t\t\treporterr( "alignmentlength = %d, gui allocated %d", ien, lgui );\n-\t\t\tval = GUI_LENGTHOVER;\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tfor( i=0; i<njob; i++ ) \n-\t\t\t{\n-#if 1\n-\t\t\t\tstrcpy( seqgui[i], bseq[i] );\n-#else\n-\t\t\t\tfree( seqgui[i] );\n-\t\t\t\tseqgui[i] = bseq[i];\n-#endif\n-\t\t\t}\n-\t\t}\n-\t}\n-\telse\n-\t{\n-\t\twriteData_pointer( stdout, njob, name, nlen, bseq );\n-\t} \n-\n-\tif( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\\n", sumofpairsscore( njob, bseq ) );\n-\tSHOWVERSION;\n-\n-\tif( subalignment )\n-\t{\n-\t\tFreeIntMtx( subtable );\n-\t\tfree( insubtable );\n-\t\tfor( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );\n-\t\tfree( subalnpt );\n-\t\tfree( preservegaps );\n-\t}\n-\n-#if 1 // seqgui[i] = bseq[i] no toki bseq ha free shinai\n-\tFreeCharMtx( bseq );\n-#endif\n-\tFreeCharMtx( name );\n- free( nlen );\n-\n-\tfree( mergeoralign );\n-\tFreeCharMtx( seq );\n- free( nogaplen );\n-\n-\tfree( mseq1 );\n-\tfree( mseq2 );\n-//\tFreeIntCub( topol ); // \n-//\tFreeFloatMtx( len ); //\n-//\tfree( mergeoralign ); //\n-\tfree( dep );\n-\n-\tif( nadd ) free( addmem );\n-\tfree( eff );\n-\tfreeconstants();\n-\tcloseFiles();\n-\tFreeCommonIP();\n-\treturn( val );\n-\n-chudan:\n-\n-\tif( nlen ) free( nlen ); nlen = NULL;\n-\tif( seq ) FreeCharMtx( seq ); seq = NULL;\n-\tif( mseq1 ) free( mseq1 ); mseq1 = NULL;\n-\tif( mseq2 ) free( mseq2 ); mseq2 = NULL;\n-\tif( topol ) \n-\t{\n-\t\tfor( i=0; i<njob; i++ )\n-\t\t{\n-\t\t\tif( topol[i] && topol[i][0] ) \n-\t\t\t{\n-\t\t\t\tfree( topol[i][0] ); topol[i][0] = NULL;\n-\t\t\t}\n-\t\t\tif( topol[i] && topol[i][1] ) \n-\t\t\t{\n-\t\t\t\tfree( topol[i][1] ); topol[i][1] = NULL;\n-\t\t\t}\n-\t\t\tif( topol[i] ) free( topol[i] ); topol[i] = NULL;\n-\t\t}\n-\t\tfree( topol ); topol = NULL;\n-\t}\n-\tif( len ) FreeFloatMtx( len ); len = NULL;\n-\tif( eff ) free( eff ); eff = NULL;\n-\tif( mergeoralign ) free( mergeoralign ); mergeoralign = NULL;\n-\tif( dep ) free( dep ); dep = NULL;\n-\tif( addmem ) free( addmem ); addmem = NULL;\n-\tif( name ) FreeCharMtx( name ); name = NULL;\n-\tif( nogaplen ) free( nogaplen ); nogaplen = NULL;\n-\n-\tif( tmpseq ) free( tmpseq ); tmpseq = NULL;\n-\tif( grpseq ) free( grpseq ); grpseq = NULL;\n-\tif( pointt ) FreeIntMtx( pointt ); pointt = NULL;\n-\tif( mtx ) FreeFloatHalfMtx( mtx, njob ); mtx = NULL;\n-\tif( table1 ) free( table1 ); table1 = NULL;\n-\n-\tif( bseq ) FreeCharMtx( bseq ); bseq = NULL;\n-\tif( selfscore ) free( selfscore ); selfscore = NULL;\n-\tif( skiptable ) FreeIntMtx( skiptable ); skiptable = NULL;\n-\n-\tfreeconstants();\n-\tcloseFiles();\n-\tFreeCommonIP();\n-\n-\treturn( GUI_CANCEL );\n-}\n-\n-int main( int argc, char **argv )\n-{\n-\tint res = disttbfast( 0, 0, NULL, NULL, argc, argv, NULL );\n-\tif( res == GUI_CANCEL ) res = 0; // treeout de goto chudan wo riyousuru\n-\treturn res;\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/dndblast.c --- a/mafft/core/dndblast.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,398 +0,0 @@\n-#include "mltaln.h"\n-#include <sys/types.h>\n-#include <unistd.h>\n-#define DEBUG 0\n-#define TEST 0\n-\n-\n-int howmanyx( char *s )\n-{\n-\tint val = 0;\n-\tif( scoremtx == -1 )\n-\t{\n-\t\tdo\n-\t\t{\n-\t\t\tif( !strchr( "atgcuATGCU", *s ) ) val++;\n-\t\t} while( *++s );\n-\t}\n-\telse\n-\t{\n-\t\tdo\n-\t\t{\n-\t\t\tif( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++;\n-\t\t} while( *++s );\n-\t}\n-\treturn( val );\n-}\n-\n-void arguments( int argc, char *argv[] )\n-{\n-\tint c;\n-\n-\tinputfile = NULL;\n-\tdisopt = 0;\n-\tdivpairscore = 0;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( (c = *++argv[0]) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'I\':\n-\t\t\t\t\tdisopt = 1;\n-\t\t\t\t\tbreak;\n- default:\n- fprintf( stderr, "illegal option %c\\n", c );\n- argc = 0;\n- break;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\n-\t}\n- if( argc != 0 )\n- {\n- fprintf( stderr, "options: -i\\n" );\n- exit( 1 );\n- }\n-}\n-\n-int main( int argc, char *argv[] )\n-{\n-\tint ktuple;\n-\tint i, j;\n-\tFILE *infp;\n-\tFILE *hat2p;\n-\tFILE *hat3p;\n-\tchar **seq = NULL; // by D.Mathog\n-\tchar **seq1;\n-\tstatic char **name;\n-\tstatic char **name1;\n-\tstatic int nlen1[M];\n-\tdouble **mtx;\n-\tdouble **mtx2;\n-\tstatic int nlen[M];\n-\tchar b[B];\n-\tdouble max;\n-\tchar com[1000];\n-\tint opt[M];\n-\tint res;\n-\tchar *home;\n-\tchar queryfile[B];\n-\tchar datafile[B];\n-\tchar fastafile[B];\n-\tchar hat2file[B];\n-\tint pid = (int)getpid();\n-\tLocalHom **localhomtable, *tmpptr;\n-#if 1\n-\thome = getenv( "HOME" );\n-#else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ \n-\thome = NULL;\n-#endif\n-\n-#if DEBUG\n-\tif( home ) fprintf( stderr, "home = %s\\n", home );\n-#endif\n-\tif( !home ) home = "";\n-\tsprintf( queryfile, "%s/tmp/query-%d", home, pid );\n-\tsprintf( datafile, "%s/tmp/data-%d", home, pid );\n-\tsprintf( fastafile, "%s/tmp/fasta-%d", home, pid );\n-\tsprintf( hat2file, "hat2-%d", pid );\n-\n-\n-\targuments( argc, argv );\n-\n-\tif( inputfile )\n-\t{\n-\t\tinfp = fopen( inputfile, "r" );\n-\t\tif( !infp )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot open %s\\n", inputfile );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\telse\n-\t\tinfp = stdin;\n-#if 0\n-\tPreRead( infp, &njob, &nlenmax );\n-#else\n-\tdorp = NOTSPECIFIED;\n-\tgetnumlen( infp );\n-#endif\n-\n-\tif( dorp == \'d\' )\n-\t{\n-\t\tscoremtx = -1;\n-\t\tpamN = NOTSPECIFIED;\n-\t}\n-\telse\n-\t{\n-\t\tnblosum = 62;\n-\t\tscoremtx = 1;\n-\t}\n-\tconstants( njob, seq );\n-\n-\trewind( infp );\n-\n-\tname = AllocateCharMtx( njob, B+1 );\n-\tname1 = AllocateCharMtx( njob, B+1 );\n-\tseq = AllocateCharMtx( njob, nlenmax+1 );\n-\tseq1 = AllocateCharMtx( 2, nlenmax+1 );\n-\tmtx = AllocateDoubleMtx( njob, njob );\n-\tmtx2 = AllocateDoubleMtx( njob, njob );\n-\tlocalhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );\n-\tfor( i=0; i<njob; i++)\n-\t{\n-\t\tlocalhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );\n-\t\tfor( j=0; j<njob; j++) \n-\t\t{\n-\t\t\tlocalhomtable[i][j].start1 = -1;\n-\t\t\tlocalhomtable[i][j].end1 = -1;\n-\t\t\tlocalhomtable[i][j].start2 = -1;\n-\t\t\tlocalhomtable[i][j].end2 = -1;\n-\t\t\tlocalhomtable[i][j].opt = -1.0; \n-\t\t\tlocalhomtable[i][j].next = NULL; \n-\n-\t\t}\n- }\n-\n-#if 0\n-\tFRead( infp, name, nlen, seq );\n-#else\n-\treadData_pointer( infp, name, nlen, seq );\n-#endif\n-\tfclose( infp );\n-\t\n-\tif( scoremtx == -1 ) ktuple = 6;\n-\telse ktuple = 1;\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tgappick0( seq1[0], seq[i] ); \n-\t\tstrcpy( seq[i], seq1[0] );\n-\t}\n-\tfor( j=0; j<njob; j++ )\n-\t{\n-\t\tsprintf( name1[j], "+==========+%d ", j );\n-\t\tnlen1[j] = nlen[j];\n-\t}\n-\n-\tfor( i=0; i<njob; i++ ) \n-\t{\n-//\t\tfprintf( stderr, "### i = %d\\n", i );\n-\n-\t\tif( i % 10 == 0 )\n-\t\t{\n-\t\t\tfprintf( stderr, "formatting .. " );\n-\t\t\that2p = fopen( datafile, "w" );\n-\t\t\tif( !hat2p ) ErrorExit( "Cannot open datafile." );\n-\t\t\tWriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i );\n-\t\t\tfclose( hat2p );\n-\t\t\n-\t\t\tif( scoremtx == -1 )\n-\t\t\t\tsprintf'..b'\tif( i < njob-1 ) for( jj=i; jj<i+5; jj++ ) \n-\t\t\t\tfprintf( stdout, "mtx[%d][%d] = %f\\n", i+1, jj+1, mtx[i][jj] );\n-\t\t}\n-#endif\n-\t\tfprintf( stderr, "query : %4d / %d\\n", i+1, njob );\n-\t}\n-\n-#if 1\n-\tfprintf( stderr, "##### writing hat3\\n" );\n-\that3p = fopen( "hat3", "w" );\n-\tif( !hat3p ) ErrorExit( "Cannot open hat3." );\n-\tfor( i=0; i<njob; i++ ) for( j=0; j<njob; j++ )\n-\t{\n-//\t\tfprintf( stderr, "mtx[%d][%d] = %f, mtx[%d][%d] = %f\\n", i, j, mtx[i][j], j, i, mtx[j][i] );\n-\t\tif( i == j ) continue;\n-\t\tif( mtx[i][j] == mtx[j][i] && i > j ) continue;\n-\t\tif( mtx[j][i] > mtx[i][j] ) continue;\n-\t\tfor( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )\n-\t\t{\n-\t\t\tif( tmpptr->opt == -1.0 ) continue;\n-\t\t\tfprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next );\n-\t\t}\n-\t}\n-\tfclose( hat3p );\n-#endif\n-\n-\tfor( i=0; i<njob; i++ ) \n-\t{\n-//\t\tfprintf( stderr, "### i = %d\\n", i );\n-\t\that2p = fopen( datafile, "w" );\n-\t\tif( !hat2p ) ErrorExit( "Cannot open datafile." );\n-\t\tWriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i );\n-\t\tfclose( hat2p );\n-\n-//\t\tseq1[0] = seq[i];\n-//\t\tnlen1[0] = nlen[i];\n-\n-\t\that2p = fopen( queryfile, "w" );\n-\t\tif( !hat2p ) ErrorExit( "Cannot open queryfile." );\n-\t\tWriteForFasta( hat2p, 1, name1+i, nlen+i, seq+i ); \n-\t\tfclose( hat2p );\n-\n-\n-\t\tif( scoremtx == -1 )\n-\t\t\tsprintf( com, "fasta34 -z3 -m10 -n -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile );\n-\t\telse\n-\t\t\tsprintf( com, "fasta34 -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", M, M, 0, queryfile, datafile, ktuple, fastafile );\n-\t\tres = system( com );\n-\t\tif( res ) ErrorExit( "error in fasta" );\n-\n-\n-\t\that2p = fopen( fastafile, "r" );\n-\t\tif( hat2p == NULL ) \n-\t\t\tErrorExit( "file \'fasta.$$\' does not exist\\n" );\n-\t\tres = ReadFasta34noalign( hat2p, mtx[i], i, name1, localhomtable[i] );\n-\t\tfclose( hat2p );\n-\t\tif( res < njob - i )\n-\t\t{\n-\t\t\tfprintf( stderr, "count (fasta34 -z 3) = %d\\n", res );\n-\t\t\texit( 1 );\n-\t\t}\n-\n-\n-\t\tif( i == 0 )\n-\t\t\tfor( j=0; j<njob; j++ ) opt[j] = (int)mtx[0][j];\n-\n-\n-#if 0\n-\t\t{\n-\t\t\tint ii, jj;\n-\t\t\tif( i < njob-1 ) for( jj=i; jj<i+5; jj++ ) \n-\t\t\t\tfprintf( stdout, "mtx[%d][%d] = %f\\n", i+1, jj+1, mtx[i][jj] );\n-\t\t}\n-#endif\n-\t\tfprintf( stderr, "query : %4d\\r", i+1 );\n-\t}\n-\n-\n-\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tmax = mtx[i][i];\n-\t\tif( max == 0.0 )\n-\t\t{\n-\t\t\tfor( j=0; j<njob; j++ )\n-\t\t\t\tmtx2[i][j] = 2.0;\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tfor( j=0; j<njob; j++ )\n-\t\t\t{\n-\t\t\t\tmtx2[i][j] = ( max - mtx[MIN(i,j)][MAX(i,j)] ) / max * 2.0;\n-//\t\t\t\tfprintf( stdout, "max = %f, mtx[%d][%d] = %f -> %f\\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] );\n-\t\t\t}\n-\t\t}\n-\t}\n-\tfor( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )\n-\t{\n-//\t\tfprintf( stdout, "mtx2[%d][%d] = %f, %f\\n", i+1, j+1, mtx2[i][j], mtx2[j][i] );\n-\t\tmtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] );\n-\t}\n-\n-#if 0\n-\t{\n-\t\tint ii, jj;\n-\t\tif( i < njob-1 ) for( jj=i+1; jj<njob; jj++ ) \n-\t\t\tfprintf( stderr, "mtx2[][] = %f\\n", mtx2[i][jj] );\n-\t}\n-#endif\n-\n-\tfor( i=0; i<njob; i++ ) name[i][0] = \'=\';\n-\n-\tif( disopt )\n-\t{\n-\t\tstrcpy( b, name[0] );\n-\t\tsprintf( name[0], "=query====lgth=%04d-%04d %.*s", nlen[0], howmanyx( seq[0] ), B-30, b );\n-#if 0\n-\t\tstrins( b, name[0] );\n-#endif\n-\t\tfor( i=1; i<njob; i++ ) \n-\t\t{\t\n-\t\t\tstrcpy( b, name[i] );\n-\t\t\tsprintf( name[i], "=opt=%04d=lgth=%04d-%04d %.*s", opt[i], nlen[i], howmanyx( seq[i] ), B-30, b );\n-#if 0\n-\t\t\tstrins( b, name[i] );\n-#endif\n-\t\t}\n-\t}\n-\n-\that2p = fopen( hat2file, "w" );\n-\tif( !hat2p ) ErrorExit( "Cannot open hat2." );\n-\tWriteHat2_pointer( hat2p, njob, name, mtx2 );\n-\tfclose( hat2p );\n-\n-\n-\tsprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile );\n-\tsystem( com );\n-\n-#if 0\n-\tsprintf( com, ALNDIR "/supgsdl < %s", hat2file );\n-\tres = system( com );\n-\tif( res ) ErrorExit( "error in spgsdl" );\n-#endif\n-\n-\tsprintf( com, "mv %s hat2", hat2file );\n-\tres = system( com );\n-\tif( res ) ErrorExit( "error in mv" );\n-\n-\tSHOWVERSION;\n-\texit( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/dndfast4.c --- a/mafft/core/dndfast4.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,241 +0,0 @@ -#include "mltaln.h" -#include <sys/types.h> -#include <unistd.h> -#define DEBUG 0 -#define TEST 0 - - -int howmanyx( char *s ) -{ - int val = 0; - if( scoremtx == -1 ) - { - do - { - if( !strchr( "atgcuATGCU", *s ) ) val++; - } while( *++s ); - } - else - { - do - { - if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; - } while( *++s ); - } - return( val ); -} - -void arguments( int argc, char *argv[] ) -{ - int c; - - disopt = 0; - - while( --argc > 0 && (*++argv)[0] == '-' ) - while ( c = *++argv[0] ) - switch( c ) - { - case 'i': - disopt = 1; - break; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - if( argc != 0 ) - { - fprintf( stderr, "options: -i\n" ); - exit( 1 ); - } -} - -int main( int argc, char *argv[] ) -{ - int ktuple; - int i, j; - FILE *hat2p; - char **seq; - char **seq1; - static char name[M][B]; - static char name1[M][B]; - static int nlen1[M]; - double **mtx; - double **mtx2; - static int nlen[M]; - char b[B]; - double max; - char com[B]; - int opt[M]; - int res; - char *home; - char queryfile[B]; - char datafile[B]; - char fastafile[B]; - char hat2file[B]; - int pid = (int)getpid(); -#if 0 - home = getenv( "HOME" ); -#else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ - home = NULL; -#endif - -#if DEBUG - if( home ) fprintf( stderr, "home = %s\n", home ); -#endif - if( !home ) home = ""; - sprintf( queryfile, "%s/tmp/query-%d\0", home, pid ); - sprintf( datafile, "%s/tmp/data-%d\0", home, pid ); - sprintf( fastafile, "%s/tmp/fasta-%d\0", home, pid ); - sprintf( hat2file, "hat2-%d\0", pid ); - - arguments( argc, argv ); -#if 0 - PreRead( stdin, &njob, &nlenmax ); -#else - getnumlen( stdin ); -#endif - rewind( stdin ); - - seq = AllocateCharMtx( njob, nlenmax+1 ); - seq1 = AllocateCharMtx( 2, nlenmax+1 ); - mtx = AllocateDoubleMtx( njob, njob ); - mtx2 = AllocateDoubleMtx( njob, njob ); - -#if 0 - FRead( stdin, name, nlen, seq ); -#else - readData( stdin, name, nlen, seq ); -#endif - if( scoremtx == -1 ) ktuple = 6; - else ktuple = 1; - - for( i=0; i<njob; i++ ) - { - gappick0( seq1[0], seq[i] ); - strcpy( seq[i], seq1[0] ); - } - for( j=0; j<njob; j++ ) - { - sprintf( name1[j], "+==========+%d \0", j ); - nlen1[j] = nlen[j]; - } - hat2p = fopen( datafile, "w" ); - if( !hat2p ) ErrorExit( "Cannot open datafile." ); - WriteForFasta( hat2p, njob, name1, nlen1, seq ); - fclose( hat2p ); - - for( i=0; i<njob; i++ ) - { - - hat2p = fopen( datafile, "w" ); - if( !hat2p ) ErrorExit( "Cannot open datafile." ); - WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i ); - fclose( hat2p ); - - - seq1[0] = seq[i]; - nlen1[0] = nlen[i]; - - hat2p = fopen( queryfile, "w" ); - if( !hat2p ) ErrorExit( "Cannot open queryfile." ); - WriteForFasta( hat2p, 1, name1+i, nlen1, seq1 ); - fclose( hat2p ); - - if( scoremtx == -1 ) - sprintf( com, "fasta3 -n -Q -h -b%d -E%d -d%d %s %s %d > %s\0", M, M, 0, queryfile, datafile, ktuple, fastafile ); - else - sprintf( com, "fasta3 -Q -h -b%d -E%d -d%d %s %s %d > %s\0", M, M, 0, queryfile, datafile, ktuple, fastafile ); - res = system( com ); - if( res ) ErrorExit( "error in fasta" ); - - hat2p = fopen( fastafile, "r" ); - if( hat2p == NULL ) - ErrorExit( "file 'fasta.$$' does not exist\n" ); - ReadFasta3( hat2p, mtx[i], njob-i, name1 ); - - if( i == 0 ) - for( j=0; j<njob; j++ ) opt[j] = (int)mtx[0][j]; - - fclose( hat2p ); - -#if 1 - { - int ii, jj; - if( i < njob-1 ) for( jj=i; jj<i+5; jj++ ) - fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] ); - } -#endif - fprintf( stderr, "query : %#4d\n", i+1 ); - } - - for( i=0; i<njob; i++ ) - { - max = mtx[i][i]; - if( max == 0.0 ) - { - for( j=0; j<njob; j++ ) - mtx2[i][j] = 2.0; - } - else - { - for( j=0; j<njob; j++ ) - { - mtx2[i][j] = ( max - mtx[MIN(i,j)][MAX(i,j)] ) / max * 2.0; -// fprintf( stdout, "max = %f, mtx[%d][%d] = %f -> %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); - } - } - } - for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) - { -// fprintf( stdout, "mtx2[%d][%d] = %f, %f\n", i+1, j+1, mtx2[i][j], mtx2[j][i] ); - mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] ); - } -#if 0 - { - int ii, jj; - if( i < njob-1 ) for( jj=i+1; jj<njob; jj++ ) - fprintf( stderr, "mtx2[][] = %f\n", mtx2[i][jj] ); - } -#endif - - for( i=0; i<njob; i++ ) name[i][0] = '='; - - if( disopt ) - { - strcpy( b, name[0] ); - sprintf( name[0], "=query====lgth=%#04d-%04d %.*s\0", nlen[0], howmanyx( seq[0] ), B-30, b ); -#if 0 - strins( b, name[0] ); -#endif - for( i=1; i<njob; i++ ) - { - strcpy( b, name[i] ); - sprintf( name[i], "=opt=%#04d=lgth=%#04d-%04d %.*s\0", opt[i], nlen[i], howmanyx( seq[i] ), B-30, b ); -#if 0 - strins( b, name[i] ); -#endif - } - } - - hat2p = fopen( hat2file, "w" ); - if( !hat2p ) ErrorExit( "Cannot open hat2." ); - WriteHat2( hat2p, njob, name, mtx2 ); - fclose( hat2p ); - - sprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile ); - system( com ); - -#if 0 - sprintf( com, ALNDIR "/supgsdl < %s\0", hat2file ); - res = system( com ); - if( res ) ErrorExit( "error in spgsdl" ); -#endif - - sprintf( com, "mv %s hat2", hat2file ); - res = system( com ); - if( res ) ErrorExit( "error in mv" ); - - SHOWVERSION; - exit( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/dndfast7.c --- a/mafft/core/dndfast7.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,342 +0,0 @@ -#include "mltaln.h" -#include <sys/types.h> -#include <unistd.h> -#define DEBUG 0 -#define TEST 0 - - -int howmanyx( char *s ) -{ - int val = 0; - if( scoremtx == -1 ) - { - do - { - if( !strchr( "atgcuATGCU", *s ) ) val++; - } while( *++s ); - } - else - { - do - { - if( !strchr( "ARNDCQEGHILKMFPSTWYV", *s ) ) val++; - } while( *++s ); - } - return( val ); -} - -void arguments( int argc, char *argv[] ) -{ - int c; - - inputfile = NULL; - disopt = 0; - divpairscore = 0; - swopt = ""; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( (c = *++argv[0]) ) - { - switch( c ) - { - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - case 'I': - disopt = 1; - break; - case 'A': - swopt = "-A"; - break; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: -i\n" ); - exit( 1 ); - } -} - -int main( int argc, char *argv[] ) -{ - int ktuple; - int i, j; - FILE *hat2p; - FILE *hat3p; - FILE *infp; - char **seq = NULL; // by D.Mathog - char **seq1; - char **name; - char **name1; - static int nlen1[M]; - double **mtx; - double **mtx2; - static int nlen[M]; - static char b[B]; - double max; - char com[1000]; - int opt[M]; - int res; - char *home; - char *fastapath; - char queryfile[B]; - char datafile[B]; - char fastafile[B]; - char hat2file[B]; - int pid = (int)getpid(); - LocalHom **localhomtable, *tmpptr; -#if 0 - home = getenv( "HOME" ); -#else /* $HOME wo tsukau to fasta ni watasu hikisuu ga afureru */ - home = NULL; -#endif - fastapath = getenv( "FASTA_4_MAFFT" ); - if( !fastapath ) - fastapath = "fasta34"; - -#if DEBUG - if( home ) fprintf( stderr, "home = %s\n", home ); -#endif - if( !home ) home = ""; - sprintf( queryfile, "%s/tmp/query-%d", home, pid ); - sprintf( datafile, "%s/tmp/data-%d", home, pid ); - sprintf( fastafile, "%s/tmp/fasta-%d", home, pid ); - sprintf( hat2file, "hat2-%d", pid ); - - - arguments( argc, argv ); - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - - - -#if 0 - PreRead( stdin, &njob, &nlenmax ); -#else - dorp = NOTSPECIFIED; - getnumlen( infp ); -#endif - - if( dorp == 'd' ) - { - scoremtx = -1; - pamN = NOTSPECIFIED; - } - else - { - nblosum = 62; - scoremtx = 1; - } - constants( njob, seq ); - - rewind( infp ); - - name = AllocateCharMtx( njob, B+1 ); - name1 = AllocateCharMtx( njob, B+1 ); - seq = AllocateCharMtx( njob, nlenmax+1 ); - seq1 = AllocateCharMtx( 2, nlenmax+1 ); - mtx = AllocateDoubleMtx( njob, njob ); - mtx2 = AllocateDoubleMtx( njob, njob ); - localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) ); - for( i=0; i<njob; i++) - { - localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) ); - for( j=0; j<njob; j++) - { - localhomtable[i][j].start1 = -1; - localhomtable[i][j].end1 = -1; - localhomtable[i][j].start2 = -1; - localhomtable[i][j].end2 = -1; - localhomtable[i][j].opt = -1.0; - localhomtable[i][j].next = NULL; - } - } - -#if 0 - FRead( infp, name, nlen, seq ); -#else - readData_pointer( infp, name, nlen, seq ); -#endif - fclose( infp ); - - if( scoremtx == -1 ) ktuple = 6; - else ktuple = 1; - - for( i=0; i<njob; i++ ) - { - gappick0( seq1[0], seq[i] ); - strcpy( seq[i], seq1[0] ); - } - for( j=0; j<njob; j++ ) - { - sprintf( name1[j], "+==========+%d ", j ); - nlen1[j] = nlen[j]; - } - hat2p = fopen( datafile, "w" ); - if( !hat2p ) ErrorExit( "Cannot open datafile." ); - WriteForFasta( hat2p, njob, name1, nlen1, seq ); - fclose( hat2p ); - - for( i=0; i<njob; i++ ) - { -// fprintf( stderr, "### i = %d\n", i ); - hat2p = fopen( datafile, "w" ); - if( !hat2p ) ErrorExit( "Cannot open datafile." ); - WriteForFasta( hat2p, njob-i, name1+i, nlen1+i, seq+i ); - fclose( hat2p ); - - seq1[0] = seq[i]; - nlen1[0] = nlen[i]; - - hat2p = fopen( queryfile, "w" ); - if( !hat2p ) ErrorExit( "Cannot open queryfile." ); - WriteForFasta( hat2p, 1, name1+i, nlen1, seq1 ); - fclose( hat2p ); - - - if( scoremtx == -1 ) - sprintf( com, "%s %s -z3 -m10 -n -Q -b%d -E%d -d%d %s %s %d > %s", fastapath, swopt, M, M, M, queryfile, datafile, ktuple, fastafile ); - else - sprintf( com, "%s %s -z3 -m10 -Q -b%d -E%d -d%d %s %s %d > %s", fastapath, swopt, M, M, M, queryfile, datafile, ktuple, fastafile ); - res = system( com ); - if( res ) ErrorExit( "error in fasta" ); - - - - hat2p = fopen( fastafile, "r" ); - if( hat2p == NULL ) - ErrorExit( "file 'fasta.$$' does not exist\n" ); - if( scoremtx == -1 ) - res = ReadFasta34m10_nuc( hat2p, mtx[i], i, name1, localhomtable[i] ); - else - res = ReadFasta34m10( hat2p, mtx[i], i, name1, localhomtable[i] ); - fclose( hat2p ); - - if( res < njob - i ) - { - fprintf( stderr, "count (fasta34 -z 3) = %d\n", res ); - exit( 1 ); - } - - - if( i == 0 ) - for( j=0; j<njob; j++ ) opt[j] = (int)mtx[0][j]; - - -#if 0 - { - int ii, jj; - if( i < njob-1 ) for( jj=i; jj<i+5; jj++ ) - fprintf( stdout, "mtx[%d][%d] = %f\n", i+1, jj+1, mtx[i][jj] ); - } -#endif - fprintf( stderr, "query : %4d / %5d\r", i+1, njob ); - } - - for( i=0; i<njob; i++ ) - { - max = mtx[i][i]; - if( max == 0.0 ) - { - for( j=0; j<njob; j++ ) - mtx2[i][j] = 2.0; - } - else - { - for( j=0; j<njob; j++ ) - { -// fprintf( stderr, "##### mtx[%d][%d] = %f\n", i, j, mtx[i][j] ); - mtx2[i][j] = ( max - mtx[MIN(i,j)][MAX(i,j)] ) / max * 2.0; -// fprintf( stdout, "max = %f, mtx[%d][%d] = %f -> %f\n", max, i+1, j+1, mtx[i][j], mtx2[i][j] ); - } - } - } - for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) - { -// fprintf( stdout, "mtx2[%d][%d] = %f, %f\n", i+1, j+1, mtx2[i][j], mtx2[j][i] ); - mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] ); - } - -#if 0 - { - int ii, jj; - if( i < njob-1 ) for( jj=i+1; jj<njob; jj++ ) - fprintf( stderr, "mtx2[][] = %f\n", mtx2[i][jj] ); - } -#endif - - for( i=0; i<njob; i++ ) name[i][0] = '='; - - if( disopt ) - { - strcpy( b, name[0] ); - sprintf( name[0], "=query====lgth=%04d-%04d %.*s", nlen[0], howmanyx( seq[0] ), B-30, b ); -#if 0 - strins( b, name[0] ); -#endif - for( i=1; i<njob; i++ ) - { - strcpy( b, name[i] ); - sprintf( name[i], "=opt=%04d=lgth=%04d-%04d %.*s", opt[i], nlen[i], howmanyx( seq[i] ), B-30, b ); -#if 0 - strins( b, name[i] ); -#endif - } - } - - hat2p = fopen( hat2file, "w" ); - if( !hat2p ) ErrorExit( "Cannot open hat2." ); - WriteHat2_pointer( hat2p, njob, name, mtx2 ); - fclose( hat2p ); - -#if 1 - fprintf( stderr, "##### writing hat3\n" ); - hat3p = fopen( "hat3", "w" ); - if( !hat3p ) ErrorExit( "Cannot open hat3." ); - for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) - { - for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next ) - { - if( tmpptr->opt == -1.0 ) continue; - fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next ); - } - } - fclose( hat3p ); -#endif - - sprintf( com, "/bin/rm %s %s %s", queryfile, datafile, fastafile ); - system( com ); - -#if 0 - sprintf( com, ALNDIR "/supgsdl < %s", hat2file ); - res = system( com ); - if( res ) ErrorExit( "error in spgsdl" ); -#endif - - sprintf( com, "mv %s hat2", hat2file ); - res = system( com ); - if( res ) ErrorExit( "error in mv" ); - - SHOWVERSION; - exit( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/dndpre.c --- a/mafft/core/dndpre.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,399 +0,0 @@\n-#include "mltaln.h"\n-\n-#define TEST 0\n-\n-static int treeout = 0;\n-static int maxdist = 1;\n-static int nadd = 0;\n-\n-#ifdef enablemultithread\n-typedef struct _jobtable\n-{\n- int i; \n- int j; \n-} Jobtable;\n-\n-typedef struct _thread_arg\n-{\n-\tint njob;\n-\tint thread_no;\n-\tfloat *selfscore;\n-\tdouble **mtx;\n-\tchar **seq;\n-\tint **skiptable;\n-\tJobtable *jobpospt;\n-\tpthread_mutex_t *mutex;\n-} thread_arg_t;\n-\n-void *athread( void *arg )\n-{\n-\tthread_arg_t *targ = (thread_arg_t *)arg;\n-\tint njob = targ->njob;\n-\tint thread_no = targ->thread_no;\n-\tfloat *selfscore = targ->selfscore;\n-\tdouble **mtx = targ->mtx;\n-\tchar **seq = targ->seq;\n-\tint **skiptable = targ->skiptable;\n-\tJobtable *jobpospt = targ->jobpospt;\n-\n-\tint i, j;\n-\tfloat ssi, ssj, bunbo;\n-\tdouble mtxv;\n-\n-\tif( njob == 1 ) return( NULL );\n-\t\n-\twhile( 1 )\n-\t{\n-\t\tpthread_mutex_lock( targ->mutex );\n-\t\tj = jobpospt->j;\n-\t\ti = jobpospt->i;\n-\t\tj++;\n-//\t\tfprintf( stderr, "\\n i=%d, j=%d before check\\n", i, j );\n-\t\tif( j == njob )\n-\t\t{\n-//\t\t\tfprintf( stderr, "\\n j = %d, i = %d, njob = %d\\n", j, i, njob );\n-\t\t\tfprintf( stderr, "%4d/%4d (thread %4d), dndpre\\r", i+1, njob, thread_no );\n-\t\t\ti++;\n-\t\t\tj = i + 1;\n-\t\t\tif( i == njob-1 )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "\\n i=%d, njob-1=%d\\n", i, njob-1 );\n-\t\t\t\tpthread_mutex_unlock( targ->mutex );\n-\t\t\t\treturn( NULL );\n-\t\t\t}\n-\t\t}\n-//\t\tfprintf( stderr, "\\n i=%d, j=%d after check\\n", i, j );\n-\t\tjobpospt->j = j;\n-\t\tjobpospt->i = i;\n-\t\tpthread_mutex_unlock( targ->mutex );\n-\n-\t\tssi = selfscore[i];\n-\t\tssj = selfscore[j];\n-\n-\t\tbunbo = MIN( ssi, ssj );\n-\t\tif( bunbo == 0.0 )\n-\t\t\tmtxv = maxdist;\n-\t\telse\n-//\t\t\tmtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo );\n-\t\t\tmtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty ) / bunbo );\n-#if 1\n-\t\tif( mtxv > 9.0 || mtxv < 0.0 )\n-\t\t{\n-\t\t\tfprintf( stderr, "Distance %d-%d is strange, %f.\\n", i, j, mtxv );\n-\t\t\texit( 1 );\n-\t\t}\n-#else // CHUUI!!! 2012/05/16\n-\t\tif( mtxv > 2.0 )\n-\t\t{\n-\t\t\tmtxv = 2.0;\n-\t\t}\n-\t\tif( mtxv < 0.0 )\n-\t\t{\n-\t\t\tfprintf( stderr, "Distance %d-%d is strange, %f.\\n", i, j, mtxv );\n-\t\t\texit( 1 );\n-\t\t}\n-#endif\n-\t\tmtx[i][j] = mtxv;\n-\t}\n-}\n-\n-#endif\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tnadd = 0;\n-\tnthread = 1;\n-\talg = \'X\';\n-\tfmodel = 0;\n-\ttreeout = 0;\n-\tscoremtx = 1;\n-\tnblosum = 62;\n-\tdorp = NOTSPECIFIED;\n-\tinputfile = NULL;\n-\tppenalty = NOTSPECIFIED; //?\n-\tppenalty_ex = NOTSPECIFIED; //?\n-\tpoffset = NOTSPECIFIED; //?\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( (c = *++argv[0]) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'t\':\n-\t\t\t\t\ttreeout = \'1\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tdorp = \'d\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'a\':\n-\t\t\t\t\tfmodel = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tdorp = \'p\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'K\': // Hontou ha iranai. disttbfast.c, tbfast.c to awaserutame.\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'I\':\n-\t\t\t\t\tnadd = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "nadd = %d\\n", nadd );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'f\':\n-\t\t\t\t\tppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'g\':\n-\t\t\t\t\tppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'h\':\n-\t\t\t\t\tpoffset = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'k\':\n-\t\t\t\t\tkimuraR = myatoi( *++argv );\n-//\t\t\t\t\tfprintf( stderr, "kimuraR = %d\\n", kimuraR );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'b\':\n-\t\t\t\t\tnblosum = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 1;\n-//\t\t\t\t\tfprintf( stderr, "blosum %d\\n", nblosum );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'j\':\n-\t\t\t\t\tpamN = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tTMorJTT = JTT;\n-\t\t\t\t\tfprintf( stderr, "jtt %d\\n", pamN );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'m\':\n-\t\t\t\t\tpamN = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tTMorJTT = TM;\n-\t\t\t\t\tfprintf( stder'..b'nt **skiptable = NULL;\n-\n-\n-\targuments( argc, argv );\n-#ifndef enablemultithread\n-\tnthread = 0;\n-#endif\n-\n-\tif( inputfile )\n-\t{\n-\t\tinfp = fopen( inputfile, "r" );\n-\t\tif( !infp )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot open %s\\n", inputfile );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\telse\n-\t\tinfp = stdin;\n-\n-#if 0\n-\tPreRead( stdin, &njob, &nlenmax );\n-#else\n-\tgetnumlen( infp );\n-#endif\n-\trewind( infp );\n-\n-\tnjob -= nadd; // atarashii hairetsu ha mushi\n-\n-\tseq = AllocateCharMtx( njob, nlenmax+1 );\n-\tname = AllocateCharMtx( njob, B+1 );\n-\tmtx = AllocateDoubleMtx( njob, njob );\n-\tselfscore = AllocateFloatVec( njob );\n-\tnlen = AllocateIntVec( njob );\n-\n-\n-#if 0\n-\tFRead( stdin, name, nlen, seq );\n-#else\n-\treadData_pointer( infp, name, nlen, seq );\n-#endif\n-\tfclose( infp );\n-\n-\n-\tfor( i=1; i<njob; i++ )\n-\t{\n-\t\tif( nlen[i] != nlen[0] )\n-\t\t{\n-\t\t\treporterr( "Not aligned!\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\n-\tconstants( njob, seq );\n-\n-#if 0\n-\tfor( i=0; i<njob-1; i++ ) \n-\t{\n-\t\tfprintf( stderr, "%4d/%4d\\r", i+1, njob );\n-\t\tfor( j=i+1; j<njob; j++ ) \n-\t\t\tmtx[i][j] = (double)substitution_hosei( seq[i], seq[j] );\n-//\t\t\tfprintf( stderr, "i=%d,j=%d, l=%d &&& %f\\n", i, j, nlen[0], mtx[i][j] );\n-\t}\n-#else // 061003\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tselfscore[i] = (float)naivepairscore11( seq[i], seq[i], penalty );\n-\t}\n-\n-\tskiptable = AllocateIntMtx( njob, 0 );\n-\tmakeskiptable( njob, skiptable, seq ); // allocate suru.\n-\n-#ifdef enablemultithread\n-\tif( nthread > 0 )\n-\t{\n-\t\tthread_arg_t *targ;\n-\t\tJobtable jobpos;\n-\t\tpthread_t *handle;\n-\t\tpthread_mutex_t mutex;\n-\n-\t\tjobpos.i = 0;\n-\t\tjobpos.j = 0;\n-\n-\t\ttarg = calloc( nthread, sizeof( thread_arg_t ) );\n-\t\thandle = calloc( nthread, sizeof( pthread_t ) );\n-\t\tpthread_mutex_init( &mutex, NULL );\n-\n-\t\tfor( i=0; i<nthread; i++ )\n-\t\t{\n-\t\t\ttarg[i].thread_no = i;\n-\t\t\ttarg[i].njob = njob;\n-\t\t\ttarg[i].selfscore = selfscore;\n-\t\t\ttarg[i].mtx = mtx;\n-\t\t\ttarg[i].seq = seq;\n-\t\t\ttarg[i].skiptable = skiptable;\n-\t\t\ttarg[i].jobpospt = &jobpos;\n-\t\t\ttarg[i].mutex = &mutex;\n-\n-\t\t\tpthread_create( handle+i, NULL, athread, (void *)(targ+i) );\n-\t\t}\n-\n-\t\tfor( i=0; i<nthread; i++ )\n-\t\t{\n-\t\t\tpthread_join( handle[i], NULL );\n-\t\t}\n-\t\tpthread_mutex_destroy( &mutex );\n-\t}\n-\telse\n-#endif\n-\t{\n-\t\tilim = njob-1;\n-\t\tfor( i=0; i<ilim; i++ )\n-\t\t{\n-\t\t\tssi = selfscore[i];\n-\t\t\tfprintf( stderr, "%4d/%4d\\r", i+1, njob );\n-\t\t\tfor( j=i+1; j<njob; j++ )\n-\t\t\t{\n-\t\t\t\tssj = selfscore[j];\n-\t\t\t\tbunbo = MIN( ssi, ssj );\n-\t\t\t\tif( bunbo == 0.0 )\n-\t\t\t\t\tmtxv = maxdist;\n-\t\t\t\telse\n-\t\t\t\t\tmtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty ) / bunbo );\n-//\t\t\t\t\tmtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo );\n-//\t\t\t\t\tmtxv = 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / MIN( selfscore[i], selfscore[j] );\n-//\t\t\t\tfprintf( stderr, "i=%d,j=%d, l=%d### %f, score = %f, %f, %f\\n", i, j, nlen[0], mtxv, naivepairscore11( seq[i], seq[j], penalty ), ssi, ssj );\n-\n-#if 1\n-\t\t\t\tif( mtxv > 9.0 || mtxv < 0.0 )\n-\t\t\t\t{\n-\t\t\t\t\tfprintf( stderr, "Distance %d-%d is strange, %f.\\n", i, j, mtxv );\n-\t\t\t\t\texit( 1 );\n-\t\t\t\t}\n-#else // CHUUI!!! 2012/05/16\n-\t\t\t\tif( mtxv > 2.0 )\n-\t\t\t\t{\n-\t\t\t\t\tmtxv = 2.0;\n-\t\t\t\t}\n-\t\t\t\tif( mtxv < 0.0 )\n-\t\t\t\t{\n-\t\t\t\t\tfprintf( stderr, "Distance %d-%d is strange, %f.\\n", i, j, mtxv );\n-\t\t\t\t\texit( 1 );\n-\t\t\t\t}\n-#endif\n-\t\t\t\tmtx[i][j] = mtxv;\n-\t\t\t}\n-\t\t}\n-\t}\n-#endif\n-\t\n-#if TEST\n-\tfor( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) \n-\t\tfprintf( stdout, "i=%d, j=%d, mtx[][] = %f\\n", i, j, mtx[i][j] );\n-#endif\n-\n-\tfp = fopen( "hat2", "w" );\n-\tWriteHat2_pointer( fp, njob, name, mtx );\n-\tfclose( fp );\n-#if 0\n-\tif( treeout )\n-\t{\n-\t\tint ***topol;\n-\t\tdouble **len;\n-\n-\t\ttopol = AllocateIntCub( njob, 2, njob );\n-\t\tlen = AllocateDoubleMtx( njob, njob );\n-\t\tveryfastsupg_double_outtree( njob, mtx, topol, len );\n-\t}\n-#endif\n-\tif( skiptable ) FreeIntMtx( skiptable ); skiptable = NULL;\n-\tSHOWVERSION;\n-\texit( 0 );\n-/*\n-\tres = system( ALNDIR "/spgsdl < hat2" );\n-\tif( res ) exit( 1 );\n-\telse exit( 0 );\n-*/\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/dndpre_score.c --- a/mafft/core/dndpre_score.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,54 +0,0 @@ -#include "mltaln.h" - -#define TEST 0 - -int main() -{ - int i, j; - char **seq; - static char name[M][B]; - static int nlen[M]; - double **mtx; - FILE *fp; - int res; - - scoremtx = NOTSPECIFIED; - -#if 0 - PreRead( stdin, &njob, &nlenmax ); -#else - getnumlen( stdin ); -#endif - rewind( stdin ); - - seq = AllocateCharMtx( njob, nlenmax+1 ); - mtx = AllocateDoubleMtx( njob, njob ); - -#if 0 - FRead( stdin, name, nlen, seq ); -#else - readData( stdin, name, nlen, seq ); -#endif - - for( i=0; i<njob-1; i++ ) - { - fprintf( stderr, "%4d/%4d\r", i+1, njob ); - for( j=i+1; j<njob; j++ ) - mtx[i][j] = (double)substitution_score( seq[i], seq[j] ); - } - -#if TEST - for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) - fprintf( stdout, "i=%d, j=%d, mtx[][] = %f\n", i, j, mtx[i][j] ); -#endif - - fp = fopen( "hat2", "w" ); - WriteHat2( fp, njob, name, mtx ); - fclose( fp ); - exit( 0 ); -/* - res = system( ALNDIR "/spgsdl < hat2" ); - if( res ) exit( 1 ); - else exit( 0 ); -*/ -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/dp.h --- a/mafft/core/dp.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,10 +0,0 @@ -#ifdef enablemultithread -#define TLS __thread -#else -#define TLS -#endif - -extern TLS int commonAlloc1; -extern TLS int commonAlloc2; -extern TLS int **commonIP; -extern TLS int **commonJP; |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/dvtditr.c --- a/mafft/core/dvtditr.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1076 +0,0 @@\n- /* Tree-dependent-iteration */\n- /* Devide to segments */ \n-\n-#include "mltaln.h"\n-\n-extern char **seq_g;\n-extern char **res_g;\n-static int subalignment;\n-static int subalignmentoffset;\n-\n-static int intop;\n-static int intree;\n-static double autosubalignment;\n-\n-\n-static void calcmaxdistclass( void )\n-{\n-\tint c;\n-\tfloat rep;\n-\tfor( c=0; c<ndistclass; c++ )\n-\t{\n-\t\trep = (double) 2 * c / ndistclass; // dist:0-2 for dist2offset \n-//\t\tfprintf( stderr, "c=%d, rep=%f, offset=%f\\n", c, rep, dist2offset( rep ) );\n-\t\tif( dist2offset( rep ) == 0.0 )\n-\t\t\tbreak;\n-\t}\n-\tfprintf( stderr, "ndistclass = %d, maxdistclass = %d\\n", ndistclass, c+1 );\n-\tmaxdistclass = c + 1;\n-//\tmaxdistclass = ndistclass; // CHUUI!!!!\n-\treturn;\n-}\n-\n-void arguments( int argc, char *argv[] )\n-{\n-\tint c;\n-\tchar *argkey;\n-\n-\toutnumber = 0;\n-\tnthread = 1;\n-\trandomseed = 0;\n-\tscoreout = 0;\n-\tspscoreout = 0;\n-\tparallelizationstrategy = BAATARI1;\n-\tintop = 0;\n-\tintree = 0;\n-\tinputfile = NULL;\n-\trnakozo = 0;\n-\trnaprediction = \'m\';\n-\tnevermemsave = 0;\n-\tscore_check = 1;\n-\tfftkeika = 1;\n-\tconstraint = 0;\n-\tfmodel = 0;\n-\tkobetsubunkatsu = 1;\n-\tbunkatsu = 1;\n-\tnblosum = 62;\n-\tniter = 100;\n-\tcalledByXced = 0;\n-\tdevide = 1;\n-\tdivWinSize = 20; /* 70 */\n-\tdivThreshold = 65;\n-\tfftscore = 1;\n-\tfftRepeatStop = 0;\n-\tfftNoAnchStop = 0;\n- scmtd = 5;\n-\tcooling = 1;\n- weight = 4;\n- utree = 1;\n- refine = 1;\n- check = 1;\n- cut = 0.0;\n-\tdisp = 0;\n-\toutgap = 1;\n-\tuse_fft = 0; // CHUUI dochira demo mafft.tmpl deha F\n-\tforce_fft = 0;\n-\talg = \'A\'; /* chuui */\n-\tmix = 0;\n-\tcheckC = 0;\n-\ttbitr = 0;\n-\ttreemethod = \'X\';\n-\tsueff_global = 0.1;\n-\tscoremtx = 1;\n-\tdorp = NOTSPECIFIED;\n-\tppenalty = NOTSPECIFIED;\n-\tpenalty_shift_factor = 1000.0;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tpoffset = NOTSPECIFIED;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tgeta2 = GETA2;\n-\tfftWinSize = NOTSPECIFIED;\n-\tfftThreshold = NOTSPECIFIED;\n-\tRNAppenalty = NOTSPECIFIED;\n-\tRNAppenalty_ex = NOTSPECIFIED;\n-\tRNApthr = NOTSPECIFIED;\n-\tTMorJTT = JTT;\n-\tconsweight_multi = 1.0;\n-\tconsweight_rna = 0.0;\n-\tsubalignment = 0;\n-\tsubalignmentoffset = 0;\n-\tlegacygapcost = 0;\n-\tspecificityconsideration = 0.0;\n-\tautosubalignment = 0.0;\n-\n-\twhile( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n-\t\twhile ( (c = *++argv[0]) )\n-\t\t{\n-\t\t\tswitch( c )\n-\t\t\t{\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'I\':\n-\t\t\t\t\tniter = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "niter = %d\\n", niter );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'e\':\n-\t\t\t\t\tRNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'o\':\n-\t\t\t\t\tRNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'f\':\n-\t\t\t\t\tppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\tfprintf( stderr, "ppenalty = %d\\n", ppenalty );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'Q\':\n-\t\t\t\t\tpenalty_shift_factor = atof( *++argv );\n-\t\t\t\t\tif( penalty_shift_factor < 100.0 && penalty_shift_factor != 2.0 )\n-\t\t\t\t\t{\n-\t\t\t\t\t\tfprintf( stderr, "%f, penalty_shift is fixed to penalty x 2 in the iterative refinement phase.\\n", penalty_shift_factor );\n-\t\t\t\t\t\tpenalty_shift_factor = 2.0;\n-\t\t\t\t\t}\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'g\':\n-\t\t\t\t\tppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\tfprintf( stderr, "ppenalty_ex = %d\\n", ppenalty_ex );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'h\':\n-\t\t\t\t\tpoffset = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "poffset = %d\\n", poffset );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'k\':\n-\t\t\t\t\tkimuraR = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "kappa = %d\\n", kimuraR );\n-\t\t\t\t\t--argc; \n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'b\':\n-\t\t\t\t\tnblosum = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 1;\n-\t\t\t\t\tfprintf( stderr, "blosum %d / kimura 200\\n", nblosum );\n-\t\t\t\t\t--argc; \n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'j\':\n-\t\t\t\t\tpamN = myatoi( *++arg'..b' sequences are numbered as 1 .. %d\\n", subalignmentoffset );\n-\t\t\t\t\tfprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\\n", subalignmentoffset+1, subalignmentoffset+njob );\n-\t\t\t\t}\n-\t\t\t\tfprintf( stderr, "############################################################################### \\n" );\n-\t\t\t\tfprintf( stderr, "\\n" );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-//\t\t\tcommongappick( seq[subtable[i]], subalignment[i] ); // irukamo\n-\t\t}\n-#if 0\n-\t\tfor( i=0; i<njob-1; i++ )\n-\t\t{\n-\t\t\tfprintf( stderr, "STEP %d\\n", i+1 );\n-\t\t\tfprintf( stderr, "group1 = " );\n-\t\t\tfor( j=0; topol[i][0][j] != -1; j++ )\n-\t\t\t\tfprintf( stderr, "%d ", topol[i][0][j]+1 );\n-\t\t\tfprintf( stderr, "\\n" );\n-\t\t\tfprintf( stderr, "SKIP -> %d\\n\\n", skipthisbranch[i][0] );\n-\t\t\tfprintf( stderr, "group2 = " );\n-\t\t\tfor( j=0; topol[i][1][j] != -1; j++ )\n-\t\t\t\tfprintf( stderr, "%d ", topol[i][1][j]+1 );\n-\t\t\tfprintf( stderr, "\\n" );\n-\t\t\tfprintf( stderr, "SKIP -> %d\\n\\n", skipthisbranch[i][1] );\n-\t\t}\n-#endif\n-\n-\t\tfor( i=0; i<njob; i++ ) \n-\t\t{\n-\t\t\tif( insubtable[i] ) strcpy( bseq[i], seq[i] );\n-\t\t\telse gappick0( bseq[i], seq[i] );\n-\t\t}\n-\n-\t\tfor( i=0; i<nsubalignments; i++ ) \n-\t\t{\n-\t\t\tfor( j=0; subtable[i][j]!=-1; j++ ) subalnpt[i][j] = bseq[subtable[i][j]];\n-\t\t\tcommongappick( j, subalnpt[i] );\n-\t\t}\n-\n-\t\tFreeIntMtx( subtable );\n-\t\tfree( insubtable );\n-\t\tfor( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );\n-\t\tfree( subalnpt );\n-\t\tfree( preservegaps );\n-\t}\n-//--------------- kokomade ----\n-\n-\n-\n-\n-\tfor( i=0; i<njob; i++ ) res_g[i][0] = 0;\n-\n-\tfor( iseg=0; iseg<nseg-1; iseg++ )\n-\t{\n-\t\tint tmplen = anchors[iseg+1]-anchors[iseg];\n-\t\tint pos = strlen( res_g[0] );\n-\t\tfor( j=0; j<njob; j++ )\n-\t\t{\n-\t\t\tstrncpy( seq[j], seq_g[j], tmplen );\n-\t\t\tseq[j][tmplen]= 0;\n-\t\t\tseq_g[j] += tmplen;\t\n-\n-\t\t}\n-\t\tfprintf( stderr, "Segment %3d/%3d %4d-%4d\\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );\n-\t\tfflush( stderr );\n-\t\tfprintf( trap_g, "Segment %3d/%3d %4d-%4d\\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );\n-\t\n-\t\tcut = ocut;\n-\t\treturnvalue = TreeDependentIteration( njob, name, nlen, seq, bseq, topol, len, eff, skipthisbranch, alloclen, localhomtable, singlerna, nkozo, kozoarivec );\n-\n-\t\tfor( i=0; i<njob; i++ )\n-\t\t\tstrcat( res_g[i], bseq[i] );\n-\t}\n-\tFreeCharMtx( seq_g_bk );\n-\tFreeIntCub( topol );\n-\tFreeDoubleMtx( len );\n-\tFreeDoubleMtx( eff );\n-\tFreeIntMtx( skipthisbranch );\n-\tfree( kozoarivec );\n-\tif( constraint ) FreeLocalHomTable( localhomtable, njob );\n-\tif( rnakozo && rnaprediction == \'m\' ) \n-\t{\n-\t\tif( singlerna ) // nen no tame\n-\t\t{\n-\t\t\tfor( i=0; i<njob; i++ ) \n-\t\t\t{\n-\t\t\t\tfor( j=0; singlerna[i][j]!=NULL; j++ )\n-\t\t\t\t{\n-\t\t\t\t\tif( singlerna[i][j] ) free( singlerna[i][j] );\n-\t\t\t\t}\n-\t\t\t\tif( singlerna[i] ) free( singlerna[i] );\n-\t\t\t}\n-\t\t\tfree( singlerna );\n-\t\t\tsinglerna = NULL;\n-\t\t}\n-\t}\n-\n-#if 0\n-\tWrite( stdout, njob, name, nlen, bseq );\n-#endif\n-\n-\tfprintf( stderr, "done\\n" );\n-\tfprintf( trap_g, "done\\n" );\n-\tfclose( trap_g );\n-\n-\n-\tdevide = 0; \n-\twritePre( njob, name, nlen, res_g, 1 );\n-#if 0\n-\twriteData( stdout, njob, name, nlen, res_g, 1 );\n-#endif\n-\n-\n-\tif( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\\n", sumofpairsscore( njob, res_g ) );\n-\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n-\n-#if 0\n-signed int main( int argc, char *argv[] )\n-{\n-\tint i, nlen[M];\n-\tchar b[B];\n-\tchar a[] = "=";\n-\tint value;\n-\n-\tgets( b ); njob = atoi( b );\n-\n-/*\n-\tscoremtx = 0;\n-\tif( strstr( b, "ayhoff" ) ) scoremtx = 1;\n-\telse if( strstr( b, "dna" ) || strstr( b, "DNA" ) ) scoremtx = -1;\n-\telse if( strstr( b, "M-Y" ) || strstr( b, "iyata" ) ) scoremtx = 2;\n-\telse scoremtx = 0;\n-*/\n-\tif( strstr( b, "constraint" ) ) cnst = 1;\n-\n-\tnlenmax = 0;\n-\ti = 0;\n-\twhile( i<njob )\n-\t{\n-\t\tgets( b );\n-\t\tif( !strncmp( b, a, 1 ) ) \n-\t\t{\n-\t\t\tgets( b ); nlen[i] = atoi( b );\n-\t\t\tif( nlen[i] > nlenmax ) nlenmax = nlen[i];\n-\t\t\ti++;\n-\t\t}\n-\t}\n-\tif( nlenmax > N || njob > M ) \n-\t{\n-\t\tfprintf( stderr, "ERROR in main\\n" );\n-\t\texit( 1 );\n-\t}\n-\t/*\n-\tnlenmax = Na;\n-\t*/\n-\trewind( stdin );\n-\tvalue = main1( nlen, argc, argv );\n-\texit( 0 );\n-}\n-#endif\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/f2cl.c --- a/mafft/core/f2cl.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,318 +0,0 @@ -#include "mltaln.h" - -#define DEBUG 0 - - -static char *comment; -static char *orderfile; -static int format; -static int namelen; -static int extendedalphabet; - -static void fillspace( char *seq, int lenmax ) -{ - int len = strlen( seq ); - seq += len; - lenmax -= len; - while( lenmax-- ) *seq++ = ' '; - *seq = 0; -} - -void setmark_clustal( int nlen, int nseq, char **seq, char *mark ) -{ - int i, j, k, nalpha; - char firstletter; - char *strong[9]; - char *weaker[11]; - int nstrong, nweaker; - char s; - - if( dorp == 'd' ) - { - strong[0] = "TU"; - nstrong = 1; - weaker[0] = "AG"; - weaker[1] = "CT"; - nweaker = 2; - nalpha = 10; - } - else - { - strong[0] = "STA"; - strong[1] = "NEQK"; - strong[2] = "NHQK"; - strong[3] = "NDEQ"; - strong[4] = "QHRK"; - strong[5] = "MILV"; - strong[6] = "MILF"; - strong[7] = "HY"; - strong[8] = "FYW"; - nstrong = 9; - weaker[0] = "CSA"; - weaker[1] = "ATV"; - weaker[2] = "SAG"; - weaker[3] = "STNK"; - weaker[4] = "STPA"; - weaker[5] = "SGND"; - weaker[6] = "SNDEQK"; - weaker[7] = "NDEQHK"; - weaker[8] = "NEQHRK"; - weaker[9] = "FVLIM"; - weaker[10] = "HFY"; - nweaker = 11; - nalpha = 20; - } - - for( i=0; i<nlen; i++ ) - { - mark[i] = ' '; - for( j=0; j<nseq; j++ ) - { - s = seq[j][i]; - if( '-' == s || ' ' == s ) break; - } - if( j != nseq ) - { - continue; - } - if( extendedalphabet ) - { - firstletter = seq[0][i]; - if( amino_n[(int)firstletter] < 0 ) continue; - - for( j=0; j<nseq; j++ ) - if( seq[j][i] != firstletter ) break; - if( j == nseq ) - { - mark[i] = '*'; - continue; - } - } - else - { - firstletter = toupper( seq[0][i] ); - if( amino_n[(int)firstletter] >= nalpha || amino_n[(int)firstletter] < 0 ) continue; - - for( j=0; j<nseq; j++ ) - if( toupper( seq[j][i] ) != firstletter ) break; - if( j == nseq ) - { - mark[i] = '*'; - continue; - } - for( k=0; k<nstrong; k++ ) - { - for( j=0; j<nseq; j++ ) - { - if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break; - } - if( j == nseq ) break; - } - if( k < nstrong ) - { - mark[i] = ':'; - continue; - } - for( k=0; k<nweaker; k++ ) - { - for( j=0; j<nseq; j++ ) - { - if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break; - } - if( j == nseq ) break; - } - if( k < nweaker ) - { - mark[i] = '.'; - continue; - } - } - } - mark[nlen] = 0; -} - -void setmark( int nlen, int nseq, char **seq, char *mark ) -{ - int i, j; - - for( i=0; i<nlen; i++ ) - { - mark[i] = ' '; - for( j=0; j<nseq; j++ ) - if( '-' == seq[j][i] ) break; - if( j != nseq ) - { - continue; - } - for( j=0; j<nseq; j++ ) - if( seq[0][i] != seq[j][i] ) break; - if( j == nseq ) - { - mark[i] = '*'; - continue; - } - for( j=0; j<nseq; j++ ) - if( amino_grp[(int)seq[0][i]] != amino_grp[(int)seq[j][i]] ) break; - if( j == nseq ) - { - mark[i] = '.'; - continue; - } - } - mark[nlen] = 0; -} - -void arguments( int argc, char *argv[] ) -{ - int c; - namelen = -1; - scoremtx = 1; - nblosum = 62; - dorp = NOTSPECIFIED; - kimuraR = NOTSPECIFIED; - pamN = NOTSPECIFIED; - inputfile = NULL; - comment = NULL; - orderfile = NULL; - format = 'c'; - extendedalphabet = 0; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( (c = *++argv[0]) ) - { - switch( c ) - { - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - case 'c': - comment = *++argv; - fprintf( stderr, "comment = %s\n", comment ); - --argc; - goto nextoption; - case 'r': - orderfile = *++argv; - fprintf( stderr, "orderfile = %s\n", orderfile ); - --argc; - goto nextoption; - case 'n': - namelen = myatoi( *++argv ); - fprintf( stderr, "namelen = %d\n", namelen ); - --argc; - goto nextoption; - case 'f': - format = 'f'; - break; - case 'y': - format = 'y'; - break; - case 'E': - extendedalphabet = 1; - nblosum = -2; - break; - case 'N': - extendedalphabet = 0; - break; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: Check source file !\n" ); - exit( 1 ); - } -} - - -int main( int argc, char *argv[] ) -{ - static int *nlen; - static char **name, **seq, *mark; - static int *order; - int i; - FILE *infp; - FILE *orderfp; - char gett[B]; - int nlenmin; - - arguments( argc, argv ); - - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - - getnumlen_casepreserve( infp, &nlenmin ); - rewind( infp ); - - seq = AllocateCharMtx( njob, nlenmax*2+1 ); - mark = AllocateCharVec( nlenmax*2+1 ); - order = AllocateIntVec( njob ); - name = AllocateCharMtx( njob, B+1 ); - nlen = AllocateIntVec( njob ); - - - if( orderfile ) - { - orderfp = fopen( orderfile, "r" ); - if( !orderfp ) - { - fprintf( stderr, "Cannot open %s\n", orderfile ); - exit( 1 ); - } - for( i=0; i<njob; i++ ) - { - fgets( gett, B-1, orderfp ); - order[i] = atoi( gett ); - } - fclose( orderfp ); - } - else - { - for( i=0; i<njob; i++ ) order[i] = i; - } - - readData_pointer_casepreserve( infp, name, nlen, seq ); - fclose( infp ); - - if( format == 'c' || format == 'y' ) for( i=0; i<njob; i++ ) fillspace( seq[i], nlenmax ); - constants( njob, seq ); - -// initSignalSM(); - -// initFiles(); - - - -// setmark( nlenmax, njob, seq, mark ); - setmark_clustal( nlenmax, njob, seq, mark ); - - if( format == 'f' ) - writeData_reorder_pointer( stdout, njob, name, nlen, seq, order ); - else if( format == 'c' ) - clustalout_pointer( stdout, njob, nlenmax, seq, name, mark, comment, order, namelen ); - else if( format == 'y' ) - phylipout_pointer( stdout, njob, nlenmax, seq, name, order, namelen ); - else - fprintf( stderr, "Unknown format\n" ); - -// SHOWVERSION; - return( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/fft.c --- a/mafft/core/fft.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,126 +0,0 @@ -#include "mltaln.h" -#include "mtxutl.h" - -/* - from "C gengo niyoru saishin algorithm jiten" ISBN4-87408-414-1 Haruhiko Okumura -*/ -static void make_sintbl(int n, float sintbl[]) -{ - int i, n2, n4, n8; - double c, s, dc, ds, t; - - n2 = n / 2; n4 = n / 4; n8 = n / 8; - t = sin(PI / n); - dc = 2 * t * t; ds = sqrt(dc * (2 - dc)); - t = 2 * dc; c = sintbl[n4] = 1; s = sintbl[0] = 0; - for (i = 1; i < n8; i++) { - c -= dc; dc += t * c; - s += ds; ds -= t * s; - sintbl[i] = s; sintbl[n4 - i] = c; - } - if (n8 != 0) sintbl[n8] = sqrt(0.5); - for (i = 0; i < n4; i++) - sintbl[n2 - i] = sintbl[i]; - for (i = 0; i < n2 + n4; i++) - sintbl[i + n2] = - sintbl[i]; -} -/* - {\tt fft()}. -*/ -static void make_bitrev(int n, int bitrev[]) -{ - int i, j, k, n2; - - n2 = n / 2; i = j = 0; - for ( ; ; ) { - bitrev[i] = j; - if (++i >= n) break; - k = n2; - while (k <= j) { j -= k; k /= 2; } - j += k; - } -} -/* -*/ -int fft(int n, Fukusosuu *x, int freeflag) -{ - static TLS int last_n = 0; /* {\tt n} */ - static TLS int *bitrev = NULL; /* */ - static TLS float *sintbl = NULL; /* */ - int i, j, k, ik, h, d, k2, n4, inverse; - float t, s, c, dR, dI; - - if (freeflag) - { - if (bitrev) free(bitrev); bitrev = NULL; - if (sintbl) free(sintbl); sintbl = NULL; - last_n = 0; - return( 0 ); - } - - /* */ - if (n < 0) { - n = -n; inverse = 1; /* */ - } else inverse = 0; - n4 = n / 4; - if (n != last_n || n == 0) { - last_n = n; -#if 0 - if (sintbl != NULL) { - free(sintbl); - sintbl = NULL; - } - if (bitrev != NULL) { - free(bitrev); - bitrev = NULL; - } - if (n == 0) return 0; /* */ - sintbl = (float *)malloc((n + n4) * sizeof(float)); - bitrev = (int *)malloc(n * sizeof(int)); -#else /* by T. Nishiyama */ - sintbl = realloc(sintbl, (n + n4) * sizeof(float)); - bitrev = realloc(bitrev, n * sizeof(int)); -#endif - if (sintbl == NULL || bitrev == NULL) { - fprintf(stderr, "\n"); return 1; - } - make_sintbl(n, sintbl); - make_bitrev(n, bitrev); - } - for (i = 0; i < n; i++) { /* */ - j = bitrev[i]; - if (i < j) { - t = x[i].R; x[i].R = x[j].R; x[j].R = t; - t = x[i].I; x[i].I = x[j].I; x[j].I = t; - } - } - for (k = 1; k < n; k = k2) { /* */ -#if 0 - fprintf( stderr, "%d / %d\n", k, n ); -#endif - h = 0; k2 = k + k; d = n / k2; - for (j = 0; j < k; j++) { -#if 0 - if( j % 1 == 0 ) - fprintf( stderr, "%d / %d\r", j, k ); -#endif - c = sintbl[h + n4]; - if (inverse) s = - sintbl[h]; - else s = sintbl[h]; - for (i = j; i < n; i += k2) { -#if 0 - if( k>=4194000 ) fprintf( stderr, "in loop %d - %d < %d, k2=%d\r", j, i, n, k2 ); -#endif - ik = i + k; - dR = s * x[ik].I + c * x[ik].R; - dI = c * x[ik].I - s * x[ik].R; - x[ik].R = x[i].R - dR; x[i].R += dR; - x[ik].I = x[i].I - dI; x[i].I += dI; - } - h += d; - } - } - if (! inverse) /* n */ - for (i = 0; i < n; i++) { x[i].R /= n; x[i].I /= n; } - return 0; /* */ -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/fft.h --- a/mafft/core/fft.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,15 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <math.h> -#include "mtxutl.h" - -#define PI 3.14159265358979323846 -#define END_OF_VEC -1 - -#define NKOUHO 20 -#define NKOUHO_LONG 500 - -#define MAX(X,Y) ( ((X)>(Y))?(X):(Y) ) -#define MIN(X,Y) ( ((X)<(Y))?(X):(Y) ) - |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/fftFunctions.c --- a/mafft/core/fftFunctions.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,762 +0,0 @@\n-#include "mltaln.h"\n-\n-#define SEGMENTSIZE 150\n-#define TMPTMPTMP 0\n-\n-#define DEBUG 0\n-\n-void keika( char *str, int current, int all )\n-{\n-\tif( current == 0 )\n-\t\tfprintf( stderr, "%s : ", str );\n-\n-\t\tfprintf( stderr, "\\b\\b\\b\\b\\b\\b\\b\\b" );\n-\t\tfprintf( stderr, "%3d /%3d", current+1, all+1 );\n-\n-\tif( current+1 == all )\n-\t\tfprintf( stderr, "\\b\\b\\b\\b\\b\\b\\b\\bdone. \\n" );\n-}\n-\n-double maxItch( double *soukan, int size )\n-{\n-\tint i;\n-\tdouble value = 0.0;\n-\tdouble cand;\n-\tfor( i=0; i<size; i++ ) \n-\t\tif( ( cand = soukan[i] ) > value ) value = cand;\n-\treturn( value );\n-}\n-\n-void calcNaiseki( Fukusosuu *value, Fukusosuu *x, Fukusosuu *y )\n-{\n-\tvalue->R = x->R * y->R + x->I * y->I;\n-\tvalue->I = -x->R * y->I + x->I * y->R;\n-}\n-\n-Fukusosuu *AllocateFukusosuuVec( int l1 )\n-{\n-\tFukusosuu *value;\n-\tvalue = (Fukusosuu *)calloc( l1, sizeof( Fukusosuu ) );\n-\tif( !value )\n-\t{\n-\t\tfprintf( stderr, "Cannot allocate %d FukusosuuVec\\n", l1 );\n-\t\treturn( NULL );\n-\t}\n-\treturn( value );\n-}\n-\t\n-Fukusosuu **AllocateFukusosuuMtx( int l1, int l2 )\n-{\n-\tFukusosuu **value;\n-\tint j;\n-//\tfprintf( stderr, "allocating %d x %d FukusosuuMtx\\n", l1, l2 );\n-\tvalue = (Fukusosuu **)calloc( l1+1, sizeof( Fukusosuu * ) );\n-\tif( !value ) \n-\t{\n-\t\tfprintf( stderr, "Cannot allocate %d x %d FukusosuuVecMtx\\n", l1, l2 );\n-\t\texit( 1 );\n-\t}\n-\tfor( j=0; j<l1; j++ ) \n-\t{\n-\t\tvalue[j] = AllocateFukusosuuVec( l2 );\n-\t\tif( !value[j] )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot allocate %d x %d FukusosuuVecMtx\\n", l1, l2 );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\tvalue[l1] = NULL;\n-\treturn( value );\n-}\n-\n-Fukusosuu ***AllocateFukusosuuCub( int l1, int l2, int l3 )\n-{\n-\tFukusosuu ***value;\n-\tint i;\n-\tvalue = calloc( l1+1, sizeof( Fukusosuu ** ) );\n-\tif( !value ) ErrorExit( "Cannot allocate Fukusosuu" );\n-\tfor( i=0; i<l1; i++ ) value[i] = AllocateFukusosuuMtx( l2, l3 );\n-\tvalue[l1] = NULL;\n-\treturn( value );\n-}\n-\n-void FreeFukusosuuVec( Fukusosuu *vec )\n-{\n-\tfree( (void *)vec );\n-}\n-\n-void FreeFukusosuuMtx( Fukusosuu **mtx )\n-{\n-\tint i;\n-\n-\tfor( i=0; mtx[i]; i++ ) \n-\t\tfree( (void *)mtx[i] );\n-\tfree( (void *)mtx );\n-}\n-\n-int getKouho( int *kouho, int nkouho, double *soukan, int nlen2 )\n-{\n-\tint i, j;\n-\tint nlen4 = nlen2 / 2;\n-\tdouble max;\n-\tdouble tmp;\n-\tint ikouho = 0; // by D.Mathog, iinoka?\n-\tfor( j=0; j<nkouho; j++ ) \n-\t{\n-\t\tmax = -9999.9;\n-\t\tfor( i=0; i<nlen2; i++ ) \n-\t\t{\n-\t\t\tif( ( tmp = soukan[i] ) > max )\n-\t\t\t{\n-\t\t\t\tikouho = i;\n-\t\t\t\tmax = tmp;\n-\t\t\t}\n-\t\t}\n-#if 0\n-\t\tif( max < 0.15 )\n-\t\t{\n-\t\t\tbreak;\n-\t\t}\n-#endif\n-#if 0\n-\t\tfprintf( stderr, "Kouho No.%d, pos=%d, score=%f, lag=%d\\n", j, ikouho, soukan[ikouho], ikouho-nlen4 );\n-#endif\n-\t\tsoukan[ikouho] = -9999.9;\n-\t\tkouho[j] = ( ikouho - nlen4 );\n-\t}\n-\treturn( j );\n-}\n-\n-void zurasu2( int lag, int clus1, int clus2, \n- char **seq1, char **seq2, \n-\t\t \t\t\t char **aseq1, char **aseq2 )\n-{\n-\tint i;\n-#if 0\n-\tfprintf( stderr, "### lag = %d\\n", lag );\n-#endif\n-\tif( lag > 0 )\n-\t{\n-\t\tfor( i=0; i<clus1; i++ ) aseq1[i] = seq1[i];\n-\t\tfor( i=0; i<clus2; i++ ) aseq2[i] = seq2[i]+lag;\n-\t}\n-\telse\n-\t{\n-\t\tfor( i=0; i<clus1; i++ ) aseq1[i] = seq1[i]-lag;\n-\t\tfor( i=0; i<clus2; i++ ) aseq2[i] = seq2[i];\n-\t}\n-}\n-\n-void zurasu( int lag, int clus1, int clus2, \n- char **seq1, char **seq2, \n-\t\t\t\t\t char **aseq1, char **aseq2 )\n-{\n-\tint i;\n-#if DEBUG\n-\tfprintf( stderr, "lag = %d\\n", lag );\n-#endif\n-\tif( lag > 0 )\n-\t{\n-\t\tfor( i=0; i<clus1; i++ ) strcpy( aseq1[i], seq1[i] );\n-\t\tfor( i=0; i<clus2; i++ ) strcpy( aseq2[i], seq2[i]+lag );\n-\t}\n-\telse\n-\t{\n-\t\tfor( i=0; i<clus1; i++ ) strcpy( aseq1[i], seq1[i]-lag );\n-\t\tfor( i=0; i<clus2; i++ ) strcpy( aseq2[i], seq2[i] );\n-\t}\n-}\n-\n-\n-int alignableReagion( int clus1, int clus2, \n-\t\t\t\t\t char **seq1, char **seq2,\n-\t\t\t\t\t double *eff1, double *eff2,\n-\t\t\t\t\t Segment *seg )\n-{\n-\tint i, j, k;\n-\tint status, starttmp = 0; // by D.Mathog, a gess\n-\tdouble score;\n-\tint value = 0;\n-\tint len, maxlen;\n-\tint length = 0; // by D.Mathog, a gess\n-\tstatic TLS double'..b'result2 = AllocateIntVec( MAXSEG );\n-\t\tocut1 = AllocateIntVec( MAXSEG );\n-\t\tocut2 = AllocateIntVec( MAXSEG );\n-\t}\n- if( crossscoresize < *ncut+2 )\n- {\n- crossscoresize = *ncut+2;\n-\t\tif( fftkeika ) fprintf( stderr, "allocating crossscore and track, size = %d\\n", crossscoresize );\n-\t\tif( track ) FreeIntMtx( track );\n- if( crossscore ) FreeDoubleMtx( crossscore );\n- if( jumppos ) FreeIntVec( jumppos );\n- if( jumpscore ) FreeDoubleVec( jumpscore );\n-\t\ttrack = AllocateIntMtx( crossscoresize, crossscoresize );\n- crossscore = AllocateDoubleMtx( crossscoresize, crossscoresize );\n- jumppos = AllocateIntVec( crossscoresize );\n- jumpscore = AllocateDoubleVec( crossscoresize );\n- }\n-\n-#if 0\n-\tfor( i=0; i<*ncut-2; i++ )\n-\t\tfprintf( stderr, "%d.start = %d, score = %f\\n", i, seg1[i]->start, seg1[i]->score );\n-\n-\tfor( i=0; i<*ncut; i++ )\n-\t\tfprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\\n", i, cut1[i], cut2[i] );\n-\tfor( i=0; i<*ncut; i++ ) \n-\t{\n-\t\tfor( j=0; j<*ncut; j++ )\n-\t\t\tfprintf( stderr, "%#4.0f ", ocrossscore[i][j] );\n-\t\tfprintf( stderr, "\\n" );\n-\t}\n-#endif\n-\n-\tfor( i=0; i<*ncut; i++ ) for( j=0; j<*ncut; j++ ) /* mudadanaa */\n-\t\tcrossscore[i][j] = ocrossscore[i][j];\n-\tfor( i=0; i<*ncut; i++ ) \n-\t{\n-\t\tocut1[i] = cut1[i];\n-\t\tocut2[i] = cut2[i];\n-\t}\n-\tfor( j=0; j<*ncut; j++ )\n-\t{\n-\t\tjumpscore[j] = -999.999;\n-\t\tjumppos[j] = -1;\n-\t}\n-\n-\tfor( i=1; i<*ncut; i++ )\n-\t{\n-\n-\t\tjumpscorei = -999.999;\n-\t\tjumpposi = -1;\n-\n-\t\tfor( j=1; j<*ncut; j++ )\n-\t\t{\n-#if 1\n-\t\t\tfprintf( stderr, "in blockalign3, ### i=%d, j=%d\\n", i, j );\n-#endif\n-\n-\n-#if 0\n-\t\t\tfor( k=0; k<j-2; k++ )\n-\t\t\t{\n-/*\n-\t\t\t\tfprintf( stderr, "k=%d, i=%d\\n", k, i );\n-*/\n-\t\t\t\tif( k && k<*ncut-1 && j<*ncut-1 && !permit( seg1[k-1], seg1[j-1] ) ) continue;\n-\t\t\t\tif( crossscore[i-1][k] > maxj )\n-\t\t\t\t{\n-\t\t\t\t\tpointi = k;\n-\t\t\t\t\tmaxi = crossscore[i-1][k];\n-\t\t\t\t}\n-\t\t\t}\n-\n-\t\t\tpointj = 0; maxj = 0.0;\n-\t\t\tfor( k=0; k<i-2; k++ )\n-\t\t\t{\n-\t\t\t\tif( k && k<*ncut-1 && i<*ncut-1 && !permit( seg2[k-1], seg2[i-1] ) ) continue;\n-\t\t\t\tif( crossscore[k][j-1] > maxj )\n-\t\t\t\t{\n-\t\t\t\t\tpointj = k;\n-\t\t\t\t\tmaxj = crossscore[k][j-1];\n-\t\t\t\t}\n-\t\t\t}\t\n-\n-\n-\t\t\tmaxi += penalty;\n-\t\t\tmaxj += penalty;\n-#endif\n-\t\t\tmaximum = crossscore[i-1][j-1];\n-\t\t\ttrack[i][j] = 0;\n-\n-\t\t\tif( maximum < jumpscorei && permit( seg1[jumpposi], seg1[i] ) )\n-\t\t\t{\n-\t\t\t\tmaximum = jumpscorei;\n-\t\t\t\ttrack[i][j] = j - jumpposi;\n-\t\t\t}\n-\n-\t\t\tif( maximum < jumpscore[j] && permit( seg2[jumppos[j]], seg2[j] ) )\n-\t\t\t{\n-\t\t\t\tmaximum = jumpscore[j];\n-\t\t\t\ttrack[i][j] = jumpscore[j] - i;\n-\t\t\t}\n-\n-\t\t\tcrossscore[i][j] += maximum;\n-\n-\t\t\tif( jumpscorei < crossscore[i-1][j] )\n-\t\t\t{\n-\t\t\t\tjumpscorei = crossscore[i-1][j];\n-\t\t\t\tjumpposi = j;\n-\t\t\t}\n-\n-\t\t\tif( jumpscore[j] < crossscore[i][j-1] )\n-\t\t\t{\n-\t\t\t\tjumpscore[j] = crossscore[i][j-1];\n-\t\t\t\tjumppos[j] = i;\n-\t\t\t}\n-\t\t}\n-\t}\n-#if 0\n-\tfor( i=0; i<*ncut; i++ ) \n-\t{\n-\t\tfor( j=0; j<*ncut; j++ )\n-\t\t\tfprintf( stderr, "%3d ", track[i][j] );\n-\t\tfprintf( stderr, "\\n" );\n-\t}\n-#endif\n-\n-\n-\tresult1[MAXSEG-1] = *ncut-1;\n-\tresult2[MAXSEG-1] = *ncut-1;\n-\n-\tfor( i=MAXSEG-1; i>=1; i-- )\n-\t{\n-\t\tcur1 = result1[i];\n-\t\tcur2 = result2[i];\n-\t\tif( cur1 == 0 || cur2 == 0 ) break;\n-\t\tshift = track[cur1][cur2];\n-\t\tif( shift == 0 )\n-\t\t{\n-\t\t\tresult1[i-1] = cur1 - 1;\n-\t\t\tresult2[i-1] = cur2 - 1;\n-\t\t\tcontinue;\n-\t\t}\n-\t\telse if( shift > 0 )\n-\t\t{\n-\t\t\tresult1[i-1] = cur1 - 1;\n-\t\t\tresult2[i-1] = cur2 - shift;\n-\t\t}\n-\t\telse if( shift < 0 )\n-\t\t{\n-\t\t\tresult1[i-1] = cur1 + shift;\n-\t\t\tresult2[i-1] = cur2 - 1;\n-\t\t}\n-\t}\n-\n-\tcount = 0;\n-\tfor( j=i; j<MAXSEG; j++ )\n-\t{\n-\t\tif( ocrossscore[result1[j]][result2[j]] == 0.0 ) continue;\n-\n-\t\tif( result1[j] == result1[j-1] || result2[j] == result2[j-1] )\n-\t\t\tif( ocrossscore[result1[j]][result2[j]] > ocrossscore[result1[j-1]][result2[j-1]] )\n-\t\t\t\tcount--;\n-\t\t\t\t\n-\t\tcut1[count] = ocut1[result1[j]];\n-\t\tcut2[count] = ocut2[result2[j]];\n-\n-\t\tcount++;\n-\t}\n-\n-\t*ncut = count;\n-#if 0\n-\tfor( i=0; i<*ncut; i++ )\n-\t\tfprintf( stderr, "i=%d, cut1 = %d, cut2 = %d\\n", i, cut1[i], cut2[i] );\n-#endif\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/functions.h --- a/mafft/core/functions.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,363 +0,0 @@\n-extern int intlen( int *num );\n-extern char seqcheck( char **seq );\n-extern void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx );\n-extern void exitall( char arr[] );\n-extern void display( char **seq, int nseq );\n-extern void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );\n-//extern void intergroup_score_dynmtx( double **pairoffset, int mtx[0x80][0x80], char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );\n-extern void intergroup_score_multimtx( int **whichmtx, double ***matrices, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );\n-extern void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );\n-extern void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );\n-extern double score_calc5( char **seq, int s, double **eff, int ex );\n-extern double score_calc4( char **seq, int s, double **eff, int ex );\n-extern void upg2( int nseq, double **eff, int ***topol, double **len );\n-//extern void veryfastsupg_float_realloc_nobk_halfmtx( int njob, float **mtx, int ***topol, float **len );\n-//extern void veryfastsupg_float_realloc_nobk( int njob, float **mtx, int ***topol, float **len );\n-extern void veryfastsupg_int_realloc_nobk( int njob, int **mtx, int ***topol, double **len );\n-extern void veryfastsupg( int nseq, double **oeff, int ***topol, double **len );\n-extern void veryfastsupg_double( int nseq, double **oeff, int ***topol, double **len );\n-extern void veryfastsupg_double_loadtree( int nseq, double **oeff, int ***topol, double **len, char **name );\n-//extern void veryfastsupg_double_loadtop( int nseq, double **oeff, int ***topol, double **len );\n-extern void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len );\n-extern void fastsupg( int nseq, double **oeff, int ***topol, double **len );\n-extern void supg( int nseq, double **oeff, int ***topol, double **len );\n-extern void spg( int nseq, double **oeff, int ***topol, double **len );\n-extern double ipower( double x, int n );\n-extern void countnode( int nseq, int ***topol, double **node );\n-extern void countnode_int( int nseq, int ***topol, int **node );\n-extern void counteff_simple( int nseq, int ***topol, double **len, double *node );\n-extern void counteff_simple_float( int nseq, int ***topol, float **len, double *node );\n-extern void counteff_simple_float_nostatic( int nseq, int ***topol, float **len, double *node );\n-extern void counteff( int nseq, int ***topol, double **len, double **node );\n-extern float score_calc1( char *seq1, char *seq2 );\n-extern float score_calcp( char *seq1, char *seq2, int len );\n-extern float substitution_nid( char *seq1, char *seq2 );\n-extern float substitution_score( char *seq1, char *seq2 );\n-extern float substitution_hosei( char *seq1, char *seq2 );\n-extern float substitution( char *seq1, char *seq2 );\n-extern void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff );\n-extern float bscore_calc( char **seq, int s, double **eff );\n-extern void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax );\n-extern void FreeTmpSeqs( char **mseq2, char *mseq1 );\n-extern void gappick_samestring( char *aseq );\n-extern void gappick0( char *aseq, char *seq );\n-extern void gappick( int nseq, int s, char **aseq, char **mseq2, \n-\t\t\t\t\t double **eff, double *effarr );\n-extern void commongappick_record( int nseq, char **seq, int *map );\n-extern void commongappick( int nseq, char **seq );\n-extern double score_calc0( char **seq, int s, double **eff, int ex );\n-extern void strins( char *str1, char *str2 );\n-extern int isaligned( int nseq, char **seq );\n-extern double score_calc_for_score( int nseq, char **seq );\n-extern void floatncpy( float *vec1, float *vec2, int len )'..b'int efffree );\n-extern void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name );\n-extern void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ncons, int **constraints );\n-extern void imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1kozo, double*eff2kozo, LocalHom ***localhom, int forscore );\n-extern void miyataout_reorder_pointer( FILE *fp, int locnjob, int nlenmax, char **name, int *nlen, char **aseq, int *order );\n-extern void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name );\n-extern void cpmx_ribosum( char **seq, char **seqr, char *dir, float **cpmx, double *eff, int lgth, int clus );\n-extern void rnaalifoldcall( char **seq, int nseq, RNApair **pairprob );\n-extern void readpairfoldalign( FILE *fp, char *seq1, char *seq2, char *aln1, char *aln2, int q1, int q2, int *of1, int *of2, int sumlen );\n-extern void write1seq( FILE *fp, char *aseq );\n-extern void assignstrweight( int nseq, double *strweight, Node *stopol, int ***topol, int step, int LorR, char *kozoari, double *seqweight );\n-extern void cutData( FILE *, int **, char **, int * );\n-extern void cutAlignment( FILE *, int **, char **, int *, char **, char ** );\n-extern void catData( FILE * );\n-extern void getnumlen_nogap_outallreg_web( FILE *fp, FILE *ofp, int *nlenminpt, int *isalignedpt );\n-extern void getnumlen_nogap_outallreg( FILE *fp, int *nlenminpt );\n-extern double plainscore( int nseq, char **seq );\n-extern void eq2dash( char *s );\n-extern void eq2dashmatometehayaku( char **s, int n );\n-extern void findnewgaps( int n, int rep, char **seq, int *gaplen );\n-extern void findcommongaps( int, char **, int * );\n-extern void adjustgapmap( int, int *, char * );\n-extern void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar );\n-extern void restorecommongaps( int n, char **seq, int *top0, int *top1, int *gaplen, int alloclen, char gapchar );\n-extern int samemember( int *mem, int *cand );\n-extern int samemembern( int *mem, int *cand, int candn );\n-extern int includemember( int *mem, int *cand );\n-extern int overlapmember( int *mem1, int *mem2 );\n-extern void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg );\n-extern void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg );\n-extern void sreverse( char *r, char *s );\n-extern int addonetip( int njobc, int ***topolc, float **lenc, float **iscorec, int ***topol, float **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign );\n-extern void intcpy( int *s1, int *s2 );\n-extern void intncpy( int *s1, int *s2, int n );\n-extern void fltncpy( float *s1, float *s2, int n );\n-extern void intcat( int *s1, int *s2 );\n-extern void readsubalignmentstable( int n, int **table, int *preservegaps, int *nsubpt, int *maxmempt );\n-extern int myatoi( char * );\n-extern float myatof( char * );\n-extern void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth );\n-extern void gapcountf( float *freq, char **seq, int nseq, double *eff, int lgth );\n-extern void outgapcount( float *freq, int nseq, char *gappat, double *eff );\n-extern void makedynamicmtx( double **out, double **in, float offset );\n-extern float dist2offset( float dist );\n-extern void reporterr( const char *str, ... );\n-extern void freeconstants();\n-extern void closeFiles();\n-extern void FreeCommonIP();\n-extern void initglobalvariables();\n-extern void makeskiptable( int n, int **skip, char **seq );\n-extern int generatesubalignmentstable( int nseq, int ***tablept, int *nsubpt, int *maxmempt, int ***topol, double **len, double threshold );\n-extern float sumofpairsscore( int nseq, char **seq );\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/genalign11.c --- a/mafft/core/genalign11.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,608 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define DEBUG 0\n-#define DEBUG2 0\n-#define XXXXXXX 0\n-#define USE_PENALTY_EX 1\n-\n-static TLS int localstop;\n-\n-#if 1\n-static void match_calc_mtx( double **mtx, float *match, char **s1, char **s2, int i1, int lgth2 ) \n-{\n-\tchar *seq2 = s2[0];\n-\tdouble *doubleptr = mtx[(int)s1[0][i1]];\n-\n-\twhile( lgth2-- )\n-\t\t*match++ = doubleptr[(int)*seq2++];\n-}\n-#else\n-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )\n-{\n-\tint j;\n-\n-\tfor( j=0; j<lgth2; j++ )\n-\t\tmatch[j] = amino_dis[(*s1)[i1]][(*s2)[j]];\n-}\n-#endif\n-\n-static float gentracking( float *lasthorizontalw, float *lastverticalw, \n-\t\t\t\t\t\tchar **seq1, char **seq2, \n- char **mseq1, char **mseq2, \n- float **cpmx1, float **cpmx2, \n- int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj )\n-{\n-\tint i, j, l, iin, jin, lgth1, lgth2, k, limk;\n-\tint ifi=0, jfi=0; // by D.Mathog\n-//\tchar gap[] = "-";\n-\tchar *gap;\n-\tgap = newgapstr;\n-\tlgth1 = strlen( seq1[0] );\n-\tlgth2 = strlen( seq2[0] );\n-\n-#if 0\n-\tfor( i=0; i<lgth1; i++ ) \n-\t{\n-\t\tfprintf( stderr, "lastverticalw[%d] = %f\\n", i, lastverticalw[i] );\n-\t}\n-#endif\n- \n- for( i=0; i<lgth1+1; i++ ) \n- {\n- ijpi[i][0] = localstop;\n- ijpj[i][0] = localstop;\n- }\n- for( j=0; j<lgth2+1; j++ ) \n- {\n- ijpi[0][j] = localstop;\n- ijpj[0][j] = localstop;\n- }\n-\n-\tmseq1[0] += lgth1+lgth2;\n-\t*mseq1[0] = 0;\n-\tmseq2[0] += lgth1+lgth2;\n-\t*mseq2[0] = 0;\n-\tiin = endi; jin = endj;\n-\tlimk = lgth1+lgth2;\n-\tfor( k=0; k<=limk; k++ ) \n-\t{\n-\n-\t\tifi = ( ijpi[iin][jin] );\n-\t\tjfi = ( ijpj[iin][jin] );\n-\t\tl = iin - ifi;\n-//\t\tif( ijpi[iin][jin] < 0 || ijpj[iin][jin] < 0 )\n-//\t\t{\n-//\t\t\tfprintf( stderr, "skip! %d-%d\\n", ijpi[iin][jin], ijpj[iin][jin] );\n-//\t\t\tfprintf( stderr, "1: %c-%c\\n", seq1[0][iin], seq1[0][ifi] );\n-//\t\t\tfprintf( stderr, "2: %c-%c\\n", seq2[0][jin], seq2[0][jfi] );\n-//\t\t}\n-\t\twhile( --l ) \n-\t\t{\n-\t\t\t*--mseq1[0] = seq1[0][ifi+l];\n-\t\t\t*--mseq2[0] = *gap;\n-\t\t\tk++;\n-\t\t}\n-\t\tl= jin - jfi;\n-\t\twhile( --l )\n-\t\t{\n-\t\t\t*--mseq1[0] = *gap;\n-\t\t\t*--mseq2[0] = seq2[0][jfi+l];\n-\t\t\tk++;\n-\t\t}\n-\n-\t\tif( iin <= 0 || jin <= 0 ) break;\n-\t\t*--mseq1[0] = seq1[0][ifi];\n-\t\t*--mseq2[0] = seq2[0][jfi];\n-\n-\t\tif( ijpi[ifi][jfi] == localstop ) break;\n-\t\tif( ijpj[ifi][jfi] == localstop ) break; \n-\t\tk++;\n-\t\tiin = ifi; jin = jfi;\n-\t}\n-\tif( ifi == -1 ) *off1pt = 0; else *off1pt = ifi;\n-\tif( jfi == -1 ) *off2pt = 0; else *off2pt = jfi;\n-\n-//\tfprintf( stderr, "ifn = %d, jfn = %d\\n", ifi, jfi );\n-\n-\n-\treturn( 0.0 );\n-}\n-\n-\n-float genL__align11( double **n_dynamicmtx, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt )\n-/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */\n-{\n-//\tint k;\n-\tregister int i, j;\n-\tint lasti, lastj; \n-\tint lgth1, lgth2;\n-\tint resultlen;\n-\tfloat wm = 0.0; /* int ?????? */\n-\tfloat g;\n-\tfloat *currentw, *previousw;\n-#if 1\n-\tfloat *wtmp;\n-\tint *ijpipt;\n-\tint *ijpjpt;\n-\tfloat *mjpt, *Mjpt, *prept, *curpt;\n-\tint *mpjpt, *Mpjpt;\n-#endif\n-\tstatic TLS float mi, *m;\n-\tstatic TLS float Mi, *largeM;\n-\tstatic TLS int **ijpi;\n-\tstatic TLS int **ijpj;\n-\tstatic TLS int mpi, *mp;\n-\tstatic TLS int Mpi, *Mp;\n-\tstatic TLS float *w1, *w2;\n-\tstatic TLS float *match;\n-\tstatic TLS float *initverticalw; /* kufuu sureba iranai */\n-\tstatic TLS float *lastverticalw; /* kufuu sureba iranai */\n-\tstatic TLS char **mseq1;\n-\tstatic TLS char **mseq2;\n-\tstatic TLS char **mseq;\n-\tstatic TLS float **cpmx1;\n-\tstatic TLS float **cpmx2;\n-\tstatic TLS int **intwork;\n-\tstatic TLS float **floatwork;\n-\tstatic TLS int orlgth1 = 0, orlgth2 = 0;\n-\tstatic TLS double **amino_dynamicmtx = NULL; // ??\n-\tfloat maxwm;\n-\tfloat tbk;\n-\tint tbki, tbkj;\n-\tint endali, endalj;\n-//\tfloat localthr = 0.0;\n-//\tfloat localthr2 = 0.0;\n-\tfloat fpenalty = (float)penalty;\n-\tfloat fpenalty_OP = (float)penalty_OP;\n-\tfloat fpenalty_ex = (float)penalty_ex;\n-//\tfloat fpenalty_EX = (float)penalt'..b';\n-}\n-fprintf( stderr, "\\n" );\n-#endif\n-\t\tcurrentw[0] = initverticalw[i];\n-\n-\t\tmi = previousw[0]; mpi = 0;\n-\t\tMi = previousw[0]; Mpi = 0;\n-\n-#if 0\n-\t\tif( mi < localthr ) mi = localthr2;\n-#endif\n-\n-\t\tijpipt = ijpi[i] + 1;\n-\t\tijpjpt = ijpj[i] + 1;\n-\t\tmjpt = m + 1;\n-\t\tMjpt = largeM + 1;\n-\t\tprept = previousw;\n-\t\tcurpt = currentw + 1;\n-\t\tmpjpt = mp + 1;\n-\t\tMpjpt = Mp + 1;\n-\t\ttbk = -999999.9;\n-\t\ttbki = 0;\n-\t\ttbkj = 0;\n-\t\tlastj = lgth2+1;\n-\t\tfor( j=1; j<lastj; j++ )\n-\t\t{\n-\t\t\twm = *prept;\n-\t\t\t*ijpipt = i-1;\n-\t\t\t*ijpjpt = j-1;\n-\n-\n-//\t\t\tfprintf( stderr, "i,j=%d,%d %c-%c\\n", i, j, seq1[0][i], seq2[0][j] );\n-//\t\t\tfprintf( stderr, "wm=%f\\n", wm );\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f->", wm );\n-#endif\n-\t\t\tg = mi + fpenalty;\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f?", g );\n-#endif\n-\t\t\tif( g > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-//\t\t\t\t*ijpipt = i - 1; \n-\t\t\t\t*ijpjpt = mpi;\n-\t\t\t}\n-\t\t\tg = *prept;\n-\t\t\tif( g > mi )\n-\t\t\t{\n-\t\t\t\tmi = g;\n-\t\t\t\tmpi = j-1;\n-\t\t\t}\n-\n-#if USE_PENALTY_EX\n-\t\t\tmi += fpenalty_ex;\n-#endif\n-\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f->", wm );\n-#endif\n-\t\t\tg = *mjpt + fpenalty;\n-#if 0\n-\t\t\tfprintf( stderr, "m%5.0f?", g );\n-#endif\n-\t\t\tif( g > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t\t*ijpipt = *mpjpt;\n-\t\t\t\t*ijpjpt = j - 1; //IRU!\n-\t\t\t}\n-\t\t\tg = *prept;\n-\t\t\tif( g > *mjpt )\n-\t\t\t{\n-\t\t\t\t*mjpt = g;\n-\t\t\t\t*mpjpt = i-1;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\t*mjpt += fpenalty_ex;\n-#endif\n-\n-\n-\t\t\tg = tbk + fpenalty_OP; \n-//\t\t\tg = tbk; \n-\t\t\tif( g > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t\t*ijpipt = tbki;\n-\t\t\t\t*ijpjpt = tbkj;\n-//\t\t\t\tfprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\\n", i, j, *ijpipt, *ijpjpt );\n-\t\t\t}\n-//\t\t\tg = Mi;\n-\t\t\tif( Mi > tbk )\n-\t\t\t{\n-\t\t\t\ttbk = Mi; //error desu.\n-\t\t\t\ttbki = i-1;\n-\t\t\t\ttbkj = Mpi;\n-\t\t\t}\n-//\t\t\tg = *Mjpt;\n-\t\t\tif( *Mjpt > tbk )\n-\t\t\t{\n-\t\t\t\ttbk = *Mjpt;\n-\t\t\t\ttbki = *Mpjpt;\n-\t\t\t\ttbkj = j-1;\n-\t\t\t}\n-//\t\t\ttbk += fpenalty_EX;// + foffset;\n-\n-//\t\t\tg = *prept;\n-\t\t\tif( *prept > *Mjpt )\n-\t\t\t{\n-\t\t\t\t*Mjpt = *prept;\n-\t\t\t\t*Mpjpt = i-1;\n-\t\t\t}\n-//\t\t\t*Mjpt += fpenalty_EX;// + foffset;\n-\n-//\t\t\tg = *prept;\n-\t\t\tif( *prept > Mi )\n-\t\t\t{\n-\t\t\t\tMi = *prept;\n-\t\t\t\tMpi = j-1;\n-\t\t\t}\n-//\t\t\tMi += fpenalty_EX;// + foffset;\n-\n-\n-//\t\t\tfprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt );\n-//\t\t\tfprintf( stderr, "ijp = %c,%c\\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] );\n-\n-\n-\t\t\tif( maxwm < wm )\n-\t\t\t{\n-\t\t\t\tmaxwm = wm;\n-\t\t\t\tendali = i;\n-\t\t\t\tendalj = j;\n-\t\t\t}\n-#if 1\n-\t\t\tif( wm < localthr )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "stop i=%d, j=%d, curpt=%f\\n", i, j, *curpt );\n-\t\t\t\t*ijpipt = localstop;\n-//\t\t\t\t*ijpjpt = localstop; \n-\t\t\t\twm = localthr2;\n-\t\t\t}\n-#endif\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f ", *curpt );\n-#endif\n-#if DEBUG2\n-\t\t\tfprintf( stderr, "%5.0f ", wm );\n-//\t\t\tfprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\\n", seq1[0][i], seq2[0][j], *ijppt, localstop );\n-#endif\n-\n-\t\t\t*curpt += wm;\n-\t\t\tijpipt++;\n-\t\t\tijpjpt++;\n-\t\t\tmjpt++;\n-\t\t\tMjpt++;\n-\t\t\tprept++;\n-\t\t\tmpjpt++;\n-\t\t\tMpjpt++;\n-\t\t\tcurpt++;\n-\t\t}\n-#if DEBUG2\n-\t\tfprintf( stderr, "\\n" );\n-#endif\n-\n-\t\tlastverticalw[i] = currentw[lgth2-1];\n-\t}\n-\n-\n-#if DEBUG2\n-\tfprintf( stderr, "maxwm = %f\\n", maxwm );\n-\tfprintf( stderr, "endali = %d\\n", endali );\n-\tfprintf( stderr, "endalj = %d\\n", endalj );\n-#endif\n-\n-\tif( ijpi[endali][endalj] == localstop ) // && ijpj[endali][endalj] == localstop )\n-\t{\n-\t\tstrcpy( seq1[0], "" );\n-\t\tstrcpy( seq2[0], "" );\n-\t\t*off1pt = *off2pt = 0;\n-\t\treturn( 0.0 );\n-\t}\n-\n-\n-\tgentracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, endali, endalj );\n-\n-//\tfprintf( stderr, "### impmatch = %f\\n", *impmatch );\n-\n-\tresultlen = strlen( mseq1[0] );\n-\tif( alloclen < resultlen || resultlen > N )\n-\t{\n-\t\tfprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\\n", alloclen, resultlen, N );\n-\t\tErrorExit( "LENGTH OVER!\\n" );\n-\t}\n-\n-\n-\tstrcpy( seq1[0], mseq1[0] );\n-\tstrcpy( seq2[0], mseq2[0] );\n-\n-#if 0\n-\tfprintf( stderr, "\\n" );\n-\tfprintf( stderr, ">\\n%s\\n", mseq1[0] );\n-\tfprintf( stderr, ">\\n%s\\n", mseq2[0] );\n-#endif\n-\n-\n-\treturn( maxwm );\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/getlag.c --- a/mafft/core/getlag.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,461 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tcalledByXced = 0;\n-\tdevide = 0;\n-\tuse_fft = 0;\n-\tfftscore = 1;\n-\tfftRepeatStop = 0;\n-\tfftNoAnchStop = 0;\n- weight = 3;\n- utree = 1;\n-\ttbutree = 1;\n- refine = 0;\n- check = 1;\n- cut = 0.0;\n- disp = 0;\n- outgap = 1;\n- alg = \'C\';\n- mix = 0;\n-\ttbitr = 0;\n-\tscmtd = 5;\n-\ttbweight = 0;\n-\ttbrweight = 3;\n-\tcheckC = 0;\n-\ttreemethod = \'x\';\n-\tcontin = 0;\n-\tppenalty = NOTSPECIFIED;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tpoffset = NOTSPECIFIED;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tgeta2 = GETA2;\n-\tscoremtx = NOTSPECIFIED;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( (c = *++argv[0]) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'f\':\n-\t\t\t\t\tppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "ppenalty = %d\\n", ppenalty );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'g\':\n-\t\t\t\t\tppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "ppenalty_ex = %d\\n", ppenalty_ex );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'h\':\n-\t\t\t\t\tpoffset = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "poffset = %d\\n", poffset );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tscoremtx = -1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tcontin = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'e\':\n-\t\t\t\t\tfftscore = 0;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'O\':\n-\t\t\t\t\tfftNoAnchStop = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'R\':\n-\t\t\t\t\tfftRepeatStop = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'Q\':\n-\t\t\t\t\tcalledByXced = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'s\':\n-\t\t\t\t\ttreemethod = \'s\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'x\':\n-\t\t\t\t\ttreemethod = \'x\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'p\':\n-\t\t\t\t\ttreemethod = \'p\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'a\':\n-\t\t\t\t\talg = \'a\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'A\':\n-\t\t\t\t\talg = \'A\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'S\':\n-\t\t\t\t\talg = \'S\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'C\':\n-\t\t\t\t\talg = \'C\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'F\':\n-\t\t\t\t\tuse_fft = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'v\':\n-\t\t\t\t\ttbrweight = 3;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'d\':\n-\t\t\t\t\tdisp = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'o\':\n-\t\t\t\t\toutgap = 0;\n-\t\t\t\t\tbreak;\n-/* Modified 01/08/27, default: user tree */\n-\t\t\t\tcase \'J\':\n-\t\t\t\t\ttbutree = 0;\n-\t\t\t\t\tbreak;\n-/* modification end. */\n-\t\t\t\tcase \'Z\':\n-\t\t\t\t\tcheckC = 1;\n-\t\t\t\t\tbreak;\n- default:\n- fprintf( stderr, "illegal option %c\\n", c );\n- argc = 0;\n- break;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\t}\n- if( argc == 1 )\n- {\n- cut = atof( (*argv) );\n- argc--;\n- }\n- if( argc != 0 ) \n- {\n- fprintf( stderr, "options: Check source file !\\n" );\n- exit( 1 );\n- }\n-\tif( tbitr == 1 && outgap == 0 )\n-\t{\n-\t\tfprintf( stderr, "conflicting options : o, m or u\\n" );\n-\t\texit( 1 );\n-\t}\n-\tif( alg == \'C\' && outgap == 0 )\n-\t{\n-\t\tfprintf( stderr, "conflicting options : C, o\\n" );\n-\t\texit( 1 );\n-\t}\n-\treadOtherOptions( &ppid, &fftThreshold, &fftWinSize );\n-}\n-\n-\n-void treebase( char **name, int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double **mtx, int ***topol, double **len, double **eff, int alloclen )\n-{\n-\tint i, j, l;\n-\tint clus1, clus2;\n-\tint s1, s2, r1, r2;\n-\tfloat pscore;\n-\tstatic char *indication1, *indication2;\n-\tstatic char **name1, **name2;\n-\tstatic double **partialmtx = NULL;\n-\tstatic int ***partialtopol = NULL;\n-\tstatic double **partiallen = NULL;\n-\tstatic double **partialeff = NULL;\n-\tstatic double *effarr = NULL;\n-\tstatic double *effarr1 = NULL;\n-\tstatic double *effarr2 = NULL;\n-#if 0\n-\tchar pair[njob][njob];\n-#else\n-\tstatic char **pair;\n-#endif\n-\tif( partialtopol == NULL ) \n-\t{\n-\t\tpartialmtx = AllocateDoubleMtx( njob, njob );\n-\t\tpartialtopol = AllocateIntCub( njob, 2, njob );\n-\t\tpartialeff = AllocateDoubleMtx( njob, njob );\n-\t\tpartiallen = AllocateDoubleMtx( njob, 2 );\n-\t\teffarr = AllocateDoubleVec( njob );\n-\t\teffarr1 = AllocateDoubleVec( njob );\n-\t\teffarr2 = AllocateDoub'..b'g == \'C\' ) \n-\t\tfprintf( fp, "Apgorithm A+/C\\n" );\n-\telse\n-\t\tfprintf( fp, "Unknown algorithm\\n" );\n-\n-\tif( treemethod == \'x\' )\n-\t\tfprintf( fp, "Tree = UPGMA (3).\\n" );\n-\telse if( treemethod == \'s\' )\n-\t\tfprintf( fp, "Tree = UPGMA (2).\\n" );\n-\telse if( treemethod == \'p\' )\n-\t\tfprintf( fp, "Tree = UPGMA (1).\\n" );\n-\telse\n-\t\tfprintf( fp, "Unknown tree.\\n" );\n-\n- if( use_fft )\n- {\n- fprintf( fp, "FFT on\\n" );\n- if( scoremtx == -1 )\n- fprintf( fp, "Basis : 4 nucleotides\\n" );\n- else\n- {\n- if( fftscore )\n- fprintf( fp, "Basis : Polarity and Volume\\n" );\n- else\n- fprintf( fp, "Basis : 20 amino acids\\n" );\n- }\n- fprintf( fp, "Threshold of anchors = %d%%\\n", fftThreshold );\n- fprintf( fp, "window size of anchors = %dsites\\n", fftWinSize );\n- }\n-\telse\n- fprintf( fp, "FFT off\\n" );\n-\tfflush( fp );\n-}\n-\t \n-\n-int main( int argc, char *argv[] )\n-{\n-\tstatic int nlen[M];\t\n-\tstatic char **name, **seq;\n-\tstatic char **mseq1, **mseq2;\n-\tstatic char **aseq;\n-\tstatic char **bseq;\n-\tstatic double **pscore;\n-\tstatic double **eff;\n-\tstatic double **node0, **node1;\n-\tint i, j;\n-\tstatic int ***topol;\n-\tstatic double **len;\n-\tFILE *prep;\n-\tchar c;\n-\tint alloclen;\n-\n-\targuments( argc, argv );\n-\tgetnumlen( stdin );\n-\trewind( stdin );\n-\n-\tname = AllocateCharMtx( njob, B+1 );\n-\tseq = AllocateCharMtx( njob, nlenmax*5+1 );\n-\taseq = AllocateCharMtx( njob, nlenmax*5+1 );\n-\tbseq = AllocateCharMtx( njob, nlenmax*5+1 );\n-\tmseq1 = AllocateCharMtx( njob, 0 );\n-\tmseq2 = AllocateCharMtx( njob, 0 );\n-\talloclen = nlenmax*5;\n-\n-\ttopol = AllocateIntCub( njob, 2, njob );\n-\tlen = AllocateDoubleMtx( njob, 2 );\n-\tpscore = AllocateDoubleMtx( njob, njob );\n-\teff = AllocateDoubleMtx( njob, njob );\n-\tnode0 = AllocateDoubleMtx( njob, njob );\n-\tnode1 = AllocateDoubleMtx( njob, njob );\n-\n-#if 0\n-\tRead( name, nlen, seq );\n-#else\n-\treadData_pointer( stdin, name, nlen, seq );\n-#endif\n-\n-\tconstants( njob, seq );\n-\n-#if 0\n-\tfprintf( stderr, "params = %d, %d, %d\\n", penalty, penalty_ex, offset );\n-#endif\n-\n-\tinitSignalSM();\n-\n-\tinitFiles();\n-\n-\tWriteOptions( trap_g );\n-\n-\tc = seqcheck( seq );\n-\tif( c )\n-\t{\n-\t\tfprintf( stderr, "Illeagal character %c\\n", c );\n-\t\texit( 1 );\n-\t}\n-\n-\twritePre( njob, name, nlen, seq, 0 );\n-\n-\tif( tbutree == 0 )\n-\t{\n-\t\tfor( i=1; i<njob; i++ ) \n-\t\t{\n-\t\t\tif( nlen[i] != nlen[0] ) \n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "Input pre-aligned seqences or make hat2.\\n" );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-\t\t}\n-\t\tfor( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) \n-\t\t{\n-\t\t/*\n-\t\t\tpscore[i][j] = (double)score_calc1( seq[i], seq[j] );\n-\t\t*/\n-\t\t\tpscore[i][j] = (double)substitution_hosei( seq[i], seq[j] );\n-\t\t}\n-\t}\n-\telse\n-\t{\n-\t\tfprintf( stderr, "Loading \'hat2\' ... " );\n-\t\tprep = fopen( "hat2", "r" );\n-\t\tif( prep == NULL ) ErrorExit( "Make hat2." );\n-\t\treadhat2_pointer( prep, njob, name, pscore );\n-\t\tfclose( prep );\n-\t\tfprintf( stderr, "done.\\n" );\n-\n-#if 0\n-\t\tprep = fopen( "hat2_check", "w" );\n-\t\tWriteHat2( prep, njob, name, pscore );\n-\t\tfclose( prep );\n-#endif\n-\n-\t}\n-\n-\tfprintf( stderr, "Constructing dendrogram ... " );\n-\tif( treemethod == \'x\' )\n-\t\tsupg( njob, pscore, topol, len );\n-\telse if( treemethod == \'s\' )\n-\t\tspg( njob, pscore, topol, len );\n-\telse if( treemethod == \'p\' )\n-\t\tupg2( njob, pscore, topol, len );\n-\telse \n-\t\tErrorExit( "Incorrect tree\\n" );\n-\tfprintf( stderr, "done.\\n" );\n-\n-\tcountnode( njob, topol, node0 );\n-\tif( tbrweight )\n-\t{\n-\t\tweight = 3; \n-\t\tutree = 0; counteff( njob, topol, len, eff ); utree = 1;\n-\t}\n-\telse\n-\t{\n-\t\tfor( i=0; i<njob; i++ ) eff[i][i] = 1.0;\n-\t}\n-\n-\n-\tfor( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );\n-\n-\ttreebase( name, nlen, bseq, aseq, mseq1, mseq2, pscore, topol, len, eff, alloclen );\n-\n-\tfprintf( trap_g, "done\\n" );\n-\tfclose( trap_g );\n-\n-\twritePre( njob, name, nlen, aseq, !contin );\n-\twriteData_pointer( stdout, njob, name, nlen, aseq );\n-#if IODEBUG\n-\tfprintf( stderr, "OSHIMAI\\n" );\n-#endif\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/interface.c --- a/mafft/core/interface.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,144 +0,0 @@\n-#include <stdio.h>\n-#include <string.h>\n-#include <stdlib.h>\n-#include "mafft.h"\n-\n-int report( int status, int progress, char *message )\n-{\n-\tfprintf( stderr, "status = %d\\n", status );\n-\tfprintf( stderr, "progress = %d / 100\\n", progress );\n-\tfprintf( stderr, "message = %s\\n", message );\n-\treturn( 0 );\n-}\n-\n-int chudan( int status, int progress, char *message )\n-{\n-\tfprintf( stderr, "status = %d\\n", status );\n-\tfprintf( stderr, "progress = %d / 100\\n", progress );\n-\tfprintf( stderr, "message = %s\\n", message );\n-\treturn( 2 );\n-}\n-int main( void )\n-{\n-\tint i;\n-\tint argc;\n-\tchar **argv;\n-\tchar **seq;\n-\tchar **name;\n-\tchar *message;\n-\tint res;\n-\tint n, l, mlen;\n-\n-//\tprintf( "This is interface.\\n" );\n-\n-\n-\tl = 10000;\t\n-\tn = 130;\n-\tseq = (char **)calloc( n, sizeof( char * ) );\n-\tname = (char **)calloc( n, sizeof( char * ) );\n-\tfor( i=0; i<n; i++ ) seq[i] = calloc( l+1, sizeof( char ) );\n-\tfor( i=0; i<n; i++ ) name[i] = calloc( 100, sizeof( char ) );\n-\n-\tfor( i=0; i<10; i++ )\n-\t{\n-\t\tstrcpy( name[i*13+0], "name0" );\n-\t\tstrcpy( name[i*13+1], "name1" );\n-\t\tstrcpy( name[i*13+2], "name2" );\n-\t\tstrcpy( name[i*13+3], "name3" );\n-\t\tstrcpy( name[i*13+4], "name4" );\n-\t\tstrcpy( name[i*13+5], "name5" );\n-\t\tstrcpy( name[i*13+6], "name6" );\n-\t\tstrcpy( name[i*13+7], "name7" );\n-\t\tstrcpy( name[i*13+8], "name8" );\n-\t\tstrcpy( name[i*13+9], "name9" );\n-\t\tstrcpy( name[i*13+10], "name10" );\n-\t\tstrcpy( name[i*13+11], "name11" );\n-\t\tstrcpy( name[i*13+12], "name12" );\n-\n-\n-\t\tstrcpy( seq[i*13+0], "TAATTAAAGGGCCGTGGTATACTGACCATGCGAAGGTAGCATAATCATTAGCCTTTTGATTTGAGGCTGGAATGAATGGTTTGACGAGAGATGGTCTGTCTCTTCGATTAAATTGAAGTTAATCTTTAAGTGAAAAAGCTTAAATGTACTTGGAGGGCGATAAGACCCTATAGATCTTTACATTTAATTCTTTTGTCTTGCGGTAGGTAATTAGACAGAGTAAAACAATGTTCGGTTGGGGCGACGGTAAGAACAGAATAAACACTTACAACATAAACACATCAATAAATGACCATTGATCCTTAGATGAATAAAGACCAAGTTACCTTAGGGATAACAGCGTAATTCTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-------------------" );\n-\t\tstrcpy( seq[i*13+1], "GTGTTGAAGGGCCGCGGTATTTTTGACCGTGCGAAGGTAGCATAATCATTAGTCTTTTAATTGAAGGCTCGAATGAATGGTTGGACGAAAAACACACTGTTTCAGACAAAGAATTTGAATTTAACTTTTAAGTGAAAAGGCTTAAATGAATTAAAAAGACGATAAGACCCTATAAATCTTTACATTATGTTTTGTATTTTTAAAGTTGTTGTGTATTAAAAAGAAATATAAAGTAGATGTTGTGTTGGGGCAACGATAATATAATAAGATTAGCTGTTGGTAAAATTAAACAAATGTGTTTGTTAGGAGTAGTTAGATCCTTTTTAGAGATTTAAAGATTAAGTTACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCTTATCGAAGAAAAAGATTGCGACCTCGATG" );\n-\t\tstrcpy( seq[i*13+2], "ATATTGAAGGGCCGCGGTATTTCTGACCGTGCGAAGGTAGCATAATCATTAGTCTTTTAATTGGAGGCTCGTATGAAAGGTCGAACGAGTGATAGACTGTCTCAGCAAAAAAAAAATTGAACTTAACTTTAAAGTGAAAAGGCTTTAATATTTCAGAAAGACGATAAGACCCTATAAATCTTCACACCACCTTTTATACTAAGCCAATCTGTTTGTATAAGGAGAAGTATAAAAAACGTGTTTTGTTGGGGCAACAAAGATATAATTAAATAACTGTTTTAATTTTAAAACAAAAATTTTTGAAAATAAATTGATCCTCTCTAAAGATTAAAAGATCAAGTTACTTTAGGGATAACAGCGTCATTTTTCTTGAGAGTTCCCATCGAAAGAAAAGTTTGCGACCTCGATG---" );\n-\t\tstrcpy( seq[i*13+3], "AAATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATAAAGGCTTGAATGAAAGGTTGGACAAAGTATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTTAAGTGAAAAGGCTTAAATAAATCAAAAAGACGATAAGACCCTATAAATCTTTACAATAAATATATTTTATATTTTAGTTTATAAGTGAATGATATATAAAAATATAGGTTTGTTGCGCTGGGGCGGCGTAGATATATAAATAAACTGTCTATAGTTTAAATACAATAATCATTGCTTAATATAAATTGATCCTTAAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG" );\n-\t\tstrcpy( seq[i*13+4], "AAATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATAAAGGCTTGAATGAAAGGTTGGACAAAGTATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTTAAGTGAAAAGGCTTAAATAAATCAAAAAGACGATAAGACCCTATAAATCTTTACAATAAATATATTTTATATTTTAGCTTATAAGTGTATAAGAAATAAAAATATAGGTTTGTTGCGCTGGGGCGGCGTAGATATATAAATAAACTGTCTATAATTTAAATACAATAATCATTGCTTAATACAAATTGATCCTTAAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG" );\n-\t\tstrcpy( seq[i*13+5], "ATATTAAAGAGCCGCAGTATTCTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATGAAGGCTTGAATGAAAGGTTGGACAAAGTATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTCAAGTGAAAAGGCTTGAATAAATTAAAAAGACGATAAGACCCTATAAATCTTTACAATAAACATGTTCTATATTTTAATTTATAATTGTATAAAATATTAAATTTGTAATTGTTTCGCTGGGGCGGCGTAGATATATAAATAAACTGT'..b'AATAATTATTGATTAATAAAATTGATCCTTAAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG--" );\n-\t\tstrcpy( seq[i*13+9], "TGATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAGATTGTCTTTTAAATGGGGGCTAGAATGAATGGTTGGACAAAGTATCGTCTGTTTCTTAAATAAATATTGAATTTGACTTTTAAGTTAAAAGGCTTAAATAAAATAAAAAGACGATAAGACCCTATAAATCTTTACATAAATTATATTTTAAATTTTAAGTTATAATTTTATAAAATATAAAAGTATAGTTTTGTTGTGCTGGGGCGGCATAGATATATAAATAAACTGTCTATAAATTTGAATCAATGATTGTTGATTAATGTGGTTGATCCTTTAAGAGATTAGAAGATTAAGTTACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-" );\n-\t\tstrcpy( seq[i*13+10], "TGATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAGATTGTCTTTTAAATGGGGGCTAGAATGAATGGTTGGACAAAGTATCATCTGTTTCTTAAATAAATATTGAATTTGACTTTTAAGTTAAAAGGCTTAAATAAAATAAAAAGACGATAAGACCCTATAAATCTTTACATAAATTATATTTTAAATTTTAATTTATAATTTTATAAAATATAAAAGTATAGTTTTGTTGTGCTGGGGCGGCATAGATATATAAATAAACTGTCTATAAATTTAAATCAATAGTTGTTGATTAATATAGTTGATCCTTTAAAAGATTAGAAGATTAAGTTACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-" );\n-\t\tstrcpy( seq[i*13+11], "TAATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAGATTGGCTTTTAAATGGAGGCTGGAATGAATGGTTGGACAAAGTATCATCTGTTTCTTAAATAAATATTGAATTTGACTTTTAAGTTAAAAGGCTTAAATAAAATAAAAAGACGATAAGACCCTATAAATCTTTACATAAATTATATTTTAAATTTTAACTTATAATTTTATAAAATATAAAAGTATAATTTTGTTGTGCTGGGGCGGCGTAGATATATAAATGAACTGTCTATGAAATTAAATCAATAATAGTTGATTATTAATATTGATCCTTTAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-" );\n-\t\tstrcpy( seq[i*13+12], "TAATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAGATTGGCTTTTAAATGGAGGCTGGAATGAATGGTTGGACAAAGTATCATCTGTTTCTTAAATAAATATTGAATTTGACTTTTAAGTTAAAAGGCTTAAATAAAATAAAAAGACGATAAGACCCTATAAATCTTTACATAAATTATATTTTAAATTTTAATTTATAATTTTATAAAATATAAAAGTATAATTTTGTTGTGCTGGGGCGGCGTAGATATATAAATGAACTGTCTATGAAATTAAATCAATAATAGTTGATTATTAATATTGATCCTTTAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-" );\n-\n-\t}\n-\n-\targc = 20;\n-\targv = (char **)calloc( argc, sizeof( char * ) );\n-\tfor( i=0; i<argc; i++ ) argv[i] = calloc( 100, sizeof( char ) );\n-\tstrcpy( argv[0], "disttbfast" );\n-\tstrcpy( argv[1], "-W" );\n-\tstrcpy( argv[2], "6" );\n-\tstrcpy( argv[3], "-b" );\n-\tstrcpy( argv[4], "62" );\n-\tstrcpy( argv[5], "-Q" );\n-\tstrcpy( argv[6], "100" );\n-\tstrcpy( argv[7], "-h" );\n-\tstrcpy( argv[8], "0" );\n-\tstrcpy( argv[9], "-F" );\n-\tstrcpy( argv[10], "-X" );\n-\tstrcpy( argv[11], "-s" );\n-\tstrcpy( argv[12], "0.0" );\n-\tstrcpy( argv[13], "-f" );\n-\tstrcpy( argv[14], "-1.53" );\n-\tstrcpy( argv[15], "-C" );\n-\tstrcpy( argv[16], "0" );\n-\tstrcpy( argv[17], "-D" ); // Necessary. DNA -> -D; Protein -> -P\n-\tstrcpy( argv[18], "-I" ); // --add\n-\tstrcpy( argv[19], "2" ); // --add\n-\n-#if 0\n-\tmlen = 5000;\n-\tmessage = (char *)calloc( mlen+1, sizeof( char ) );\n-\n-\tfprintf( stderr, "first run\\n" );\n-\tres = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report );\n-\tfprintf( stderr, "second run\\n" );\n-\tres = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report );\n-\tfprintf( stderr, "third run\\n" );\n-\tres = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report );\n-\n-\tfprintf( stderr, "\\n\\n\\nmessage in interface = :%s:\\n", message );\n-\tfree( message );\n-#else\n-\tfprintf( stderr, "first run\\n" );\n-\tres = disttbfast( n, l, name, seq, argc, argv, report );\n-\tfprintf( stderr, "second run\\n" );\n-\tres = disttbfast( n, l, name, seq, argc, argv, chudan );\n-//\tfprintf( stderr, "third run\\n" );\n-//\tres = disttbfast( n, l, name, seq, argc, argv, report );\n-#endif\n-\n-\tif( res == GUI_LENGTHOVER )\n-\t{\n-\t\tfprintf( stderr, "length over!" );\n-\t}\n-\telse\n-\t{\n-\t\tfprintf( stderr, "res = %d\\n", res );\n-\t\tfprintf( stdout, "Output:\\n" );\n-\t\tfor( i=0; i<n; i++ ) \n-\t\t\tfprintf( stdout, "%s\\n", seq[i] );\n-\t}\n-\tfprintf( stderr, "argv = \\n" );\n-\tfor( i=0; i<argc; i++ )\n-\t\tfprintf( stderr, "%s ", argv[i] );\n-\tfprintf( stderr, "\\n" );\n-\t\n-\tfor( i=0; i<n; i++ ) free( seq[i] );\n-\tfree( seq );\n-\tfor( i=0; i<n; i++ ) free( name[i] );\n-\tfree( name );\n-\tfor( i=0; i<argc; i++ ) free( argv[i] );\n-\tfree( argv );\n-\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/io.c --- a/mafft/core/io.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,5370 +0,0 @@\n-#include "mltaln.h"\n-\n-static int upperCase = 0;\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-\n-char creverse( char f )\n-{\n-\tstatic char *table = NULL;\n-\tif( table == NULL )\n-\t{\n-\t\tint i;\n-\t\ttable = AllocateCharVec(0x80);\n-\t\tfor( i=0; i<0x80; i++ ) table[i] = i;\n-\t\ttable[\'A\'] = \'T\';\n-\t\ttable[\'C\'] = \'G\';\n-\t\ttable[\'G\'] = \'C\';\n-\t\ttable[\'T\'] = \'A\';\n-\t\ttable[\'U\'] = \'A\';\n-\t\ttable[\'M\'] = \'K\';\n-\t\ttable[\'R\'] = \'Y\';\n-\t\ttable[\'W\'] = \'W\';\n-\t\ttable[\'S\'] = \'S\';\n-\t\ttable[\'Y\'] = \'R\';\n-\t\ttable[\'K\'] = \'M\';\n-\t\ttable[\'V\'] = \'B\';\n-\t\ttable[\'H\'] = \'D\';\n-\t\ttable[\'D\'] = \'H\';\n-\t\ttable[\'B\'] = \'V\';\n-\t\ttable[\'N\'] = \'N\';\n-\t\ttable[\'a\'] = \'t\';\n-\t\ttable[\'c\'] = \'g\';\n-\t\ttable[\'g\'] = \'c\';\n-\t\ttable[\'t\'] = \'a\';\n-\t\ttable[\'u\'] = \'a\';\n-\t\ttable[\'m\'] = \'k\';\n-\t\ttable[\'r\'] = \'y\';\n-\t\ttable[\'w\'] = \'w\';\n-\t\ttable[\'s\'] = \'s\';\n-\t\ttable[\'y\'] = \'r\';\n-\t\ttable[\'k\'] = \'m\';\n-\t\ttable[\'v\'] = \'b\';\n-\t\ttable[\'h\'] = \'d\';\n-\t\ttable[\'d\'] = \'h\';\n-\t\ttable[\'b\'] = \'v\';\n-\t\ttable[\'n\'] = \'n\';\n-//\t\ttable[\'-\'] = \'-\';\n-//\t\ttable[\'.\'] = \'.\';\n-//\t\ttable[\'*\'] = \'*\';\n-\t}\n-\treturn( table[(int)f] );\n-}\n-\n-void sreverse( char *r, char *s )\n-{\n-\tr += strlen( s );\n-\t*r-- = 0;\n-\twhile( *s )\n-\t\t*r-- = creverse( *s++ );\n-//\t\t*r-- = ( *s++ );\n-}\n-\n-void gappick_samestring( char *seq )\n-{\n-\tchar *aseq = seq;\n-\n-\tfor( ; *seq != 0; seq++ )\n-\t{\n-\t\tif( *seq != \'-\' )\n-\t\t\t*aseq++ = *seq;\n-\t}\n-\t*aseq = 0;\n-}\n-\n-#if 0\n-\n-static int addlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, int skip )\n-{\n-\tint pos1, pos2, start1, start2, end1, end2;\n-\tchar *pt1, *pt2;\n-\tint iscore;\n-\tint isumscore;\n-\tint sumoverlap;\n-\tLocalHom *tmppt;\n-\tint st;\n-\tint nlocalhom = 0;\n-\tpt1 = al1; pt2 = al2;\n-\tpos1 = off1; pos2 = off2;\n-\n-\tisumscore = 0;\n-\tsumoverlap = 0;\n-\n-#if 0\n-\tfprintf( stderr, "nlocalhom = %d in addlocalhom\\n", nlocalhom );\n-\tfprintf( stderr, "al1 = %s, al2 = %s\\n", al1, al2 );\n-\tfprintf( stderr, "off1 = %d, off2 = %d\\n", off1, off2 );\n-\tfprintf( stderr, "localhopt = %p, skip = %d\\n", localhompt, skip );\n-\tfprintf( stderr, "pt1 = \\n%s\\n, pt2 = \\n%s\\n", pt1, pt2 );\n-#endif\n-\n-\tif( skip )\n-\t{\n-\t\twhile( --skip > 0 ) localhompt = localhompt->next;\n-\t\tlocalhompt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );\n-\t\tlocalhompt = localhompt->next;\n-//\t\tfprintf( stderr, "tmppt = %p, localhompt = %p\\n", tmppt, localhompt );\n-\t}\n-\ttmppt = localhompt;\n-\n-\tst = 0;\n-\tiscore = 0;\n-\twhile( *pt1 != 0 )\n-\t{\n-//\t\tfprintf( stderr, "In in while loop\\n" );\n-//\t\tfprintf( stderr, "pt = %c, %c, st=%d\\n", *pt1, *pt2, st );\n-\t\tif( st == 1 && ( *pt1 == \'-\' || *pt2 == \'-\' ) )\n-\t\t{\n-\t\t\tend1 = pos1 - 1;\n-\t\t\tend2 = pos2 - 1;\n-\n-\t\t\tif( nlocalhom++ > 0 )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "reallocating ...\\n" );\n-\t\t\t\ttmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );\n-//\t\t\t\tfprintf( stderr, "done\\n" );\n-\t\t\t\ttmppt = tmppt->next;\n-\t\t\t\ttmppt->next = NULL;\n-\t\t\t}\n-\t\t\ttmppt->start1 = start1;\n-\t\t\ttmppt->start2 = start2;\n-\t\t\ttmppt->end1 = end1 ;\n-\t\t\ttmppt->end2 = end2 ;\n-\n-#if 1\n-\t\t\tisumscore += iscore;\n-\t\t\tsumoverlap += end2-start2+1;\n-#else\n-\t\t\ttmppt->overlapaa = end2-start2+1;\n-\t\t\ttmppt->opt = iscore * 5.8 / 600;\n-\t\t\ttmppt->overlapaa = overlapaa;\n-\t\t\ttmppt->opt = (double)opt;\n-#endif\n-\n-#if 0\n-\t\t\tfprintf( stderr, "iscore (1)= %d\\n", iscore );\n-\t\t\tfprintf( stderr, "al1: %d - %d\\n", start1, end1 );\n-\t\t\tfprintf( stderr, "al2: %d - %d\\n", start2, end2 );\n-#endif\n-\t\t\tiscore = 0;\n-\t\t\tst = 0;\n-\t\t}\n-\t\telse if( *pt1 != \'-\' && *pt2 != \'-\' )\n-\t\t{\n-\t\t\tif( st == 0 )\n-\t\t\t{\n-\t\t\t\tstart1 = pos1; start2 = pos2;\n-\t\t\t\tst = 1;\n-\t\t\t}\n-\t\t\tiscore += n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]];\n-//\t\t\tfprintf( stderr, "%c-%c, score(0) = %d\\n", *pt1, *pt2, iscore );\n-\t\t}\n-\t\tif( *pt1++ != \'-\' ) pos1++;\n-\t\tif( *pt2++ != \'-\' ) pos2++;\n-\t}\n-\n-\tif( st )\n-\t{\n-\t\tif( nlocalhom++ > 0 )\n-\t\t{\n-//\t\t\tfprintf( stderr, "reallocating ...\\n" );\n-\t\t\ttmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );\n-//\t\t\tfprintf( stderr, "done\\n" );\n-\t\t\ttmppt = tmppt->next;\n-\t\t\ttmppt->next = NULL;\n-\t\t}\n-\t\tend1 = pos1 - 1;\n-\t'..b' int q1, int q2, int *of1, int *of2, int sumlen )\n-{\n-\tchar gett[1000];\n-\tint *maptoseq1;\n-\tint *maptoseq2;\n-\tchar dumc;\n-\tint dumi;\n-\tchar sinseq[100], sinaln[100];\n-\tint posinseq, posinaln;\n-\tint alnlen;\n-\tint i;\n-\tint pos1, pos2;\n-\tchar *pa1, *pa2;\n-\tchar qstr[1000];\n-\n-\t*of1 = -1;\n-\t*of2 = -1;\n-\n-\tmaptoseq1 = AllocateIntVec( sumlen+1 );\n-\tmaptoseq2 = AllocateIntVec( sumlen+1 );\n-\n-\tposinaln = 0; // foldalign ga alingment wo kaesanaitok no tame.\n-\n-\twhile( !feof( fp ) )\n-\t{\n-\t\tfgets( gett, 999, fp );\n-\t\tif( !strncmp( gett, "; ALIGNING", 10 ) ) break;\n-\t}\n-\tsprintf( qstr, "; ALIGNING %d against %d\\n", q1+1, q2+1 );\n-\tif( strcmp( gett, qstr ) )\n-\t{\n-\t\tfprintf( stderr, "Error in FOLDALIGN\\n" );\n-\t\tfprintf( stderr, "qstr = %s, but gett = %s\\n", qstr, gett );\n-\t\texit( 1 );\n-\t}\n-\n-\twhile( !feof( fp ) )\n-\t{\n-\t\tfgets( gett, 999, fp );\n-\t\tif( !strncmp( gett, "; --------", 10 ) ) break;\n-\t}\n-\n-\n-\twhile( !feof( fp ) )\n-\t{\n-\t\tfgets( gett, 999, fp );\n-\t\tif( !strncmp( gett, "; ********", 10 ) ) break;\n-//\t\tfprintf( stderr, "gett = %s\\n", gett );\n-\t\tsscanf( gett, "%c %c %s %s %d %d", &dumc, &dumc, sinseq, sinaln, &dumi, &dumi );\n-\t\tposinaln = atoi( sinaln );\n-\t\tposinseq = atoi( sinseq );\n-//\t\tfprintf( stderr, "posinseq = %d\\n", posinseq );\n-//\t\tfprintf( stderr, "posinaln = %d\\n", posinaln );\n-\t\tmaptoseq1[posinaln-1] = posinseq-1;\n-\t}\n-\talnlen = posinaln;\n-\n-\twhile( !feof( fp ) )\n-\t{\n-\t\tfgets( gett, 999, fp );\n-\t\tif( !strncmp( gett, "; --------", 10 ) ) break;\n-\t}\n-\n-\twhile( !feof( fp ) )\n-\t{\n-\t\tfgets( gett, 999, fp );\n-\t\tif( !strncmp( gett, "; ********", 10 ) ) break;\n-//\t\tfprintf( stderr, "gett = %s\\n", gett );\n-\t\tsscanf( gett, "%c %c %s %s %d %d", &dumc, &dumc, sinseq, sinaln, &dumi, &dumi );\n-\t\tposinaln = atof( sinaln );\n-\t\tposinseq = atof( sinseq );\n-//\t\tfprintf( stderr, "posinseq = %d\\n", posinseq );\n-//\t\tfprintf( stderr, "posinaln = %d\\n", posinaln );\n-\t\tmaptoseq2[posinaln-1] = posinseq-1;\n-\t}\n-\tif( alnlen != posinaln )\n-\t{\n-\t\tfprintf( stderr, "Error in foldalign?\\n" );\n-\t\texit( 1 );\n-\t}\n-\n-\tpa1 = aln1;\n-\tpa2 = aln2;\n-\tfor( i=0; i<alnlen; i++ )\n-\t{\n-\t\tpos1 = maptoseq1[i];\n-\t\tpos2 = maptoseq2[i];\n-\n-\t\tif( pos1 > -1 )\n-\t\t\t*pa1++ = s1[pos1];\n-\t\telse\n-\t\t\t*pa1++ = \'-\';\n-\n-\t\tif( pos2 > -1 )\n-\t\t\t*pa2++ = s2[pos2];\n-\t\telse\n-\t\t\t*pa2++ = \'-\';\n-\t}\n-\t*pa1 = 0;\n-\t*pa2 = 0;\n-\n-\t*of1 = 0;\n-\tfor( i=0; i<alnlen; i++ )\n-\t{\n-\t\t*of1 = maptoseq1[i];\n-\t\tif( *of1 > -1 ) break;\n-\t}\n-\t*of2 = 0;\n-\tfor( i=0; i<alnlen; i++ )\n-\t{\n-\t\t*of2 = maptoseq2[i];\n-\t\tif( *of2 > -1 ) break;\n-\t}\n-\n-//\tfprintf( stderr, "*of1=%d, aln1 = :%s:\\n", *of1, aln1 );\n-//\tfprintf( stderr, "*of2=%d, aln2 = :%s:\\n", *of2, aln2 );\n-\n-\tfree( maptoseq1 );\n-\tfree( maptoseq2 );\n-}\n-\n-int myatoi( char *in )\n-{\n-\tif( in == NULL )\n-\t{\n-\t\tfprintf( stderr, "Error in myatoi()\\n" );\n-\t\texit( 1 );\n-\t}\n-\treturn( atoi( in ) );\n-}\n-\n-float myatof( char *in )\n-{\n-\tif( in == NULL )\n-\t{\n-\t\tfprintf( stderr, "Error in myatof()\\n" );\n-\t\texit( 1 );\n-\t}\n-\treturn( atof( in ) );\n-}\n-\n-void reporterr( const char *str, ... )\n-{\n-//\tstatic int loglen = 0;\n-\tva_list args;\n-\n-\tif( gmsg )\n-\t{\n-# if 1 // ato de sakujo\n-\t\tstatic FILE *errtmpfp = NULL;\n-\t\tif( errtmpfp == NULL )\n-\t\t\terrtmpfp = fopen( "maffterr", "w" );\n-\t\telse\n-\t\t\terrtmpfp = fopen( "maffterr", "a" );\n-\t\tva_start( args, str );\n-\t\tvfprintf( errtmpfp, str, args );\n-\t\tva_end( args );\n-\t\tfclose( errtmpfp );\n-#endif\n-\n-#if 0\n-\t\tchar *tmpptr;\n-\t\ttmpptr = (char *)realloc( *gmsg, (loglen+10000) * sizeof( char ) );\n-\t\tif( tmpptr == NULL )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot relloc *gmsg\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t\t*gmsg = tmpptr;\n-\t\tva_start( args, str );\n-\t\tloglen += vsprintf( *gmsg + loglen, str, args );\n-\t\tva_end( args );\n-\n-\n-\t\tva_start( args, str );\n-\t\tloglen += vsprintf( *gmsg + loglen, str, args );\n-\t\tva_end( args );\n-\t\t*(*gmsg + loglen) = 0;\n-\t\tif( loglen > gmsglen - 100 ) loglen = 0; // tekitou\n-#endif\n-\n-\t}\n-\telse\n-\t{\n-\t\tva_start( args, str );\n-\t\tvfprintf( stderr, str, args );\n-\t\tva_end( args );\n-//\t\tfflush( stderr ); // iru?\n-\t}\n-\treturn;\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/iteration.c --- a/mafft/core/iteration.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,412 +0,0 @@\n- /* iteration ( algorithm C ) */\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-\n-static void Writeoptions( FILE *fp )\n-{\n- if( scoremtx == 1 )\n- fprintf( fp, "Dayhoff( ... )\\n" );\n- else if( scoremtx == -1 )\n- fprintf( fp, "DNA\\n" );\n- else if( scoremtx == 2 )\n- fprintf( fp, "Miyata-Yasunaga\\n" );\n-\telse\n-\t\tfprintf( fp, "JTT %dPAM\\n", pamN );\n-\n-\tif( scoremtx == 0 )\n-\t fprintf( fp, "Gap Penalty = %+d, %+d\\n", penalty, offset );\n-\telse\n-\t fprintf( fp, "Gap Penalty = %+d\\n", penalty );\n-\n- fprintf( fp, "marginal score to search : best - %f\\n", cut );\n- if( scmtd == 3 )\n- fprintf( fp, "score of rnd or sco\\n" );\n- else if( scmtd == 4 )\n- fprintf( fp, "score = sigma( score for a pair of homologous amino acids ) / ( number of amino acids pairs )\\n" );\n- else if( scmtd == 5 )\n- fprintf( fp, "score : SP\\n" );\n- if( mix )\n- fprintf( fp, "?\\n" );\n- else\n- { \n- if( weight == 2 )\n- fprintf( fp, "weighted, geta2 = %f\\n", geta2 );\n- else if( weight == 3 )\n- {\n- if( scmtd == 4 )\n- fprintf( fp, "reversely weighted in function \'align\', unweighted in function \'score_calc\'\\n" );\n- else\n- fprintf( fp, "weighted like ClustalW," );\n- }\n- else\n- fprintf( fp, "unweighted\\n" );\n- }\n- if( weight && utree )\n- {\n- fprintf( fp, "using tree defined by the file hat2 with simplified UPG method\\n" );\n- }\n- if( weight && !utree )\n- fprintf( fp, "using temporary tree by simplified UPG method\\n" );\n- fprintf( fp, "Algorithm %c\\n", alg );\n-}\n-\n-\n-\n-\n-char **align0( float *wm, char **aseq, char *seq, double effarr[M], int icyc, int ex )\n-{\n- char **result;\n-\n- if( alg == \'B\' )\n- {\n-\t\tErrorExit( "Sorry!" );\n-\t/*\n- if( outgap == 0 )\n- {\n- result = alignm1_o( wm, aseq, seq, scmx, effarr, icyc, ex );\n- }\n- if( outgap == 1 )\n- {\n- result = alignm1( wm, aseq, seq, scmx, effarr, icyc, ex );\n- }\n-\t*/\n- }\n- else if( alg == \'C\' )\n- {\n- result = Calignm1( wm, aseq, seq, effarr, icyc, ex );\n- }\n- return( result );\n-}\n- \n-\n-double score_m_1_0( char **aseq, int locnjob, int s, double **eff, double effarr[M] )\n-{\n- double x;\n-\n- if( alg == \'B\' )\n- {\n-\t\tErrorExit( "Sorry!" );\n- }\n- if( alg == \'C\' )\n- {\n- x = Cscore_m_1( aseq, locnjob, s, eff );\n- }\n- fprintf( stderr, "in score_m_1_0 %f\\n", x );\n- return( x );\n-}\n-\n-int iteration( int locnjob, char name[M][B], int nlen[M], char **aseq, char **bseq, int ***topol, double **len, double **eff ) \n-{\n- double tscore, mscore;\n- int identity;\n- static char *mseq1, **mseq2 = NULL;\n-\tstatic char **result;\n-\tint i, l;\n-\tstatic double effarr[M];\n-\tint s;\n-\tint sss[2];\n-\tchar ou;\n-\tint alloclen; \n-\tint resultlen;\n-\tint nlenmax0 = nlenmax;\n-\tFILE *prep;\n-\tchar sai[M];\n-\tchar sai1[M];\n-\tchar sai2[M];\n-#if 0\n-\tdouble his[2][M][MAXITERATION/locnjob+1];\n-#else\n-\tdouble ***his;\n-#endif\n-\tint cyc[2];\n-\tchar shindou = 0;\n-\tfloat wm;\n-\tint returnvalue;\n-\n- for( i=0; i<locnjob; i++ ) \n- {\n-\t\tsai[i] = 1;\n- sai1[i] = 1;\n- sai2[i] = 2;\n- }\n- sai[locnjob] = sai1[locnjob] = sai2[locnjob] = 0;\n-\n-\n-\tWriteoptions( trap_g );\n-\n-\this = AllocateDoubleCub( 2, M, MAXITERATION/locnjob+1 );\n-\n-\tif( mseq2 == NULL )\n-\t{\n- \talloclen = nlenmax * 2.0;\n- \t\tAllocateTmpSeqs( &mseq2, &mseq1, alloclen );\n-\t}\n-\n-\tif( !tbitr && !tbweight )\n-\t{\n-\t\twritePre( locnjob, name, nlen, aseq, 0 );\n-\n-#if 0\n-\t\tprep = fopen( "best", "w" );\n-\t\tWrite( prep, locnjob, name, nlen, aseq );\n-\t\tfclose( prep );\n-#endif\n-\t}\n-\t\n-\n-\n-\n-\ttreeconstruction( aseq, locnjob, topol, len, eff );\n-\ttscore = score_calc0( aseq, locnjob, eff, 0 );\n-\n-#if DEBUG\n- fprintf( stderr, "eff mtx in iteration\\n" );\n- for( i=0; i<locnjob; i++ )\n- {\n- for( j=0; j<locnjob; j++ ) \n- {\n- '..b'-\t\tif( resultlen > alloclen )\n-\t\t{\n-\t\t\tif( resultlen > nlenmax0*3 || resultlen > N )\n-\t\t\t{\n-\t\t\t\tfprintf(stderr, "Error in main1\\n");\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-\t\t\tFreeTmpSeqs( mseq2, mseq1 );\n-\t\t\talloclen = strlen( result[0] ) * 2.0;\n-\t\t\tfprintf( stderr, "\\n\\ntrying to allocate TmpSeqs\\n\\n" );\n-\t\t\tAllocateTmpSeqs( &mseq2, &mseq1, alloclen );\n-\t\t}\n-\t\tfor( i=0; i<locnjob; i++ ) strcpy( mseq2[i], result[i] ); \n-\n-\t\tif( checkC )\n-\t\t\tfprintf( stderr, "wm in iteration == %f\\n", wm );\n-\n-\t\tstrcpy( mseq1, mseq2[locnjob-1] );\n-/*\n-\t\tWrite( stdout, locnjob, name, nlen, mseq2 );\n-*/\n- for( i=locnjob-2; i>=s; i-- ) strcpy( mseq2[i+1], mseq2[i] );\n- strcpy( mseq2[s], mseq1 );\n-\t\tif( checkC )\n-\t\t{\n-\t\t\ttmpscore1= score_m_1_0( mseq2, locnjob, s, eff, effarr );\n-\t\t\tfprintf( stderr, "pick up %d, before ALIGNM1 score_m_1_0 = %f\\n", s+1, tmpscore );\n-\t\t\tfprintf( stderr, "pick up %d, after ALIGNM1 score_m_1_0 = %f\\n", s+1, tmpscore1 );\n-\t\t\tif( tmpscore1 < tmpscore ) \n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "\\7" );\n-\t\t\t\tfprintf( trap_g, ">>>>>>>n\\n" );\n-\t\t\t}\n-\t\t\tif( fabs( wm - tmpscore1 ) / wm > 0.001 ) \n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "\\7sorry\\n" );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-\t\t}\n-\n- identity = !strcmp( mseq2[s], aseq[s] );\n- if( s == locnjob - 1 ) ss = 0; else ss=s+1;\n-\n- identity *= !strcmp( mseq2[ss], aseq[ss] );\n-\n- \t if( !identity ) \n-\t\t{\n-\t\t\ttmpscore = score_calc0( mseq2, locnjob, eff, s );\n-\t\t}\n-\t\telse tmpscore = tscore;\n-\n-\t\tif( disp )\n-\t\t{\n- \t\tfprintf( stderr, "% 3d % 3d / the rest \\n", l+1, s+1 );\n- \t\tdisplay( mseq2, locnjob );\n-\t\t}\n- \tfprintf( stderr, "% 3d % 3d / the rest \\n", l+1, s+1 );\n- \tfprintf( stderr, "score = %f mscore = %f ", tmpscore, mscore );\n-\n- \tfprintf( trap_g, "%#4d %#4d / the rest ", l+1, s+1 );\n- \tfprintf( trap_g, "score = %f mscore = %f ", tmpscore, mscore );\n-\n-\t\tif( identity ) \n-\t\t{\n-\t\t\tfprintf( stderr, "( identical )\\n" );\n-\t\t\tfprintf( trap_g, "( identical )\\n" );\n-\t\t\tsai[s] = 2;\n-\t\t}\n-\n- else if( tmpscore > mscore - cut )\n- {\n- fprintf( stderr, "accepted\\n" );\n- fprintf( trap_g, "accepted\\n" );\n- for( i=0; i<locnjob; i++ ) strcpy( aseq[i], mseq2[i] );\n-\t\t\tstrcpy( sai, sai1 ); /* kokoka ? */\n-\t\t\tif( !tbitr && !tbweight )\n-\t\t\t{\n-\t\t\t\twritePre( locnjob, name, nlen, aseq, 0 );\n-\t\t\t}\n-\t\t\tstrcpy( sai, sai1 );\n-\t\t\ttscore = tmpscore;\n-\t\t\t/*\n-\t\t\ttscore = tmpscore = score_calc0( aseq, locnjob, eff, s ); * ? *\n-\t\t\t*/\n- \t\tif( tmpscore > mscore ) \n-\t\t\t{\n- \tfor( i=0; i<locnjob; i++ ) strcpy( bseq[i], mseq2[i] );\n-\t\t\t\ttreeconstruction( bseq, locnjob, topol, len, eff );\n-\t\t\t\ttscore = mscore = score_calc0( bseq, locnjob, eff, s );\n-\t\t\t\tfprintf( trap_g, " -> %f\\n", mscore );\n-\t\t\t\tstrcpy( sai, sai1 ); /* kokoka ? */\n-#if 0\n-\t\t\t\tif( !tbitr && !tbweight )\n-\t\t\t\t{\tprep = fopen( "best", "w" );\n-\t\t\t\t\tWrite( prep, locnjob, name, nlen, bseq );\n-\t\t\t\t\tfclose( prep );\n-\t\t\t\t}\n-#endif\n-\t\t\t}\n- }\n-\n-\t\telse\n-\t\t{\n-\t\t\tif( tmpscore == tscore )\n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "occational coincidence \\n" );\n-\t\t\t\tfprintf( trap_g, "occational coincidence\\n" );\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "rejected\\n" );\n- \t fprintf( trap_g, "rejected\\n" );\n-\t\t\t}\n-\t\t\tfor( i=0; i<locnjob; i++ ) strcpy( aseq[i], bseq[i] );\n-\t\t\ttscore = mscore;\n-\t\t\tsai[s] = 2;\n-\t\t}\n-\n-/*\n-\t\tprep = fopen( "cur", "w" );\n-\t\tWrite( prep, locnjob, name, nlen, mseq2 );\n-\t\tfclose( prep );\n-*/\n-\n-\t\this[ou][s][cyc[ou]] = tmpscore;\n-\t\tif( !strcmp( sai, sai2 ) )\n-\t\t{\n-\t\t\treturnvalue = 0;\n-\t\t\tfprintf( trap_g, "converged\\n" );\n-\t\t\tbreak;\n-\t\t}\n-\t\tfor( i=cyc[ou]-1; i>0; i-- ) \n-\t\t{\n-\t\t\tif( tmpscore == his[ou][s][i] ) \n-\t\t\t{\n-\t\t\t\tshindou = 1;\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t}\n-\t\tfprintf( stderr, "\\n" );\n-\t\tif( shindou == 1 )\n-\t\t{\n-\t\t\treturnvalue = -1;\n-\t\t\tfprintf( trap_g, "oscillating\\n" );\n-\t\t\tbreak;\n-\t\t}\n-\t}\n-\tif( l == MAXITERATION ) returnvalue = -2;\n-\tFreeDoubleCub( his );\n-\treturn( returnvalue );\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafft-distance.c --- a/mafft/core/mafft-distance.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,425 +0,0 @@\n-#include "mltaln.h"\n-#include "mtxutl.h"\n-\n-#define DEBUG 0\n-#define TEST 0\n-\n-#define END_OF_VEC -1\n-\n-static int maxl;\n-static int tsize;\n-static char outputformat;\n-static float lenfaca, lenfacb, lenfacc, lenfacd;\n-static int nadd;\n-#define PLENFACA 0.01\n-#define PLENFACB 10000\n-#define PLENFACC 10000\n-#define PLENFACD 0.1\n-#define DLENFACA 0.01\n-#define DLENFACB 2500\n-#define DLENFACC 2500\n-#define DLENFACD 0.1\n-\n-void arguments( int argc, char *argv[] )\n-{\n-\tint c;\n-\n-\tinputfile = NULL;\n-\toutputformat = \'s\';\n-\tscoremtx = 1;\n-\tnblosum = 62;\n-\tdorp = NOTSPECIFIED;\n-\tnadd = 0;\n-\talg = \'X\';\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( (c = *++argv[0]) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'I\':\n-\t\t\t\t\tnadd = myatoi(*++argv);\n-\t\t\t\t\tif( nadd == 0 )\n-\t\t\t\t\t{\n-\t\t\t\t\t\tfprintf( stderr, "nadd = %d?\\n", nadd );\n-\t\t\t\t\t\texit( 1 );\n-\t\t\t\t\t}\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'p\':\n-\t\t\t\t\toutputformat = \'p\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tdorp = \'d\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tdorp = \'p\';\n-\t\t\t\t\tbreak;\n- default:\n- fprintf( stderr, "illegal option %c\\n", c );\n- argc = 0;\n- break;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\t}\n-\tif( inputfile == NULL )\n-\t{\n-\t\targc--;\n-\t\tinputfile = *argv;\n-\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t}\n- if( argc != 0 )\n- {\n- fprintf( stderr, "Usage: mafft-distance [-PD] [-i inputfile] inputfile > outputfile\\n" );\n- exit( 1 );\n- }\n-}\n-\n-void seq_grp_nuc( int *grp, char *seq )\n-{\n-\tint tmp;\n-\tint *grpbk = grp;\n-\twhile( *seq )\n-\t{\n-\t\ttmp = amino_grp[(int)*seq++];\n-\t\tif( tmp < 4 )\n-\t\t\t*grp++ = tmp;\n-\t\telse\n-\t\t\tfprintf( stderr, "WARNING : Unknown character %c\\r", *(seq-1) );\n-\t}\n-\t*grp = END_OF_VEC;\n-\tif( grp - grpbk < 6 )\n-\t{\n-\t\tfprintf( stderr, "\\n\\nWARNING: Too short.\\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\\n\\n\\n" );\n-//\t\texit( 1 );\n-\t\t*grpbk = -1;\n-\t}\n-}\n-\n-void seq_grp( int *grp, char *seq )\n-{\n-\tint tmp;\n-\tint *grpbk = grp;\n-\twhile( *seq )\n-\t{\n-\t\ttmp = amino_grp[(int)*seq++];\n-\t\tif( tmp < 6 )\n-\t\t\t*grp++ = tmp;\n-\t\telse\n-\t\t\tfprintf( stderr, "WARNING : Unknown character %c\\r", *(seq-1) );\n-\t}\n-\t*grp = END_OF_VEC;\n-\tif( grp - grpbk < 6 )\n-\t{\n-\t\tfprintf( stderr, "\\n\\nWARNING: Too short.\\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\\n\\n\\n" );\n-//\t\texit( 1 );\n-\t\t*grpbk = -1;\n-\t}\n-}\n-\n-void makecompositiontable_p( short *table, int *pointt )\n-{\n-\tint point;\n-\n-\twhile( ( point = *pointt++ ) != END_OF_VEC )\n-\t\ttable[point]++;\n-}\n-\n-int commonsextet_p( short *table, int *pointt )\n-{\n-\tint value = 0;\n-\tshort tmp;\n-\tint point;\n-\tstatic short *memo = NULL;\n-\tstatic int *ct = NULL;\n-\tstatic int *cp;\n-\n-\tif( *pointt == -1 )\n-\t\treturn( 0 );\n-\n-\tif( !memo )\n-\t{\n-\t\tmemo = (short *)calloc( tsize, sizeof( short ) );\n-\t\tif( !memo ) ErrorExit( "Cannot allocate memo\\n" );\n-\t\tct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) );\n-\t\tif( !ct ) ErrorExit( "Cannot allocate memo\\n" );\n-\t}\n-\n-\tcp = ct;\n-\twhile( ( point = *pointt++ ) != END_OF_VEC )\n-\t{\n-\t\ttmp = memo[point]++;\n-\t\tif( tmp < table[point] )\n-\t\t\tvalue++;\n-\t\tif( tmp == 0 ) *cp++ = point;\n-//\t\tfprintf( stderr, "cp - ct = %d (tsize = %d)\\n", cp - ct, tsize );\n-\t}\n-\t*cp = END_OF_VEC;\n-\t\n-\tcp = ct;\n-\twhile( *cp != END_OF_VEC )\n-\t\tmemo[*cp++] = 0;\n-\n-\treturn( value );\n-}\n-\n-void makepointtable_nuc( int *pointt, int *n )\n-{\n-\tint point;\n-\tregister int *p;\n-\n-\tif( *n == -1 )\n-\t{\n-\t\t*pointt = -1;\n-\t\treturn;\n-\t}\n-\n-\tp = n;\n-\tpoint = *n++ * 1024;\n-\tpoint += *n++ * 256;\n-\tpoint += *n++ * 64;\n-\tpoint += *n++ * 16;\n-\tpoint += *n++ * 4;\n-\tpoint += *n++;\n-\t*pointt++ = point;\n-\n-\twhile( *n != END_OF_VEC )\n-\t{\n-\t\tpoint -= *p++ * 1024;\n-\t\tpoint *= 4;\n-\t\tpoint += *n++;\n-\t\t*pointt++ = point;\n-\t}\n-\t*pointt = END_OF_V'..b'-\t\tfprintf( stderr, "At least 2 sequences should be input!\\n"\n-\t\t\t\t\t\t "Only %d sequence found.\\n", njob );\n-\t\texit( 1 );\n-\t}\n-\n-\ttmpseq = AllocateCharVec( nlenmax+1 );\n-\tseq = AllocateCharMtx( njob, nlenmax+1 );\n-\tgrpseq = AllocateIntVec( nlenmax+1 );\n-\tpointt = AllocateIntMtx( njob, nlenmax+1 );\n-\tmtxself = AllocateDoubleVec( njob );\n-\tpamN = NOTSPECIFIED;\n-\tname = AllocateCharMtx( njob, B );\n-\tnlen = AllocateIntVec( njob );\n-\n-#if 0\n-\tFRead( infp, name, nlen, seq );\n-#else\n-\treadData_pointer( infp, name, nlen, seq );\n-#endif\n-\n-\tfclose( infp );\n-\n-\tconstants( njob, seq );\n-\n-\n-\tif( nadd ) outputformat = \'s\';\n-\tnorg = njob - nadd;\n-\n-\tif( dorp == \'d\' ) tsize = (int)pow( 4, 6 );\n-\telse tsize = (int)pow( 6, 6 );\n-\n-\tif( dorp == \'d\' )\n-\t{\n-\t\tlenfaca = DLENFACA;\n-\t\tlenfacb = DLENFACB;\n-\t\tlenfacc = DLENFACC;\n-\t\tlenfacd = DLENFACD;\n-\t}\n-\telse \n-\t{\n-\t\tlenfaca = PLENFACA;\n-\t\tlenfacb = PLENFACB;\n-\t\tlenfacc = PLENFACC;\n-\t\tlenfacd = PLENFACD;\n-\t}\n-\n-\tmaxl = 0;\n-\tfor( i=0; i<njob; i++ ) \n-\t{\n-\t\tgappick0( tmpseq, seq[i] );\n-\t\tnlen[i] = strlen( tmpseq );\n-//\t\tif( nlen[i] < 6 )\n-//\t\t{\n-//\t\t\tfprintf( stderr, "Seq %d, too short, %d characters\\n", i+1, nlen[i] );\n-//\t\t\texit( 1 );\n-//\t\t}\n-\t\tif( nlen[i] > maxl ) maxl = nlen[i];\n-\t\tif( dorp == \'d\' ) /* nuc */\n-\t\t{\n-\t\t\tseq_grp_nuc( grpseq, tmpseq );\n-\t\t\tmakepointtable_nuc( pointt[i], grpseq );\n-\t\t}\n-\t\telse /* amino */\n-\t\t{\n-\t\t\tseq_grp( grpseq, tmpseq );\n-\t\t\tmakepointtable( pointt[i], grpseq );\n-\t\t}\n-\t}\n-\tfprintf( stderr, "\\nCalculating i-i scores ... " );\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\ttable1 = (short *)calloc( tsize, sizeof( short ) );\n-\t\tif( !table1 ) ErrorExit( "Cannot allocate table1\\n" );\n-\t\tmakecompositiontable_p( table1, pointt[i] );\n-\n-\t\tscore = commonsextet_p( table1, pointt[i] );\n-\t\tmtxself[i] = score;\n-\t\tfree( table1 );\n-\t}\n-\n-\tfprintf( stderr, "done.\\n" );\n-\tfprintf( stderr, "\\nCalculating i-j scores ... \\n" );\n-\tif( outputformat == \'p\' ) fprintf( stdout, "%-5d", njob );\n-\tfor( i=0; i<norg; i++ )\n-\t{\n-\t\tif( outputformat == \'p\' ) fprintf( stdout, "\\n%-9d ", i+1 );\n-\t\ttable1 = (short *)calloc( tsize, sizeof( short ) );\n-\t\tif( !table1 ) ErrorExit( "Cannot allocate table1\\n" );\n-\t\tif( i % 10 == 0 )\n-\t\t{\n-\t\t\tfprintf( stderr, "%4d / %4d\\r", i+1, njob );\n-\t\t}\n-\t\tmakecompositiontable_p( table1, pointt[i] );\n-\n-\n-\t\tif( nadd == 0 )\n-\t\t{\n-\t\t\tif( outputformat == \'p\' ) initj = 0;\n-\t\t\telse initj = i+1;\n-\t\t}\n-\t\telse \n-\t\t{\n-\t\t\tinitj = norg;\n-\t\t}\n-\t\tfor( j=initj; j<njob; j++ ) \n-\t\t{\n-\t\t\tif( nlen[i] > nlen[j] )\n-\t\t\t{\n-\t\t\t\tlonger=(float)nlen[i];\n-\t\t\t\tshorter=(float)nlen[j];\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\tlonger=(float)nlen[j];\n-\t\t\t\tshorter=(float)nlen[i];\n-\t\t\t}\n-//\t\t\tlenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );\n-\t\t\tlenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );\n-//\t\t\tlenfac = 1.0;\n-//\t\t\tfprintf( stderr, "lenfac = %f (%.0f,%.0f)\\n", lenfac, longer, shorter );\n-\t\t\tscore = commonsextet_p( table1, pointt[j] );\n-\t\t\tbunbo = MIN( mtxself[i], mtxself[j] );\n-\t\t\tif( outputformat == \'p\' )\n-\t\t\t{\n-\t\t\t\tif( bunbo == 0.0 )\n-\t\t\t\t\tfprintf( stdout, " %8.6f", 1.0 );\n-\t\t\t\telse\n-\t\t\t\t\tfprintf( stdout, " %8.6f", ( 1.0 - score / bunbo ) * lenfac );\n-\t\t\t\tif( j % 7 == 6 ) fprintf( stdout, "\\n" );\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\tif( bunbo == 0.0 )\n-\t\t\t\t\tfprintf( stdout, "%d-%d d=%4.2f l=%d,%d\\n", i+1, j+1, 1.0, nlen[i], nlen[j] );\n-\t\t\t\telse\n-\t\t\t\t\tfprintf( stdout, "%d-%d d=%4.2f l=%d,%d\\n", i+1, j+1, ( 1.0 - score / bunbo ) * lenfac, nlen[i], nlen[j] );\n-\t\t\t}\n-//\t\t\tfprintf( stderr, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );\n-// score = (double)commonsextet_p( table1, pointt[j] );\n-//\t\t\tfprintf( stdout, "%d-%d d=%4.2f l=%d,%d\\n", i+1, j+1, ( 1.0 - score / MIN( mtxself[i], mtxself[j] ) ) * 3, nlen[i], nlen[j] );\n-\n-\n-\t\t} \n-\t\tfree( table1 );\n-\t}\n-\t\t\n-\tfprintf( stderr, "\\n" );\n-\tif( outputformat == \'p\' ) fprintf( stdout, "\\n" );\n-\tSHOWVERSION;\n-\texit( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafft-homologs.1 --- a/mafft/core/mafft-homologs.1 Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,131 +0,0 @@ -.\" Title: MAFFT-HOMOLOGS -.\" Author: Kazutaka Katoh <katoh_at_bioreg.kyushu-u.ac.jp.> -.\" Generator: DocBook XSL Stylesheets v1.72.0 <http://docbook.sf.net/> -.\" Date: 2007-08-14 -.\" Manual: Mafft Manual -.\" Source: mafft-homologs 2.1 -.\" -.TH "MAFFT\-HOMOLOGS" "1" "2007\-06\-09" "mafft\-homologs 2.1" "Mafft Manual" -.\" disable hyphenation -.nh -.\" disable justification (adjust text to left margin only) -.ad l -.SH "NAME" -.RS 0 -mafft\-homologs \- aligns sequences together with homologues automatically collected from SwissProt via NCBI BLAST -.RE -.SH "SYNOPSIS" -.RS 0 -\fBmafft\-homologs\fR [\fBoptions\fR] \fIinput\fR [>\ \fIoutput\fR] -.RE -.SH "DESCRIPTION" -.RS 0 -The accuracy of an alignment of a few distantly related sequences is considerably improved when being aligned together with their close homologs. The reason for the improvement is probably the same as that for PSI\-BLAST. That is, the positions of highly conserved residues, those with many gaps and other additional information is brought by close homologs. According to Katoh et al. (2005), the improvement by adding close homologs is 10% or so, which is comparable to the improvement by incorporating structural information of a pair of sequences. Mafft\-homologs in a mafft server works like this: -.sp -.RS 4 -\h'-04' 1.\h'+02'Collect a number (50 by default) of close homologs (E=1e\-10 by default) of the input sequences. -.RE -.sp -.RS 4 -\h'-04' 2.\h'+02'Align the input sequences and homologs all together using the L\-INS\-i strategy. -.RE -.sp -.RS 4 -\h'-04' 3.\h'+02'Remove the homologs. -.RE -.RE -.SH "OPTIONS" -.RS 0 -.PP -\fB\-a\fR \fI\fIn\fR\fR -.RS 4 -The number of collected sequences (default: 50). -.RE -.PP -\fB\-e\fR \fI\fIn\fR\fR -.RS 4 -Threshold value (default: 1e\-10). -.RE -.PP -\fB\-o\fR \fI\fIxxx\fR\fR -.RS 4 -Options for mafft (default: " \-\-op 1.53 \-\-ep 0.123 \-\-maxiterate 1000 --localpair --reorder"). -.RE -.PP -\fB\-l\fR -.RS 4 -Locally carries out BLAST searches instead of NCBI BLAST (requires locally installed BLAST and a database). -.RE -.PP -\fB\-f\fR -.RS 4 -Outputs collected homologues also (default: off). -.RE -.PP -\fB\-w\fR -.RS 4 -entire sequences are subjected to BLAST search (default: well\-aligned region only) -.RE -.RE -.SH "REQUIREMENTS" -.RS 0 -.PP -MAFFT version > 5.58. -.PP -Either of -.RS 4 -.PP -lynx (when remote BLAST server is used) -.PP -BLAST and a protein sequence database (when local BLAST is used) -.RE -.RE -.SH "REFERENCES" -.RS 0 -.PP -Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment. -.RE -.SH "SEE ALSO" -.RS 0 -.PP -\fBmafft\fR(1) -.RE -.SH "AUTHORS" -.RS 0 -.PP -\fBKazutaka Katoh\fR <\&katoh_at_bioreg.kyushu\-u.ac.jp.\&> -.sp -1n -.IP "" 4 -Wrote Mafft. -.PP -\fBCharles Plessy\fR <\&charles\-debian\-nospam@plessy.org\&> -.sp -1n -.IP "" 4 -Wrote this manpage in DocBook XML for the Debian distribution, using Mafft's homepage as a template. -.RE -.SH "COPYRIGHT" -.RS 0 -Copyright \(co 2002\-2007 Kazutaka Katoh (mafft) -.br -Copyright \(co 2007 Charles Plessy (this manpage) -.br -.PP -Mafft and its manpage are offered under the following conditions: -.PP -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -.sp -.RS 4 -\h'-04' 1.\h'+02'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. -.RE -.sp -.RS 4 -\h'-04' 2.\h'+02'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. -.RE -.sp -.RS 4 -\h'-04' 3.\h'+02'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. -.RE -.PP -THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -.br -.RE |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafft-homologs.tmpl --- a/mafft/core/mafft-homologs.tmpl Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,378 +0,0 @@\n-#!/usr/bin/env ruby\n-\n-localdb = "sp" \n-# database name from which homologues are collected \n-# by locally installed blast. Leave this if you do \n-# not use the \'-l\' option.\n-\n-mafftpath = "_BINDIR/mafft" \n-# path of mafft. "/usr/local/bin/mafft"\n-# if mafft is in your command path, "mafft" is ok.\n-\n-blastpath = "blastall" \n-# path of blastall. \n-# if blastall is in your command path, "blastall" is ok.\n-\n-# mafft-homologs.rb v. 2.1 aligns sequences together with homologues \n-# automatically collected from SwissProt via NCBI BLAST.\n-#\n-# mafft > 5.58 is required\n-#\n-# Usage:\n-# mafft-homologs.rb [options] input > output\n-# Options:\n-# -a # the number of collected sequences (default: 50)\n-# -e # threshold value (default: 1e-10)\n-# -o "xxx" options for mafft \n-# (default: " --op 1.53 --ep 0.123 --maxiterate 1000")\n-# -l locally carries out blast searches instead of NCBI blast\n-# (requires locally installed blast and a database)\n-# -f outputs collected homologues also (default: off)\n-# -w entire sequences are subjected to BLAST search \n-# (default: well-aligned region only)\n-\n-require \'getopts\'\n-require \'tempfile\'\n-\n-# mktemp\n-GC.disable\n-temp_vf = Tempfile.new("_vf").path\n-temp_if = Tempfile.new("_if").path\n-temp_pf = Tempfile.new("_pf").path\n-temp_af = Tempfile.new("_af").path\n-temp_qf = Tempfile.new("_qf").path\n-temp_bf = Tempfile.new("_bf").path\n-temp_rid = Tempfile.new("_rid").path\n-temp_res = Tempfile.new("_res").path\n-\n-\n-system( mafftpath + " --help > #{temp_vf} 2>&1" )\n-pfp = File.open( "#{temp_vf}", \'r\' )\n-while pfp.gets\n-\tbreak if $_ =~ /MAFFT v/\n-end\n-pfp.close\n-if( $_ ) then\n-\tmafftversion = sub( /^\\D*/, "" ).split(" ").slice(0).strip.to_s\n-else\n-\tmafftversion = "0"\n-end\n-if( mafftversion < "5.58" ) then\n-\tputs ""\n-\tputs "======================================================"\n-\tputs "Install new mafft (v. >= 5.58)"\n-\tputs "======================================================"\n-\tputs ""\n-\texit\n-end\n-\n-srand ( 0 )\n-\n-def readfasta( fp, name, seq )\n-\tnseq = 0\n-\ttmpseq = ""\n-\twhile fp.gets\n-\t\tif $_ =~ /^>/ then\n-\t\t\tname.push( $_.sub(/>/,"").strip )\n-\t\t\tseq.push( tmpseq ) if nseq > 0\n-\t\t\tnseq += 1\n-\t\t\ttmpseq = ""\n-\t\telse\n-\t\t\ttmpseq += $_.strip\n-\t\tend\n-\tend\n-\tseq.push( tmpseq )\n-\treturn nseq\n-end\n-\n-nadd = 50\n-eval = 1e-10\n-local = 0\n-fullout = 0\n-entiresearch = 0\n-corewin = 50\n-corethr = 0.3\n-mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder "\n-if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then\n-\tputs "Usage: #{$0} [-h -l -e# -a# -o\\"[options for mafft]\\"] input_file"\n-\texit\n-end\n-\n-if $OPT_c then\n-\tcorewin = $OPT_c.to_i\n-end\n-if $OPT_d then\n-\tcorethr = $OPT_d.to_f\n-end\n-if $OPT_w\n-\tentiresearch = 1\n-end\n-if $OPT_f\n-\tfullout = 1\n-end\n-if $OPT_s\n-\tfullout = 0\n-end\n-if $OPT_l\n-\tlocal = 1\n-end\n-if $OPT_e then\n-\teval = $OPT_e.to_f\n-end\n-if $OPT_a then\n-\tnadd = $OPT_a.to_i\n-end\n-if $OPT_o then\n-\tmafftopt += " " + $OPT_o + " "\n-end\n-\n-system "cat " + ARGV.to_s + " > #{temp_if}"\n-ar = mafftopt.split(" ")\n-nar = ar.length\n-for i in 0..(nar-1)\n-\tif ar[i] == "--seed" then\n-\t\tsystem "cat #{ar[i+1]} >> #{temp_if}"\n-\tend\n-end\n-\n-nseq = 0\n-ifp = File.open( "#{temp_if}", \'r\' )\n-\twhile ifp.gets\n-\t\tnseq += 1 if $_ =~ /^>/\n-\tend\n-ifp.close\n-\n-if nseq >= 100 then\n-\tSTDERR.puts "The number of input sequences must be <100."\n-\texit\n-elsif nseq == 1 then\n-\tsystem( "cp #{temp_if}" + " #{temp_pf}" )\n-else\n-\tSTDERR.puts "Performing preliminary alignment .. "\n-\tif entiresearch == 1 then\n-#\t\tsystem( mafftpath + " --maxiterate 1000 --localpair #{temp_if} > #{temp_pf}" )\n-\t\tsystem( mafftpath + " --maxiterate 0 --retree 2 #{temp_if} > #{temp_pf}" )\n-\telse\n-\t\tsystem( mafftpath + " --maxiterate 1000 --localpair --core --coreext --corethr #{corethr.to_s} --corewin #{corewin.to_s} #{temp_if} > #{temp_pf}" )\n-\tend\n-end\n-\n-pfp = File.open( "#{temp_pf}", \'r\' )\n-'..b' File.open( "#{temp_af}", \'w\' )\n-\n-STDERR.puts "Searching .. \\n"\n-ids = []\n-add = []\n-sco = []\n-for i in 0..(nin-1)\n-\tinseq[i].gsub!(/-/,"")\n-\tafp.puts ">" + orname[i]\n-\tafp.puts orseq[i]\n-\n-#\tafp.puts ">" + inname[i]\n-#\tafp.puts inseq[i]\n-\n-\tSTDERR.puts "Query (#{i+1}/#{nin})\\n" + inname[i]\n-\tif act[i] == 0 then\n-\t\tSTDERR.puts "Skip.\\n\\n"\n-\t\tnext \n-\tend\n-\n-\tif local == 0 then\n-\t\tcommand = "lynx -source \'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT=\'" + eval.to_s + "\'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put\' > #{temp_rid}"\n-\t\tsystem command\n-\t\n-\t\tridp = File.open( "#{temp_rid}", \'r\' )\n-\t\twhile ridp.gets\n-\t\t\tbreak if $_ =~ / RID = (.*)/\n-\t\tend\n-\t\tridp.close\n-\t\trid = $1.strip\n-\t\tSTDERR.puts "Submitted to NCBI. rid = " + rid\n-\t\n-\t\tSTDERR.printf "Waiting "\n-\t\twhile 1 \n-\t\t\tSTDERR.printf "."\n-\t\t\tsleep 10\n-\t\t\tcommand = "lynx -source \'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML\' > #{temp_res}"\n-\t\t\tsystem command\n-\t\t\tresp = File.open( "#{temp_res}", \'r\' )\n-#\t\t\tresp.gets\n-#\t\t\tif $_ =~ /WAITING/ then\n-#\t\t\t\tresp.close\n-#\t\t\t\tnext\n-#\t\t\tend\n-\t\t\twhile( resp.gets )\n-\t\t\t\tbreak if $_ =~ /QBlastInfoBegin/\n-\t\t\tend\n-\t\t\tresp.gets\n-\t\t\tif $_ =~ /WAITING/ then\n-\t\t\t\tresp.close\n-\t\t\t\tnext\n-\t\t\telse\n-\t\t\t\tresp.close\n-\t\t\t\tbreak\n-\t\t\tend\n-\t\tend\n-\telse\n-#\t\tputs "Not supported"\n-#\t\texit\n-\t\tqfp = File.open( "#{temp_qf}", \'w\' )\n-\t\t\tqfp.puts "> "\n-\t\t\tqfp.puts inseq[i]\n-\t\tqfp.close\n-\t\tcommand = blastpath + " -p blastp -e #{eval} -b 1000 -m 7 -i #{temp_qf} -d #{localdb} > #{temp_res}"\n-\t\tsystem command\n-\t\tresp = File.open( "#{temp_res}", \'r\' )\n-\tend\n-\tSTDERR.puts " Done.\\n\\n"\n-\n-\tresp = File.open( "#{temp_res}", \'r\' )\n-\twhile 1\n-\t\twhile resp.gets\n-\t\t\tbreak if $_ =~ /<Hit_id>(.*)<\\/Hit_id>/ || $_ =~ /(<Iteration_stat>)/\n-\t\tend\n-\t\tid = $1\n-\t\tbreak if $_ =~ /<Iteration_stat>/\n-#\t\tp id\n-\t\twhile resp.gets\n-\t\t\tbreak if $_ =~ /<Hsp_bit-score>(.*)<\\/Hsp_bit-score>/\n-\t\tend\n-\t\tscore = $1.to_f\n-#\t\tp score\n-\n-\t\tknown = ids.index( id )\n-\t\tif known != nil then\n-\t\t\tif sco[known] >= score then\n-\t\t\t\tnext\n-\t\t\telse\n-\t\t\t\tids.delete_at( known )\n-\t\t\t\tadd.delete_at( known )\n-\t\t\t\tsco.delete_at( known )\n-\t\t\tend\n-\t\tend\n-\t\twhile resp.gets\n-\t\t\tbreak if $_ =~ /<Hsp_hseq>(.*)<\\/Hsp_hseq>/\n-\t\tend\n-#\t\tbreak if $1 == nil\n-\t\ttarget = $1.sub( /-/, "" ).sub( /U/, "X" )\n-#\t\tp target\n-#\t\tSTDERR.puts "adding 1 seq"\n-\t\tids.push( id )\n-\t\tsco.push( score )\n-\t\tadd.push( target )\n-\tend\n-\tresp.close\n-end\n-\n-n = ids.length\n-\n-outnum = 0\n-while n > 0 && outnum < nadd\n-\tm = rand( n )\n-\tafp.puts ">_addedbymaffte_" + ids[m]\n-\tafp.puts add[m]\n-\tids.delete_at( m )\n-\tadd.delete_at( m )\n-\tn -= 1\n-\toutnum += 1\n-end\n-afp.close\n-\n-STDERR.puts "Performing alignment .. "\n-system( mafftpath + mafftopt + " #{temp_af} > #{temp_bf}" )\n-STDERR.puts "done."\n-\n-bfp = File.open( "#{temp_bf}", \'r\' )\n-outseq = []\n-outnam = []\n-readfasta( bfp, outnam, outseq )\n-bfp.close\n-\n-outseq2 = []\n-outnam2 = []\n-\n-len = outseq.length\n-for i in 0..(len-1)\n-#\tp outnam[i]\n-\tif fullout == 0 && outnam[i] =~ /_addedbymaffte_/ then\n-\t\tnext\n-\tend\n-\toutseq2.push( outseq[i] )\n-\toutnam2.push( outnam[i].sub( /_addedbymaffte_/, "_ho_" ) )\n-end\n-\n-nout = outseq2.length\n-len = outseq[0].length\n-p = len\n-while p>0\n-\tp -= 1\n- allgap = 1\n- for j in 0..(nout-1)\n-\t\tif outseq2[j][p,1] != "-" then\n-\t\t\tallgap = 0\n-\t\t\tbreak\n-\t\tend\n- end\n- if allgap == 1 then\n- for j in 0..(nout-1)\n- outseq2[j][p,1] = ""\n- end\n- end\n-end\n-for i in 0..(nout-1)\n-\tputs ">" + outnam2[i]\n-\tputs outseq2[i].gsub( /.{1,60}/, "\\\\0\\n" )\n-end\n-\n-\n-system( "rm -rf #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid}" )\n-if File.exist?( "#{temp_af}.tree" ) then\n-\tsystem( "sed \'s/_addedbymaffte_/_ho_/\' #{temp_af}.tree > #{ARGV.to_s}.tree" )\n-\tsystem( "rm #{temp_af}.tree" )\n-end\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafft-profile.c --- a/mafft/core/mafft-profile.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,487 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0 \n-\n-#if DEBUG\n-#include <time.h>\n-#include <sys/time.h>\n-#include <sys/resource.h>\n-double getrusage_sec()\n-{\n- struct rusage t;\n- struct timeval tv;\n- getrusage(RUSAGE_SELF, &t);\n- tv = t.ru_utime;\n- return tv.tv_sec + (double)tv.tv_usec*1e-6;\n-}\n-#endif\n-\n-\n-int intcmp( int *str1, int *str2 )\n-{\n-\twhile( *str1 != -1 && *str2 != -1 )\n-\t\tif( *str1++ != *str2++ ) return( 1 );\n-\tif( *str1 != *str2 ) return( 1 );\n-\treturn( 0 );\n-}\n-\n-char **arguments( int argc, char *argv[] )\n-{\n- int c = 0;\n-\t\n-\tfmodel = 0;\n-\tnblosum = 62;\n-\tcalledByXced = 0;\n-\tdevide = 0;\n-\tfftscore = 1;\n-\tuse_fft = 1;\n-\talg = \'A\';\n- weight = 0;\n- utree = 1;\n-\ttbutree = 0;\n- refine = 0;\n- check = 1;\n- cut = 0.0;\n- disp = 0;\n- outgap = 0;\n- mix = 0;\n-\ttbitr = 0;\n-\tscmtd = 5;\n-\ttbweight = 0;\n-\ttbrweight = 3;\n-\tcheckC = 0;\n- scoremtx = 1;\n-\tdorp = NOTSPECIFIED;\n-\tppenalty = NOTSPECIFIED;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tpoffset = 0; // chokusetsu yobareru kara\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tfftWinSize = NOTSPECIFIED;\n-\tfftThreshold = NOTSPECIFIED;\n-\tTMorJTT = JTT;\n-\ttreemethod = \'x\';\n-\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( ( c = *++argv[0] ) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tdorp = \'p\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tdorp = \'d\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'F\':\n-\t\t\t\t\tuse_fft = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'N\':\n-\t\t\t\t\tuse_fft = 0;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'e\':\n-\t\t\t\t\tfftscore = 0;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'Q\':\n-\t\t\t\t\talg = \'Q\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'A\':\n-\t\t\t\t\talg = \'A\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'M\':\n-\t\t\t\t\talg = \'M\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'d\':\n-\t\t\t\t\tdisp = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'O\':\n-\t\t\t\t\toutgap = 0;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'a\':\n-\t\t\t\t\tfmodel = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'u\':\n-\t\t\t\t\ttbrweight = 0;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'z\':\n-\t\t\t\t\tfftThreshold = myatoi( *++argv );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'w\':\n-\t\t\t\t\tfftWinSize = myatoi( *++argv );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'Z\':\n-\t\t\t\t\tcheckC = 1;\n-\t\t\t\t\tbreak;\n- case \'f\':\n- ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n- fprintf( stderr, "ppenalty = %d\\n", ppenalty );\n- --argc;\n- goto nextoption;\n- case \'g\':\n- ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );\n- fprintf( stderr, "ppenalty_ex = %d\\n", ppenalty_ex );\n- --argc;\n- goto nextoption;\n- case \'h\':\n- poffset = (int)( atof( *++argv ) * 1000 - 0.5 );\n- fprintf( stderr, "poffset = %d\\n", poffset );\n- --argc;\n- goto nextoption;\n- case \'k\':\n- kimuraR = myatoi( *++argv );\n- fprintf( stderr, "kappa = %d\\n", kimuraR );\n- --argc;\n- goto nextoption;\n-\t\t\t\tcase \'b\':\n-\t\t\t\t\tnblosum = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 1;\n-\t\t\t\t\tfprintf( stderr, "blosum %d\\n", nblosum );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'j\':\n-\t\t\t\t\tpamN = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tTMorJTT = JTT;\n-\t\t\t\t\tfprintf( stderr, "jtt %d\\n", pamN );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'m\':\n-\t\t\t\t\tpamN = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tTMorJTT = TM;\n-\t\t\t\t\tfprintf( stderr, "tm %d\\n", pamN );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tdefault:\n-\t\t\t\t\tfprintf( stderr, "illegal option %c\\n", c );\n-\t\t\t\t\targc = 0;\n-\t\t\t\t\tbreak;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\t}\n- if( argc != 2 ) \n- {\n- fprintf( stderr, "options: Check source file ! %c ?\\n", c );\n- exit( 1 );\n- }\n-\tfprintf( stderr, "tbitr = %d, tbrweight = %d, tbweight = %d\\n", tbitr, tbrweight, tbweight );\n-//\treadOtherOptions( &ppid, &fftThreshold, &fftWinSize );\n-\treturn( argv ); \n-\n-}\n-\n-void GroupAlign( int nseq1, int nseq2, '..b' alloclen;\n-\n-\targv2 = arguments( argc, argv );\n-\n-\tfprintf( stderr, "####### in galn\\n" );\n-\n-\tinitFiles();\n-\n-\tfprintf( stderr, "file1 = %s\\n", argv2[0] );\n-\tfprintf( stderr, "file2 = %s\\n", argv2[1] );\n-\n-\tgp1 = fopen( argv2[0], "r" ); if( !gp1 ) ErrorExit( "cannot open file1" );\n-\tgp2 = fopen( argv2[1], "r" ); if( !gp2 ) ErrorExit( "cannot open file2" );\n-\n-#if 0\n-\tPreRead( gp1, &nseq1, &nlenmax1 );\n-\tPreRead( gp2, &nseq2, &nlenmax2 );\n-#else\n- getnumlen( gp1 );\n-\tnseq1 = njob; nlenmax1 = nlenmax;\n- getnumlen( gp2 );\n-\tnseq2 = njob; nlenmax2 = nlenmax;\n-#endif\n-\n-\tnjob = nseq1 + nseq2;\n-\tnlenmax = MAX( nlenmax1, nlenmax2 );\n-\n-\trewind( gp1 );\n-\trewind( gp2 );\n-\n-\n-\tname = AllocateCharMtx( njob, B );\n-\tnlen = AllocateIntVec( njob );\n-\tseq1 = AllocateCharMtx( nseq1, nlenmax*3 );\n-\tseq2 = AllocateCharMtx( nseq2, nlenmax*3 );\n-\tseq = AllocateCharMtx( njob, 1 );\n-\taseq = AllocateCharMtx( njob, nlenmax*3 );\n-\tbseq = AllocateCharMtx( njob, nlenmax*3 );\n-\tmseq1 = AllocateCharMtx( njob, 1 );\n-\tmseq2 = AllocateCharMtx( njob, 1 );\n-\talloclen = nlenmax * 3;\n-\n-\ttopol = AllocateIntCub( njob, 2, njob );\n-\tlen = AllocateDoubleMtx( njob, 2 );\n-\tpscore = AllocateDoubleMtx( njob, njob );\n-\teff = AllocateDoubleVec( njob );\n-\n-#if 0\n- njob=nseq2; FRead( gp2, name+nseq1, nlen+nseq1, seq2 );\n-\tnjob=nseq1; FRead( gp1, name, nlen, seq1 );\n-#else\n- njob=nseq2; readDataforgaln( gp2, name+nseq1, nlen+nseq1, seq2 );\n-\tnjob=nseq1; readDataforgaln( gp1, name, nlen, seq1 );\n-#endif\n-\tnjob = nseq1 + nseq2;\n-\n-\n-#if 0 // CHUUI\n-\tcommongappick( nseq1, seq1 );\n-\tcommongappick( nseq2, seq2 );\n-#endif\n-\n-\tfor( i=0; i<nseq1; i++ ) seq[i] = seq1[i];\n-\tfor( i=nseq1; i<njob; i++ ) seq[i] = seq2[i-nseq1];\n-/*\n-\tWrite( stdout, njob, name, nlen, seq );\n-*/\n-\n- constants( njob, seq );\n-\n- WriteOptions( trap_g );\n-\n- c = seqcheck( seq );\n- if( c )\n- {\n- fprintf( stderr, "Illeagal character %c\\n", c );\n- exit( 1 );\n- }\n- for( i=1; i<nseq1; i++ ) \n- {\n- if( nlen[i] != nlen[0] ) \n- ErrorExit( "group1 is not aligned." );\n- }\n- for( i=nseq1+1; i<njob; i++ ) \n- {\n- if( nlen[i] != nlen[nseq1] ) \n- ErrorExit( "group2 is not aligned." );\n- }\n- if( tbutree == 0 )\n-\t{\n-\t\tfor( i=0; i<nseq1; i++ ) \n-\t\t{\n-\t\t\tfor( j=i+1; j<nseq1; j++ )\n-\t\t\t{\n-\t\t\t\tpscore[i][j] = (double)substitution_hosei( seq[i], seq[j] );\n-//\t\t\t\tfprintf( stderr, "%d-%d, %5.1f \\n", i, j, pscore[i][j] );\n-\t\t\t}\n-\t\t\tfor( j=nseq1; j<njob; j++ )\n-\t\t\t{\n-\t\t\t\tpscore[i][j] = 3.0;\n-//\t\t\t\tfprintf( stderr, "%d-%d, %5.1f \\n", i, j, pscore[i][j] );\n-\t\t\t}\n-\t\t}\n-\t\tfor( i=nseq1; i<njob-1; i++ ) \n-\t\t{\n-\t\t\tfor( j=i+1; j<njob; j++ )\n-\t\t\t{\n-\t\t\t\tpscore[i][j] = (double)substitution_hosei( seq[i], seq[j] );\n-//\t\t\t\tfprintf( stderr, "%d-%d, %5.1f \\n", i, j, pscore[i][j] );\n-\t\t\t}\n-\t\t}\n-//\t\tfprintf( stderr, "\\n" );\n-\n-\n- }\n- \telse\n-\t{\n-\t\tfprintf( stderr, "Not supported\\n" );\n-\t\texit( 1 );\n-#if 0\n-\t\tprep = fopen( "hat2", "r" );\n-\t\tif( prep == NULL ) ErrorExit( "Make hat2." );\n-\t\treadhat2( prep, njob, name, pscore );\n-\t\tfclose( prep );\n-#endif\n-\t}\n-\tfprintf( stderr, "Constructing dendrogram ... " );\n-\tif( treemethod == \'x\' )\n-\t\tveryfastsupg( njob, pscore, topol, len );\n-\telse\n-\t\tErrorExit( "Incorrect tree\\n" );\n-\tfprintf( stderr, "done.\\n" );\n-\n-\tif( tbrweight )\n-\t{\n-\t\tweight = 3;\n-\t\tcounteff_simple( njob, topol, len, eff );\n-//\t\tfor( i=0; i<njob; i++ ) fprintf( stderr, "eff[%d] = %f\\n", i, eff[i] );\n-\t}\n-\telse\n-\t{\n-\t\tfor( i=0; i<njob; i++ ) eff[i] = 1.0;\n-\t}\n-\n-\tlen1 = strlen( seq[0] );\n-\tlen2 = strlen( seq[nseq1] );\n-\tif( len1 > 30000 || len2 > 30000 )\n-\t{ \n-\t\tfprintf( stderr, "\\nlen1=%d, len2=%d, Switching to the memsave mode.\\n", len1, len2 );\n-\t\talg = \'M\';\n-\t} \n- \n-\n-\n-\n-\tGroupAlign( nseq1, nseq2, name, nlen, seq, aseq, mseq1, mseq2, topol, len, eff, alloclen );\n-\n-#if 0\n-\twritePre( njob, name, nlen, aseq, 1 );\n-#else\n-\twriteDataforgaln( stdout, njob, name, nlen, aseq );\n-#endif\n-\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafft.1 --- a/mafft/core/mafft.1 Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,479 +0,0 @@\n-.\\" Title: MAFFT\n-.\\" Author: Kazutaka Katoh <kazutaka.katoh@aist.go.jp>\n-.\\" Generator: DocBook XSL Stylesheets v1.72.0 <http://docbook.sf.net/>\n-.\\" Date: 2007-08-14\n-.\\" Manual: Mafft Manual\n-.\\" Source: mafft 6.240\n-.\\"\n-.TH "MAFFT" "1" "2007\\-06\\-09" "mafft 6.240" "Mafft Manual"\n-.\\" disable hyphenation\n-.nh\n-.\\" disable justification (adjust text to left margin only)\n-.ad l\n-.SH "THIS MANUAL IS FOR V6.2XX (2007)"\n-Recent versions (v7.1xx; 2013 Jan.) have more features than those described below.\n-See also the tips page at \n-http://mafft.cbrc.jp/alignment/software/tips0.html\n-.SH "NAME"\n-.RS 0\n-.sp\n-mafft \\- Multiple alignment program for amino acid or nucleotide sequences\n-.RE\n-.SH "SYNOPSIS"\n-.RS 0\n-.HP 6\n-\\fBmafft\\fR [\\fBoptions\\fR] \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBlinsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBginsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBeinsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 7\n-\\fBfftnsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBfftns\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 5\n-\\fBnwns\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBnwnsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 14\n-\\fBmafft\\-profile\\fR \\fIgroup1\\fR \\fIgroup2\\fR [>\\ \\fIoutput\\fR]\n-.HP\n-.sp\n-\\fIinput\\fR, \\fIgroup1\\fR and \\fIgroup2\\fR must be in FASTA format.\n-.RE\n-.SH "DESCRIPTION"\n-.RS 0\n-\\fBMAFFT\\fR is a multiple sequence alignment program for unix\\-like operating systems. It offers a range of multiple alignment methods.\n-.SS "Accuracy\\-oriented methods:"\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'L\\-INS\\-i (probably most accurate; recommended for <200 sequences; iterative refinement method incorporating local pairwise alignment information):\n-.HP 6\n-\\fBmafft\\fR \\fB\\-\\-localpair\\fR \\fB\\-\\-maxiterate\\fR\\ \\fI1000\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBlinsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'G\\-INS\\-i (suitable for sequences of similar lengths; recommended for <200 sequences; iterative refinement method incorporating global pairwise alignment information):\n-.HP 6\n-\\fBmafft\\fR \\fB\\-\\-globalpair\\fR \\fB\\-\\-maxiterate\\fR\\ \\fI1000\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBginsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'E\\-INS\\-i (suitable for sequences containing large unalignable regions; recommended for <200 sequences):\n-.HP 6\n-\\fBmafft\\fR \\fB\\-\\-ep\\fR\\ \\fI0\\fR \\fB\\-\\-genafpair\\fR \\fB\\-\\-maxiterate\\fR\\ \\fI1000\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBeinsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.br\n-\n-For E\\-INS\\-i, the\n-\\fB\\-\\-ep\\fR\n-\\fI0\\fR\n-option is recommended to allow large gaps.\n-.RE\n-.SS "Speed\\-oriented methods:"\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'FFT\\-NS\\-i (iterative refinement method; two cycles only):\n-.HP 6\n-\\fBmafft\\fR \\fB\\-\\-retree\\fR\\ \\fI2\\fR \\fB\\-\\-maxiterate\\fR\\ \\fI2\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 7\n-\\fBfftnsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'FFT\\-NS\\-i (iterative refinement method; max. 1000 iterations):\n-.HP 6\n-\\fBmafft\\fR \\fB\\-\\-retree\\fR\\ \\fI2\\fR \\fB\\-\\-maxiterate\\fR\\ \\fI1000\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'FFT\\-NS\\-2 (fast; progressive method):\n-.HP 6\n-\\fBmafft\\fR \\fB\\-\\-retree\\fR\\ \\fI2\\fR \\fB\\-\\-maxiterate\\fR\\ \\fI0\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 6\n-\\fBfftns\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'FFT\\-NS\\-1 (very fast; recommended for >2000 sequences; progressive method with a rough guide tree):\n-.HP 6\n-\\fBmafft\\fR \\fB\\-\\-retree\\fR\\ \\fI1\\fR \\fB\\-\\-maxiterate\\fR\\ \\fI0\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'NW\\-NS\\-i (iterative refinement method without FFT approximation; two cycles only):\n-.HP 6\n-\\fBmafft\\fR \\fB\\-\\-retree\\fR\\ \\fI2\\fR \\fB\\-\\-maxiterate\\fR\\ \\fI2\\fR \\fB\\-\\-nofft\\fR\\ \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.HP 7\n-\\fBnwnsi\\fR \\fIinput\\fR [>\\ \\fIoutput\\fR]\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'NW\\-NS\\-2 (fast; progressive method without the FFT approximation):\n-.HP 6\n-\\fB'..b'\\fB\\-\\-amino\\fR\n-.RS 4\n-Assume the sequences are amino acid. Default: auto\n-.RE\n-.PP\n-\\fB\\-\\-seed\\fR \\fIalignment1\\fR [\\fB--seed\\fR \\fIalignment2\\fR \\fB--seed\\fR \\fIalignment3\\fR ...]\n-.RS 4\n-Seed alignments given in \\fIalignment_n\\fR (fasta format) are aligned with \n-sequences in \\fIinput\\fR. The alignment within every seed is preserved.\n-.RE\n-.RE\n-.SH "FILES"\n-.RS 0\n-.PP\n-Mafft stores the input sequences and other files in a temporary directory, which by default is located in\n-\\fI/tmp\\fR.\n-.RE\n-.SH "ENVIONMENT"\n-.RS 0\n-.PP\n-\\fBMAFFT_BINARIES\\fR\n-.RS 4\n-Indicates the location of the binary files used by mafft. By default, they are searched in\n-\\fI/usr/local/lib/mafft\\fR, but on Debian systems, they are searched in\n-\\fI/usr/lib/mafft\\fR.\n-.RE\n-.PP\n-\\fBFASTA_4_MAFFT\\fR\n-.RS 4\n-This variable can be set to indicate to mafft the location to the fasta34 program if it is not in the PATH.\n-.RE\n-.RE\n-.SH "SEE ALSO"\n-.RS 0\n-.PP\n-\n-\\fBmafft\\-homologs\\fR(1)\n-.RE\n-.SH "REFERENCES"\n-.RS 0\n-.SS "In English"\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'Katoh and Toh (Bioinformatics 23:372\\-374, 2007) PartTree: an algorithm to build an approximate tree from a large number of unaligned sequences (describes the PartTree algorithm).\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'Katoh, Kuma, Toh and Miyata (Nucleic Acids Res. 33:511\\-518, 2005) MAFFT version 5: improvement in accuracy of multiple sequence alignment (describes [ancestral versions of] the G\\-INS\\-i, L\\-INS\\-i and E\\-INS\\-i strategies)\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'Katoh, Misawa, Kuma and Miyata (Nucleic Acids Res. 30:3059\\-3066, 2002) MAFFT: a novel method for rapid multiple sequence alignment based on fast Fourier transform (describes the FFT\\-NS\\-1, FFT\\-NS\\-2 and FFT\\-NS\\-i strategies)\n-.RE\n-.SS "In Japanese"\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'Katoh and Misawa (Seibutsubutsuri 46:312\\-317, 2006) Multiple Sequence Alignments: the Next Generation\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\'\\(bu\\h\'+03\'Katoh and Kuma (Kagaku to Seibutsu 44:102\\-108, 2006) Jissen\\-teki Multiple Alignment\n-.RE\n-.RE\n-.SH "AUTHORS"\n-.RS 0\n-.PP\n-\\fBKazutaka Katoh\\fR <\\&kazutaka.katoh_at_aist.go.jp\\&>\n-.sp -1n\n-.IP "" 4\n-Wrote Mafft.\n-.PP\n-\\fBCharles Plessy\\fR <\\&charles\\-debian\\-nospam_at_plessy.org\\&>\n-.sp -1n\n-.IP "" 4\n-Wrote this manpage in DocBook XML for the Debian distribution, using Mafft\'s homepage as a template.\n-.RE\n-.SH "COPYRIGHT"\n-.RS 0\n-Copyright \\(co 2002\\-2007 Kazutaka Katoh (mafft)\n-.br\n-Copyright \\(co 2007 Charles Plessy (this manpage)\n-.br\n-.PP\n-Mafft and its manpage are offered under the following conditions:\n-.PP\n-Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:\n-.sp\n-.RS 4\n-\\h\'-04\' 1.\\h\'+02\'Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\' 2.\\h\'+02\'Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.\n-.RE\n-.sp\n-.RS 4\n-\\h\'-04\' 3.\\h\'+02\'The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission.\n-.RE\n-.PP\n-THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n-.br\n-.RE\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafft.bat --- a/mafft/core/mafft.bat Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,17 +0,0 @@ -@echo off - -setlocal - -if not "x%PROCESSOR_ARCHITECTURE%" == "xAMD64" goto _NotX64 -set COMSPEC=%WINDIR%\SysWOW64\cmd.exe -%COMSPEC% /c %0 %* -goto EOF -:_NotX64 - -set ROOTDIR="%~d0%~p0\ms" -set PATH=/bin/:%PATH% -set MAFFT_BINARIES=/lib/mafft - -%ROOTDIR%\bin\sh %ROOTDIR%\bin\mafft %* - -:EOF |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafft.h --- a/mafft/core/mafft.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -extern int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, char **argv, int (*callback)(int, int, char*)); -#define GUI_ERROR 1 -#define GUI_LENGTHOVER 2 -#define GUI_CANCEL 3 |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafft.tmpl --- a/mafft/core/mafft.tmpl Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2432 +0,0 @@\n-#! /bin/sh\n-\n-er=0;\n-myself=`dirname "$0"`/`basename "$0"`; export myself\n-version="v7.221 (2014/04/16)"; export version\n-LANG=C; export LANG\n-os=`uname`\n-progname=`basename "$0"`\n-if [ `echo $os | grep -i cygwin` ]; then\n-\tos="cygwin"\n-elif [ `echo $os | grep -i mingw` ]; then\n-\tos="mingw"\n-elif [ `echo $os | grep -i darwin` ]; then\n-\tos="darwin"\n-elif [ `echo $os | grep -i sunos` ]; then\n-\tos="sunos"\n-elif [ `echo $os | grep -i linux` ]; then\n-\tos="linux"\n-else\n-\tos="unix"\n-fi\n-export os\n-\n-if [ "$MAFFT_BINARIES" ]; then\n-\tprefix="$MAFFT_BINARIES"\n-else\n-\tprefix=_LIBDIR\n-fi\n-export prefix\n-\n-if [ $# -gt 0 ]; then\n-\tif [ "$1" = "--man" ]; then \n-\t\tman "$prefix/mafft.1"\n-\t\texit 0;\n-\tfi\n-fi\n-\n-if [ -x "$prefix/version" ]; then\n-\t\tversionbin=`"$prefix/version" | awk \'{print $1}\'` # for cygwin\n-\telse\n-\t\tversionbin="0.000"\n-fi\n-\n-if ! expr "$version" : v"$versionbin" > /dev/null ; then\n-\techo "" 1>&2\n-\techo "v$versionbin != $version" 1>&2\n-\techo "" 1>&2\n-\techo "There is a problem in the configuration of your shell." 1>&2\n-\techo "Check the MAFFT_BINARIES environmental variable by" 1>&2\n-\techo "$ echo \\$MAFFT_BINARIES" 1>&2\n-\techo "" 1>&2\n-\techo "This variable must be *unset*, unless you have installed MAFFT" 1>&2\n-\techo "with a special configuration. To unset this variable, type" 1>&2\n-\techo "$ unset MAFFT_BINARIES" 1>&2\n-\techo "or" 1>&2\n-\techo "% unsetenv MAFFT_BINARIES" 1>&2\n-\techo "Then retry" 1>&2\n-\techo "$ mafft input > output" 1>&2\n-\techo "" 1>&2\n-\techo "To keep this change permanently, edit setting files" 1>&2\n-\techo "(.bash_profile, .profile, .cshrc, etc) in your home directory" 1>&2\n-\techo "to delete the MAFFT_BINARIES line." 1>&2\n-\techo "On MacOSX, also edit or remove the .MacOSX/environment.plist file" 1>&2\n-\techo "and then re-login (MacOSX 10.6) or reboot (MacOSX 10.7)." 1>&2\n-\techo "" 1>&2\n-\techo "Please send a problem report to kazutaka.katoh@aist.go.jp," 1>&2\n-\techo "if this problem remains." 1>&2\n-\techo "" 1>&2\n-\texit 1\n-\ter=1\n-fi\n-\n-defaultiterate=0\n-defaultcycle=2\n-defaultgop="1.53"\n-#defaultaof="0.123"\n-defaultaof="0.000"\n-defaultlaof="0.100"\n-defaultlgop="-2.00"\n-defaultfft=1\n-defaultrough=0\n-defaultdistance="ktuples"\n-#defaultdistance="local"\n-defaultweighti="2.7"\n-defaultweightr="0.0"\n-defaultweightm="1.0"\n-defaultdafs=0\n-defaultmccaskill=0\n-defaultcontrafold=0\n-defaultalgopt=" "\n-defaultalgoptit=" "\n-defaultsbstmodel=" -b 62 "\n-defaultfmodel=" "\n-defaultkappa=" "\n-if [ $progname = "xinsi" -o $progname = "mafft-xinsi" ]; then\n-\tdefaultfft=1\n-\tdefaultcycle=1\n-\tdefaultiterate=1000\n-\tdefaultdistance="scarna"\n-\tdefaultweighti="3.2"\n-\tdefaultweightr="8.0"\n-\tdefaultweightm="2.0"\n-\tdefaultmccaskill=1\n-\tdefaultcontrafold=0\n-\tdefaultdafs=0\n-\tdefaultalgopt=" -A "\n-\tdefaultalgoptit=" -AB " ## chui\n-\tdefaultaof="0.0"\n-\tdefaultsbstmodel=" -b 62 "\n-\tdefaultkappa=" "\n-\tdefaultfmodel=" " # 2013/06/18\n-elif [ $progname = "qinsi" -o $progname = "mafft-qinsi" ]; then\n-\tdefaultfft=1\n-\tdefaultcycle=1\n-\tdefaultiterate=1000\n-\tdefaultdistance="global"\n-\tdefaultweighti="3.2"\n-\tdefaultweightr="8.0"\n-\tdefaultweightm="2.0"\n-\tdefaultmccaskill=1\n-\tdefaultcontrafold=0\n-\tdefaultdafs=0\n-\tdefaultalgopt=" -A "\n-\tdefaultalgoptit=" -AB " ## chui\n-\tdefaultaof="0.0"\n-\tdefaultsbstmodel=" -b 62 "\n-\tdefaultkappa=" "\n-\tdefaultfmodel=" " # 2013/06/18\n-elif [ $progname = "linsi" -o $progname = "mafft-linsi" ]; then\n-\tdefaultfft=0\n-\tdefaultcycle=1\n-\tdefaultiterate=1000\n-\tdefaultdistance="local"\n-elif [ $progname = "ginsi" -o $progname = "mafft-ginsi" ]; then\n-\tdefaultfft=1\n-\tdefaultcycle=1\n-\tdefaultiterate=1000\n-\tdefaultdistance="global"\n-elif [ $progname = "einsi" -o $progname = "mafft-einsi" ]; then\n-\tdefaultfft=0\n-\tdefaultcycle=1\n-\tdefaultiterate=1000\n-\tdefaultdistance="localgenaf"\n-elif [ $progname = "fftns" -o $progname = "mafft-fftns" ]; then\n-\tdefaultfft=1\n-\tdefaultcycle=2\n-\tdefaultdistance="ktuples"\n-elif [ $progname = "fftnsi" -o $progname = "mafft-fftnsi" ]; then\n-\tdefaultfft=1\n-\tdefaultcycle=2\n-\tdefaultiterate=2\n'..b'/tty" );\n-\t\t\tif( res == 0 || NF == 0 )\n-\t\t\t\tcontinue;\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\toutfile = sprintf( "%s", $0 );\n-\t\t\t\tprintf( "OK. outfile = %s\\n\\n", outfile ) > "/dev/tty";\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t}\n-\t\n-\t\twhile( 1 )\n-\t\t{\n-\t\t\toutargs = "";\n-\t\t\tprintf( "\\n" ) > "/dev/tty";\n-\t\t\tprintf( "Output format?\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 1. Clustal format / Sorted\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 2. Clustal format / Input order\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 3. Fasta format / Sorted\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 4. Fasta format / Input order\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 5. Phylip format / Sorted\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 6. Phylip format / Input order\\n" ) > "/dev/tty";\n-\t\t\tprintf( "@ " ) > "/dev/tty";\n-\t\t\tres = getline < "/dev/tty";\n-\t\t\tclose( "/dev/tty" );\n-#\t\t\tprintf( "res=%d, NF=%d\\n", res, NF );\n-\n-\t\t\tresnum = 0 + $1;\n-#\t\t\tprintf( "resnum=%d\\n", resnum );\n-\n-\t\t\tif( resnum < 1 || 6 < resnum )\n-\t\t\t\tcontinue;\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\tif( resnum == 1 )\n-\t\t\t\t\toutargs = "--clustalout --reorder";\n-\t\t\t\telse if( resnum == 2 )\n-\t\t\t\t\toutargs = "--clustalout --inputorder";\n-\t\t\t\telse if( resnum == 3 )\n-\t\t\t\t\toutargs = "--reorder";\n-\t\t\t\telse if( resnum == 4 )\n-\t\t\t\t\toutargs = "--inputorder";\n-\t\t\t\telse if( resnum == 5 )\n-\t\t\t\t\toutargs = "--phylipout --reorder";\n-\t\t\t\telse if( resnum == 6 )\n-\t\t\t\t\toutargs = "--phylipout --inputorder";\n-\t\t\t\telse\n-\t\t\t\t\tcontinue;\n-\t\t\t\tprintf( "OK. arguments = %s\\n\\n", outargs ) > "/dev/tty";\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t}\n-\t\n-\t\twhile( 1 )\n-\t\t{\n-\t\t\targuments = "";\n-\t\t\tprintf( "\\n" ) > "/dev/tty";\n-\t\t\tprintf( "Strategy?\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 1. --auto\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 2. FFT-NS-1 (fast)\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 3. FFT-NS-2 (default)\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 4. G-INS-i (accurate)\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 5. L-INS-i (accurate)\\n" ) > "/dev/tty";\n-\t\t\tprintf( " 6. E-INS-i (accurate)\\n" ) > "/dev/tty";\n-\t\t\tprintf( "@ " ) > "/dev/tty";\n-\t\t\tres = getline < "/dev/tty";\n-\t\t\tclose( "/dev/tty" );\n-#\t\t\tprintf( "res=%d, NF=%d\\n", res, NF );\n-\n-\t\t\tresnum = 0 + $1;\n-#\t\t\tprintf( "resnum=%d\\n", resnum );\n-\n-\t\t\tif( resnum < 1 || 6 < resnum )\n-\t\t\t\tcontinue;\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\tif( resnum == 1 )\n-\t\t\t\t\targuments = "--auto";\n-\t\t\t\telse if( resnum == 2 )\n-\t\t\t\t\targuments = "--retree 1";\n-\t\t\t\telse if( resnum == 3 )\n-\t\t\t\t\targuments = "--retree 2";\n-\t\t\t\telse if( resnum == 4 )\n-\t\t\t\t\targuments = "--globalpair --maxiterate 16";\n-\t\t\t\telse if( resnum == 5 )\n-\t\t\t\t\targuments = "--localpair --maxiterate 16";\n-\t\t\t\telse if( resnum == 6 )\n-\t\t\t\t\targuments = "--genafpair --maxiterate 16";\n-\t\t\t\telse\n-\t\t\t\t\targuments = sprintf( "%s", $0 );\n-\t\t\t\tprintf( "OK. arguments = %s %s\\n\\n", arguments, outargs ) > "/dev/tty";\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t}\n-\n-\n-\t\twhile( 1 )\n-\t\t{\n-\t\t\tprintf( "\\n" ) > "/dev/tty";\n-\t\t\tprintf( "Additional arguments? (--ep #, --op #, --kappa #, etc)\\n" ) > "/dev/tty";\n-\t\t\tprintf( "@ " ) > "/dev/tty";\n-\t\t\tres = getline < "/dev/tty";\n-\t\t\tclose( "/dev/tty" );\n-\t\t\tif( res == 0 || NF == 0 )\n-\t\t\t{\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\taddargs = sprintf( "%s", $0 );\n-\t\t\t\tprintf( "OK. arguments = %s %s %s\\n\\n", addargs, arguments, outargs ) > "/dev/tty";\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t}\n-\n-\t\targuments = sprintf( "%s %s %s", addargs, arguments, outargs );\n-\n-\t\tprint ""\n-\t\tcommand = sprintf( "\\"%s\\" %s \\"%s\\" > \\"%s\\"", myself, arguments, infile, outfile );\n-\t\tgsub( /\\\\/, "/", command );\n-\n-\n-\t\tprintf( "command=\\n%s\\n", command ) > "/dev/tty";\n-\t\n-\t\n-\t\twhile( 1 )\n-\t\t{\n-\t\t\tgo = 0;\n-\t\t\tprintf( "OK?\\n" ) > "/dev/tty";\n-\t\t\tprintf( "@ [Y] " ) > "/dev/tty";\n-\t\t\tres = getline < "/dev/tty";\n-\t\t\tclose( "/dev/tty" );\n-\t\t\tif( res == 0 )\n-\t\t\t\tcontinue;\n-\t\t\telse if( NF == 0 || $0 ~ /^[Yy]/ )\n-\t\t\t{\n-\t\t\t\tgo=1;\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t\telse\n-\t\t\t\tbreak;\n-\t\t}\n-\t\tif( go ) break;\n-\t\tprintf( "\\n" ) > "/dev/tty";\n-\t\tprintf( "\\n" ) > "/dev/tty";\n-\t}\n-\tsystem( command );\n-\tcommand = sprintf( "less \\"%s\\"", outfile );\n-\tsystem( command );\n-\tprintf( "Press Enter to exit." ) > "/dev/tty";\n-\tres = getline < "/dev/tty";\n-}\n-\'\n-)\n-exit 0;\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mafftash_premafft.tmpl --- a/mafft/core/mafftash_premafft.tmpl Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,464 +0,0 @@\n-#!/usr/bin/perl\n-\n-#####################################################################\n-# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)\n-#\n-# Ver. Date Changelog\n-#####################################################################\n-# 1.0 07.26.13 Initial release\n-# 2.0 09.03.13 Added extensive warnings and error messages\n-# 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs\n-# 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output\n-# 3.2 12.08.14 Removed 5-char restriction for own structure files\n-#\n-#####################################################################\n-\n-use strict;\n-use Getopt::Long;\n-use File::Path qw(make_path remove_tree);\n-use LWP::Simple;\n-use LWP::UserAgent;\n-\n-# to prevent error \'Header line too long (limit is 8192)\' [v3.1]\n-use LWP::Protocol::http;\n-push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);\n-\n-\n-\n-my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft";\n-\n-my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE );\n-\n-GetOptions\n-(\n- \'d=s\' => \\$WORKDIR,\n- \'p=s\' => \\$PDBLIST,\n- \'o=s\' => \\$OWNLIST,\n- \'h=s\' => \\$HAT3FILE,\n- \'i=s\' => \\$INSTRFILE,\n-);\n-\n-print STDERR "[MAFFTash-premafft]\\n";\n-\n-# set temp directory\n-my $TMP = "/tmp/mapremafft$$";\n-make_path($TMP) unless -d $TMP;\n-\n-\n-\n-######\n-# validation\n-&help("Required parameter : atleast one of either \'-p\' or \'-o\'") unless ( defined $PDBLIST || defined $OWNLIST);\n-&help("Required parameter : \'-d\'") if defined $OWNLIST && ! defined $WORKDIR;\n-\n-$HAT3FILE = "hat3" unless defined $HAT3FILE;\n-$INSTRFILE = "instr" unless defined $INSTRFILE;\n-chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\\/$/g;\n-\n-\n-######\n-# prepare inputs\n-print STDERR "Preparing inputs for service request...\\n";\n-\n-my @files = ();\n-push(@files, "strweight" => "0.5");\n-push(@files, "premafft" => "1");\n-\n-\n-# pdb entries\n-if ( defined $PDBLIST )\n-{\n- print STDERR "PDB List defined!\\n";\n- &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST;\n- my $listfile = "$TMP/pdblist.inp";\n-\n-\n- open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!");\n- open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!");\n-\n- while(<INPF>)\n- {\n- chomp;\n- if ( /^(\\w{5})$/ )\n- {\n- print OUTF ">PDBID\\n$1\\n";\n- }\n- }\n-\n- close OUTF;\n- close INPF;\n-\n- push(@files, "inputfile" => ["$listfile"]);\n-}\n-\n-\n-\n-# upload own structures\n-my %ownids = ();\n-\n-if ( defined $OWNLIST )\n-{\n- print STDERR "OWN List defined!\\n";\n- &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST;\n-\n-\n- open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!");\n-\n- while(<OWNINPF>)\n- {\n- chomp;\n-\n- if ( /^(\\S+)$/ )\n- {\n- my $fileref = "$WORKDIR/$1.pdb";\n-\n- unless (-e $fileref)\n- {\n- close OWNINPF;\n- &bail("Error: File $fileref does not exists!");\n- }\n-\n- push(@files, "inputownfile[]" => ["$fileref"]);\n- $ownids{$1} = 1;\n- }\n- }\n-\n- close OWNINPF;\n-}\n-\n-\n-\n-######\n-# start rest service\n-print STDERR "Sending service request...\\n";\n-\n-my $browser = LWP::UserAgent->new;\n-$browser->timeout(0);\n-\n-\n-# post: running a mafftash job\n-my $postResponse = $browser->post( $BASEURL, \\@files, \'Content_Type\' => \'form-data\' );\n-&bail(sprintf("[%d] %s\\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success);\n-\n-\n-# get response from post request\n-my ($status, $mafftashid) = &parseResponse($postResponse->content);\n-\n-\n-\n-my $MAXTRIES = 3;\n-my $STIMER = 4;\n-my $longtimer = 0;\n-\n-print STDERR "Request sent! Waiting for response...[$mafftashid]\\n";\n-\n-\n-# wait for results until it becomes available\n-while(1)\n-{\n- $longtimer = $longtimer <= ($STIMER*3) ? $lo'..b'ess\n-print STDERR "Assembling final results...\\n";\n-\n-&backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/");\n-&backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr";\n-&backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3";\n-\n-# sometimes no hat3 file is generated [v3.1]\n-#&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE;\n-&bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE;\n-\n-\n-# warn if some ownids were ommitted\n-if ( scalar keys(%ownids) > 0 )\n-{\n- my %instrids = ();\n-\n- open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!");\n-\n- while(<INSTRF>)\n- {\n- chomp;\n-\n- if ( /^>\\d+_(\\S+)$/ )\n- {\n- $instrids{$1} = 1;\n- }\n- }\n-\n- close INSTRF;\n-\n- foreach my $id ( keys %ownids )\n- {\n- warn "Warning: Own structure $id was excluded from instr/hat3.\\n" unless $instrids{$id};\n- }\n-\n-}\n-\n-\n-\n-&cleanup();\n-\n-\n-\n-####################\n-####################\n-\n-\n-\n-sub parseResponse\n-{\n- my $response = shift;\n-\n- #"status":"wait","mafftashid":"Ma8211432R"\n-\n- my $status = "";\n- my $mafftashid = "";\n-\n- if ( $response =~ /^([^\\s:]+):([^\\s:]+)$/ )\n- {\n- $mafftashid = $1;\n- $status = $2;\n- }\n-\n- return ($status, $mafftashid);\n-\n-}\n-\n-\n-sub extractchecksum\n-{\n- my $infile = shift;\n- my %dataset = ();\n-\n- open CSUM, "tar -zxf $infile -O|" or return \\%dataset;\n-\n- while(<CSUM>)\n- {\n- chomp;\n- if ( /^(\\S+)\\s+(\\S+)$/ )\n- {\n- $dataset{$2} = $1;\n- }\n-\n- }\n-\n- close CSUM;\n-\n- return \\%dataset;\n-\n-}\n-\n-\n-sub parseError\n-{\n- my $response = shift;\n-\n- #"error":"Invalid number of inputs found."\n- my $errorstr = ( $response =~ /\\"error\\"\\s*:\\s*\\"([^\\"]+)\\"/ ) ? $1 : "";\n- return $errorstr;\n-}\n-\n-\n-sub getchecksum\n-{\n- my $infile = shift;\n-\n- # md5 binary check\n- my $MD5BIN = "";\n-\n- if ( -x "/usr/bin/md5sum" )\n- {\n- $MD5BIN = "/usr/bin/md5sum";\n- }\n- elsif ( -x "/sbin/md5" )\n- {\n- $MD5BIN = "/sbin/md5 -q";\n- }\n-\n- return "" if $MD5BIN eq "";\n-\n-\n- my $checksum = "";\n- open MD5EXE, "$MD5BIN $infile|" or return "";\n-\n- while(<MD5EXE>)\n- {\n- if (/^(\\S+)\\s+(\\S+)$/)\n- {\n- $checksum = $1;\n- last;\n- }\n- elsif (/^(\\S+)$/)\n- {\n- $checksum = $1;\n- last;\n- }\n- }\n-\n- close MD5EXE;\n-\n- return $checksum;\n-\n-}\n-\n-\n-sub backticks\n-{\n- my $command = shift;\n-\n- `$command`;\n- return ($? == -1) ? 0 : 1;\n-}\n-\n-\n-sub bail\n-{\n- my $str = shift;\n- print STDERR "$str\\n" if defined $str;\n-\n- &cleanup();\n- exit(1);\n-}\n-\n-\n-sub cleanup\n-{\n- return if ($TMP eq "" || !-d $TMP);\n-\n- opendir(MAINDIR, $TMP);\n- my @files = readdir(MAINDIR);\n- closedir(MAINDIR);\n-\n- foreach my $file (@files)\n- {\n- unlink "$TMP/$file" if -e "$TMP/$file";\n- }\n-\n- remove_tree($TMP);\n-\n-}\n-\n-\n-sub help\n-{\n- my $str = shift;\n-\n- print <<\'HELPME\';\n-\n-USAGE\n- ./mafftash_premafft.pl -p [FILE]\n- ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY]\n- ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY]\n-\n-\n-PARAMETERS\n- -p [FILE]\n- FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format\n-\n- -o [FILE] -d [DIRECTORY]\n- FILE contains a list of IDs from your own structure/pdb files (one entry per line)\n- for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY\n-\n- -h [HATFILE]\n- save the output hat3 file in HATFILE; if not set, the output is written to a file named \'hat3\' in your current directory\n-\n- -i [INSTRFILE]\n- save the output instr file in INSTRFILE; if not set, the output is written to a file named \'instr\' in your current directory\n-\n-HELPME\n-\n- &bail($str);\n-}\n-\n-\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/makedirectionlist.c --- a/mafft/core/makedirectionlist.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,853 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 0\n-\n-#define END_OF_VEC -1\n-\n-int nadd;\n-float thresholdtorev;\n-int dodp;\n-int addfragment;\n-\n-typedef struct _thread_arg\n-{\n-\tint iend; \n-\tchar **seq;\n-\tchar *tmpseq;\n-\tint *res;\n-\tint **spointt;\n-\tshort *table1;\n-\tint iq;\n-#ifdef enablemultithread\n-\tint *jshare;\n-\tint thread_no;\n-\tpthread_mutex_t *mutex_counter;\n-#endif\n-} thread_arg_t;\n-\n-\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tnthread = 1;\n-\tinputfile = NULL;\n-\tnadd = 0;\n-\tdodp = 0;\n-\talg = \'a\';\n-\talg = \'m\';\n-\tdorp = NOTSPECIFIED;\n-\tfmodel = 0;\n-//\tppenalty = (int)( -2.0 * 1000 - 0.5 );\n-//\tppenalty_ex = (int)( -0.1 * 1000 - 0.5 );\n-//\tpoffset = (int)( 0.1 * 1000 - 0.5 ); \n-\tppenalty = NOTSPECIFIED;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tpoffset = NOTSPECIFIED;\n-\tkimuraR = 2;\n-\tpamN = 200;\n-\tthresholdtorev = 0.1;\n-\taddfragment = 0;\n-\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( (c = *++argv[0]) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'I\':\n-\t\t\t\t\tnadd = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "nadd = %d\\n", nadd );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'C\':\n-\t\t\t\t\tnthread = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "nthread = %d\\n", nthread );\n-\t\t\t\t\t--argc; \n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'f\':\n-\t\t\t\t\tppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\tfprintf( stderr, "ppenalty = %d\\n", ppenalty );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'g\':\n-\t\t\t\t\tppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "ppenalty_ex = %d\\n", ppenalty_ex );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'h\':\n-\t\t\t\t\tpoffset = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\tfprintf( stderr, "poffset = %d\\n", poffset );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'k\':\n-\t\t\t\t\tkimuraR = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "kappa = %d\\n", kimuraR );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'j\':\n-\t\t\t\t\tpamN = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tTMorJTT = JTT;\n-\t\t\t\t\tfprintf( stderr, "jtt/kimura %d\\n", pamN );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'t\':\n-\t\t\t\t\tthresholdtorev = atof( *++argv );\n-\t\t\t\t\tfprintf( stderr, "thresholdtorev = %f\\n", thresholdtorev );\n-\t\t\t\t\t--argc; \n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'d\':\n-\t\t\t\t\tdodp = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'F\':\n-\t\t\t\t\taddfragment = 1;\n-\t\t\t\t\tbreak;\n-#if 1\n-\t\t\t\tcase \'a\':\n-\t\t\t\t\tfmodel = 1;\n-\t\t\t\t\tbreak;\n-#endif\n-\t\t\t\tcase \'S\':\n-\t\t\t\t\talg = \'S\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'M\':\n-\t\t\t\t\talg = \'M\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'m\':\n-\t\t\t\t\talg = \'m\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'G\':\n-\t\t\t\t\talg = \'G\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tdorp = \'d\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tdorp = \'p\';\n-\t\t\t\t\tbreak;\n- default:\n- fprintf( stderr, "illegal option %c\\n", c );\n- argc = 0;\n- break;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\t}\n- if( argc == 1 )\n- {\n- cut = atof( (*argv) );\n- argc--;\n- }\n- if( argc != 0 ) \n- {\n- fprintf( stderr, "options: Check source file !\\n" );\n- exit( 1 );\n- }\n-\tif( tbitr == 1 && outgap == 0 )\n-\t{\n-\t\tfprintf( stderr, "conflicting options : o, m or u\\n" );\n-\t\texit( 1 );\n-\t}\n-}\n-\n-\n-\n-\n-static int maxl;\n-static int tsize;\n-\n-void seq_grp_nuc( int *grp, char *seq )\n-{\n-\tint tmp;\n-\tint *grpbk = grp;\n-\twhile( *seq )\n-\t{\n-\t\ttmp = amino_grp[(int)*seq++];\n-\t\tif( tmp < 4 )\n-\t\t\t*grp++ = tmp;\n-\t\telse\n-//\t\t\tfprintf( stderr, "WARNING : Unknown character %c\\r", *(seq-1) );\n-\t\t\t;\n-\t}\n-\t*grp = END_OF_VEC;\n-\tif( grp - grpbk < 6 )\n-\t{\n-//\t\tfprintf( stderr, "\\n\\nWARNING: Too short.\\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\\n\\n\\n" );\n-//\t\texit( 1 );\n-\t\t*grpbk = -1;\n-\t}\n-}\n-\n-void seq_grp( int *grp, char *seq )\n-{\n-\tint tmp;\n-\tint *grpbk = grp;\n-\twhile( *seq )\n-\t{\n-\t\ttmp = amino_grp[(int)*seq++];\n'..b'j, NULL, directionthread, (void *)(targ+j) );\n-\t\t\t\t}\n-\t\t\t\tfor( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );\n-\t\t\t\tpthread_mutex_destroy( &mutex_counter );\n-\t\t\t\tfree( handle );\n-\t\t\t\tfree( targ );\n-\t\t\t\tfree( jsharept );\n-\t\t\t}\n-\t\t\telse\n-#endif\n-\t\t\t{\n-\t\t\t\tthread_arg_t *targ;\n-\t\t\t\ttarg = calloc( 1, sizeof( thread_arg_t ) );\n-\t\t\t\ttarg[0].iend = iend;\n-\t\t\t\ttarg[0].seq = seq;\n-\t\t\t\ttarg[0].tmpseq = tmpseq; \n-\t\t\t\ttarg[0].res = res; \n-\t\t\t\ttarg[0].spointt = spointt; \n-\t\t\t\ttarg[0].table1 = table1; \n-\t\t\t\ttarg[0].iq = i; \n-\t\t\t\tdirectionthread( targ );\n-\t\t\t\tfree( targ );\n-\t\t\t}\n-\n-\n-\t\t\tmres = mres2 = 0;\n-\t\t\tfor( j=0; j<iend; j++ )\n-\t\t\t{\n-\t\t\t\tires = res[j];\n-//\t\t\t\tfprintf( stdout, "ires (%d,%d) = %d\\n", i, j, ires );\n-//\t\t\t\tfflush( stdout );\n-\t\t\t\tif( ires>mres2 ) \n-\t\t\t\t{\n-\t\t\t\t\tif( ires>mres ) \n-\t\t\t\t\t{\n-\t\t\t\t\t\tmres2 = mres;\n-\t\t\t\t\t\tmres = ires;\n-\t\t\t\t\t}\n-\t\t\t\t\telse\n-\t\t\t\t\t\tmres2 = ires;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tres_forward = (float)( mres + mres2 ) / 2;\n-\n-#ifdef enablemultithread\n-\t\t\tif( nthread )\n-\t\t\t{\n-\t\t\t\tpthread_t *handle;\n-\t\t\t\tpthread_mutex_t mutex_counter;\n-\t\t\t\tthread_arg_t *targ;\n-\t\t\t\tint *jsharept;\n-\t\t\n-\t\t\t\ttarg = calloc( nthread, sizeof( thread_arg_t ) );\n-\t\t\t\thandle = calloc( nthread, sizeof( pthread_t ) );\n-\t\t\t\tpthread_mutex_init( &mutex_counter, NULL );\n-\t\t\t\tjsharept = calloc( 1, sizeof(int) );\n-\t\t\t\t*jsharept = 0;\n-\t\t\n-\t\t\t\tfor( j=0; j<nthread; j++ )\n-\t\t\t\t{\n-\t\t\t\t\ttarg[j].iend = iend;\n-\t\t\t\t\ttarg[j].seq = seq;\n-\t\t\t\t\ttarg[j].tmpseq = revseq; \n-\t\t\t\t\ttarg[j].res = res; \n-\t\t\t\t\ttarg[j].spointt = spointt; \n-\t\t\t\t\ttarg[j].table1 = table1_rev; \n-\t\t\t\t\ttarg[j].jshare = jsharept;\n-\t\t\t\t\ttarg[j].iq = i;\n-\t\t\t\t\ttarg[j].mutex_counter = &mutex_counter;\n-\t\t\t\t\ttarg[j].thread_no = j;\n-\t\t\t\t\tpthread_create( handle+j, NULL, directionthread, (void *)(targ+j) );\n-\t\t\t\t}\n-\t\t\t\tfor( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );\n-\t\t\t\tpthread_mutex_destroy( &mutex_counter );\n-\t\t\t\tfree( handle );\n-\t\t\t\tfree( targ );\n-\t\t\t\tfree( jsharept );\n-\t\t\t}\n-\t\t\telse\n-#endif\n-\t\t\t{\n-\t\t\t\tthread_arg_t *targ;\n-\t\t\t\ttarg = calloc( 1, sizeof( thread_arg_t ) );\n-\t\t\t\ttarg[0].iend = iend;\n-\t\t\t\ttarg[0].seq = seq;\n-\t\t\t\ttarg[0].tmpseq = revseq; \n-\t\t\t\ttarg[0].res = res; \n-\t\t\t\ttarg[0].spointt = spointt;\n-\t\t\t\ttarg[0].table1 = table1_rev; \n-\t\t\t\ttarg[0].iq = i; \n-\t\t\t\tdirectionthread( targ );\n-\t\t\t\tfree( targ );\n-\t\t\t}\n-\n-\t\t\tmres = mres2 = 0;\n-\t\t\tfor( j=0; j<iend; j++ )\n-\t\t\t{\n-\t\t\t\tires = res[j];\n-\t\t\t\tif( ires>mres2 )\n-\t\t\t\t{\n-\t\t\t\t\tif( ires>mres ) \n-\t\t\t\t\t{\n-\t\t\t\t\t\tmres2 = mres;\n-\t\t\t\t\t\tmres = ires;\n-\t\t\t\t\t}\n-\t\t\t\t\telse\n-\t\t\t\t\t\tmres2 = ires;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tres_reverse = (float)( mres + mres2 ) / 2;\n-\n-//\t\t\tfprintf( stdout, "\\n" );\n-//\t\t\tfprintf( stdout, "score_for(%d,%d) = %f\\n", 0, i, res_forward );\n-//\t\t\tfprintf( stdout, "score_rev(%d,%d) = %f\\n", 0, i, res_reverse );\n-//\t\t\tfflush( stdout );\n-\t\t\tres_max = MAX(res_reverse,res_forward);\n-\t\t\tif( (res_reverse-res_forward)/res_max > thresholdtorev ) // tekitou\n-\n-\t\t\t{\n-\t\t\t\tstrcpy( seq[i], revseq );\n-\n-\t\t\t\tstrcpy( tmpseq, name[i] );\n-\t\t\t\tstrcpy( name[i], "_R_" );\n-\t\t\t\tstrncpy( name[i]+3, tmpseq+1, 10 );\n-\t\t\t\tname[i][13] = 0;\n-\t\t\t\tif( !dodp ) spointt[i] = pointt_rev[i];\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\tstrcpy( tmpseq, name[i] );\n-\t\t\t\tstrcpy( name[i], "_F_" );\n-\t\t\t\tstrncpy( name[i]+3, tmpseq+1, 10 );\n-\t\t\t\tname[i][13] = 0;\n-\t\t\t\tif( !dodp ) spointt[i] = pointt[i];\n-\t\t\t}\n-\n-\t\t\tif( !dodp )\n-\t\t\t{\n-\t\t\t\tfree( table1 );\n-\t\t\t\tfree( table1_rev );\n-\t\t\t}\n-\t\t}\n-\n-\t\tfree( grpseq );\n-\t\tfree( tmpseq );\n-\t\tfree( revseq );\n-\t\tfree( res );\n-\t\tif( dodp )\n-\t\t{\n-\t\t\tFreeCharMtx( mseq1f );\n-\t\t\tFreeCharMtx( mseq1r );\n-\t\t\tFreeCharMtx( mseq2 );\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tFreeIntMtx( pointt );\n-\t\t\tFreeIntMtx( pointt_rev );\n-\t\t\tfree( spointt );\n-\t\t}\n-\t}\n-\telse\n-\t{\n-\t\tfprintf( stderr, "Unknown alg %c\\n", alg );\n-\t\texit( 1 );\n-\t}\n-//\twriteData_pointer( stdout, njob, name, nlen, seq );\n-\tfor( i=0; i<njob; i++ ) \n-\t{\n-//\t\tfprintf( stdout, ">%s\\n", name[i] );\n-//\t\tfprintf( stdout, "%s\\n", seq[i] );\n-\t\tfprintf( stdout, "%s\\n", name[i] );\n-\t}\n-\n-\tfprintf( stderr, "\\n" );\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/makemergetable.rb --- a/mafft/core/makemergetable.rb Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,32 +0,0 @@ -#!/bin/env ruby -require 'getopts' - -seedoffset = 0 - -if getopts( "s:" ) == nil || ARGV.length == 0 || $OPT_h then - puts "Usage: #{$0} [-s number_of_seeds] input_files" - exit -end - -if $OPT_s - seedoffset = $OPT_s.to_i -end - - -files = ARGV - - -num = seedoffset + 1 -for file in files - output = "" - STDERR.puts file - fp = File.open( file, "r" ) - while line = fp.gets - if line =~ /^>/ then - output += " " + num.to_s - num += 1 - end - end - fp.close - puts output + " # " + file -end |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mccaskillwrap.c --- a/mafft/core/mccaskillwrap.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,451 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-\n-static char *whereismccaskillmea;\n-\n-#ifdef enablemultithread\n-typedef struct _thread_arg\n-{\n-\tint thread_no;\n-\tint njob;\n-\tint *jobpospt;\n-\tint **gapmap;\n-\tchar **nogap;\n-\tint nlenmax;\n-\tRNApair ***pairprob;\n-\tpthread_mutex_t *mutex;\n-} thread_arg_t;\n-#endif\n-\n-void outmccaskill( FILE *fp, RNApair **pairprob, int length )\n-{\n-\tint i;\n-\tRNApair *pt;\n-\tfor( i=0; i<length; i++ ) for( pt=pairprob[i]; pt->bestpos!=-1; pt++ )\n-\t{\n-\t\tif( pt->bestpos > i ) \n-\t\t\tfprintf( fp, "%d %d %50.40f\\n", i, pt->bestpos, pt->bestscore );\n-\t}\n-}\n-\n-#if 1\n-static void readrawmccaskill( FILE *fp, RNApair **pairprob, int length )\n-{\n-\tchar gett[1000];\n-\tint *pairnum;\n-\tint i;\n-\tint left, right;\n-\tfloat prob;\n-\n-\tpairnum = (int *)calloc( length, sizeof( int ) );\n-\tfor( i=0; i<length; i++ ) pairnum[i] = 0;\n-\n-\twhile( 1 )\n-\t{\n-\t\tfgets( gett, 999, fp );\n-\t\tif( feof( fp ) ) break;\n-\t\tif( gett[0] == \'>\' ) continue;\n-\t\tsscanf( gett, "%d %d %f", &left, &right, &prob );\n-\t\tif( prob < 0.01 ) continue; // mxscarna to mafft ryoho ni eikyou\n-//fprintf( stderr, "gett = %s\\n", gett );\n-\n-\t\tif( left != right && prob > 0.0 )\n-\t\t{\n-\t\t\tpairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) );\n-\t\t\tpairprob[left][pairnum[left]].bestscore = prob;\n-\t\t\tpairprob[left][pairnum[left]].bestpos = right;\n-\t\t\tpairnum[left]++;\n-\t\t\tpairprob[left][pairnum[left]].bestscore = -1.0;\n-\t\t\tpairprob[left][pairnum[left]].bestpos = -1;\n-//\t\t\tfprintf( stderr, "%d-%d, %f\\n", left, right, prob );\n-\n-\t\t\tpairprob[right] = (RNApair *)realloc( pairprob[right], (pairnum[right]+2) * sizeof( RNApair ) );\n-\t\t\tpairprob[right][pairnum[right]].bestscore = prob;\n-\t\t\tpairprob[right][pairnum[right]].bestpos = left;\n-\t\t\tpairnum[right]++;\n-\t\t\tpairprob[right][pairnum[right]].bestscore = -1.0;\n-\t\t\tpairprob[right][pairnum[right]].bestpos = -1;\n-//\t\t\tfprintf( stderr, "%d-%d, %f\\n", right, left, prob );\n-\t\t}\n-\t}\n-\tfree( pairnum );\n-}\n-#endif\n-\n-#ifdef enablemultithread\n-static void *athread( void *arg )\n-{\n-\tthread_arg_t *targ = (thread_arg_t *)arg;\n-\tint thread_no = targ->thread_no;\n-\tint njob = targ->njob;\n-\tint *jobpospt = targ->jobpospt;\n-\tint **gapmap = targ->gapmap;\n-\tchar **nogap = targ->nogap;\n-\tint nlenmax = targ->nlenmax;\n-\tRNApair ***pairprob = targ->pairprob;\n-\n-\tint i, res;\n-\tFILE *infp;\n-\tchar *com;\n-\tchar *dirname;\n-\n-\tdirname = calloc( 100, sizeof( char ) );\n-\tcom = calloc( 1000, sizeof( char ) );\n-\t\n-\n-\twhile( 1 )\n-\t{\n-\t\tpthread_mutex_lock( targ->mutex );\n-\t\ti = *jobpospt;\n-\t\tif( i == njob )\n-\t\t{\n-\t\t\tpthread_mutex_unlock( targ->mutex );\n-//\t\t\treturn( NULL );\n-\t\t\tbreak;\n-\t\t}\n-\t\t*jobpospt = i+1;\n-\t\tpthread_mutex_unlock( targ->mutex );\n-\n-\t\tcommongappick_record( 1, nogap+i, gapmap[i] );\n-\t\tif( strlen( nogap[i] ) == 0 ) continue;\n-\n-\t\tsprintf( dirname, "_%d", i );\n-\t\tsprintf( com, "rm -rf %s", dirname );\n-\t\tsystem( com );\n-\t\tsprintf( com, "mkdir %s", dirname );\n-\t\tsystem( com );\n-\n-\t\tfprintf( stderr, "%d / %d (by thread %4d)\\n", i+1, njob, thread_no );\n-\t\tsprintf( com, "%s/_mccaskillinorg", dirname );\n-\t\tinfp = fopen( com, "w" );\n-//\t\tfprintf( infp, ">in\\n%s\\n", nogap[i] );\n-\t\tfprintf( infp, ">in\\n" );\n-\t\twrite1seq( infp, nogap[i] );\n-\t\tfclose( infp );\n-\n-\t\tsprintf( com, "tr -d \'\\\\r\' < %s/_mccaskillinorg > %s/_mccaskillin", dirname, dirname );\n-\t\tsystem( com ); // for cygwin, wakaran\n-\t\tif( alg == \'G\' )\n-\t\t\tsprintf( com, "cd %s; %s/dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", dirname, whereismccaskillmea );\n-\t\telse\n-\t\t\tsprintf( com, "cd %s; %s/mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea );\n-\t\tres = system( com );\n-\n-\t\tif( res )\n-\t\t{\n-\t\t\tfprintf( stderr, "ERROR IN mccaskill_mea\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\n-\t\tsprintf( com, "%s/_mccaskillout", dirname );\n-\t\tinfp = fopen( com, "r" );\n-\t\treadrawmccaskill( infp, pairprob[i], nlenmax );\n-\t\tfclose( infp );\n-\n-\t\tsprintf( com, "rm -rf %s > /dev/null 2>&1", dirname );\n-\t\tif( system( com ) )\n-\t\t{\n-\t\t\tfpri'..b'][0].bestpos = -1;\n-\t\talnpairprob[j][0].bestscore = -1.0;\n-\t}\n-\n-\n-\tconstants( njob, seq );\n-\n-\tif( alg == \'G\' )\n-\t\tfprintf( stderr, "Running DAFS (Sato et al. 2012; http://www.ncrna.org/).\\n" );\n-\telse\n-\t\tfprintf( stderr, "Running mxscarna with the mccaskill_mea mode.\\n" );\n-#ifdef enablemultithread\n-\tif( nthread > 0 )\n-\t{\n-\t\tint jobpos;\n-\t\tpthread_t *handle;\n-\t\tpthread_mutex_t mutex;\n-\t\tthread_arg_t *targ;\n-\t\tjobpos = 0;\n-\n-\t\ttarg = calloc( nthread, sizeof( thread_arg_t ) );\n-\t\thandle = calloc( nthread, sizeof( pthread_t ) );\n-\t\tpthread_mutex_init( &mutex, NULL );\n-\n-\t\tfor( i=0; i<nthread; i++ )\n-\t\t{\n-\t\t\ttarg[i].thread_no = i;\n-\t\t\ttarg[i].njob = njob;\n-\t\t\ttarg[i].jobpospt = &jobpos;\n-\t\t\ttarg[i].gapmap = gapmap;\n-\t\t\ttarg[i].nogap = nogap;\n-\t\t\ttarg[i].nlenmax = nlenmax;\n-\t\t\ttarg[i].pairprob = pairprob;\n-\t\t\ttarg[i].mutex = &mutex;\n-\n-//\t\t\tathread( targ );\n-\t\t\tpthread_create( handle+i, NULL, athread, (void *)(targ+i) );\n-\t\t\t\n-\t\t}\n-\n-\t\tfor( i=0; i<nthread; i++ )\n-\t\t{\n-\t\t\tpthread_join( handle[i], NULL );\n-\t\t}\n-\t\tpthread_mutex_destroy( &mutex );\n-\n-\t\tfree( handle );\n-\t\tfree( targ );\n-\n-\n-\t\tfor( i=0; i<njob; i++ )\n-\t\t{\n-\t\t\tfprintf( stdout, ">%d\\n", i );\n-\t\t\toutmccaskill( stdout, pairprob[i], nlenmax );\n-\t\t}\n-\t}\n-\telse\n-#endif\n-\t{\n-\t\tfor( i=0; i<njob; i++ )\n-\t\t{\n-\t\t\tfprintf( stderr, "%d / %d\\n", i+1, njob );\n-\t\t\tcommongappick_record( 1, nogap+i, gapmap[i] );\n-\t\t\tif( strlen( nogap[i] ) == 0 ) \n-\t\t\t{\n-\t\t\t\tfprintf( stdout, ">%d\\n", i );\n-\t\t\t\tcontinue;\n-\t\t\t}\n-\n-\t\t\tinfp = fopen( "_mccaskillinorg", "w" );\n-//\t\t\tfprintf( infp, ">in\\n%s\\n", nogap[i] );\n-\t\t\tfprintf( infp, ">in\\n" );\n-\t\t\twrite1seq( infp, nogap[i] );\n-\t\t\tfclose( infp );\n-\t\n-\t\t\tsystem( "tr -d \'\\\\r\' < _mccaskillinorg > _mccaskillin" ); // for cygwin, wakaran\n-\t\t\tif( alg == \'G\' )\n-\t\t\t\tsprintf( com, "env PATH=%s dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", whereismccaskillmea );\n-\t\t\telse\n-\t\t\t\tsprintf( com, "env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", whereismccaskillmea );\n-\t\t\tres = system( com );\n-\t\n-\t\t\tif( res )\n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "ERROR IN mccaskill_mea\\n" );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-\t\n-\t\t\tinfp = fopen( "_mccaskillout", "r" );\n-\t\t\treadrawmccaskill( infp, pairprob[i], nlenmax );\n-\t\t\tfclose( infp );\n-\t\t\tfprintf( stdout, ">%d\\n", i );\n-\t\t\toutmccaskill( stdout, pairprob[i], nlenmax );\n-\t\t}\n-\t}\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tfor( j=0; j<nlen[i]; j++ ) for( pairprobpt=pairprob[i][j]; pairprobpt->bestpos!=-1; pairprobpt++ )\n-\t\t{\n-\t\t\tleft = gapmap[i][j];\n-\t\t\tright = gapmap[i][pairprobpt->bestpos];\n-\t\t\tprob = pairprobpt->bestscore;\n-\n-\t\t\tfor( pt=alnpairprob[left]; pt->bestpos!=-1; pt++ )\n-\t\t\t\tif( pt->bestpos == right ) break;\n-\n-\t\t\tif( pt->bestpos == -1 )\n-\t\t\t{\n-\t\t\t\talnpairprob[left] = (RNApair *)realloc( alnpairprob[left], (alnpairnum[left]+2) * sizeof( RNApair ) );\n-\t\t\t\tadpos = alnpairnum[left];\n-\t\t\t\talnpairnum[left]++;\n-\t\t\t\talnpairprob[left][adpos].bestscore = 0.0;\n-\t\t\t\talnpairprob[left][adpos].bestpos = right;\n-\t\t\t\talnpairprob[left][adpos+1].bestscore = -1.0;\n-\t\t\t\talnpairprob[left][adpos+1].bestpos = -1;\n-\t\t\t\tpt = alnpairprob[left]+adpos;\n-\t\t\t}\n-\t\t\telse\n-\t\t\t\tadpos = pt-alnpairprob[left];\n-\n-\t\t\tpt->bestscore += prob;\n-\t\t\tif( pt->bestpos != right )\n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "okashii!\\n" );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-//\t\t\tfprintf( stderr, "adding %d-%d, %f\\n", left, right, prob );\n-\t\t}\n-\t}\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tfor( j=0; j<nlenmax; j++ ) free( pairprob[i][j] );\n-\t\tfree( pairprob[i] );\n-\t}\n-\tfree( pairprob );\n-\tfor( j=0; j<nlenmax; j++ ) free( alnpairprob[j] );\n-\tfree( alnpairprob );\n-\tfree( alnpairnum );\n-\tfprintf( stderr, "%d thread(s)\\n", nthread );\n-\treturn( 0 );\n-\n-#if 0\n-\tfprintf( stdout, "result=\\n" );\n-\n-\tfor( i=0; i<nlenmax; i++ ) for( pairprobpt=alnpairprob[i]; pairprobpt->bestpos!=-1; pairprobpt++ )\n-\t{\n-\t\tpairprobpt->bestscore /= (float)njob;\n-\t\tleft = i;\n-\t\tright = pairprobpt->bestpos;\n-\t\tprob = pairprobpt->bestscore;\n-\t\tfprintf( stdout, "%d-%d, %f\\n", left, right, prob );\n-\t}\n-\n-\treturn( 0 );\n-#endif\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mingw64mingw32 --- a/mafft/core/mingw64mingw32 Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,38 +0,0 @@ -#!/usr/bin/env bash -export PATH=/home/mingw32/mingw32/bin:$PATH -export C_INCLUDE_PATH=/home/mingw32/mingw32/include -export LIBRARY_PATH=/home/mingw32/mingw32/lib -make clean -make ENABLE_MULTITHREAD="" -rm -rf binaries32 -mkdir binaries32 -mv ../binaries/* binaries32/ - -export PATH=/home/mingw64/mingw64/bin:$PATH -export C_INCLUDE_PATH=/home/mingw64/mingw64/include -export LIBRARY_PATH=/home/mingw64/mingw64/lib -make clean -make ENABLE_MULTITHREAD="" -rm -rf binaries64 -mkdir binaries64 -mv ../binaries/* binaries64/ - - - -export PATH=/home/mingw32/mingw32/bin:$PATH -export C_INCLUDE_PATH=/home/mingw32/mingw32/include -export LIBRARY_PATH=/home/mingw32/mingw32/lib -make clean -make LIBS="-static -lm -lpthread" -rm -rf binaries32-multithread -mkdir binaries32-multithread -mv ../binaries/* binaries32-multithread/ - -export PATH=/home/mingw64/mingw64/bin:$PATH -export C_INCLUDE_PATH=/home/mingw64/mingw64/include -export LIBRARY_PATH=/home/mingw64/mingw64/lib -make clean -make LIBS="-static -lm -lpthread" -rm -rf binaries64-multithread -mkdir binaries64-multithread -mv ../binaries/* binaries64-multithread/ |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mingw64mingw32dll --- a/mafft/core/mingw64mingw32dll Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,18 +0,0 @@ -#!/usr/bin/env bash -export PATH=/home/mingw32/mingw32/bin:$PATH -export C_INCLUDE_PATH=/home/mingw32/mingw32/include -export LIBRARY_PATH=/home/mingw32/mingw32/lib -make clean -make ENABLE_MULTITHREAD="" dlls -rm -rf dll32 -mkdir dll32 -mv *.dll dll32/ - -export PATH=/home/mingw64/mingw64/bin:$PATH -export C_INCLUDE_PATH=/home/mingw64/mingw64/include -export LIBRARY_PATH=/home/mingw64/mingw64/lib -make clean -make ENABLE_MULTITHREAD="" dlls -rm -rf dll64 -mkdir dll64 -mv *.dll dll64/ |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/miyata.h --- a/mafft/core/miyata.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,47 +0,0 @@ -double polarity_[] = -{ - 8.1, /* A */ - 10.5, /* R */ - 11.6, /* N */ - 13.0, /* D */ - 5.5, /* C */ - 10.5, /* Q */ - 12.3, /* E */ - 9.0, /* G */ - 10.4, /* H */ - 5.2, /* I */ - 4.9, /* L */ - 11.3, /* K */ - 5.7, /* M */ - 5.2, /* F */ - 8.0, /* P */ - 9.2, /* S */ - 8.6, /* T */ - 5.4, /* W */ - 6.2, /* Y */ - 5.9, /* V */ -}; - -double volume_[] = -{ - 31.0, /* A */ - 124.0, /* R */ - 56.0, /* N */ - 54.0, /* D */ - 55.0, /* C */ - 85.0, /* Q */ - 83.0, /* E */ - 3.0, /* G */ - 96.0, /* H */ - 111.0, /* I */ - 111.0, /* L */ - 119.0, /* K */ - 105.0, /* M */ - 132.0, /* F */ - 32.5, /* P */ - 32.0, /* S */ - 61.0, /* T */ - 170.0, /* W */ - 136.0, /* Y */ - 84.0, /* V */ -}; |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/miyata5.h --- a/mafft/core/miyata5.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,166 +0,0 @@ -int locpenaltym = -1440; -int exgpm = +0; /* != 0 nisuruto kowareru. exgp ha constants.c de kurikomu */ -char locaminom[] = "ARNDCQEGHILKMFPSTWYVBZX.-J"; -char locgrpm[] = -{ - 0, 3, 2, 2, 5, 2, 2, 0, 3, 1, 1, 3, 1, 4, 0, 0, 0, 4, 4, 1, 2, 2, - 6, 6, 6, 1, -}; -int locn_dism[26][26] = - { - { - 600, -235, 91, -78, 202, 51, -103, 340, -21, -169, - -189, -246, -92, -323, 582, 454, 342, -400, -309, 71, - 7, -26, -15, -400, 0,-1400, - }, - - { - -235, 600, 17, -69, -275, 277, 185, -400, 365, -112, - -149, 485, -55, -106, -229, -183, 20, -178, 22, -95, - -26, 231, -15, -400, 0,-1400, - }, - - { - 91, 17, 600, 414, -209, 317, 357, 39, 231, -363, - -398, 74, -280, -400, 85, 225, 200, -400, -378, -189, - 507, 337, -15, -400, 0,-1400, - }, - - { - -78, -69, 414, 600, -395, 179, 342, -78, 108, -400, - -400, 14, -400, -400, -86, 65, 14, -400, -400, -372, - 507, 261, -15, -400, 0,-1400, - }, - - { - 202, -275, -209, -395, 600, -109, -332, -35, -132, 134, - 128, -335, 182, -40, 220, 74, 185, -355, -81, 354, - -302, -220, -15, -400, 0,-1400, - }, - - { - 51, 277, 317, 179, -109, 600, 360, -109, 508, -135, - -172, 297, -58, -203, 51, 128, 280, -378, -109, -9, - 248, 480, -15, -400, 0,-1400, - }, - - { - -103, 185, 357, 342, -332, 360, 600, -195, 325, -369, - -400, 274, -295, -400, -109, 11, 77, -400, -321, -249, - 350, 480, -15, -400, 0,-1400, - }, - - { - 340, -400, 39, -78, -35, -109, -195, 600, -195, -400, - -400, -400, -355, -400, 322, 357, 114, -400, -400, -189, - -19, -152, -15, -400, 0,-1400, - }, - - { - -21, 365, 231, 108, -132, 508, 325, -195, 600, -100, - -141, 374, -26, -152, -15, 45, 222, -303, -49, -3, - 169, 417, -15, -400, 0,-1400, - }, - - { - -169, -112, -363, -400, 134, -135, -369, -400, -100, 600, - 560, -212, 517, 425, -149, -243, -12, 108, 354, 357, - -400, -252, -15, -400, 0,-1400, - }, - - { - -189, -149, -398, -400, 128, -172, -400, -400, -141, 560, - 600, -252, 482, 420, -172, -269, -43, 105, 331, 340, - -400, -290, -15, -400, 0,-1400, - }, - - { - -246, 485, 74, 14, -335, 297, 274, -400, 374, -212, - -252, 600, -152, -215, -240, -175, -1, -289, -92, -172, - 44, 285, -15, -400, 0,-1400, - }, - - { - -92, -55, -280, -400, 182, -58, -295, -355, -26, 517, - 482, -152, 600, 365, -75, -163, 68, 59, 334, 422, - -368, -176, -15, -400, 0,-1400, - }, - - { - -323, -106, -400, -400, -40, -203, -400, -400, -152, 425, - 420, -215, 365, 600, -306, -386, -143, 282, 462, 191, - -400, -315, -15, -400, 0,-1400, - }, - - { - 582, -229, 85, -86, 220, 51, -109, 322, -15, -149, - -172, -240, -75, -306, 600, 440, 351, -400, -292, 88, - 0, -29, -15, -400, 0,-1400, - }, - - { - 454, -183, 225, 65, 74, 128, 11, 357, 45, -243, - -269, -175, -163, -386, 440, 600, 345, -400, -352, -15, - 145, 70, -15, -400, 0,-1400, - }, - - { - 342, 20, 200, 14, 185, 280, 77, 114, 222, -12, - -43, -1, 68, -143, 351, 345, 600, -400, -100, 194, - 107, 178, -15, -400, 0,-1400, - }, - - { - -400, -178, -400, -400, -355, -378, -400, -400, -303, 108, - 105, -289, 59, 282, -400, -400, -400, 600, 297, -118, - -400, -400, -15, -400, 0,-1400, - }, - - { - -309, 22, -378, -400, -81, -109, -321, -400, -49, 354, - 331, -92, 334, 462, -292, -352, -100, 297, 600, 165, - -400, -215, -15, -400, 0,-1400, - }, - - { - 71, -95, -189, -372, 354, -9, -249, -189, -3, 357, - 340, -172, 422, 191, 88, -15, 194, -118, 165, 600, - -280, -129, -15, -400, 0,-1400, - }, - - { - 7, -26, 507, 507, -302, 248, 350, -19, 169, -400, - -400, 44, -368, -400, 0, 145, 107, -400, -400, -280, - 507, 299, -400, -400, 0,-1400, - }, - - { - -26, 231, 337, 261, -220, 480, 480, -152, 417, -252, - -290, 285, -176, -315, -29, 70, 178, -400, -215, -129, - 299, 480, -400, -400, 0,-1400, - }, - - { - -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, - -15, -15, -15, -15, -15, -15, -15, -15, -15, -15, - -400, -400, -400, -400, 0,-1400, - }, - - { - -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, - -400, -400, -400, -400, -400, -400, -400, -400, -400, -400, - -400, -400, -400, -400, 0,-1400, - }, - - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - }, - - { --1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400, --1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400,-1400, --1400,-1400,-1400,-1400, 0, 1600, - }, - }; |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mltaln.h --- a/mafft/core/mltaln.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,328 +0,0 @@ -#define USE_XCED 0 - -#if USE_XCED -#include "config.h" -#include "share.h" -#else -#endif - -#include "mafft.h" -#include <stdio.h> -#include <stdlib.h> -#include <stddef.h> -#include <sys/types.h> -#include <string.h> -#include <unistd.h> -#include <math.h> -#include <ctype.h> -#include "mtxutl.h" -#include <float.h> -#include <stdarg.h> -#ifdef enablemultithread -#include <pthread.h> -#endif - -#define VERSION "7.221" -#define SHOWVERSION reporterr( "%s (%s) Version " VERSION " alg=%c, model=%s, amax=%3.1f\n%d thread(s)\n", progName( argv[0] ), (dorp=='d')?"nuc":((nblosum==-2)?"text":"aa"), alg, modelname, specificityconsideration, nthread ) - -#define FFT_THRESHOLD 80 -#define FFT_WINSIZE_P 20 -#define FFT_WINSIZE_D 100 -#define DISPSEQF 60 -#define DISPSITEI 0 -#define MAXITERATION 500 -#define M 500000 /* njob no saidaiti */ -#define N 5000000 /* nlen no saidaiti */ -#define MAXSEG 100000 -#define B 256 -#define C 60 /* 1 gyou no mojisuu */ -#define D 6 -#define rnd() ( ( 1.0 / ( RAND_MAX + 1.0 ) ) * rand() ) -#define MAX(X,Y) ( ((X)>(Y))?(X):(Y) ) -#define MIN(X,Y) ( ((X)<(Y))?(X):(Y) ) -#define G(X) ( ((X)>(0))?(X):(0) ) -#define BEFF 1.0 /* 0.6 ni suruto zureru */ -#define WIN 3 -#define SGAPP -1000 -#define GETA2 0.001 -#define GETA3 0.001 -#define NOTSPECIFIED 100009 -#define SUEFF 0.1 /* upg/(spg+upg) -> sueff.sed */ -#define DIVLOCAL 0 -#define INTMTXSCALE 1000000.0 -#define JTT 201 -#define TM 202 - -extern char modelname[500]; -extern int njob, nlenmax; -extern int amino_n[0x80]; -extern char amino_grp[0x80]; -extern int amino_dis[0x80][0x80]; -extern double **n_disLN; -extern double amino_dis_consweight_multi[0x80][0x80]; -extern int **n_dis; -extern int **n_disFFT; -extern double **n_dis_consweight_multi; -extern char amino[0x80]; -extern double polarity[0x80]; -extern double volume[0x80]; -extern int ribosumdis[37][37]; - -extern int ppid; -extern double thrinter; -extern double fastathreshold; -extern int pslocal, ppslocal; -extern int constraint; -extern int divpairscore; -extern int fmodel; // 1-> fmodel 0->default -1->raw -extern int nblosum; // 45, 50, 62, 80 -extern int kobetsubunkatsu; -extern int bunkatsu; -extern int dorp; -extern int niter; -extern int contin; -extern int calledByXced; -extern int devide; -extern int scmtd; -extern int weight; -extern int utree; -extern int tbutree; -extern int refine; -extern int check; -extern double cut; -extern int cooling; -extern int trywarp; -extern int penalty, ppenalty, penaltyLN; -extern int penalty_dist, ppenalty_dist; -extern int RNApenalty, RNAppenalty; -extern int RNApenalty_ex, RNAppenalty_ex; -extern int penalty_ex, ppenalty_ex, penalty_exLN; -extern int penalty_EX, ppenalty_EX; -extern int penalty_OP, ppenalty_OP; -extern int penalty_shift; -extern double penalty_shift_factor; -extern int offset, poffset, offsetLN, offsetFFT; -extern int RNAthr, RNApthr; -extern int scoremtx; -extern int TMorJTT; -extern char use_fft; -extern char force_fft; -extern int nevermemsave; -extern int fftscore; -extern int fftWinSize; -extern int fftThreshold; -extern int fftRepeatStop; -extern int fftNoAnchStop; -extern int divWinSize; -extern int divThreshold; -extern int disp; -extern int outgap; -extern char alg; -extern int cnst; -extern int mix; -extern int tbitr; -extern int tbweight; -extern int tbrweight; -extern int disopt; -extern int pamN; -extern int checkC; -extern float geta2; -extern int treemethod; -extern int kimuraR; -extern char *swopt; -extern int fftkeika; -extern int score_check; -extern char *inputfile; -extern char *addfile; -extern int addprofile; -extern float consweight_multi; -extern float consweight_rna; -extern char RNAscoremtx; - -extern char *signalSM; -extern FILE *prep_g; -extern FILE *trap_g; -extern char **seq_g; -extern char **res_g; -extern int rnakozo; -extern char rnaprediction; - -/* sengen no ichi ha koko dake de ha nai */ -extern void constants(); -extern char **Calignm1(); -extern char **Dalignm1(); -extern char **align0(); -extern double Cscore_m_1( char **, int, int, double ** ); -extern double score_m_1( char **, int, int, double ** ); -extern double score_calc0( char **, int, double **, int ); -extern char seqcheck( char ** ); -extern float substitution( char *, char * ); -extern float substitution_score( char *, char * ); -extern float substitution_nid( char *, char * ); -extern float substitution_hosei( char *, char * ); -extern double ipower( double, int ); -extern float translate_and_Calign(); -extern float A__align(); -extern float A__align11(); -extern float A__align_gapmap(); -extern float partA__align(); -extern float L__align11( double **scoringmtx, float scoreoffset, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt ); -extern float G__align11(); -extern float Falign(); -extern float Falign_localhom(); -extern float Conalign(); -extern float Aalign(); -extern float imp_match_out_sc( int, int ); -extern float part_imp_match_out_sc( int, int ); -extern void ErrorExit(); -extern void cpmx_calc(); -extern void intergroup_score( char **, char **, double *, double *, int, int, int, double * ); -extern int conjuctionfortbfast(); -extern int fastconjuction(); -extern char seqcheck( char ** ); - -typedef struct _LocalHom -{ - int nokori; - struct _LocalHom *next; - struct _LocalHom *last; - int start1; - int end1; - int start2; - int end2; - double opt; - int overlapaa; - int extended; - double importance; - float fimportance; - double wimportance; - char korh; -} LocalHom; - -typedef struct _NodeInCub -{ - int step; - int LorR; -} NodeInCub; - -typedef struct _Node -{ - struct _Node *children[3]; - int tmpChildren[3]; - double length[3]; - double *weightptr[3]; - int top[3]; - int *members[3]; -} Node; - -typedef struct _Segment -{ - int start; - int end; - int center; - double score; - int skipForeward; - int skipBackward; - struct _Segment *pair; - int number; -} Segment; - -typedef struct _Segments -{ - Segment group1; - Segment group2; - int number1; - int number2; -} Segments; - -typedef struct _Bchain -{ - struct _Bchain *next; - struct _Bchain *prev; - int pos; -} Bchain; - -typedef struct _Achain -{ - int next; - int prev; -// int curr; -} Achain; - - -typedef struct _Fukusosuu -{ - double R; - double I; -} Fukusosuu; - -typedef struct _Gappattern -{ - int len; - float freq; -} Gappat; - -typedef struct _RNApair -{ - int uppos; - float upscore; - int downpos; - float downscore; - int bestpos; - float bestscore; -} RNApair; - -typedef struct _Treedep -{ - int child0; - int child1; - int done; - float distfromtip; -} Treedep; - -typedef struct _Addtree -{ - int nearest; - float dist1; - char *neighbors; - float dist2; -} Addtree; - -#include "fft.h" -#include "dp.h" -#include "functions.h" - -#ifdef enablemultithread -#define TLS __thread -#else -#define TLS -#endif - -extern TLS int commonAlloc1; -extern TLS int commonAlloc2; -extern TLS int **commonIP; -extern TLS int **commonJP; -extern int nthread; -extern int randomseed; -extern int parallelizationstrategy; -#define BESTFIRST 0 -#define BAATARI0 1 -#define BAATARI1 2 -#define BAATARI2 3 - -extern int scoreout; -extern int spscoreout; -extern int outnumber; - -extern int legacygapcost; - -extern TLS char *newgapstr; - -extern int nalphabets; -extern int nscoredalphabets; -extern double specificityconsideration; -extern int ndistclass, maxdistclass; - -extern int gmsg; - -extern double sueff_global; |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mltaln9.c --- a/mafft/core/mltaln9.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,9624 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-\n-#if 0\n-int seqlen( char *seq )\n-{\n-\tint val = 0;\n-\twhile( *seq )\n-\t\tif( *seq++ != \'-\' ) val++;\n-\treturn( val );\n-}\n-#else\n-int seqlen( char *seq )\n-{\n-\tint val = 0;\n-\tif( *newgapstr == \'-\' )\n-\t{\n-\t\twhile( *seq )\n-\t\t\tif( *seq++ != \'-\' ) val++;\n-\t}\n-\telse\n-\t{\n-\t\twhile( *seq )\n-\t\t{\n-\t\t\tif( *seq != \'-\' && *seq != *newgapstr ) val++;\n-\t\t\tseq++;\n-\t\t}\n-\t}\n-\treturn( val );\n-}\n-#endif\n-\n-int intlen( int *num )\n-{\n-\tint value = 0;\n-\twhile( *num++ != -1 ) value++;\n-\treturn( value );\n-}\n-\n-char seqcheck( char **seq )\n-{\n-\tint i, len;\n-\tchar **seqbk = seq;\n-\twhile( *seq )\t\n-\t{\n-\t\tlen = strlen( *seq );\n-\t\tfor( i=0; i<len; i++ ) \n-\t\t{\n-\t\t\tif( amino_n[(int)(*seq)[i]] == -1 ) \n-\t\t\t{\n-\n-\t\t\t\treporterr( "========================================================================= \\n" );\n-\t\t\t\treporterr( "========================================================================= \\n" );\n-\t\t\t\treporterr( "=== \\n" );\n-\t\t\t\treporterr( "=== Alphabet \'%c\' is unknown.\\n", (*seq)[i] );\n-\t\t\t\treporterr( "=== Please check site %d in sequence %d.\\n", i+1, (int)(seq-seqbk+1) );\n-\t\t\t\treporterr( "=== \\n" );\n-\t\t\t\treporterr( "=== To make an alignment having unusual characters (U, @, #, etc), try\\n" );\n-\t\t\t\treporterr( "=== %% mafft --anysymbol input > output\\n" );\n-\t\t\t\treporterr( "=== \\n" );\n-\t\t\t\treporterr( "========================================================================= \\n" );\n-\t\t\t\treporterr( "========================================================================= \\n" );\n-\t\t\t\treturn( (int)(*seq)[i] );\n-\t\t\t}\n-\t\t}\n-\t\tseq++;\n-\t}\n-\treturn( 0 );\n-}\n-\t\n-void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )\n-{\n-\tint i, j, lgth;\n-\t \n-\tlgth = strlen( aseq[0] );\n-\tfor( j=0; j<lgth; j++ )\n-\t{\n-\t\tfor( i=0; i<nalphabets; i++ )\n-\t\t{\n-\t\t\tscmx[i][j] = 0;\n-\t\t}\n-\t}\n-\tfor( i=0; i<icyc+1; i++ )\n-\t{\n-\t\tint id;\n-\t\tid = amino_n[(int)aseq[i][0]];\n-\t\tscmx[id][0] += (float)effarr[i];\n-\t}\n-\tfor( j=1; j<lgth-1; j++ )\n-\t{\n-\t\tfor( i=0; i<icyc+1; i++ )\n-\t\t{\n-\t\t\tint id;\n-\t\t\tid = amino_n[(int)aseq[i][j]];\n-\t\t\tscmx[id][j] += (float)effarr[i];\n-\t\t}\n-\t}\n-\tfor( i=0; i<icyc+1; i++ )\n-\t{\n-\t\tint id;\n-\t\tid = amino_n[(int)aseq[i][lgth-1]];\n-\t\tscmx[id][lgth-1] += (float)effarr[i];\n-\t}\n-}\n-\n-void exitall( char arr[] )\n-{\n-\treporterr( "%s\\n", arr );\n-\texit( 1 );\n-}\n-\n-void display( char **seq, int nseq )\n-{\n-\tint i, imax;\n-\tchar b[121];\n-\n-\tif( !disp ) return;\n-\t\tif( nseq > DISPSEQF ) imax = DISPSEQF;\n-\t\telse imax = nseq;\n-\t\treporterr( " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\\n" );\n-\t\tfor( i=0; i<+imax; i++ )\n-\t\t{\n-\t\t\tstrncpy( b, seq[i]+DISPSITEI, 120 );\n-\t\t\tb[120] = 0;\n-\t\t\treporterr( "%3d %s\\n", i+1, b );\n-\t\t}\n-}\n-#if 0\n-double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )\n-{\n-\tint i, j, k;\n-\tdouble score;\n-\tdouble tmpscore;\n-\tchar *mseq1, *mseq2;\n-\tdouble efficient;\n-\tchar xxx[100];\n-\n-//\ttotaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];\n-//\ttotaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];\n-\n-\tscore = 0.0;\n-\tfor( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ ) \n-\t{\n-\t\tefficient = eff1[i] * eff2[j];\n-\t\tmseq1 = seq1[i];\n-\t\tmseq2 = seq2[j];\n-\t\ttmpscore = 0.0;\n-\t\tfor( k=0; k<len; k++ ) \n-\t\t{\n-\t\t\tif( mseq1[k] == \'-\' && mseq2[k] == \'-\' ) continue;\n-\t\t\ttmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];\n-\n-\t\t\tif( mseq1[k] == \'-\' ) \n-\t\t\t{\n-\t\t\t\ttmpscore += penalty;\n-\t\t\t\ttmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];\n-\t\t\t\twhile( mseq1[++k] == \'-\' )\n-\t\t\t\t\ttmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];\n-\t\t\t\tk--;\n-\t\t\t\tif( k >len-2 ) break;\n-\t\t\t\tcontinue;\n-\t\t\t}\n-\t\t\tif( mseq2[k] == \'-\' )\n-\t\t\t{\n-\t\t\t\ttmpscore += penalty;\n-\t\t\t\ttmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];\n-\t\t\t\twhile( mseq2[++k] == \'-\' )\n-\t\t\t\t\ttmpscore += amino_dis[(int)mseq1[k]][(int)mseq2'..b'amino_n[\'A\']][amino_n[\'A\']] );\n-\n-\n-\treturn;\n-\n-// Taikaku youso no heikin ga 600 ni naruyouni re-scale.\n-// Hitaikaku youso ga ookiku narisugi.\n-\n-\tav = 0.0;\n-\tfor( i=0; i<nalphabets; i++ ) \n-\t{\n-\t\tif( ii == \'-\' ) continue; // text no toki arieru\n-\t\tav += out[i][i];\n-\t}\n-\tav /= (double)nalphabets;\n-\n-\tfor( i=0; i<nalphabets; i++ ) \n-\t{\n-\t\tif( amino[i] == \'-\' ) continue; // text no toki arieru\n-\t\tfor( j=0; j<nalphabets; j++ )\n-\t\t{\n-\t\t\tif( amino[j] == \'-\' ) continue; // text no toki arieru\n-\t\t\tout[i][j] = out[i][j] * 600 / av;\n-\t\t\treporterr( "%c-%c: %f\\n", amino[i], amino[j], out[i][j] );\n-\t\t}\n-\t}\n-}\n-void FreeCommonIP()\n-{\n-\tif( commonIP ) FreeIntMtx( commonIP ); \n-\tcommonIP = NULL;\n-\tcommonAlloc1 = 0;\n-\tcommonAlloc2 = 0;\n-}\n-\n-void makeskiptable( int n, int **skip, char **seq )\n-{\n-\tchar *nogapseq;\n-\tint nogaplen, alnlen;\n-\tint i, j, posinseq, gaplen;\n-\n-\tnogapseq = calloc( strlen( seq[0] )+1, sizeof( char ) );\n-\tfor( i=0; i<n; i++ )\n-\t{\n-\t\tgappick0( nogapseq, seq[i] );\n-\t\tnogaplen = strlen( nogapseq );\n-\t\talnlen = strlen( seq[i] );\n-\t\tskip[i] = calloc( nogaplen+1, sizeof( int ) );\n-\n-//\t\treporterr( "%s\\n", nogapseq );\n-\n-\t\tposinseq = 0;\n-\t\tgaplen = 0;\n-\t\tfor( j=0; j<alnlen; j++ )\n-\t\t{\n-\t\t\tif( seq[i][j] == \'-\' )\n-\t\t\t{\n-\t\t\t\tskip[i][posinseq]++;\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\tposinseq++;\n-\t\t\t}\n-\t\t}\n-//\t\tfor( j=0; j<nogaplen+1; j++ )\n-//\t\t\treporterr( "%d ", skip[i][j] );\n-//\t\treporterr( "\\n" );\n-//\t\texit( 1 );\n-\t}\n-\tfree( nogapseq );\n-}\n-\n-int generatesubalignmentstable( int nseq, int ***tablept, int *nsubpt, int *maxmempt, int ***topol, double **len, double threshold )\n-{\n-\tint i, j, rep0, rep1, nmem, mem;\n-\tdouble distfromtip0, distfromtip1;\n-\tdouble *distfromtip;\n-\treporterr( "\\n\\n\\n" );\n-\n-\t*maxmempt = 0;\n-\t*nsubpt = 0;\n-\n-\tdistfromtip = calloc( nseq, sizeof( double ) );\n-\tfor( i=0; i<nseq-1; i++ )\n-\t{\n-#if 0\n-\t\treporterr( "STEP %d\\n", i );\n-\t\tfor( j=0; topol[i][0][j]!=-1; j++ )\n-\t\t\treporterr( "%3d ", topol[i][0][j] );\n-\t\treporterr( "\\n" );\n-\t\treporterr( "len=%f\\n", len[i][0] );\n-#endif\n-\n-\t\trep0 = topol[i][0][0];\n-\t\tdistfromtip0 = distfromtip[rep0];\n-\t\tdistfromtip[rep0] += len[i][0];\n-//\t\treporterr( "distfromtip[%d] = %f->%f\\n", rep0, distfromtip0, distfromtip[rep0] );\n-\n-\n-#if 0\n-\t\tfor( j=0; topol[i][1][j]!=-1; j++ )\n-\t\t\treporterr( "%3d ", topol[i][1][j] );\n-\t\treporterr( "\\n" );\n-\t\treporterr( "len=%f\\n", len[i][1] );\n-#endif\n-\n-\t\trep1 = topol[i][1][0];\n-\t\tdistfromtip1 = distfromtip[rep1];\n-\t\tdistfromtip[rep1] += len[i][1];\n-//\t\treporterr( "distfromtip[%d] = %f->%f\\n", rep1, distfromtip1, distfromtip[rep1] );\n-\n-\t\tif( topol[i][0][1] != -1 && distfromtip0 <= threshold && threshold < distfromtip[rep0] )\n-\t\t{\n-//\t\t\treporterr( "HIT 0!\\n" );\n-\t\t\t*tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) );\n-\t\t\tfor( j=0, nmem=0; (mem=topol[i][0][j])!=-1; j++ ) \n-\t\t\t\tnmem++;\n-//\t\t\treporterr( "allocating %d\\n", nmem+1 );\n-\t\t\t(*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) );\n-\t\t\t(*tablept)[*nsubpt+1] = NULL;\n-\t\t\tintcpy( (*tablept)[*nsubpt], topol[i][0] );\n-\t\t\tif( *maxmempt < nmem ) *maxmempt = nmem;\n-\t\t\t*nsubpt += 1;\n-\t\t}\n-\n-\t\tif( topol[i][1][1] != -1 && distfromtip1 <= threshold && threshold < distfromtip[rep1] )\n-\t\t{\n-//\t\t\treporterr( "HIT 1!\\n" );\n-\t\t\t*tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) );\n-\t\t\tfor( j=0, nmem=0; (mem=topol[i][1][j])!=-1; j++ )\n-\t\t\t\tnmem++;\n-//\t\t\treporterr( "allocating %d\\n", nmem+1 );\n-\t\t\t(*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) );\n-\t\t\t(*tablept)[*nsubpt+1] = NULL;\n-\t\t\tintcpy( (*tablept)[*nsubpt], topol[i][1] );\n-\t\t\tif( *maxmempt < nmem ) *maxmempt = nmem;\n-\t\t\t*nsubpt += 1;\n-\t\t}\n-\n-\t}\n-\n-\tif( distfromtip[0] <= threshold ) \n-\t{\n-\t\tfree( distfromtip );\n-\t\treturn( 1 );\n-\t}\n-\n-\tfree( distfromtip );\n-\treturn( 0 );\n-}\n-\n-\n-\n-float sumofpairsscore( int nseq, char **seq )\n-{\n-\tfloat v = 0;\n-\tint i, j;\n-\tfor( i=1; i<nseq; i++ )\n-\t{\n-\t\tfor( j=0; j<i; j++ )\n-\t\t{\n-\t\t\tv += naivepairscore11( seq[i], seq[j], penalty ) / 600;\n-\t\t}\n-\t}\n-//\tv /= ( (nseq-1) * nseq ) / 2;\n-\treturn( v );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mtxutl.c --- a/mafft/core/mtxutl.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,565 +0,0 @@\n-#include <stdio.h>\n-#include <stddef.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include "mtxutl.h"\n-\n-void MtxuntDouble( double **mtx, int n )\n-{\n- int i, j;\n- for( i=0; i<n; i++ ) for( j=0; j<n; j++ ) mtx[i][j] = 0.0;\n- for( i=0; i<n; i++ ) mtx[i][i] = 1.0;\n-}\n-\n-void MtxmltDouble( double **mtx1, double **mtx2, int n )\n-{\n- int i, j, k;\n- double s, *tmp;\n-\n-\ttmp = (double *)calloc( n, sizeof( double ) );\n- for( i=0; i<n; i++ ) \n- {\n- for( k=0; k<n; k++ ) tmp[k] = mtx1[i][k];\n- for( j=0; j<n; j++ ) \n- {\n- s = 0.0;\n- for( k=0; k<n; k++ ) s += tmp[k] * mtx2[k][j];\n- mtx1[i][j] = s;\n- }\n- }\n-\tfree( tmp );\n-}\n-\n-char *AllocateCharVec( int l1 )\n-{\n-\tchar *cvec;\n-\t\n-\tcvec = (char *)calloc( l1, sizeof( char ) );\n-\tif( cvec == NULL )\n-\t{\n-\t\tfprintf( stderr, "Cannot allocate %d character vector.\\n", l1 );\n-\t\texit( 1 );\n-\t}\n-\treturn( cvec );\n-}\n-\t\n-#if 0\n-void ReallocateCharMtx( char **mtx, int l1, int l2 )\n-{\n-\tint i;\n-\tchar *bk = (char *)malloc( l2+1 ); // hontou ha iranai\n-\tif( bk == NULL )\n-\t{\n-\t\tfprintf( stderr, "Cannot allocate bk in ReallocateCharMtx\\n" );\n-\t\texit( 1 );\n-\t}\n-\tfor( i=0; i<l1; i++ ) \n-\t{\n-#if 1\n-\t\tstrcpy( bk, mtx[i] );\n-\t\tmtx[i] = (char *)realloc( mtx[i], (l2+1) * sizeof( char ) );\n-\t\tif( mtx[i] == NULL )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot reallocate %d x %d character matrix.\\n", l1, l2 );\n-\t\t}\n-\t\tif( strcmp( bk, mtx[i] ) ) // hontou ha iranai\n-\t\t{\n-\t\t\tfprintf( stderr, "changed!! \\n%s\\n \\nto\\n%s\\n in realloc..\\n", bk, mtx[i] );\n-\t\t\tstrcpy( mtx[i], bk );\n-\t\t}\n-#else\n-\t\tstrcpy( bk, mtx[i] );\n-\t\tfree( mtx[i] );\n-\t\tmtx[i] = (char *)calloc( (l2+1), sizeof( char ) );\n-\t\tstrcpy( mtx[i], bk );\n-#endif\n-\t}\n-\tfree( bk ); // hontou ha iranai\n-} \n-#else\n-void ReallocateCharMtx( char **mtx, int l1, int l2 )\n-{\n-\tint i;\n-\tfor( i=0; i<l1; i++ ) \n-\t{\n-\t\tmtx[i] = (char *)realloc( mtx[i], (l2+1) * sizeof( char ) );\n-\t\tif( mtx[i] == NULL )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot reallocate %d x %d character matrix.\\n", l1, l2 );\n-\t\t}\n-\t}\n-} \n-#endif\n-\n-char **AllocateCharMtx( int l1, int l2 )\n-{\n-\tint i;\n-\tchar **cmtx;\n-\t\n-\tcmtx = (char **)calloc( l1+1, sizeof( char * ) );\n-\tif( cmtx == NULL )\n-\t{\n-\t\tfprintf( stderr, "Cannot allocate %d x %d character matrix.\\n", l1, l2 );\n-\t\texit( 1 );\n-\t} \n-\tif( l2 )\n-\t{\n-\t\tfor( i=0; i<l1; i++ ) \n-\t\t{\n-\t\t\tcmtx[i] = AllocateCharVec( l2 );\n-\t\t}\n-\t}\n-\tcmtx[l1] = NULL;\n-\treturn( cmtx );\n-} \n-\n-void FreeCharMtx( char **mtx )\n-{\n-/*\n-\tchar **x;\n-\tx = mtx;\n-\twhile( *x != NULL ) free( *x++ );\n-\tfree( mtx );\n-*/\n-\tint i;\n-\tfor( i=0; mtx[i]; i++ ) \n-\t{\n-\t\tfree( mtx[i] );\n-\t}\n-\tfree( mtx );\n-}\n-\n-float *AllocateFloatVec( int l1 )\n-{\n-\tfloat *vec;\n-\n-\tvec = (float *)calloc( (unsigned int)l1, sizeof( float ) );\n-\tif( vec == NULL )\n-\t{\n-\t\tfprintf( stderr, "Allocation error ( %d fload vec )\\n", l1 );\n-\t\texit( 1 );\n-\t}\n-\treturn( vec );\n-}\n-\n-void FreeFloatVec( float *vec )\n-{\n-\tfree( (char *)vec );\n-}\n-\n-float **AllocateFloatHalfMtx( int ll1 )\n-{\n-\tfloat **mtx;\n-\tint i;\n-\n-\tmtx = (float **)calloc( (unsigned int)ll1+1, sizeof( float * ) );\n-\tif( mtx == NULL )\n-\t{\n-\t\tfprintf( stderr, "Allocation error ( %d fload halfmtx )\\n", ll1 );\n-\t\texit( 1 );\n-\t}\n-\tfor( i=0; i<ll1; i++ )\n-\t{\n-\t\tmtx[i] = (float *)calloc( ll1-i, sizeof( float ) );\n-\t\tif( !mtx[i] )\n-\t\t{\n-\t\t\tfprintf( stderr, "Allocation error( %d floathalfmtx )\\n", ll1 );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\tmtx[ll1] = NULL;\n-\treturn( mtx );\n-}\n-\n-float **AllocateFloatMtx( int ll1, int ll2 )\n-{\n-\tfloat **mtx;\n-\tint i;\n-\n-\tmtx = (float **)calloc( (unsigned int)ll1+1, sizeof( float * ) );\n-\tif( mtx == NULL )\n-\t{\n-\t\tfprintf( stderr, "Allocation error ( %d x %d fload mtx )\\n", ll1, ll2 );\n-\t\texit( 1 );\n-\t}\n-\tif( ll2 )\n-\t{\n-\t\tfor( i=0; i<ll1; i++ )\n-\t\t{\n-\t\t\tmtx[i] = (float *)calloc( ll2, sizeof( float ) );\n-\t\t\tif( !mtx[i] )\n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "Allocation error( %d x %d floatmtx )\\n", ll1, ll2 );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-\t\t}\n-\t}\n-\tmtx[ll1] = NULL;\n-\treturn( mtx );\n-}\n-\n-voi'..b' x %d x %d char cube\\n", ll1, ll2, ll3 );\n-\t\texit( 1 );\n-\t}\n-\tif( ll2 )\n-\t{\n-\t\tfor( i=0; i<ll1; i++ ) \n-\t\t{\n-\t\t\tcub[i] = AllocateCharMtx( ll2, ll3 );\n-\t\t}\n-\t}\n-\tcub[ll1] = NULL;\n-\treturn( cub );\n-}\n-\n-void FreeCharCub( char ***cub )\n-{\n-\tint i;\n-\n-\tfor( i=0; cub[i]; i++ ) \n-\t{\n-\t\tFreeCharMtx( cub[i] );\n-\t}\n-\tfree( cub );\n-}\n-\n-void freeintmtx( int **mtx, int ll1, int ll2 )\n-{\n- int i;\n-\n- for( i=0; i<ll1; i++ ) \n- free( (char *)mtx[i] );\n- free( (char *)mtx );\n-}\n- \n-void FreeIntMtx( int **mtx )\n-{\n-\tint i;\n-\n-\tfor( i=0; mtx[i]; i++ ) \n-\t{\n-\t\tif( mtx[i] ) free( (char *)mtx[i] ); mtx[i] = NULL;\n-\t}\n-\tfree( (char *)mtx );\n-}\n-\n-char ****AllocateCharHcu( int ll1, int ll2, int ll3, int ll4 )\n-{\n-\tint i;\n-\tchar ****hcu;\n-\n-\thcu = (char ****)calloc( ll1+1, sizeof( char *** ) );\n-\tif( hcu == NULL ) exit( 1 );\n-\tfor( i=0; i<ll1; i++ ) \n-\t\thcu[i] = AllocateCharCub( ll2, ll3, ll4 );\n-\thcu[ll1] = NULL;\n-\treturn( hcu );\n-}\n-\n-void FreeCharHcu( char ****hcu )\n-{\n-\tint i;\n-\tfor( i=0; hcu[i]; i++ )\n-\t{\n-\t\tFreeCharCub( hcu[i] );\n-\t}\n-\tfree ( (char *)hcu );\n-}\n-\n-double *AllocateDoubleVec( int ll1 )\n-{\n-\tdouble *vec;\n-\n-\tvec = (double *)calloc( ll1, sizeof( double ) ); // filled with 0.0\n-\treturn( vec );\n-}\n-\n-void FreeDoubleVec( double *vec )\n-{\n-\tfree( vec );\n-}\n-\n-int ***AllocateIntCub( int ll1, int ll2, int ll3 )\n-{\n-\tint i;\n-\tint ***cub;\n-\n-\tcub = (int ***)calloc( ll1+1, sizeof( int ** ) );\n-\tif( cub == NULL )\n-\t{\n-\t\tfprintf( stderr, "cannot allocate IntCub\\n" );\n-\t\texit( 1 );\n-\t}\n-\tfor( i=0; i<ll1; i++ ) \n-\t\tcub[i] = AllocateIntMtx( ll2, ll3 );\n-\tcub[ll1] = NULL;\n-\n-\treturn cub;\n-}\n-\n-void FreeIntCub( int ***cub )\n-{\n-\tint i;\n-\tfor( i=0; cub[i]; i++ ) \n-\t{\n-\t\tif( cub[i] ) FreeIntMtx( cub[i] ); cub[i] = NULL;\n-\t}\n-\tfree( cub );\n-}\n-\n-double **AllocateDoubleMtx( int ll1, int ll2 )\n-{\n-\tint i;\n-\tdouble **mtx;\n-\tmtx = (double **)calloc( ll1+1, sizeof( double * ) );\n-\tif( !mtx )\n-\t{\n-\t\tfprintf( stderr, "cannot allocate DoubleMtx\\n" );\n-\t\texit( 1 );\n-\t}\n-\tif( ll2 )\n-\t{\n-\t\tfor( i=0; i<ll1; i++ ) \n-\t\t\tmtx[i] = AllocateDoubleVec( ll2 );\n-\t}\n-\tmtx[ll1] = NULL;\n-\n-\treturn mtx;\n-}\n-\n-void FreeDoubleMtx( double **mtx )\n-{\n-\tint i;\n-\tfor( i=0; mtx[i]; i++ )\n-\t\tFreeDoubleVec( mtx[i] );\n-\tfree( mtx );\n-}\n-\n-float ***AllocateFloatCub( int ll1, int ll2, int ll3 )\n-{\n-\tint i;\n-\tfloat ***cub;\n-\n-\tcub = (float ***)calloc( ll1+1, sizeof( float ** ) );\n-\tif( !cub ) \n-\t{\n-\t\tfprintf( stderr, "cannot allocate float cube.\\n" );\n-\t\texit( 1 );\n-\t}\n-\tfor( i=0; i<ll1; i++ ) \n-\t{\n-\t\tcub[i] = AllocateFloatMtx( ll2, ll3 );\n-\t}\n-\tcub[ll1] = NULL;\n-\treturn( cub );\n-}\n-\n-void FreeFloatCub( float ***cub )\n-{\n-\tint i;\n-\n-\tfor( i=0; cub[i]; i++ ) \n-\t{\n-\t\tFreeFloatMtx( cub[i] );\n-\t}\n-\tfree( cub );\n-}\n-\n-double ***AllocateDoubleCub( int ll1, int ll2, int ll3 )\n-{\n-\tint i;\n-\tdouble ***cub;\n-\n-\tcub = (double ***)calloc( ll1+1, sizeof( double ** ) );\n-\tif( !cub ) \n-\t{\n-\t\tfprintf( stderr, "cannot allocate double cube.\\n" );\n-\t\texit( 1 );\n-\t}\n-\tfor( i=0; i<ll1; i++ ) \n-\t{\n-\t\tcub[i] = AllocateDoubleMtx( ll2, ll3 );\n-\t}\n-\tcub[ll1] = NULL;\n-\treturn( cub );\n-}\n-\n-void FreeDoubleCub( double ***cub )\n-{\n-\tint i;\n-\n-\tfor( i=0; cub[i]; i++ ) \n-\t{\n-\t\tFreeDoubleMtx( cub[i] );\n-\t}\n-\tfree( cub );\n-}\n-\n-\n-short *AllocateShortVec( int ll1 )\n-{\n-\tshort *vec;\n-\n-\tvec = (short *)calloc( ll1, sizeof( short ) );\n-\tif( vec == NULL )\n-\t{\t\n-\t\tfprintf( stderr, "Allocation error( %d short vec )\\n", ll1 );\n-\t\texit( 1 );\n-\t}\n-\treturn( vec );\n-}\t\n-\n-void FreeShortVec( short *vec )\n-{\n-\tfree( (char *)vec );\n-}\n-\n-short **AllocateShortMtx( int ll1, int ll2 )\n-{\n-\tint i;\n-\tshort **mtx;\n-\n-\n-\tmtx = (short **)calloc( ll1+1, sizeof( short * ) );\n-\tif( !mtx )\n-\t{\n-\t\tfprintf( stderr, "Allocation error( %d x %d short mtx ) \\n", ll1, ll2 );\n-\t\texit( 1 );\n-\t}\n-\tfor( i=0; i<ll1; i++ ) \n-\t{\n-\t\tmtx[i] = AllocateShortVec( ll2 );\n-\t}\n-\tmtx[ll1] = NULL;\n-\treturn( mtx );\n-}\n-\n-void FreeShortMtx( short **mtx )\n-{\n-\tint i;\n-\n-\tfor( i=0; mtx[i]; i++ ) \n-\t\tfree( (char *)mtx[i] );\n-\tfree( (char *)mtx );\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/mtxutl.h --- a/mafft/core/mtxutl.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,50 +0,0 @@ -void MtxuntDouble( double **, int ); -void MtxmltDouble( double **, double **, int ); - -char *AllocateCharVec( int ); -void FreeCharVec( char * ); - -char **AllocateCharMtx( int, int); -void ReallocateCharMtx( char **, int, int); -void FreeCharMtx( char ** ); - -float *AllocateFloatVec( int ); -void FreeFloatVec( float * ); - -float **AllocateFloatHalfMtx( int ); -float **AllocateFloatMtx( int, int ); -void FreeFloatHalfMtx( float **, int ); -void FreeFloatMtx( float ** ); - -float **AlocateFloatTri( int ); -void FreeFloatTri( float ** ); - -int *AllocateIntVec( int ); -void FreeIntVec( int * ); - -int **AllocateIntMtx( int, int ); -void FreeIntMtx( int ** ); - -char ***AllocateCharCub( int, int, int ); -void FreeCharCub( char *** ); - -int ***AllocateIntCub( int, int, int ); -void FreeIntCub( int *** ); - -double *AllocateDoubleVec( int ); -void FreeDoubleVec( double * ); - -double **AllocateDoubleMtx( int, int ); -void FreeDoubleMtx( double ** ); - -double ***AllocateDoubleCub( int, int, int ); -void FreeDoubleCub( double *** ); - -float ***AllocateFloatCub( int, int, int ); -void FreeFloatCub( float *** ); - -short *AllocateShortVec( int ); -void FreeShortVec( short * ); - -short **AllocateShortMtx( int, int ); -void FreeShortMtx( short ** ); |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/multi2hat3s.c --- a/mafft/core/multi2hat3s.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,390 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 1\n-#define TSUYOSAFACTOR 100\n-\n-\n-static int nhomologs;\n-static int seedoffset;\n-\n-void strip( char *s )\n-{\n-\tchar *pt = s;\n-\twhile( *++pt )\n-\t\tif( *pt == \'\\n\' ) *pt = 0;\n-}\n-\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tseedoffset = 0;\n-\tnhomologs = 1;\n-\tinputfile = NULL;\n-\tfftkeika = 0;\n-\tpslocal = -1000.0;\n-\tconstraint = 0;\n-\tnblosum = 62;\n-\tfmodel = 0;\n-\tcalledByXced = 0;\n-\tdevide = 0;\n-\tuse_fft = 0;\n-\tfftscore = 1;\n-\tfftRepeatStop = 0;\n-\tfftNoAnchStop = 0;\n- weight = 3;\n- utree = 1;\n-\ttbutree = 1;\n- refine = 0;\n- check = 1;\n- cut = 0.0;\n- disp = 0;\n- outgap = 1;\n- alg = \'A\';\n- mix = 0;\n-\ttbitr = 0;\n-\tscmtd = 5;\n-\ttbweight = 0;\n-\ttbrweight = 3;\n-\tcheckC = 0;\n-\ttreemethod = \'x\';\n-\tcontin = 0;\n-\tscoremtx = 1;\n-\tkobetsubunkatsu = 0;\n-\tdivpairscore = 0;\n-\tdorp = NOTSPECIFIED;\n-\tppenalty = NOTSPECIFIED;\n-\tppenalty_OP = NOTSPECIFIED;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tppenalty_EX = NOTSPECIFIED;\n-\tpoffset = NOTSPECIFIED;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tgeta2 = GETA2;\n-\tfftWinSize = NOTSPECIFIED;\n-\tfftThreshold = NOTSPECIFIED;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( ( c = *++argv[0] ) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "seed = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'t\':\n-\t\t\t\t\tnhomologs = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "nhomologs = %d\\n", nhomologs );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'o\':\n-\t\t\t\t\tseedoffset = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "seedoffset = %d\\n", seedoffset );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tdorp = \'d\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tdorp = \'p\';\n-\t\t\t\t\tbreak;\n- default:\n- fprintf( stderr, "illegal option %c\\n", c );\n- argc = 0;\n- break;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\t}\n- if( argc == 1 )\n- {\n- cut = atof( (*argv) );\n- argc--;\n- }\n- if( argc != 0 ) \n- {\n- fprintf( stderr, "options: Check source file !\\n" );\n- exit( 1 );\n- }\n-\tif( tbitr == 1 && outgap == 0 )\n-\t{\n-\t\tfprintf( stderr, "conflicting options : o, m or u\\n" );\n-\t\texit( 1 );\n-\t}\n-\tif( alg == \'C\' && outgap == 0 )\n-\t{\n-\t\tfprintf( stderr, "conflicting options : C, o\\n" );\n-\t\texit( 1 );\n-\t}\n-}\n-\n-int countamino( char *s, int end )\n-{\n-\tint val = 0;\n-\twhile( end-- )\n-\t\tif( *s++ != \'-\' ) val++;\n-\treturn( val );\n-}\n-\n-static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen )\n-{\n-\tint i, j;\n-\tFILE *hat3p;\n-\tfloat pscore = 0.0; // by D.Mathog\n-\tstatic double *effarr1 = NULL;\n-\tstatic double *effarr2 = NULL;\n-\tchar *aseq;\n-\tstatic char **pseq;\n-\tLocalHom **localhomtable, *tmpptr;\n-\tdouble tsuyosa;\n-\n-\tif( nhomologs < 1 ) nhomologs = 1; // tsuyosa=0.0 wo sakeru\n-\ttsuyosa = (double)nhomologs * nhomologs * TSUYOSAFACTOR;\n-\tfprintf( stderr, "tsuyosa = %f\\n", tsuyosa );\n-\tlocalhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );\n-\tfor( i=0; i<njob; i++)\n-\t{\n-\t\tlocalhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );\n-\t\tfor( j=0; j<njob; j++)\n-\t\t{\n-\t\t\tlocalhomtable[i][j].start1 = -1;\n-\t\t\tlocalhomtable[i][j].end1 = -1;\n-\t\t\tlocalhomtable[i][j].start2 = -1; \n-\t\t\tlocalhomtable[i][j].end2 = -1; \n-\t\t\tlocalhomtable[i][j].opt = -1.0;\n-\t\t\tlocalhomtable[i][j].next = NULL;\n-\t\t}\n-\t}\n-\n-\tif( effarr1 == NULL ) \n-\t{\n-\t\teffarr1 = AllocateDoubleVec( njob );\n-\t\teffarr2 = AllocateDoubleVec( njob );\n-\t\tpseq = AllocateCharMtx( 2, 0 );\n-\t\taseq = AllocateCharVec( nlenmax*9+1 );\n-#if 0\n-#else\n-#endif\n-\t}\n-\n-#if 0\n-\tfprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\\n", fftWinSize, fftThreshold );\n-#endif\n-\n-#if 0\n-\tfor( i=0; i<njob; i++ )\n-\t\tfprintf( stderr, "TBFAST effarr[%d] = %f\\n", i, effarr[i] );\n-#endif\n-\n-\n-//\twritePre( njob, name, nlen, aseq, 0 );\n-\n-\that3p '..b'. FreeLocalHomTable\\n" );\n-#endif\n-}\n-\n-static void WriteOptions( FILE *fp )\n-{\n-\n-\tif( dorp == \'d\' ) fprintf( fp, "DNA\\n" );\n-\telse\n-\t{\n-\t\tif ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\\n", pamN );\n-\t\telse if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\\n", nblosum );\n-\t\telse if( scoremtx == 2 ) fprintf( fp, "M-Y\\n" );\n-\t}\n- fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );\n- if( use_fft ) fprintf( fp, "FFT on\\n" );\n-\n-\tfprintf( fp, "tree-base method\\n" );\n-\tif( tbrweight == 0 ) fprintf( fp, "unweighted\\n" );\n-\telse if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\\n" );\n-\tif( tbitr || tbweight ) \n-\t{\n-\t\tfprintf( fp, "iterate at each step\\n" );\n-\t\tif( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\\n" ); \n-\t\tif( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\\n" ); \n-\t\tif( tbweight ) fprintf( fp, " weighted\\n" ); \n-\t\tfprintf( fp, "\\n" );\n-\t}\n-\n- \t fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );\n-\n-\tif( alg == \'a\' )\n-\t\tfprintf( fp, "Algorithm A\\n" );\n-\telse if( alg == \'A\' ) \n-\t\tfprintf( fp, "Algorithm A+\\n" );\n-\telse if( alg == \'S\' ) \n-\t\tfprintf( fp, "Apgorithm S\\n" );\n-\telse if( alg == \'C\' ) \n-\t\tfprintf( fp, "Apgorithm A+/C\\n" );\n-\telse\n-\t\tfprintf( fp, "Unknown algorithm\\n" );\n-\n-\tif( treemethod == \'x\' )\n-\t\tfprintf( fp, "Tree = UPGMA (3).\\n" );\n-\telse if( treemethod == \'s\' )\n-\t\tfprintf( fp, "Tree = UPGMA (2).\\n" );\n-\telse if( treemethod == \'p\' )\n-\t\tfprintf( fp, "Tree = UPGMA (1).\\n" );\n-\telse\n-\t\tfprintf( fp, "Unknown tree.\\n" );\n-\n- if( use_fft )\n- {\n- fprintf( fp, "FFT on\\n" );\n- if( dorp == \'d\' )\n- fprintf( fp, "Basis : 4 nucleotides\\n" );\n- else\n- {\n- if( fftscore )\n- fprintf( fp, "Basis : Polarity and Volume\\n" );\n- else\n- fprintf( fp, "Basis : 20 amino acids\\n" );\n- }\n- fprintf( fp, "Threshold of anchors = %d%%\\n", fftThreshold );\n- fprintf( fp, "window size of anchors = %dsites\\n", fftWinSize );\n- }\n-\telse\n- fprintf( fp, "FFT off\\n" );\n-\tfflush( fp );\n-}\n-\t \n-\n-int main( int argc, char *argv[] )\n-{\n-\tstatic int nlen[M];\t\n-\tstatic char **name, **seq;\n-\tstatic char **bseq;\n-\tstatic double *eff;\n-\tint i;\n-\tchar c;\n-\tint alloclen;\n-\tFILE *infp;\n-\n-\targuments( argc, argv );\n-\n-\tif( inputfile )\n-\t{\n-\t\tinfp = fopen( inputfile, "r" );\n-\t\tif( !infp )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot open %s\\n", inputfile );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\telse\n-\t\tinfp = stdin;\n-\n-\tgetnumlen( infp );\n-\trewind( infp );\n-\n-\tif( njob < 2 )\n-\t{\n-\t\tfprintf( stderr, "At least 2 sequences should be input!\\n"\n-\t\t\t\t\t\t "Only %d sequence found.\\n", njob ); \n-\t\texit( 1 );\n-\t}\n-\n-\tname = AllocateCharMtx( njob, B+1 );\n-\tseq = AllocateCharMtx( njob, nlenmax*9+1 );\n-\tbseq = AllocateCharMtx( njob, nlenmax*9+1 );\n-\talloclen = nlenmax*9;\n-\n-\teff = AllocateDoubleVec( njob );\n-\n-#if 0\n-\tRead( name, nlen, seq );\n-#else\n-\treadData_pointer( infp, name, nlen, seq );\n-#endif\n-\tfclose( infp );\n-\n-\tconstants( njob, seq );\n-\n-#if 0\n-\tfprintf( stderr, "params = %d, %d, %d\\n", penalty, penalty_ex, offset );\n-#endif\n-\n-\tinitSignalSM();\n-\n-\tinitFiles();\n-\n-\tWriteOptions( trap_g );\n-\n-\tc = seqcheck( seq );\n-\tif( c )\n-\t{\n-\t\tfprintf( stderr, "Illeagal character %c\\n", c );\n-\t\texit( 1 );\n-\t}\n-\n-//\twritePre( njob, name, nlen, seq, 0 );\n-\n-\tfor( i=0; i<njob; i++ ) eff[i] = 1.0;\n-\n-\n-\tfor( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );\n-\n-\n-//\tfor( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\\n%s\\n", name[i]+1, bseq[i] ); // CHUUI!!\n-\tfor( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\\n%s\\n", name[i]+1, seq[i] );\n-\n-\tpairalign( name, nlen, seq, eff, alloclen );\n-\n-\tfprintf( trap_g, "done.\\n" );\n-#if DEBUG\n-\tfprintf( stderr, "closing trap_g\\n" );\n-#endif\n-\tfclose( trap_g );\n-\n-#if IODEBUG\n-\tfprintf( stderr, "OSHIMAI\\n" );\n-#endif\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/newick2mafft.rb --- a/mafft/core/newick2mafft.rb Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,132 +0,0 @@ -#! /usr/bin/env ruby - -#version 2, 2009/01/24 - - -if ARGV.length == 1 - scale = 1.0 -elsif ARGV.length == 2 - scale = ARGV.shift.to_f -else - STDERR.puts "USAGE: newick2mafft.rb scale input_tree > output" - exit -end - -if scale <= 0.0 then - STDERR.puts "Inappropriate scale, #{scale.to_s}" - exit -end - -STDERR.puts "scale = " + scale.to_s - -infp = File.open( ARGV.shift, "r" ) - -tree = "" -while line = infp.gets - tree += line.strip - break if tree =~ /;$/ -end -infp.close - - -#tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "").gsub(/:\-[0-9\.]+/, ":0.0" ) -tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "") - - -STDERR.puts "Initial tree = " + tree - -def resolve( tree ) - - -while 1 -# p tree - tree.sub!( /\,([0-9]+):(\-?[0-9\.]+)\,([0-9]+):(\-?[0-9\.]+)/, ",XXX" ) - hit1 = $1 - hit2 = $2 - hit3 = $3 - hit4 = $4 - -# p hit1 -# p hit2 -# p hit3 -# p hit4 - -# puts "introduce XXX" -# p tree - - break unless tree.index(/XXX/) - - poshit = tree.index(/XXX/) -# puts "poshit=" + poshit.to_s - - i = poshit - height = 0 - while i >= 0 - break if height == 0 && tree[i..i] == '(' - if tree[i..i] == ')' then - height += 1 - elsif tree[i..i] == '(' then - height -= 1 - end - i -= 1 - end - - poskakko = i -# puts "poskakko = " + poskakko.to_s - zenhan = tree[0..poskakko] - zenhan = "" if poskakko == -1 -# puts "zenhan = " + zenhan - - treelen = tree.length - tree = zenhan + "(" + tree[poskakko+1..treelen] -# puts "add (" -# p tree - tree.sub!( /XXX/, "#{hit1}:#{hit2}):0,#{hit3}:#{hit4}" ) - -# p tree -end - - -return tree - -end - -memi = [-1,-1] -leni = [-1,-1] - -while tree.index( /\(/ ) - - tree = resolve( tree ) - - tree.sub!( /\(([0-9]+):(\-?[0-9\.]+),([0-9]+):(\-?[0-9\.]+)\)/, "XXX" ) - memi[0] = $1.to_i - leni[0] = $2.to_f * scale - memi[1] = $3.to_i - leni[1] = $4.to_f * scale - - if leni[0] > 10 || leni[1] > 10 then - STDERR.puts "" - STDERR.puts "Please check the scale of branch length!" - STDERR.puts "The unit of branch lengths must be 'substitution/site'" - STDERR.puts "If the unit is 'substition' in your tree, please" - STDERR.puts "use the scale argument," - STDERR.puts "% newick2mafft scale in > out" - STDERR.puts "where scale = 1/(alignment length)" - STDERR.puts "" - exit 1 - end - -# STDERR.puts "subtree = " + $& - - if memi[1] < memi[0] then - memi.reverse! - leni.reverse! - end - - tree.sub!( /XXX/, memi[0].to_s ) - -# STDERR.puts "Tree = " + tree - - printf( "%5d %5d %10.5f %10.5f\n", memi[0], memi[1], leni[0], leni[1] ) - -end |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/nj.c --- a/mafft/core/nj.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,195 +0,0 @@ -#include "mltaln.h" -#define DEBUG 0 - - -void topolcpy( int s1[], int s2[], int *mpt1, int *mpt2 ) -{ - int i; - - *mpt1 = *mpt2; - for( i=0; i<*mpt2; i++ ) - { - s1[i] = s2[i]; - } -} - -void topolcat( int s1[], int s2[], int *mpt1, int *mpt2 ) -{ - int i; - - for( i=*mpt1; i<*mpt1+*mpt2; i++ ) - { - s1[i] = s2[i-*mpt1]; - } - *mpt1 += *mpt2; -} - -void topolsort( int m, int s[] ) -{ - int i, j, im; - int sm; - - for( j=0; j<m-1; j++ ) - { - sm = s[j]; im = j; - for( i=j+1; i<m; i++ ) - { - if( s[i] < sm ) - { - sm = s[i]; - im = i; - } - } - s[im] = s[j]; s[j] = sm; - } -} - -void topolswap( int s1[], int s2[], int *mpt1, int *mpt2 ) -{ - int i; - int im; - int b; - b = *mpt1; *mpt1 = *mpt2; *mpt2 = b; - im = MAX(*mpt1,*mpt2); - for( i=0; i<im; i++ ) - { - b = s1[i]; s1[i] = s2[i]; s2[i] = b; - /* - printf( "s1[%d]=%d\ns2[%d]=%d\n", i, s1[i], i, s2[i] ); - */ - } -} - -void reduc( double **mtx, int nseq, int im, int jm ) -{ - int i; - for( i=0; i<nseq; i++ ) - { - if( i==im || i==jm - || mtx[MIN(i,im)][MAX(i,im)] == 9999.9 - || mtx[MIN(i,jm)][MAX(i,jm)] == 9999.9 - ) continue; - mtx[MIN(i,im)][MAX(i,im)] - = 0.5 * ( mtx[MIN(i,im)][MAX(i,im)] + mtx[MIN(i,jm)][MAX(i,jm)] - - mtx[MIN(im,jm)][MAX(im,jm)] ); - mtx[MIN(i,jm)][MAX(i,jm)] = 9999.9; - } - mtx[MIN(im,jm)][MAX(im,jm)] = 9999.9; -} - - -void nj( int nseq, double **omtx, int ***topol, double **dis ) -{ - int i, j, l, n, m; - int count; - double r[M]; - double t; - double s, sm; - double totallen = 0.0; - int im=0, jm=0; - double len1, len2; -#if 1 - static char **par = NULL; - static double **mtx = NULL; - static int **mem = NULL; - if( par == NULL ) - { - par = AllocateCharMtx( njob, njob ); - mtx = AllocateDoubleMtx( njob, njob ); - mem = AllocateIntMtx( njob, 2 ); - } -#else - char par[njob][njob]; - double mtx[njob][njob]; - int mem[njob][2]; -#endif - for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) mtx[i][j] = omtx[i][j]; - for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) par[i][j] = 0; - for( i=0; i<nseq; i++ ) par[i][i] = 1; -// for( i=0; i<nseq; i++ ) for( j=0; j<2; j++ ) for( l=0; l<nseq+1; l++ ) topol[i][j][l] = -1; - for( i=0; i<nseq; i++ ) for( j=0; j<2; j++ ) for( l=0; l<nseq; l++ ) topol[i][j][l] = -1; - for( n=nseq, m=0; n>2; n--, m=nseq-n ) - { - t = 0.0; - for( i=0; i<nseq-1; i++ ) for( j=0; j<nseq; j++ ) if( mtx[i][j] < 9999.9 ) - t += mtx[i][j]; - for( i=0; i<nseq; i++ ) - { - r[i] = 0.0; - for( l=0; l<nseq; l++ ) - if( ( l != i ) && ( mtx[MIN(i,l)][MAX(i,l)] < 9999.9 ) ) - r[i] += mtx[MIN(i,l)][MAX(i,l)]; - } - sm = 9999.9; - for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ ) if( mtx[i][j] < 9999.9) - { - s = ( ( 2.0 * t - r[i] - r[j] + (n-2.0)*mtx[i][j] ) ) / ( 2.0*(n-2.0) ); - if ( s < sm ) - { - sm = s; - im = i; jm = j; - } - } - len1 = ( (n-2)*mtx[im][jm] + r[im] - r[jm] ) / (2*(n-2)); - len2 = ( (n-2)*mtx[im][jm] - r[im] + r[jm] ) / (2*(n-2)); - -#if DEBUG - fprintf( stderr, "STEP-%3d %3d: L = %5.5f\n", m+1, im+1, len1 ); - fprintf( stderr, " %3d: L = %5.5f\n", jm+1, len2 ); -#endif - - totallen += len1; - totallen += len2; - - dis[m][0] = len1; - dis[m][1] = len2; - - for( l=0, count=0; l<nseq; l++ ) - if( par[im][l] > 0 ) - { - topol[m][0][count] = l; - count++; - } - mem[m][0] = count; - for( l=0, count=0; l<nseq; l++ ) - if( par[jm][l] > 0 ) - { - topol[m][1][count] = l; - count++; - } - mem[m][1] = count; - for( l=0; l<nseq; l++ ) - par[im][l] += ( par[jm][l] > 0 ); - if( n > 3 ) reduc( mtx, nseq, im, jm ); - } - for( i=0; i<nseq; i++ ) - if( i!=im && i!=jm && mtx[MIN(i,im)][MAX(i,im)]<9999.9 ) - break; - len2 = ( mtx[MIN(i,im)][MAX(i,im)] - r[im] + r[i] ) / 2; - -/* - printf(" %3d: L = %5.5f\n", i+1, len2 ); -*/ - totallen += len2; - - dis[m][0] = len2; - dis[m][1] = 0.0; - for( l=0, count=0; l<nseq; l++ ) - if( par[i][l] > 0 ) - { - topol[m][0][count] = l; - count++; - } - mem[m][0] = count; - /* - printf( " total length == %f\n", totallen ); - */ - - topolcpy( topol[nseq-2][1], topol[nseq-3][0], mem[nseq-2]+1, mem[nseq-3] ); - topolcat( topol[nseq-2][1], topol[nseq-3][1], mem[nseq-2]+1, mem[nseq-3]+1 ); - topolsort( mem[nseq-2][1], topol[nseq-2][1] ); - - if( topol[nseq-2][0][0] > topol[nseq-2][1][0] ) - topolswap( topol[nseq-2][0], topol[nseq-2][1], mem[nseq-2], mem[nseq-2]+1 ); - -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/pair2hat3s.c --- a/mafft/core/pair2hat3s.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,468 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 1\n-#define TSUYOSAFACTOR 100\n-\n-\n-static char *pairfile;\n-static int nhomologs;\n-\n-void strip( char *s )\n-{\n-\tchar *pt = s;\n-\twhile( *++pt )\n-\t\tif( *pt == \'\\n\' ) *pt = 0;\n-}\n-\n-int searchused( char *q, char **keys, int n )\n-{\n-\tint i;\n-\tfor( i=0; i<n; i++ )\n-\t{\n-//\t\tfprintf( stderr, "%s ? %s\\n", q, names[i] );\n-\t\tif( !strcmp( q, keys[i] ) ) return( i );\n-\t}\n-\treturn( -1 );\n-}\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tnhomologs = 2;\n-\tinputfile = NULL;\n-\tpairfile = NULL;\n-\tfftkeika = 0;\n-\tpslocal = -1000.0;\n-\tconstraint = 0;\n-\tnblosum = 62;\n-\tfmodel = 0;\n-\tcalledByXced = 0;\n-\tdevide = 0;\n-\tuse_fft = 0;\n-\tfftscore = 1;\n-\tfftRepeatStop = 0;\n-\tfftNoAnchStop = 0;\n- weight = 3;\n- utree = 1;\n-\ttbutree = 1;\n- refine = 0;\n- check = 1;\n- cut = 0.0;\n- disp = 0;\n- outgap = 1;\n- alg = \'A\';\n- mix = 0;\n-\ttbitr = 0;\n-\tscmtd = 5;\n-\ttbweight = 0;\n-\ttbrweight = 3;\n-\tcheckC = 0;\n-\ttreemethod = \'x\';\n-\tcontin = 0;\n-\tscoremtx = 1;\n-\tkobetsubunkatsu = 0;\n-\tdivpairscore = 0;\n-\tdorp = NOTSPECIFIED;\n-\tppenalty = NOTSPECIFIED;\n-\tppenalty_OP = NOTSPECIFIED;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tppenalty_EX = NOTSPECIFIED;\n-\tpoffset = NOTSPECIFIED;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tgeta2 = GETA2;\n-\tfftWinSize = NOTSPECIFIED;\n-\tfftThreshold = NOTSPECIFIED;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( ( c = *++argv[0] ) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'p\':\n-\t\t\t\t\tpairfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "pairfile = %s\\n", pairfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'t\':\n-\t\t\t\t\tnhomologs = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "nhomologs = %d\\n", nhomologs );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tdorp = \'d\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tdorp = \'p\';\n-\t\t\t\t\tbreak;\n- default:\n- fprintf( stderr, "illegal option %c\\n", c );\n- argc = 0;\n- break;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\t}\n- if( argc == 1 )\n- {\n- cut = atof( (*argv) );\n- argc--;\n- }\n- if( argc != 0 ) \n- {\n- fprintf( stderr, "options: Check source file !\\n" );\n- exit( 1 );\n- }\n-\tif( tbitr == 1 && outgap == 0 )\n-\t{\n-\t\tfprintf( stderr, "conflicting options : o, m or u\\n" );\n-\t\texit( 1 );\n-\t}\n-\tif( alg == \'C\' && outgap == 0 )\n-\t{\n-\t\tfprintf( stderr, "conflicting options : C, o\\n" );\n-\t\texit( 1 );\n-\t}\n-}\n-\n-int countamino( char *s, int end )\n-{\n-\tint val = 0;\n-\twhile( end-- )\n-\t\tif( *s++ != \'-\' ) val++;\n-\treturn( val );\n-}\n-\n-static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen )\n-{\n-\tFILE *tmpfp;\n-\tstatic char dumm1[B], dumm0[B];\n-\tint i, j;\n-\tchar *res;\n-\tFILE *hat3p;\n-\tstatic double *effarr1 = NULL;\n-\tstatic double *effarr2 = NULL;\n-\tstatic char **pseq;\n-\tLocalHom **localhomtable, *tmpptr;\n-\tfloat pscore = 0.0; // by D.Mathog, aguess\n-\tchar *aseq = NULL; // by D.Mathog\n-\tchar **usedseqs = NULL; // by D.Mathog\n-\tchar **usednames = NULL; // by D.Mathog\n-\tint nused;\n-\tdouble tsuyosa;\n-\n-\ttsuyosa = (double)nhomologs * (nhomologs-1) / njob * TSUYOSAFACTOR;\n-\tfprintf( stderr, "tsuyosa = %f\\n", tsuyosa );\n-\tlocalhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );\n-\tfor( i=0; i<njob; i++)\n-\t{\n-\t\tlocalhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );\n-\t\tfor( j=0; j<njob; j++)\n-\t\t{\n-\t\t\tlocalhomtable[i][j].start1 = -1;\n-\t\t\tlocalhomtable[i][j].end1 = -1;\n-\t\t\tlocalhomtable[i][j].start2 = -1; \n-\t\t\tlocalhomtable[i][j].end2 = -1; \n-\t\t\tlocalhomtable[i][j].opt = -1.0;\n-\t\t\tlocalhomtable[i][j].next = NULL;\n-\t\t}\n-\t}\n-\n-\tif( effarr1 == NULL ) \n-\t{\n-\t\teffarr1 = AllocateDoubleVec( njob );\n-\t\teffarr2 = AllocateDoubleVec( njob );\n-\t\tpseq = AllocateCharMtx( 2, nlenmax*9+1 );\n-\t'..b'omTable( localhomtable, njob );\n-#if DEBUG\n-\tfprintf( stderr, "done. FreeLocalHomTable\\n" );\n-#endif\n-}\n-\n-static void WriteOptions( FILE *fp )\n-{\n-\n-\tif( dorp == \'d\' ) fprintf( fp, "DNA\\n" );\n-\telse\n-\t{\n-\t\tif ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\\n", pamN );\n-\t\telse if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\\n", nblosum );\n-\t\telse if( scoremtx == 2 ) fprintf( fp, "M-Y\\n" );\n-\t}\n- fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );\n- if( use_fft ) fprintf( fp, "FFT on\\n" );\n-\n-\tfprintf( fp, "tree-base method\\n" );\n-\tif( tbrweight == 0 ) fprintf( fp, "unweighted\\n" );\n-\telse if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\\n" );\n-\tif( tbitr || tbweight ) \n-\t{\n-\t\tfprintf( fp, "iterate at each step\\n" );\n-\t\tif( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\\n" ); \n-\t\tif( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\\n" ); \n-\t\tif( tbweight ) fprintf( fp, " weighted\\n" ); \n-\t\tfprintf( fp, "\\n" );\n-\t}\n-\n- \t fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );\n-\n-\tif( alg == \'a\' )\n-\t\tfprintf( fp, "Algorithm A\\n" );\n-\telse if( alg == \'A\' ) \n-\t\tfprintf( fp, "Algorithm A+\\n" );\n-\telse if( alg == \'S\' ) \n-\t\tfprintf( fp, "Apgorithm S\\n" );\n-\telse if( alg == \'C\' ) \n-\t\tfprintf( fp, "Apgorithm A+/C\\n" );\n-\telse\n-\t\tfprintf( fp, "Unknown algorithm\\n" );\n-\n-\tif( treemethod == \'x\' )\n-\t\tfprintf( fp, "Tree = UPGMA (3).\\n" );\n-\telse if( treemethod == \'s\' )\n-\t\tfprintf( fp, "Tree = UPGMA (2).\\n" );\n-\telse if( treemethod == \'p\' )\n-\t\tfprintf( fp, "Tree = UPGMA (1).\\n" );\n-\telse\n-\t\tfprintf( fp, "Unknown tree.\\n" );\n-\n- if( use_fft )\n- {\n- fprintf( fp, "FFT on\\n" );\n- if( dorp == \'d\' )\n- fprintf( fp, "Basis : 4 nucleotides\\n" );\n- else\n- {\n- if( fftscore )\n- fprintf( fp, "Basis : Polarity and Volume\\n" );\n- else\n- fprintf( fp, "Basis : 20 amino acids\\n" );\n- }\n- fprintf( fp, "Threshold of anchors = %d%%\\n", fftThreshold );\n- fprintf( fp, "window size of anchors = %dsites\\n", fftWinSize );\n- }\n-\telse\n- fprintf( fp, "FFT off\\n" );\n-\tfflush( fp );\n-}\n-\t \n-\n-int main( int argc, char *argv[] )\n-{\n-\tstatic int nlen[M];\t\n-\tstatic char **name, **seq;\n-\tstatic char **bseq;\n-\tstatic double *eff;\n-\tint i;\n-\tchar c;\n-\tint alloclen;\n-\tFILE *infp;\n-\n-\targuments( argc, argv );\n-\n-\tif( inputfile )\n-\t{\n-\t\tinfp = fopen( inputfile, "r" );\n-\t\tif( !infp )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot open %s\\n", inputfile );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\telse\n-\t\tinfp = stdin;\n-\n-\tif( !pairfile )\n-\t{\n-\t\tfprintf( stderr, "Usage: %s -p pairfile -i inputfile \\n", argv[0] );\n-\t\texit( 1 );\n-\t}\n-\n-\tgetnumlen( infp );\n-\trewind( infp );\n-\n-\tif( njob < 2 )\n-\t{\n-\t\tfprintf( stderr, "At least 2 sequences should be input!\\n"\n-\t\t\t\t\t\t "Only %d sequence found.\\n", njob ); \n-\t\texit( 1 );\n-\t}\n-\n-\tname = AllocateCharMtx( njob, B+1 );\n-\tseq = AllocateCharMtx( njob, nlenmax*9+1 );\n-\tbseq = AllocateCharMtx( njob, nlenmax*9+1 );\n-\talloclen = nlenmax*9;\n-\n-\teff = AllocateDoubleVec( njob );\n-\n-#if 0\n-\tRead( name, nlen, seq );\n-#else\n-\treadData_pointer( infp, name, nlen, seq );\n-#endif\n-\tfclose( infp );\n-\n-\tconstants( njob, seq );\n-\n-#if 0\n-\tfprintf( stderr, "params = %d, %d, %d\\n", penalty, penalty_ex, offset );\n-#endif\n-\n-\tinitSignalSM();\n-\n-\tinitFiles();\n-\n-\tWriteOptions( trap_g );\n-\n-\tc = seqcheck( seq );\n-\tif( c )\n-\t{\n-\t\tfprintf( stderr, "Illeagal character %c\\n", c );\n-\t\texit( 1 );\n-\t}\n-\n-//\twritePre( njob, name, nlen, seq, 0 );\n-\n-\tfor( i=0; i<njob; i++ ) eff[i] = 1.0;\n-\n-\n-\tfor( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );\n-\n-\n-\tpairalign( name, nlen, bseq, eff, alloclen );\n-\n-\tfprintf( trap_g, "done.\\n" );\n-#if DEBUG\n-\tfprintf( stderr, "closing trap_g\\n" );\n-#endif\n-\tfclose( trap_g );\n-\n-#if IODEBUG\n-\tfprintf( stderr, "OSHIMAI\\n" );\n-#endif\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/pairash.c --- a/mafft/core/pairash.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1442 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 0\n-\n-static int usecache;\n-static char *whereispairalign;\n-static char *odir;\n-static char *pdir;\n-static double scale;\n-static int *alreadyoutput;\n-static int equivthreshold;\n-static int equivwinsize;\n-static int equivshortestlen;\n-\n-static void cutpath( char *s )\n-{\n-\tchar *pos;\n-\tpos = s + strlen( s );\n-\n-\twhile( --pos >= s )\n-\t{\n-\t\tif( *pos == \'/\' ) break;\n-\t}\n-\n-\tstrcpy( s, pos+1 );\n-}\n-\n-static char getchainid( char *s )\n-{\n-\ts += strlen( s ) - 2;\n-\tif( isspace( s[0] ) && isalnum( s[1] ) )\n-\t\treturn( s[1] );\n-\telse\n-\t\treturn( \'A\' );\n-}\n-\n-static void extractfirstword( char *s )\n-{\n-\twhile( *s )\n-\t{\n-\t\tif( isspace( *s ) ) break;\n-\t\ts++;\n-\t}\n-\t*s = 0;\n-}\n-\n-static char *strip( char *s )\n-{\n-\tchar *v;\n-\n-\twhile( *s )\n-\t{\n-\t\tif( !isspace( *s ) ) break;\n-\t\ts++;\n-\t}\n-\tv = s;\n-\n-\ts += strlen( v ) - 1;\n-\twhile( s>=v )\n-\t{\n-\t\tif( !isspace( *s ) ) \n-\t\t{\n-\t\t\t*(s+1) = 0;\n-\t\t\tbreak;\n-\t\t}\n-\t\ts--;\n-\t}\n-\n-\treturn( v );\n-}\n-\n-#if 0\n-static void makeequivdouble( double *d, char *c )\n-{\n-\twhile( *c )\n-\t{\n-\t\t*d++ = (double)( *c++ - \'0\' );\n-\t}\n-}\n-\n-static void maskequiv( double *d, int n )\n-{\n-\tint halfwin;\n-\tint ok;\n-\tint i, j;\n-\n-\thalfwin = (int)( equivwinsize / 2 );\n-\n-\tfor( i=0; i<n; i++ )\n-\t{\n-\t\tok = 1;\n-\t\tfor( j = i-halfwin; j<i+halfwin; j++ )\n-\t\t{\n-\t\t\tif( j<0 || n=<j ) continue;\n-\t\t\tif( d[j] <= 0.0 )\n-\t\t\t{\n-\t\t\t\tok = 0;\n-\t\t\t\tbreak;\n-\t\t\t}\n-\t\t}\n-\t\tif( ok == 0 ) d[i] = 0.0;\n-\t}\n-}\n-#else\n-static void maskequiv( double *d, int n )\n-{\n-\tint i, len;\n-\tint count;\n-\tlen = 0;\n-\tdouble *dbk, *dori, *dbkori;\n-\n-\tdbk = calloc( n, sizeof( double ) );\n-\n-\tdbkori = dbk;\n-\tdori = d;\n-\tcount = n;\n-\twhile( count-- )\n-\t{\n-\t\t*dbk++ = *d++;\n-\t}\n-\n-\tdbk = dbkori;\n-\td = dori;\n-\tlen = 0;\n-\n-\n-\tfor( i=0; i<n; i++ )\n-\t{\n-\t\tif( d[i] > 0.0 )\n-\t\t{\n-\t\t\tlen += 1;\n-\t\t\td[i] = 0.0;\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\td[i] = 0.0;\n-\t\t\tif( len >= equivshortestlen ) \n-\t\t\t{\n-\t\t\t\tlen++;\n-\t\t\t\twhile( len-- ) d[i-len] = dbk[i-len];\n-\t\t\t}\n-\t\t\tlen = 0;\n-\t\t}\n-\t}\n-\n-\tif( len >= equivshortestlen )\n-\t{\n-\t\tlen++;\n-\t\twhile( len-- ) d[n-len] = dbk[n-len];\n-\t}\n-\n-\tfree( dbk );\n-}\n-#endif\n-\n-static void makeequivdouble_tmalign( double *d, char *c, int n )\n-{\n-\tdouble tmpd;\n-\tdouble *dbk;\n-\tint tmpi;\n-\tchar s;\n-\tdbk = d;\n-\twhile( *c )\n-\t{\n-\t\tif( ( s=*c++ ) == \':\' )\n-\t\t\ttmpi = 9;\n-\t\telse if( s == \'.\' )\n-\t\t\ttmpi = 4;\n-\t\telse\n-\t\t\ttmpi = 0;\n-//\t\ttmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0;\n-//\t\tif( tmpd < 0.0 ) tmpd = 0.0;\n-\t\ttmpd = (double)( tmpi );\n-//\t\t*d++ = (int)tmpd;\n-\t\t*d++ = tmpd;\n-\t}\n-\n-\td = dbk;\n-//\tmaskequiv( d, n );\n-}\n-\n-static void makeequivdouble_threshold( double *d, char *c, int n )\n-{\n-\tdouble tmpd;\n-\tdouble *dbk;\n-\tint tmpi;\n-\tdbk = d;\n-\twhile( *c )\n-\t{\n-\t\ttmpi = (int)( *c++ - \'0\' );\n-\t\ttmpd = (double)( tmpi + 1 - equivthreshold ) / ( 10 - equivthreshold ) * 9.0;\n-\t\tif( tmpd < 0.0 ) tmpd = 0.0;\n-//\t\t*d++ = (int)tmpd;\n-\t\t*d++ = tmpd;\n-\t}\n-\n-\td = dbk;\n-\tmaskequiv( d, n );\n-}\n-\n-static void readtmalign( FILE *fp, char *seq1, char *seq2, double *equiv )\n-{\n-\tstatic char *line = NULL;\n-\tstatic char *equivchar = NULL;\n-\tint n;\n-\n-\t\n-\tif( equivchar == NULL )\n-\t{\n-\t\tequivchar = calloc( nlenmax * 2 + 1, sizeof( char ) );\n-\t\tline = calloc( nlenmax * 2 + 1, sizeof( char ) );\n-\t}\n-\tseq1[0] = 0;\n-\tseq2[0] = 0;\n-\tequivchar[0] = 0;\n-\n-\n-//\tsystem( "vi _tmalignout" );\n-\twhile( 1 )\n-\t{\n-\t\tif( feof( fp ) ) \n-\t\t{\n-\t\t\tfprintf( stderr, "Error in TMalign\\n" );\n-\t\t\texit( 1 );\n-\t\t}\n-\t\tfgets( line, 999, fp );\n-//\t\tfprintf( stdout, "line = :%s:\\n", line );\n-\t\tif( !strncmp( line+5, "denotes the residue pairs", 20 ) ) break;\n-\t}\n-\tfgets( line, nlenmax*2, fp );\n-\tstrcat( seq1, strip( line ) );\n-\n-\tfgets( line, nlenmax*2, fp );\n-\tstrcat( equivchar, strip( line ) );\n-\n-\tfgets( line, nlenmax*2, fp );\n-\tstrcat( seq2, strip( line ) );\n-\n-#if 0\n-\tprintf( "seq1=%s\\n", seq1 );\n-\tprintf( "seq2=%s\\n", seq2 );\n-\tprintf( "equi=%s\\n", equivchar );\n-exit( 1 );\n-#endif\n-\tn = strlen( seq1 );\n-\tmakeequivd'..b'ht == 3 ) fprintf( fp, "clustalw-like weighting\\n" );\n-\tif( tbitr || tbweight ) \n-\t{\n-\t\tfprintf( fp, "iterate at each step\\n" );\n-\t\tif( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\\n" ); \n-\t\tif( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\\n" ); \n-\t\tif( tbweight ) fprintf( fp, " weighted\\n" ); \n-\t\tfprintf( fp, "\\n" );\n-\t}\n-\n- \t fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );\n-\n-\tif( alg == \'a\' )\n-\t\tfprintf( fp, "Algorithm A\\n" );\n-\telse if( alg == \'A\' ) \n-\t\tfprintf( fp, "Algorithm A+\\n" );\n-\telse if( alg == \'S\' ) \n-\t\tfprintf( fp, "Apgorithm S\\n" );\n-\telse if( alg == \'C\' ) \n-\t\tfprintf( fp, "Apgorithm A+/C\\n" );\n-\telse\n-\t\tfprintf( fp, "Unknown algorithm\\n" );\n-\n- if( use_fft )\n- {\n- fprintf( fp, "FFT on\\n" );\n- if( dorp == \'d\' )\n- fprintf( fp, "Basis : 4 nucleotides\\n" );\n- else\n- {\n- if( fftscore )\n- fprintf( fp, "Basis : Polarity and Volume\\n" );\n- else\n- fprintf( fp, "Basis : 20 amino acids\\n" );\n- }\n- fprintf( fp, "Threshold of anchors = %d%%\\n", fftThreshold );\n- fprintf( fp, "window size of anchors = %dsites\\n", fftWinSize );\n- }\n-\telse\n- fprintf( fp, "FFT off\\n" );\n-\tfflush( fp );\n-}\n-\t \n-\n-int main( int argc, char *argv[] )\n-{\n-\tstatic int nlen[M];\t\n-\tstatic char **name, **seq;\n-\tstatic char **mseq1, **mseq2;\n-\tstatic char **aseq;\n-\tstatic char **bseq;\n-\tstatic double *eff;\n-\tstatic double *equiv;\n-\tchar **strfiles;\n-\tchar **chainids;\n-\tint i;\n-\tFILE *infp;\n-\tchar c;\n-\tint alloclen;\n-\n-\targuments( argc, argv );\n-\n-\tif( equivthreshold < 1 || 9 < equivthreshold )\n-\t{\n-\t\tfprintf( stderr, "-t n, n must be 1..9\\n" );\n-\t\texit( 1 );\n-\t}\n-\n-\tif( ( equivwinsize + 1 ) % 2 != 0 )\n-\t{\n-\t\tfprintf( stderr, "equivwinsize = %d\\n", equivwinsize );\n-\t\tfprintf( stderr, "It must be an odd number.\\n" );\n-\t\texit( 1 );\n-\t}\n-\n-\tif( inputfile )\n-\t{\n-\t\tinfp = fopen( inputfile, "r" );\n-\t\tif( !infp )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot open %s\\n", inputfile );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\telse\n-\t\tinfp = stdin;\n-\n-\tnlenmax = 10000; // tekitou\n-\n-\tif( alg == \'R\' )\n-\t\tprepareash( infp, inputfile, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen );\n-\telse if( alg == \'T\' )\n-\t\tpreparetmalign( infp, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen );\n-\n-\tfclose( infp );\n-\n-\tname = AllocateCharMtx( njob, B+1 );\n-\taseq = AllocateCharMtx( njob, nlenmax*2+1 );\n-\tbseq = AllocateCharMtx( njob, nlenmax*2+1 );\n-\teff = AllocateDoubleVec( njob );\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tfprintf( stderr, "str%d = %s-%s\\n", i, strfiles[i], chainids[i] );\n-\t}\n-\n-\tif( njob < 1 )\n-\t{\n-\t\tfprintf( stderr, "No structure found.\\n" ); \n-\t\texit( 1 );\n-\t}\n-\tif( njob < 2 )\n-\t{\n-\t\tfprintf( stderr, "Only %d structure found.\\n", njob ); \n-\t\texit( 0 );\n-\t}\n-\tif( njob > M )\n-\t{\n-\t\tfprintf( stderr, "The number of structures must be < %d\\n", M );\n-\t\tfprintf( stderr, "Please try sequence-based methods for such large data.\\n" );\n-\t\texit( 1 );\n-\t}\n-\n-\n-\n-#if 0\n-\treadData( infp, name, nlen, seq );\n-#endif\n-\n-\tconstants( njob, seq );\n-\n-#if 0\n-\tfprintf( stderr, "params = %d, %d, %d\\n", penalty, penalty_ex, offset );\n-#endif\n-\n-\tinitSignalSM();\n-\n-\tinitFiles();\n-\n-\tWriteOptions( trap_g );\n-\n-\tc = seqcheck( seq );\n-\tif( c )\n-\t{\n-\t\tfprintf( stderr, "Illegal character %c\\n", c );\n-\t\texit( 1 );\n-\t}\n-\n-//\twritePre( njob, name, nlen, seq, 0 );\n-\n-\tfor( i=0; i<njob; i++ ) eff[i] = 1.0;\n-\n-\n-\tfor( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );\n-\n-\tpairalign( name, nlen, bseq, aseq, mseq1, mseq2, equiv, eff, strfiles, chainids, alloclen );\n-\n-\tfprintf( trap_g, "done.\\n" );\n-#if DEBUG\n-\tfprintf( stderr, "closing trap_g\\n" );\n-#endif\n-\tfclose( trap_g );\n-\n-//\twritePre( njob, name, nlen, aseq, !contin );\n-#if 0\n-\twriteData( stdout, njob, name, nlen, aseq );\n-#endif\n-#if IODEBUG\n-\tfprintf( stderr, "OSHIMAI\\n" );\n-#endif\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/pairlocalalign.c --- a/mafft/core/pairlocalalign.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,3041 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 0\n-\n-\n-#define NODIST -9999\n-\n-static char *whereispairalign;\n-static char *laraparams;\n-static char foldalignopt[1000];\n-static int stdout_align;\n-static int stdout_dist;\n-static int store_localhom;\n-static int store_dist;\n-static int nadd;\n-static int laste;\n-static int lastm;\n-static int lastsubopt;\n-static int lastonce;\n-\n-typedef struct _lastres\n-{\n-\tint score;\n-\tint start1;\n-\tint start2;\n-\tchar *aln1;\n-\tchar *aln2;\n-} Lastres;\n-\n-typedef struct _reg\n-{\n-\tint start;\n-\tint end;\n-} Reg;\n-\n-typedef struct _aln\n-{\n-\tint nreg;\n-\tReg *reg1;\n-\tReg *reg2;\n-} Aln;\n-\n-typedef struct _lastresx\n-{\n-\tint score;\n-\tint naln;\n-\tAln *aln;\n-} Lastresx;\n-\n-#ifdef enablemultithread\n-typedef struct _jobtable\n-{\n-\tint i;\n-\tint j;\n-} Jobtable;\n-\n-typedef struct _thread_arg\n-{\n-\tint thread_no;\n-\tint njob;\n-\tJobtable *jobpospt;\n-\tchar **name;\n-\tchar **seq;\n-\tchar **dseq;\n-\tint *thereisxineachseq;\n-\tLocalHom **localhomtable;\n-\tdouble **distancemtx;\n-\tdouble *selfscore;\n-\tchar ***bpp;\n-\tLastresx **lastresx;\n-\tint alloclen;\n-\tpthread_mutex_t *mutex_counter;\n-\tpthread_mutex_t *mutex_stdout;\n-} thread_arg_t;\n-#endif\n-\n-typedef struct _lastcallthread_arg\n-{\n-\tint nq, nd;\n-\tchar **dseq;\n-\tchar **qseq;\n-\tLastresx **lastresx;\n-#ifdef enablemultithread\n-\tint thread_no;\n-\tint *kshare;\n-\tpthread_mutex_t *mutex;\n-#endif\n-} lastcallthread_arg_t;\n-\n-static void t2u( char *seq )\n-{\n-\twhile( *seq )\n-\t{\n-\t\tif ( *seq == \'A\' ) *seq = \'a\';\n-\t\telse if( *seq == \'a\' ) *seq = \'a\';\n-\t\telse if( *seq == \'T\' ) *seq = \'u\';\n-\t\telse if( *seq == \'t\' ) *seq = \'u\';\n-\t\telse if( *seq == \'U\' ) *seq = \'u\';\n-\t\telse if( *seq == \'u\' ) *seq = \'u\';\n-\t\telse if( *seq == \'G\' ) *seq = \'g\';\n-\t\telse if( *seq == \'g\' ) *seq = \'g\';\n-\t\telse if( *seq == \'C\' ) *seq = \'c\';\n-\t\telse if( *seq == \'c\' ) *seq = \'c\';\n-\t\telse *seq = \'n\';\n-\t\tseq++;\n-\t}\n-}\n-\n-static int removex( char *d, char *m )\n-{\n-\tint val = 0;\n-\twhile( *m != 0 )\n-\t{\n-\t\tif( *m == \'X\' || *m == \'x\' ) \n-\t\t{\n-\t\t\tm++;\n-\t\t\tval++;\n-\t\t}\n-\t\telse \n-\t\t{\n-\t\t\t*d++ = *m++;\n-\t\t}\n-\t}\n-\t*d = 0;\n-\treturn( val );\n-}\n-\n-static void putlocalhom_last( char *s1, char *s2, LocalHom *localhompt, Lastresx *lastresx )\n-{\n-\tchar *pt1, *pt2;\n-\tint naln, nreg;\n-\tint iscore;\n-\tint isumscore;\n-\tint sumoverlap;\n-\tLocalHom *tmppt = localhompt;\n-\tLocalHom *tmppt2;\n-\tLocalHom *localhompt0;\n-\tReg *rpt1, *rpt2;\n-\tAln *apt;\n-\tint nlocalhom = 0;\n-\tint len;\n-\n-//\tfprintf( stderr, "s1=%s\\n", s1 );\n-//\tfprintf( stderr, "s2=%s\\n", s2 );\n-\n-\n-\tnaln = lastresx->naln;\n-\tapt = lastresx->aln;\n-\n-\tif( naln == 0 ) return;\n-\twhile( naln-- )\n-\t{\n-\t\trpt1 = apt->reg1;\n-\t\trpt2 = apt->reg2;\n-\t\tnreg = apt->nreg;\n-\t\tisumscore = 0;\n-\t\tsumoverlap = 0;\n-\t\twhile( nreg-- )\n-\t\t{\n-\t\t\tif( nlocalhom++ > 0 )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "reallocating ...\\n" );\n-\t\t\t\ttmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );\n-//\t\t\t\tfprintf( stderr, "done\\n" );\n-\t\t\t\ttmppt = tmppt->next;\n-\t\t\t\ttmppt->next = NULL;\n-\t\t\t}\n-\t\t\ttmppt->start1 = rpt1->start;\n-\t\t\ttmppt->start2 = rpt2->start;\n-\t\t\ttmppt->end1 = rpt1->end;\n-\t\t\ttmppt->end2 = rpt2->end;\n-\t\t\tif( rpt1 == apt->reg1 ) localhompt0 = tmppt; // ?\n-\t\n-//\t\t\tfprintf( stderr, "in putlocalhom, reg1: %d-%d (nreg=%d)\\n", rpt1->start, rpt1->end, lastresx->nreg );\n-//\t\t\tfprintf( stderr, "in putlocalhom, reg2: %d-%d (nreg=%d)\\n", rpt2->start, rpt2->end, lastresx->nreg );\n-\t\n-\t\t\tlen = tmppt->end1 - tmppt->start1 + 1;\n-\t\n-//\t\t\tfprintf( stderr, "tmppt->start1=%d\\n", tmppt->start1 );\n-//\t\t\tfprintf( stderr, "tmppt->start2=%d\\n", tmppt->start2 );\n-\n-//\t\t\tfprintf( stderr, "s1+tmppt->start1=%*.*s\\n", len, len, s1+tmppt->start1 );\n-//\t\t\tfprintf( stderr, "s2+tmppt->start2=%*.*s\\n", len, len, s2+tmppt->start2 );\n-\t\n-\t\t\tpt1 = s1 + tmppt->start1;\n-\t\t\tpt2 = s2 + tmppt->start2;\n-\t\t\tiscore = 0;\n-\t\t\twhile( len-- )\n-\t\t\t{\n-\t\t\t\tiscore += n_dis[(int)amino_n[(int)*pt1++]][(int)amino_n[(int)*pt2++]]; // - offset \x1b$B$O$$$i$J$$$+$b\x1b(B\n-//\t\t\t\tfprintf( stderr, "len=%d, %c-%c, iscore(0) = %d\\n", len,'..b' open %s\\n", inputfile );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\telse\n-\t\tinfp = stdin;\n-\n-\tgetnumlen( infp );\n-\trewind( infp );\n-\n-\tif( njob < 2 )\n-\t{\n-\t\tfprintf( stderr, "At least 2 sequences should be input!\\n"\n-\t\t\t\t\t\t "Only %d sequence found.\\n", njob ); \n-\t\texit( 1 );\n-\t}\n-\tif( njob > M )\n-\t{\n-\t\tfprintf( stderr, "The number of sequences must be < %d\\n", M );\n-\t\tfprintf( stderr, "Please try the splittbfast program for such large data.\\n" );\n-\t\texit( 1 );\n-\t}\n-\n-\tif( ( alg == \'r\' || alg == \'R\' ) && dorp == \'p\' )\n-\t{\n-\t\tfprintf( stderr, "Not yet supported\\n" );\n-\t\texit( 1 );\n-\t}\n-\n-\talloclen = nlenmax*2;\n-\tseq = AllocateCharMtx( njob, alloclen+10 );\n-\taseq = AllocateCharMtx( 2, alloclen+10 );\n-\tbseq = AllocateCharMtx( njob, alloclen+10 );\n-\tdseq = AllocateCharMtx( njob, alloclen+10 );\n-\tmseq1 = AllocateCharMtx( njob, 0 );\n-\tmseq2 = AllocateCharMtx( njob, 0 );\n-\tname = AllocateCharMtx( njob, B );\n-\tnlen = AllocateIntVec( njob );\n-\tthereisxineachseq = AllocateIntVec( njob );\n-\n-\tif( alg == \'R\' )\n-\t{\n-\t\tlastresx = calloc( njob+1, sizeof( Lastresx * ) );\n-\t\tfor( i=0; i<njob; i++ ) \n-\t\t{\n-\t\t\tlastresx[i] = calloc( njob+1, sizeof( Lastresx ) ); // muda\n-\t\t\tfor( j=0; j<njob; j++ ) \n-\t\t\t{\n-\t\t\t\tlastresx[i][j].score = 0;\n-\t\t\t\tlastresx[i][j].naln = 0;\n-\t\t\t\tlastresx[i][j].aln = NULL;\n-\t\t\t}\n-\t\t\tlastresx[i][njob].naln = -1;\n-\t\t}\n-\t\tlastresx[njob] = NULL;\n-\t}\n-\telse if( alg == \'r\' )\n-\t{\n-//\t\tfprintf( stderr, "Allocating lastresx (%d), njob=%d, nadd=%d\\n", njob-nadd+1, njob, nadd );\n-\t\tlastresx = calloc( njob-nadd+1, sizeof( Lastresx * ) );\n-\t\tfor( i=0; i<njob-nadd; i++ )\n-\t\t{\n-//\t\t\tfprintf( stderr, "Allocating lastresx[%d]\\n", i );\n-\t\t\tlastresx[i] = calloc( nadd+1, sizeof( Lastresx ) );\n-\t\t\tfor( j=0; j<nadd; j++ ) \n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "Initializing lastresx[%d][%d]\\n", i, j );\n-\t\t\t\tlastresx[i][j].score = 0;\n-\t\t\t\tlastresx[i][j].naln = 0;\n-\t\t\t\tlastresx[i][j].aln = NULL;\n-\t\t\t}\n-\t\t\tlastresx[i][nadd].naln = -1;\n-\t\t}\n-\t\tlastresx[njob-nadd] = NULL;\n-\t}\n-\telse\n-\t\tlastresx = NULL;\n-\n-#if 0\n-\tRead( name, nlen, seq );\n-#else\n-\treadData_pointer( infp, name, nlen, seq );\n-#endif\n-\tfclose( infp );\n-\n-\tconstants( njob, seq );\n-\n-#if 0\n-\tfprintf( stderr, "params = %d, %d, %d\\n", penalty, penalty_ex, offset );\n-#endif\n-\n-\tinitSignalSM();\n-\n-\tinitFiles();\n-\n-//\tWriteOptions( trap_g );\n-\n-\tc = seqcheck( seq );\n-\tif( c )\n-\t{\n-\t\tfprintf( stderr, "Illegal character %c\\n", c );\n-\t\texit( 1 );\n-\t}\n-\n-//\twritePre( njob, name, nlen, seq, 0 );\n-\n-\n-\tfor( i=0; i<njob; i++ ) \n-\t{\n-\t\tgappick0( bseq[i], seq[i] );\n-\t\tthereisxineachseq[i] = removex( dseq[i], bseq[i] );\n-\t}\n-\n-\tpairalign( name, nlen, bseq, aseq, dseq, thereisxineachseq, mseq1, mseq2, alloclen, lastresx );\n-\n-\tfprintf( trap_g, "done.\\n" );\n-#if DEBUG\n-\tfprintf( stderr, "closing trap_g\\n" );\n-#endif\n-\tfclose( trap_g );\n-\n-//\twritePre( njob, name, nlen, aseq, !contin );\n-#if 0\n-\twriteData( stdout, njob, name, nlen, aseq );\n-#endif\n-#if IODEBUG\n-\tfprintf( stderr, "OSHIMAI\\n" );\n-#endif\n-\tSHOWVERSION;\n-\n-\tif( stdout_dist && nthread > 1 )\n-\t{\n-\t\tfprintf( stderr, "\\nThe order of distances is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself, using sort -n -k 2 | sort -n -k 1 -s\\n" );\n-\t}\n-\tif( stdout_align && nthread > 1 )\n-\t{\n-\t\tfprintf( stderr, "\\nThe order of pairwise alignments is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself.\\n" );\n-\t}\n-\n-#if 1\n-\tif( lastresx ) \n-\t{\n-\t\tfor( i=0; lastresx[i]; i++ ) \n-\t\t{\n-\t\t\tfor( j=0; lastresx[i][j].naln!=-1; j++ ) \n-\t\t\t{\n-\t\t\t\tfor( k=0; k<lastresx[i][j].naln; k++ )\n-\t\t\t\t{\n-\t\t\t\t\tfree( lastresx[i][j].aln[k].reg1 );\n-\t\t\t\t\tfree( lastresx[i][j].aln[k].reg2 );\n-\t\t\t\t}\n-\t\t\t\tfree( lastresx[i][j].aln );\n-\t\t\t}\n-\t\t\tfree( lastresx[i] );\n-\t\t}\n-\t\tfree( lastresx );\n-\t}\n-#endif\n-\tFreeCharMtx( seq );\n-\tFreeCharMtx( aseq );\n-\tFreeCharMtx( bseq );\n-\tFreeCharMtx( dseq );\n-\tFreeCharMtx( name );\n-\tfree( mseq1 );\n-\tfree( mseq2 );\n-\tfree( nlen );\n-\tfree( thereisxineachseq );\n-\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/partSalignmm.c --- a/mafft/core/partSalignmm.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2302 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define MACHIGAI 0\n-#define OUTGAP0TRY 1\n-#define DEBUG 0\n-#define XXXXXXX 0\n-#define USE_PENALTY_EX 0\n-#define FASTMATCHCALC 1\n-\n-#if 0\n-static void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len )\n-{\n-\tint i, j, gc, gb; \n-\tfloat feff;\n-\t\n-\tfor( i=0; i<len; i++ ) ogcp[i] = 0.0;\n-\tfor( j=0; j<clus; j++ ) \n-\t{\n-\t\tfeff = (float)eff[j];\n-\t\tgc = 0;\n-\t\tfor( i=0; i<len; i++ ) \n-\t\t{\n-\t\t\tgb = gc;\n-\t\t\tgc = ( seq[j][i] == \'-\' );\n-\t\t\t{\n-\t\t\t\tif( !gb * gc ) ogcp[i] += feff;\n-\t\t\t}\n-\t\t}\n-\t}\n-}\n-\n-static void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len )\n-{\n-\tint i, j, gc, gb; \n-\tfloat feff;\n-\t\n-\tfor( i=0; i<len; i++ ) fgcp[i] = 0.0;\n-\tfor( j=0; j<clus; j++ ) \n-\t{\n-\t\tfeff = (float)eff[j];\n-\t\tgc = ( seq[j][0] == \'-\' );\n-\t\tfor( i=1; i<len+1; i++ ) \n-\t\t{\n-\t\t\tgb = gc;\n-\t\t\tgc = ( seq[j][i] == \'-\' );\n-\t\t\t{\n-\t\t\t\tif( gb * !gc ) fgcp[i-1] += feff;\n-\t\t\t}\n-\t\t}\n-\t}\n-}\n-#endif\n-\n-\n-\t\t\t\n-\t\t\n-\n-static TLS int impalloclen = 0;\n-static TLS float **impmtx = NULL;\n-float part_imp_match_out_sc( int i1, int j1 )\n-{\n-//\tfprintf( stderr, "impalloclen = %d\\n", impalloclen );\n-//\tfprintf( stderr, "i1,j1=%d,%d -> impmtx=%f\\n", i1, j1, impmtx[i1][j1] );\n-\treturn( impmtx[i1][j1] );\n-#if 0\n-\tif( i1 == l1 || j1 == l2 ) return( 0.0 );\n-\treturn( impmtx[i1+start1][j1+start2] );\n-#endif\n-}\n-static void part_imp_match_out_vead_gapmap( float *imp, int i1, int lgth2, int start2, int *gapmap2 )\n-{\n-#if FASTMACHCALC\n-\tfloat *pt = imp;\n-\tint *gapmappt = gapmap2;\n-\twhile( lgth2-- )\n-\t\t*pt++ += impmtx[i1][start2+*gapmappt++];\n-#else\n-\tint j;\n-\tfor( j=0; j<lgth2; j++ )\n-\t{\n-\t\timp[j] += impmtx[i1][start2+gapmap2[j]];\n-\t}\n-#endif\n-}\n-\n-static void part_imp_match_out_vead_tate_gapmap( float *imp, int j1, int lgth1, int start1, int *gapmap1 )\n-{\n-#if FASTMACHCALC\n-\tfloat *pt = imp;\n-\tint *gapmappt = gapmap1;\n-\twhile( lgth1-- )\n-\t\t*pt++ = impmtx[start1+*gapmappt++][j1];\n-#else\n-\tint i;\n-\tfor( i=0; i<lgth1; i++ )\n-\t{\n-\t\timp[i] += impmtx[start1+gapmap1[i]][j1];\n-\t}\n-#endif\n-}\n-\n-void part_imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, int forscore )\n-{\n-\tint i, j, k1, k2, tmpint, start1, start2, end1, end2;\n-\tdouble effij, effijx, effij_kozo; \n-\tchar *pt, *pt1, *pt2;\n-\tLocalHom *tmpptr;\n-\n-\tif( seq1 == NULL )\n-\t{\n-\t\tif( impmtx ) FreeFloatMtx( impmtx );\n-\t\timpmtx = NULL;\n-\t\treturn;\n-\t}\n-\n-\tif( impalloclen <= lgth1 + 2 || impalloclen <= lgth2 + 2 )\n-\t{\n-\t\tif( impmtx ) FreeFloatMtx( impmtx );\n-\t\timpalloclen = MAX( lgth1, lgth2 ) + 2;\n-\t\timpmtx = AllocateFloatMtx( impalloclen+100, impalloclen+100 );\n-\t}\n-\n-\n-#if 0\n-\tfprintf( stderr, "eff1 in _init_strict = \\n" );\n-\tfor( i=0; i<clus1; i++ )\n-\t\tfprintf( stderr, "eff1[] = %f\\n", eff1[i] );\n-\tfor( i=0; i<clus2; i++ )\n-\t\tfprintf( stderr, "eff2[] = %f\\n", eff2[i] );\n-#endif\n-\n-\tfor( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )\n-\t\timpmtx[i][j] = 0.0;\n-\teffijx = 1.0 * fastathreshold;\n-\tfor( i=0; i<clus1; i++ )\n-\t{\n-\t\tfor( j=0; j<clus2; j++ )\n-\t\t{\n-\t\t\teffij = eff1[i] * eff2[j] * effijx;\n-\t\t\teffij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx;\n-\t\t\ttmpptr = localhom[i][j];\n-\t\t\twhile( tmpptr )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "start1 = %d\\n", tmpptr->start1 );\n-//\t\t\t\tfprintf( stderr, "end1 = %d\\n", tmpptr->end1 );\n-//\t\t\t\tfprintf( stderr, "i = %d, seq1 = \\n%s\\n", i, seq1[i] );\n-//\t\t\t\tfprintf( stderr, "j = %d, seq2 = \\n%s\\n", j, seq2[j] );\n-\t\t\t\tpt = seq1[i];\n-\t\t\t\ttmpint = -1;\n-\t\t\t\twhile( *pt != 0 )\n-\t\t\t\t{\n-\t\t\t\t\tif( *pt++ != \'-\' ) tmpint++;\n-\t\t\t\t\tif( tmpint == tmpptr->start1 ) break;\n-\t\t\t\t}\n-\t\t\t\tstart1 = (int)( pt - seq1[i] ) - 1;\n-\t\n-\t\t\t\tif( tmpptr->start1 == tmpptr->end1 ) end1 = start1;\n-\t\t\t\telse\n-\t\t\t\t{\n-#if MACHIGAI\n-\t\t\t\t\twhile( *pt != 0 )\n-\t\t\t\t\t{\n-\t\t\t\t\t\tif( tmpint == tmpptr->end1 ) break;\n-\t\t\t\t\t\tif( *pt++ != \'-\' ) tmpint++;\n-\t\t\t\t\t}\n-\t\t\t\t\tend1 = (int)( pt - seq1[i] ) - 1;\n-#else\n-\t\t\t\t\twhile('..b'\t\tfprintf( stderr, "%5.0f->", wm );\n-#endif\n-//\t\t\tg = mi + *fgcp2pt * gapfreq1[i];\n-\t\t\tif( (g = mi + *fgcp2pt * gf1va) > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t\t*ijppt = -( j - mpi );\n-\t\t\t}\n-//\t\t\tg = *prept + *ogcp2pt * gapfreq1[i-1];\n-\t\t\tif( (g = *prept + *ogcp2pt * gf1vapre) >= mi )\n-\t\t\t{\n-\t\t\t\tmi = g;\n-\t\t\t\tmpi = j-1;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\tmi += fpenalty_ex;\n-#endif\n-\n-//\t\t\tg = *mjpt + fgcp1va * gapfreq2[j];\n-\t\t\tif( (g = *mjpt + fgcp1va * *gf2pt) > wm )\n-\t\t\t{\n-\t\t\t\twm = g;\n-\t\t\t\t*ijppt = +( i - *mpjpt );\n-\t\t\t}\n-//\t\t\tg = *prept + ogcp1va * gapfreq2[j-1];\n-\t\t\tif( (g = *prept + ogcp1va * *gf2ptpre) >= *mjpt )\n-\t\t\t{\n-\t\t\t\t*mjpt = g;\n-\t\t\t\t*mpjpt = i-1;\n-\t\t\t}\n-#if USE_PENALTY_EX\n-\t\t\tm[j] += fpenalty_ex;\n-#endif\n-\t\t\tif( trywarp )\n-\t\t\t{\n-#if USE_PENALTY_EX\n-\t\t\t\tif( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai\n-#else\n-\t\t\t\tif( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai\n-#endif\n-\t\t\t\t{\n-\t\t\t\t\tif( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )\n-\t\t\t\t\t{\n-\t\t\t\t\t\t*ijppt = warpbase + warpn - 1;\n-\t\t\t\t\t}\n-\t\t\t\t\telse\n-\t\t\t\t\t{\n-\t\t\t\t\t\t*ijppt = warpbase + warpn;\n-\t\t\t\t\t\twarpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );\n-\t\t\t\t\t\twarpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );\n-\t\t\t\t\t\twarpis[warpn] = prevwarpi[j-1];\n-\t\t\t\t\t\twarpjs[warpn] = prevwarpj[j-1];\n-\t\t\t\t\t\twarpn++;\n-\t\t\t\t\t}\n-\t\t\t\t\twm = g;\n-\t\t\t\t}\n-\n-\t\t\t\tcurm = *curpt + wm;\n-\t\t\t\tif( *wmrecords1pt > *wmrecordspt )\n-\t\t\t\t{\n-\t\t\t\t\t*wmrecordspt = *wmrecords1pt;\n-\t\t\t\t\t*warpipt = *(warpipt-1);\n-\t\t\t\t\t*warpjpt = *(warpjpt-1);\n-\t\t\t\t}\n-\t\t\t\tif( curm > *wmrecordspt )\n-\t\t\t\t{\n-\t\t\t\t\t*wmrecordspt = curm;\n-\t\t\t\t\t*warpipt = i;\n-\t\t\t\t\t*warpjpt = j;\n-\t\t\t\t}\n-\t\t\t\twmrecordspt++;\n-\t\t\t\twmrecords1pt++;\n-\t\t\t\twarpipt++;\n-\t\t\t\twarpjpt++;\n-\t\t\t}\n-\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f ", wm );\n-#endif\n-\t\t\t*curpt += wm;\n-\t\t\tijppt++;\n-\t\t\tmjpt++;\n-\t\t\tprept++;\n-\t\t\tmpjpt++;\n-\t\t\tcurpt++;\n-\t\t\tfgcp2pt++;\n-\t\t\togcp2pt++;\n-\t\t\tgf2ptpre++;\n-\t\t\tgf2pt++;\n-\n-\t\t}\n-\t\tlastverticalw[i] = currentw[lgth2-1];\n-\n-\t\tif( trywarp )\n-\t\t{\n-\t\t\tfltncpy( prevwmrecords, wmrecords, lastj );\n-\t\t\tintncpy( prevwarpi, warpi, lastj );\n-\t\t\tintncpy( prevwarpj, warpj, lastj );\n-\t\t}\n-\t}\n-\tif( trywarp )\n-\t{\n-//\t\tfprintf( stderr, "wm = %f\\n", wm );\n-//\t\tfprintf( stderr, "warpn = %d\\n", warpn );\n-\t\tfree( wmrecords );\n-\t\tfree( prevwmrecords );\n-\t\tfree( warpi );\n-\t\tfree( warpj );\n-\t\tfree( prevwarpi );\n-\t\tfree( prevwarpj );\n-\t}\n-\n-#if OUTGAP0TRY\n-\tif( !outgap )\n-\t{\n-\t\tfor( j=1; j<lgth2+1; j++ )\n-\t\t\tcurrentw[j] -= offset * ( lgth2 - j ) / 2.0;\n-\t\tfor( i=1; i<lgth1+1; i++ )\n-\t\t\tlastverticalw[i] -= offset * ( lgth1 - i / 2.0);\n-\t}\n-#endif\n-\t\t\n-\t/*\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<icyc; i++ ) fprintf( stderr,"%s\\n", seq1[i] );\n-\tfprintf( stderr, "#####\\n" );\n-\tfor( j=0; j<jcyc; j++ ) fprintf( stderr,"%s\\n", seq2[j] );\n-\tfprintf( stderr, "====>" );\n-\tfor( i=0; i<icyc; i++ ) strcpy( mseq1[i], seq1[i] );\n-\tfor( j=0; j<jcyc; j++ ) strcpy( mseq2[j], seq2[j] );\n-\t*/\n-\tif( localhom )\n-\t{\n-\t\tAtracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, start1, end1, start2, end2, gapmap1, gapmap2, warpis, warpjs, warpbase );\n-\t}\n-\telse\n-\t\tAtracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, warpis, warpjs, warpbase );\n-\n-\tif( warpis ) free( warpis );\n-\tif( warpjs ) free( warpjs );\n-\n-//\tfprintf( stderr, "### impmatch = %f\\n", *impmatch );\n-\n-\tresultlen = strlen( mseq1[0] );\n-\tif( alloclen < resultlen || resultlen > N )\n-\t{\n-\t\tfprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\\n", alloclen, resultlen, N );\n-\t\tErrorExit( "LENGTH OVER!\\n" );\n-\t}\n-\n-\n-\tfor( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );\n-\tfor( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );\n-\t/*\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<icyc; i++ ) fprintf( stderr, "%s\\n", mseq1[i] );\n-\tfprintf( stderr, "#####\\n" );\n-\tfor( j=0; j<jcyc; j++ ) fprintf( stderr, "%s\\n", mseq2[j] );\n-\t*/\n-\n-\n-\treturn( wm );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/regionalrealignment.rb --- a/mafft/core/regionalrealignment.rb Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,387 +0,0 @@\n-#! /usr/bin/env ruby\n-\n-$MAFFTCOMMAND = \'"/usr/local/bin/mafft"\'\n-# Edit the above line to specify the location of mafft.\n-# $MAFFTCOMMAND = \'"C:\\folder name\\mafft.bat"\' # windows\n-# $MAFFTCOMMAND = \'"/usr/local/bin/mafft"\' # mac or cygwin\n-# $MAFFTCOMMAND = \'"/usr/bin/mafft"\' # linux (rpm)\n-# $MAFFTCOMMAND = \'"/somewhere/mafft.bat"\' # all-in-one version for linux or mac\n-\n-#####################################################################\n-#\n-# regionalrealignment.rb version 0.2 (2013/Sep/21)\n-# ruby regionalrealignment.rb setting input > output\n-# See http://mafft.cbrc.jp/alignment/software/regionalrealignment.html\n-# \n-# v0.2, 2013/Sep/21, Fixed a windows-specific bug.\n-#\n-#####################################################################\n-\n-\n-def readfasta( fp, name, seq )\n- nseq = 0\n- tmpseq = ""\n- while fp.gets\n- if $_ =~ /^>/ then\n- name.push( $_.sub(/>/,"").strip )\n- seq.push( tmpseq ) if nseq > 0\n- nseq += 1\n- tmpseq = ""\n- else\n- tmpseq += $_.strip\n- end\n- end\n- seq.push( tmpseq )\n- return nseq\n-end\n-\n-def resolve( tree )\n-\twhile 1\n-#\t\tp tree\n-\t\ttree.sub!( /\\,([0-9]+):(\\-?[0-9\\.]+)\\,([0-9]+):(\\-?[0-9\\.]+)/, ",XXX" )\n-\t\thit1 = $1\n-\t\thit2 = $2\n-\t\thit3 = $3\n-\t\thit4 = $4\n-\t\n-#\t\tp hit1\n-#\t\tp hit2\n-#\t\tp hit3\n-#\t\tp hit4\n-\t\n-#\t\tputs "introduce XXX"\n-#\t\tp tree\n-\t\n-\t\tbreak unless tree.index(/XXX/)\n-\t\n-\t\tposhit = tree.index(/XXX/)\n-#\t\tputs "poshit=" + poshit.to_s\n-\t\n-\t\ti = poshit\n-\t\theight = 0\n-\t\twhile i >= 0\n-\t\t\tbreak if height == 0 && tree[i..i] == \'(\'\n-\t\t\tif tree[i..i] == \')\' then\n-\t\t\t\theight += 1\n-\t\t\telsif tree[i..i] == \'(\' then\n-\t\t\t\theight -= 1\n-\t\t\tend\n-\t\t\ti -= 1\n-\t\tend\n-\t\n-\t\tposkakko = i\n-#\t\tputs "poskakko = " + poskakko.to_s\n-\t\tzenhan = tree[0..poskakko]\n-\t\tzenhan = "" if poskakko == -1\n-#\t\tputs "zenhan = " + zenhan\n-\t\n-\t\ttreelen = tree.length\n-\t\ttree = zenhan + "(" + tree[poskakko+1..treelen]\n-#\t\tputs "add ("\n-#\t\tp tree\n-\t\ttree.sub!( /XXX/, "#{hit1}:#{hit2}):0,#{hit3}:#{hit4}" )\n-\t\n-#\t\tp tree\n-end\n-\n-\n-return tree\n-\n-end\n-\n-if ARGV.length != 2 then\n-\tSTDERR.puts ""\n-\tSTDERR.puts "Usage: ruby #{$0} setingfile inputfile > output"\n-\tSTDERR.puts ""\n-\texit 1\n-end\n-\n-infilename = ARGV[1]\n-tname = []\n-tseq = []\n-infp = File.open( infilename, "r" )\n-tin = readfasta( infp, tname, tseq )\n-infp.close\n-\n-if tin == 0 then\n-\t\tSTDERR.puts ""\n-\t\tSTDERR.puts "Error in the \'#{infilename}\' file. Is this FASTA format?\\n"\n-\t\tSTDERR.puts ""\n-\t\texit 1\n-end\n-\n-alnlen = tseq[0].length\n-if alnlen == 0 then\n-\t\tSTDERR.puts ""\n-\t\tSTDERR.puts "Error in the \'#{infilename}\' file. Is this FASTA format?\\n"\n-\t\tSTDERR.puts ""\n-\t\texit 1\n-end\n-\n-\n-for i in 0..(tin-1)\n-\tif alnlen != tseq[i].length then\n-\t\tSTDERR.puts ""\n-\t\tSTDERR.puts "Please insert gaps such that all the input sequences have the same length.\\n"\n-\t\tSTDERR.puts ""\n-\t\texit 1\n-\tend\n-end\n-\n-checkmap = []\n-for i in 0..(alnlen-1)\n-\tcheckmap.push(0)\n-end\n-\n-outputseq = []\n-for i in 0..(tin-1)\n-\toutputseq.push("")\n-end\n-\n-\n-settingfile = ARGV[0].to_s\n-reg = []\n-startpos = []\n-endpos = []\n-realign = []\n-options = []\n-treeoption = ""\n-revwarn = 0\n-sfp = File.open( settingfile, "r" )\n-while line = sfp.gets\n-\tline.sub!(/#.*/,"")\n-\tnext if line.length < 2\n-\tif line.strip =~ /^treeoption / then\n-\t\ttreeoption = line.strip.sub(/.*treeoption/,"")\n-\t\tbreak\n-\tend\n-end\n-sfp.close\n-sfp = File.open( settingfile, "r" )\n-while line = sfp.gets\n-\tline.sub!(/#.*/,"")\n-\tnext if line.length < 2\n-\tnext if line.strip =~ /^treeoption/\n-\tstartposv = line.split(\' \')[0].to_i - 1\n-\tendposv = line.split(\' \')[1].to_i - 1\n-\tif startposv < 0 || endposv < 0 then\n-\t\tSTDERR.puts "\\nError in the \'#{settingfile}\' file. Please check this line:\\n"\n-\t\tSTDERR.puts line\n-\t\tSTDERR.puts "Sites must be numbered as 1, 2, ...\\n"\n-\t\tSTDERR.puts "\\n"\n-\t\texit 1\n-\tend\n-\tif startposv >= alnlen || endposv >='..b'DERR.puts "\\n"\n-\tSTDERR.puts "ERROR in building a guide tree"\n-\tSTDERR.puts "\\n"\n-\texit 1\n-end\n-\n-treefp = File.open( "#{infilename}.tree", "r" )\n-\n-tree = ""\n-while line = treefp.gets\n-\ttree += line.strip\n-\tbreak if tree =~ /;$/\n-end\n-treefp.close\n-\n-tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\\[.*?\\]/,"").gsub(/ /, "")\n-scale = 1.0\n-mtreefp = File.open("_tree", "w")\n-\n-\n-#STDERR.puts "Tree = " + tree\n-\n-memi = [-1,-1]\n-leni = [-1,-1]\n-\n-while tree.index( /\\(/ ) \n-\n-\ttree = resolve( tree )\n-\n-\ttree.sub!( /\\(([0-9]+):(\\-?[0-9\\.]+),([0-9]+):(\\-?[0-9\\.]+)\\)/, "XXX" )\n-\tmemi[0] = $1.to_i\n-\tleni[0] = $2.to_f * scale\n-\tmemi[1] = $3.to_i\n-\tleni[1] = $4.to_f * scale\n-\n-\tif leni[0] > 10 || leni[1] > 10 then\n-\t\tSTDERR.puts ""\n-\t\tSTDERR.puts "Please check the scale of branch length!"\n-\t\tSTDERR.puts "The unit of branch lengths must be \'substitution/site\'"\n-\t\tSTDERR.puts "If the unit is \'substition\' in your tree, please"\n-\t\tSTDERR.puts "use the scale argument,"\n-\t\tSTDERR.puts "% newick2mafft scale in > out"\n-\t\tSTDERR.puts "where scale = 1/(alignment length)"\n-\t\tSTDERR.puts ""\n-\t\texit 1\n-\tend\n-\n-#\tSTDERR.puts "subtree = " + $&\n-\n-\tif memi[1] < memi[0] then\n-\t\tmemi.reverse!\n-\t\tleni.reverse!\n-\tend\n-\n-\ttree.sub!( /XXX/, memi[0].to_s )\n-\n-#\tSTDERR.puts "Tree = " + tree\n-\n-\tmtreefp.printf( "%5d %5d %10.5f %10.5f\\n", memi[0], memi[1], leni[0], leni[1] )\n-\n-end\n-\n-\n-mtreefp.close\n-\n-numreg = startpos.length\n-\n-for i in 0..(numreg-1)\n-\n-\tpartfp = File.open( "_part", "w" )\n-\tfor j in 0..(tin-1)\n-\t\tpartfp.puts ">" + tname[j]\n-\t\tif startpos[i] > endpos[i] then\n-\t\t\tpartfp.puts tseq[j][endpos[i]..startpos[i]].reverse\n-\t\telse\n-\t\t\tpartfp.puts tseq[j][startpos[i]..endpos[i]]\n-\t\tend\n-\tend\n-\tpartfp.close\n-\n-\tif( realign[i] == 1 ) then\n-\t\tSTDERR.puts "Aligning region #{startpos[i]+1} - #{endpos[i]+1}"\n-\t\tres = system "#{$MAFFTCOMMAND} #{options[i]} --inputorder --treein _tree _part > _partout"\n-\t\tif res == false then\n-\t\t\tSTDERR.puts "\\n"\n-\t\t\tSTDERR.puts "ERROR in aligning region #{startpos[i]+1} - #{endpos[i]+1}"\n-\t\t\tSTDERR.puts "Please check the option:"\n-\t\t\tSTDERR.puts "#{options[i]}"\n-\t\t\tSTDERR.puts "\\n"\n-\t\t\texit 1\n-\t\tend\n-\n-\telse\n-\t\tSTDERR.puts "Copying region #{startpos[i]+1} - #{endpos[i]+1}"\n-#\t\tsystem "cp _part _partout"\n-\t\tFile.rename( "_part", "_partout" )\n-\tend\n-\n-\tpname = []\n-\tpseq = []\n-\tpartfp = File.open( "_partout", "r" )\n-\tpin = readfasta( partfp, pname, pseq )\n-\tpartfp.close\n-\tfor j in 0..(tin-1)\n-\t\toutputseq[j] += pseq[j]\n-\tend\n-end\n-\n-for j in 0..(tin-1)\n-\tputs ">" + tname[j]\n-\tputs outputseq[j]\n-end\n-\n-STDERR.puts "Done."\n-\n-numdupsites = checkmap.select{|x| x>1}.length\n-if numdupsites > 0 then\n-\tSTDERR.puts ""\n-\tSTDERR.puts "#########################################################"\n-\tSTDERR.puts "# Warning: #{numdupsites} sites were duplicatedly selected."\n-\tSTDERR.puts "#########################################################"\n-\tSTDERR.puts ""\n-end\n-\n-numunselectedsites = checkmap.select{|x| x==0}.length\n-if numunselectedsites > 0 then\n-\tSTDERR.puts ""\n-\tSTDERR.puts "#########################################################"\n-\tSTDERR.puts "# Warning: #{numunselectedsites} sites were not selected."\n-\tSTDERR.puts "#########################################################"\n-\tSTDERR.puts ""\n-end\n-\n-if revwarn == 1 then\n-\tSTDERR.puts ""\n-\tSTDERR.puts "#########################################################"\n-\tSTDERR.puts "# Warning: The order of sites were reversed."\n-\tSTDERR.puts "#########################################################"\n-\tSTDERR.puts ""\n-end\n-\n-\n-STDERR.puts ""\n-STDERR.puts " Tree: computed with #{treeoption} --treeout "\n-for i in 0..(numreg-1)\n-\trange = sprintf( "%6d - %6d", startpos[i]+1, endpos[i]+1 )\n-\tif realign[i] == 1 then\n-\t\tSTDERR.puts "#{range}: realigned with #{options[i]} --treein (tree)"\n-\telse\n-\t\tSTDERR.puts "#{range}: preserved"\n-\tend\n-end\n-STDERR.puts ""\n-\n-File.delete( "_dum" )\n-File.delete( "_tree" )\n-File.delete( "_part" )\n-File.delete( "_partout" )\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/regtable2seq.c --- a/mafft/core/regtable2seq.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,212 +0,0 @@ -#include "mltaln.h" - -#define DEBUG 0 - -char *regfile; -char *eregfile; - -void arguments( int argc, char *argv[] ) -{ - int c; - - outnumber = 0; - inputfile = NULL; - regfile = NULL; - eregfile = NULL; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( (c = *++argv[0]) ) - { - switch( c ) - { - case 'e': - eregfile = *++argv; - fprintf( stderr, "eregfile = %s\n", eregfile ); - --argc; - goto nextoption; - case 'r': - regfile = *++argv; - fprintf( stderr, "regfile = %s\n", regfile ); - --argc; - goto nextoption; - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - case 'n' : - outnumber = 1; - break; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: Check source file !\n" ); - exit( 1 ); - } -} - -void readereg( FILE *regfp, int **regtable, char **revtable, int *outtable, int *noutpt, int *loutpt ) -{ - char gett[1000]; - int j; - int mem; - char cmem; - char reg[5][100]; - char out[100]; - int startpos, endpos; - - *noutpt = 0; - *loutpt = 0; - fgets( gett, 999, regfp ); - sscanf( gett, "%c %s %s %s %s %s", &cmem, reg[0], reg[1], reg[2], reg[3], reg[4] ); - if( cmem != 'e' ) - { - fprintf( stderr, "Format error\n" ); - exit( 1 ); - } - for( j=0; j<5; j++ ) - { - sscanf( reg[j], "%d-%d-%c", regtable[0]+(j*2), regtable[0]+(j*2)+1, revtable[0]+j ); - fprintf( stderr, "%d %d-%d\n", 0, regtable[0][j*2], regtable[0][j*2+1] ); - startpos = regtable[0][j*2]; - endpos = regtable[0][j*2+1]; - if( startpos > endpos ) - { - endpos = regtable[0][j*2]; - startpos = regtable[0][j*2+1]; - } - if( startpos != -1 && endpos != -1 ) - *loutpt += endpos - startpos + 1; - } - - while( 1 ) - { - fgets( gett, 999, regfp ); - if( feof( regfp ) ) break; - sscanf( gett, "%d o=%s", &mem, out ); - if( mem >= njob ) - { - fprintf( stderr, "Out of range\n" ); - exit( 1 ); - } - outtable[mem] = atoi( out ); - if( outtable[mem] ) *noutpt += 1; - } -} - -void readreg( FILE *regfp, int **regtable, char **revtable, int *outtable ) -{ - char gett[1000]; - int j; - int mem; - char reg[5][100]; - char out[100]; - - while( 1 ) - { - fgets( gett, 999, regfp ); - if( feof( regfp ) ) break; - sscanf( gett, "%d %s %s %s %s %s o=%s", &mem, reg[0], reg[1], reg[2], reg[3], reg[4], out ); - if( mem >= njob ) - { - fprintf( stderr, "Out of range\n" ); - exit( 1 ); - } - for( j=0; j<5; j++ ) - { - sscanf( reg[j], "%d-%d-%c", regtable[mem]+(j*2), regtable[mem]+(j*2)+1, revtable[mem]+j ); - fprintf( stderr, "%d %d-%d\n", mem, regtable[mem][j*2], regtable[mem][j*2+1] ); - } - outtable[mem] = atoi( out ); - } -} - -int main( int argc, char *argv[] ) -{ - FILE *infp; - FILE *regfp; - int nlenmin; - int **regtable; - char **revtable; - int *outtable; - int i, nout, lout; - char **outseq; - char **name; - - arguments( argc, argv ); - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - - dorp = NOTSPECIFIED; - getnumlen_nogap( infp, &nlenmin ); - - if( regfile ) - { - regfp = fopen( regfile, "r" ); - if( !regfp ) - { - fprintf( stderr, "Cannot open %s\n", regfile ); - exit( 1 ); - } - regtable = AllocateIntMtx( njob, 5*2 ); - revtable = AllocateCharMtx( njob, 5 ); - outtable = AllocateIntVec( njob ); - readreg( regfp, regtable, revtable, outtable ); - cutData( infp, regtable, revtable, outtable ); - } - else if( eregfile ) - { - regfp = fopen( eregfile, "r" ); - if( !regfp ) - { - fprintf( stderr, "Cannot open %s\n", eregfile ); - exit( 1 ); - } - regtable = AllocateIntMtx( 1, 5*2 ); - revtable = AllocateCharMtx( 1, 5 ); - outtable = AllocateIntVec( njob ); - readereg( regfp, regtable, revtable, outtable, &nout, &lout ); - fprintf( stderr, "nout = %d, lout = %d\n", nout, lout ); - - outseq = AllocateCharMtx( nout, lout+1 ); - name = AllocateCharMtx( nout, B ); - - cutAlignment( infp, regtable, revtable, outtable, name, outseq ); - fprintf( stderr, "gappick! nout = %d\n", nout ); - commongappick( nout, outseq ); - for( i=0; i<nout; i++ ) - { - fprintf( stdout, "%s\n", name[i] ); - fprintf( stdout, "%s\n", outseq[i] ); - } - } - else - { - catData( infp ); - } - - fprintf( stderr, "Strategy:\n" ); - fprintf( stderr, " Not-Aligned\n" ); - -// fprintf( stdout, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); - return( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/replaceu.c --- a/mafft/core/replaceu.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,152 +0,0 @@ -#include "mltaln.h" - -#define DEBUG 0 - -static int seedoffset; - -static void replace_unusual( int n, char **seq, char *usual, char unknown, int (*uporlow)( int ) ) -{ - int i; - char *pt; - for( i=0; i<n; i++ ) - { - pt = seq[i]; - while( *pt ) - { - if( !strchr( usual, *pt ) ) *pt = unknown; - else *pt = uporlow( *pt ); - pt++; - } - } -} - - -void arguments( int argc, char *argv[] ) -{ - int c; - - seedoffset = 0; - inputfile = NULL; - dorp = NOTSPECIFIED; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( (c = *++argv[0]) ) - { - switch( c ) - { - case 'o': - seedoffset = myatoi( *++argv ); - fprintf( stderr, "seedoffset = %d\n", seedoffset ); - --argc; - goto nextoption; - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - case 'D': - dorp = 'd'; - break; - case 'P': - dorp = 'p'; - break; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: Check source file !\n" ); - exit( 1 ); - } -} - - - -int main( int argc, char *argv[] ) -{ - FILE *infp; - int nlenmin; - char **name; - char **seq; - int *nlen; - int i; - char *usual; - - arguments( argc, argv ); - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - - -// dorp = NOTSPECIFIED; - getnumlen_casepreserve( infp, &nlenmin ); - - fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); - - seq = AllocateCharMtx( njob, nlenmax+1 ); - name = AllocateCharMtx( njob, B+1 ); - nlen = AllocateIntVec( njob ); - - readData_pointer_casepreserve( infp, name, nlen, seq ); - -// for( i=0; i<njob; i++ ) gappick_samestring( seq[i] ); - -#if 0 - FILE *origfp; - origfp = fopen( "_original", "w" ); - if( !origfp ) - { - fprintf( stderr, "Cannot open _original\n" ); - exit( 1 ); - } - for( i=0; i<njob; i++ ) - { - nlen[i] = strlen( seq[i] ); - fprintf( origfp, ">%s\n", name[i]+1 ); - if( seq[i][nlen[i]-1] == '\n' ) seq[i][nlen[i]-1] = 0; - fprintf( origfp, "%s\n", seq[i] ); - } - fclose( origfp ); -#endif - - if( dorp == 'p' ) - { - usual = "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv-."; - replace_unusual( njob, seq, usual, 'X', toupper ); - } - else - { - usual = "ATGCUatgcuBDHKMNRSVWYXbdhkmnrsvwyx-"; - replace_unusual( njob, seq, usual, 'n', tolower ); - } - - - - for( i=0; i<njob; i++ ) - { - fprintf( stdout, ">_os_%d_oe_%s\n", i+seedoffset, name[i]+1 ); - fprintf( stdout, "%s\n", seq[i] ); - } - - free( nlen ); - FreeCharMtx( seq ); - FreeCharMtx( name ); - - return( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/restoreu.c --- a/mafft/core/restoreu.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,260 +0,0 @@ -#include "mltaln.h" - -#define DEBUG 0 - -char *alignmentfile; - -static void fillorichar( int nseq, int *oripos, char **a, char **s ) -{ - int i; - char *pta, *pts; - for( i=0; i<nseq; i++ ) - { - pta = a[i]; - pts = s[oripos[i]]; - while( *pta ) - { - if( *pta != '-' ) *pta = *pts++; - if( *pta++ == 0 ) - { - fprintf( stderr, "ERROR!!\n" ); - fprintf( stderr, "alignment is inconsistent with the original sequences\n" ); - exit( 1 ); - } - } - if( *pts != 0 ) - { - fprintf( stderr, "ERROR!!\n" ); - fprintf( stderr, "alignment is inconsistent with the original sequences\n" ); - exit( 1 ); - } - } -} - -void arguments( int argc, char *argv[] ) -{ - int c; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( (c = *++argv[0]) ) - { - switch( c ) - { - case 'i': - inputfile = *++argv; - --argc; - goto nextoption; - case 'a': - alignmentfile = *++argv; - --argc; - goto nextoption; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: Check source file !\n" ); - exit( 1 ); - } -} - - - -int main( int argc, char *argv[] ) -{ - FILE *infp; - FILE *alfp; - char **name; - char **aname; - char **oname; - char **seq; - char **aseq; - int *nlen; - int *oripos; - char *npt, *npt0, *npt2, *pt, *pt2; - int i, o, prelen; - int nlenmin; - int njobs, njoba; - - arguments( argc, argv ); - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - - if( alignmentfile ) - { - alfp = fopen( alignmentfile, "r" ); - if( !alfp ) - { - fprintf( stderr, "Cannot open %s\n", alignmentfile ); - exit( 1 ); - } - } - else - { - fprintf( stderr, "No alignment is given.\n" ); - exit( 1 ); - } - - dorp = NOTSPECIFIED; - getnumlen_casepreserve( infp, &nlenmin ); - njobs = njob; -// fprintf( stderr, "in infp, %d x %d - %d %c\n", njob, nlenmin, nlenmax, dorp ); - - seq = AllocateCharMtx( njob, nlenmax+1 ); - name = AllocateCharMtx( njob, B+1 ); - nlen = AllocateIntVec( njob ); - oripos = AllocateIntVec( njob ); - readData_pointer_casepreserve( infp, name, nlen, seq ); - - dorp = NOTSPECIFIED; - getnumlen( alfp ); - njoba = njob; -// fprintf( stderr, "in alfp, %d x %d %c\n", njob, nlenmax, dorp ); - aseq = AllocateCharMtx( njob, nlenmax+1 ); - aname = AllocateCharMtx( njob, B+1 ); - oname = AllocateCharMtx( njob, B+1 ); - readData_pointer( alfp, aname, nlen, aseq ); - - for( i=0; i<njob; i++ ) gappick_samestring( seq[i] ); - - if( njoba != njobs ) - { - fprintf( stderr, "ERROR!!\n" ); - fprintf( stderr, "In input file,\n" ); - fprintf( stderr, "njob = %d\n", njobs ); - fprintf( stderr, "but in alignment file,\n" ); - fprintf( stderr, "njob = %d\n", njoba ); - exit( 1 ); - } - - for( i=0; i<njob; i++ ) - { -#if 0 - if( strstr( aname[i], "_seed_" ) ) - { - npt2 = aname[i] + 7; - strcpy( oname[i], "=_seed_" ); - } - else - { - npt2 = aname[i] + 1; - strcpy( oname[i], "=" ); - } - - fprintf( stderr, "npt2 = %s\n", npt2 ); - - o = oripos[i] = atoi( npt2 ); - npt = strstr( npt2, "_oe_" ); - if( npt == NULL ) - { - fprintf( stderr, "Format error!\n" ); - exit( 1 ); - } - npt += 4; - strcat( oname[i], npt+1 ); -#endif - npt0 = strstr( aname[i], "_os_" ); - if( npt0 == NULL ) - { - fprintf( stderr, "Format error!\n" ); - exit( 1 ); - } - npt2 = npt0 + 4; - o = oripos[i] = atoi( npt2 ); - - npt = strstr( aname[i], "_oe_" ); - if( npt == NULL ) - { - fprintf( stderr, "Format error!\n" ); - exit( 1 ); - } - npt += 4; - - pt2 = npt; - pt = npt2 - 4; - while( *pt ) *pt++ = *pt2++; // okashii - - prelen = npt0-aname[i]; - strncpy( oname[i], aname[i], prelen ); oname[i][prelen] = 0; - strcat( oname[i], npt0 ); - -#if 0 - pt = strstr( aname[i], "_numo_e" ); - if( pt ) pt += 8; - else pt = aname[i] + 1; - - if( strstr( pt, "_seed_" ) ) pt += 6; - - fprintf( stderr, "pt = :%s:\n", pt ); - -#endif -// fprintf( stderr, "npt0 = :%s:\n", npt0 ); -// -// reporterr( "oname[i] = %s\n", oname[i] ); -// reporterr( "aname[i] = %s\n", aname[i] ); -// reporterr( " name[i] = %s\n", name[i] ); - -// fprintf( stderr, "aname[i] = :%s:\n", aname[i] ); -// fprintf( stderr, "pt = :%s:\n", pt ); -// fprintf( stderr, "oname[i] = :%s:\n", oname[i] ); -// fprintf( stderr, "name[o] = :%s:\n", name[o] ); - - if( strncmp( npt0, name[o]+1, 10 ) ) - { - fprintf( stderr, "ERROR!!\n" ); - fprintf( stderr, "In input file,\n" ); - fprintf( stderr, "name[%d] = %s\n", o, name[o] ); - fprintf( stderr, "but in alignment file,\n" ); - fprintf( stderr, "oname[%d] = %s\n", i, oname[i] ); - fprintf( stderr, "npt0 = %s\n", npt0 ); - fprintf( stderr, "prelen = %d\n", prelen ); - fprintf( stderr, "name[%d] = %s\n", i, aname[i] ); - exit( 1 ); - } -#if 0 - else - { - fprintf( stderr, "OK!!\n" ); - fprintf( stderr, "In input file,\n" ); - fprintf( stderr, "name[%d] = %s\n", o, name[o] ); - fprintf( stderr, "and in alignment file,\n" ); - fprintf( stderr, "name[%d] = %s\n", i, aname[i] ); - fprintf( stderr, "\n" ); - } -#endif - } -// fprintf( stderr, "seq[0] = %s\n", seq[0] ); -// fprintf( stderr, "aseq[0] = %s\n", aseq[0] ); - - fillorichar( njob, oripos, aseq, seq ); - - - writeData_pointer( stdout, njob, oname, nlen, aseq ); - - FreeCharMtx( seq ); - FreeCharMtx( aseq ); - FreeCharMtx( name ); - FreeCharMtx( aname ); - FreeCharMtx( oname ); - free( nlen ); - free( oripos ); - - return( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/rna.c --- a/mafft/core/rna.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,527 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define MEMSAVE 1\n-\n-#define DEBUG 1\n-#define USE_PENALTY_EX 1\n-#define STOREWM 1\n-\n-\n-\n-#if 0\n-static float singleribosumscore( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2 )\n-{\n-\tfloat val;\n-\tint i, j;\n-\tint code1, code2;\n-\n-\tval = 0.0;\n-\tfor( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )\n-\t{\n-\t\tcode1 = amino_n[(int)s1[i][p1]];\n-\t\tif( code1 > 3 ) code1 = 36;\n-\t\tcode2 = amino_n[(int)s2[j][p2]];\n-\t\tif( code2 > 3 ) code2 = 36;\n-\n-//\t\tfprintf( stderr, "\'l\'%c-%c: %f\\n", s1[i][p1], s2[j][p2], (float)ribosumdis[code1][code2] );\n-\n-\t\tval += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j];\n-\t}\n-\treturn( val );\n-}\n-static float pairedribosumscore53( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )\n-{\n-\tfloat val;\n-\tint i, j;\n-\tint code1o, code1u, code2o, code2u, code1, code2;\n-\n-\tval = 0.0;\n-\tfor( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )\n-\t{\n-\t\tcode1o = amino_n[(int)s1[i][p1]];\n-\t\tcode1u = amino_n[(int)s1[i][c1]];\n-\t\tif( code1o > 3 ) code1 = code1o = 36;\n-\t\telse if( code1u > 3 ) code1 = 36;\n-\t\telse code1 = 4 + code1o * 4 + code1u;\n-\n-\t\tcode2o = amino_n[(int)s2[j][p2]];\n-\t\tcode2u = amino_n[(int)s2[j][c2]];\n-\t\tif( code2o > 3 ) code2 = code1o = 36;\n-\t\telse if( code2u > 3 ) code2 = 36;\n-\t\telse code2 = 4 + code2o * 4 + code2u;\n-\n-\n-//\t\tfprintf( stderr, "%c%c-%c%c: %f\\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (float)ribosumdis[code1][code2] );\n-\n-\t\tif( code1 == 36 || code2 == 36 )\n-\t\t\tval += (float)n_dis[code1o][code2o] * eff1[i] * eff2[j];\n-\t\telse\n-\t\t\tval += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j];\n-\t}\n-\treturn( val );\n-}\n-\n-static float pairedribosumscore35( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )\n-{\n-\tfloat val;\n-\tint i, j;\n-\tint code1o, code1u, code2o, code2u, code1, code2;\n-\n-\tval = 0.0;\n-\tfor( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )\n-\t{\n-\t\tcode1o = amino_n[(int)s1[i][p1]];\n-\t\tcode1u = amino_n[(int)s1[i][c1]];\n-\t\tif( code1o > 3 ) code1 = code1o = 36;\n-\t\telse if( code1u > 3 ) code1 = 36;\n-\t\telse code1 = 4 + code1u * 4 + code1o;\n-\n-\t\tcode2o = amino_n[(int)s2[j][p2]];\n-\t\tcode2u = amino_n[(int)s2[j][c2]];\n-\t\tif( code2o > 3 ) code2 = code1o = 36;\n-\t\telse if( code2u > 3 ) code2 = 36;\n-\t\telse code2 = 4 + code2u * 4 + code2o;\n-\n-\n-//\t\tfprintf( stderr, "%c%c-%c%c: %f\\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (float)ribosumdis[code1][code2] );\n-\n-\t\tif( code1 == 36 || code2 == 36 )\n-\t\t\tval += (float)n_dis[code1o][code2o] * eff1[i] * eff2[j];\n-\t\telse\n-\t\t\tval += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j];\n-\t}\n-\treturn( val );\n-}\n-#endif\n-\n-\n-static void mccaskillextract( char **seq, char **nogap, int nseq, RNApair **pairprob, RNApair ***single, int **sgapmap, double *eff )\n-{\n-\tint lgth;\n-\tint nogaplgth;\n-\tint i, j;\n-\tint left, right, adpos;\n-\tfloat prob;\n-\tstatic TLS int *pairnum;\n-\tRNApair *pt, *pt2;\n-\n-\tlgth = strlen( seq[0] );\n-\tpairnum = calloc( lgth, sizeof( int ) );\n-\tfor( i=0; i<lgth; i++ ) pairnum[i] = 0;\n-\n-\tfor( i=0; i<nseq; i++ )\n-\t{\n-\t\tnogaplgth = strlen( nogap[i] );\n-\t\tfor( j=0; j<nogaplgth; j++ ) for( pt=single[i][j]; pt->bestpos!=-1; pt++ )\n-\t\t{\n-\t\t\tleft = sgapmap[i][j];\n-\t\t\tright = sgapmap[i][pt->bestpos];\n-\t\t\tprob = pt->bestscore;\n-\n-\n-\t\t\tfor( pt2=pairprob[left]; pt2->bestpos!=-1; pt2++ )\n-\t\t\t\tif( pt2->bestpos == right ) break;\n-\n-//\t\t\tfprintf( stderr, "i,j=%d,%d, left=%d, right=%d, pt=%d, pt2->bestpos = %d\\n", i, j, left, right, pt-single[i][j], pt2->bestpos );\n-\t\t\tif( pt2->bestpos == -1 )\n-\t\t\t{\n-\t\t\t\tpairprob[left] = (RNApair *)realloc( pairprob[left], (pairnum[left]+2) * sizeof( RNApair ) );\n-\t\t\t\tadpos = pairnum[left];\n-\t\t\t\tpairnum[left]++;\n-\t\t\t\tpairprob[left][adpos].bestscore = 0.0;\n-\t\t\t\tpairprob[left][adpos].bestpos = right;\n-\t\t\t\tpairprob[left][adpos+1].bestscore = -1.0;\n-\t\t\t\tpairprob[left][adpos+1].bestpos = -1;\n-\t\t\t\tpt2 = pairprob[left]+adpos;\n-\t\t\t}\n-\t\t\telse\n-\t\t\t\tadpos = pt2-pairprob[left];\n-\n-\t\t'..b' i<lgth1; i++ )\n-\t{\n-\t\tpairprob1[i] = (RNApair *)calloc( 1, sizeof( RNApair ) );\n-\t\tpairprob1[i][0].bestpos = -1;\n-\t\tpairprob1[i][0].bestscore = -1;\n-\t}\n-\tfor( i=0; i<lgth2; i++ )\n-\t{\n-\t\tpairprob2[i] = (RNApair *)calloc( 1, sizeof( RNApair ) );\n-\t\tpairprob2[i][0].bestpos = -1;\n-\t\tpairprob2[i][0].bestscore = -1;\n-\t}\n-\n-\tutot( nseq1, lgth1, oseq1 );\n-\tutot( nseq2, lgth2, oseq2 );\n-\n-//\tfprintf( stderr, "folding group1\\n" );\n-//\trnalocal( oseq1, useq1, eff1, eff1, nseq1, nseq1, lgth1+10, pair1 );\n-\n-/* base-pairing probability of group 1 */\n-\tif( rnaprediction == \'r\' )\n-\t\trnaalifoldcall( oseq1, nseq1, pairprob1 );\n-\telse\n-\t\tmccaskillextract( oseq1, useq1, nseq1, pairprob1, grouprna1, sgapmap1, eff1 );\n-\n-\n-//\tfprintf( stderr, "folding group2\\n" );\n-//\trnalocal( oseq2, useq2, eff2, eff2, nseq2, nseq2, lgth2+10, pair2 );\n-\n-/* base-pairing probability of group 2 */\n-\tif( rnaprediction == \'r\' )\n-\t\trnaalifoldcall( oseq2, nseq2, pairprob2 );\n-\telse\n-\t\tmccaskillextract( oseq2, useq2, nseq2, pairprob2, grouprna2, sgapmap2, eff2 );\n-\n-\n-\n-#if 0\n-\tmakerseq( oseq1, oseq1r, odir1, pairprob1, nseq1, lgth1 );\n-\tmakerseq( oseq2, oseq2r, odir2, pairprob2, nseq2, lgth2 );\n-\n-\tfprintf( stderr, "%s\\n", odir2 );\n-\n-\tfor( i=0; i<nseq1; i++ )\n-\t{\n-\t\tfprintf( stdout, ">ori\\n%s\\n", oseq1[0] );\n-\t\tfprintf( stdout, ">rev\\n%s\\n", oseq1r[0] );\n-\t}\n-#endif\n-\n-/* similarity score */\n-\tLalignmm_hmout( oseq1, oseq2, eff1, eff2, nseq1, nseq2, 10000, NULL, NULL, NULL, NULL, map );\n-\n-\tif( 1 )\n-\t{\n-\t\tif( RNAscoremtx == \'n\' )\n-\t\t{\n-\t\t\tfor( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )\n-\t\t\t{\n-//\t\t\t\timpmtx2[i][j] = osoiaveragescore( nseq1, nseq2, oseq1, oseq2, eff1, eff2, i, j ) * consweight_multi;\n-\t\t\t\timpmtx2[i][j] = 0.0;\n-\t\t\t}\n-\t\t}\n-\t\telse if( RNAscoremtx == \'r\' )\n-\t\t{\n-\t\t\tfprintf( stderr, "Unexpected error. Please contact kazutaka.katoh@aist.go.jp\\n" );\n-\t\t}\n-\n-\n-/* four-way consistency */\n-\n-\t\tfor( i=0; i<lgth1; i++ ) for( pairpt1=pairprob1[i]; pairpt1->bestpos!=-1; pairpt1++ )\n-\t\t{\n-\n-//\t\t\tif( pairprob1[i] == NULL ) continue;\n-\n-\t\t\tfor( j=0; j<lgth2; j++ ) for( pairpt2=pairprob2[j]; pairpt2->bestpos!=-1; pairpt2++ )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "i=%d, j=%d, pn1=%d, pn2=%d\\n", i, j, pairpt1-pairprob1[i], pairpt2-pairprob2[j] ); \n-//\t\t\t\tif( pairprob2[j] == NULL ) continue;\n-\n-\t\t\t\tuido = pairpt1->bestpos;\n-\t\t\t\tujdo = pairpt2->bestpos;\n-\t\t\t\tprob = pairpt1->bestscore * pairpt2->bestscore;\n-//\t\t\t\tprob = 1.0;\n-//\t\t\t\tfprintf( stderr, "i=%d->uido=%d, j=%d->ujdo=%d\\n", i, uido, j, ujdo );\n-\n-//\t\t\t\tfprintf( stderr, "impmtx2[%d][%d] = %f\\n", i, j, impmtx2[i][j] );\n-\n-//\t\t\t\tif( i < uido && j > ujdo ) continue;\n-//\t\t\t\tif( i > uido && j < ujdo ) continue;\n-\n-\n-//\t\t\t\tposdistj = abs( ujdo-j );\n-\n-//\t\t\t\tif( uido > -1 && ujdo > -1 ) \n-\t\t\t\tif( uido > -1 && ujdo > -1 && ( ( i > uido && j > ujdo ) || ( i < uido && j < ujdo ) ) )\n-\t\t\t\t{\n-\t\t\t\t\t{\n-\t\t\t\t\t\timpmtx2[i][j] += MAX( 0, map[uido][ujdo] ) * consweight_rna * 600 * prob; // osoi\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\n-\t\t\t}\n-\t\t}\n-\t\tfor( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )\n-\t\t{\n-\t\t\timpmtx[i][j] += impmtx2[i][j];\n-//\t\t\tfprintf( stderr, "fastathreshold=%f, consweight_multi=%f, consweight_rna=%f\\n", fastathreshold, consweight_multi, consweight_rna );\n-//\t\t\timpmtx[i][j] *= 0.5;\n-\t\t}\n-\n-//\t\timpmtx[0][0] += 10000.0;\n-//\t\timpmtx[lgth1-1][lgth2-1] += 10000.0;\n-\n-\n-\n-#if 0\n-\t\tfprintf( stdout, "#impmtx2 = \\n" );\n-\t\tfor( i=0; i<lgth1; i++ )\n-\t\t{\n-\t\t\tfor( j=0; j<lgth2; j++ )\n-\t\t\t{\n-\t\t\t\tfprintf( stdout, "%d %d %f\\n", i, j, impmtx2[i][j] );\n-\t\t\t}\n-\t\t\tfprintf( stdout, "\\n" );\n-\t\t}\n-\t\texit( 1 );\n-#endif\n-\t}\n-\n-\tFreeCharMtx( useq1 );\n-\tFreeCharMtx( useq2 );\n-\tFreeCharMtx( oseq1 );\n-\tFreeCharMtx( oseq2 );\n-\tFreeCharMtx( oseq1r );\n-\tFreeCharMtx( oseq2r );\n-\tfree( odir1 );\n-\tfree( odir2 );\n-\tFreeFloatMtx( impmtx2 );\n-\tFreeFloatMtx( map );\n-\tFreeIntMtx( sgapmap1 );\n-\tFreeIntMtx( sgapmap2 );\n-\tFreeFloatMtx( tbppmtx );\n-\n-\tfor( i=0; i<lgth1; i++ ) free( pairprob1[i] );\n-\tfor( i=0; i<lgth2; i++ ) free( pairprob2[i] );\n-\tfree( pairprob1 );\n-\tfree( pairprob2 );\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/rnatest.c --- a/mafft/core/rnatest.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,460 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 0\n-\n-\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tinputfile = NULL;\n-\tfftkeika = 0;\n-\tpslocal = -1000.0;\n-\tconstraint = 0;\n-\tnblosum = 62;\n-\tfmodel = 0;\n-\tcalledByXced = 0;\n-\tdevide = 0;\n-\tuse_fft = 0;\n-\tfftscore = 1;\n-\tfftRepeatStop = 0;\n-\tfftNoAnchStop = 0;\n- weight = 3;\n- utree = 1;\n-\ttbutree = 1;\n- refine = 0;\n- check = 1;\n- cut = 0.0;\n- disp = 0;\n- outgap = 1;\n- alg = \'A\';\n- mix = 0;\n-\ttbitr = 0;\n-\tscmtd = 5;\n-\ttbweight = 0;\n-\ttbrweight = 3;\n-\tcheckC = 0;\n-\ttreemethod = \'x\';\n-\tcontin = 0;\n-\tscoremtx = 1;\n-\tkobetsubunkatsu = 0;\n-\tdivpairscore = 0;\n-\tdorp = NOTSPECIFIED;\n-\tppenalty = NOTSPECIFIED;\n-\tppenalty_OP = NOTSPECIFIED;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tppenalty_EX = NOTSPECIFIED;\n-\tpoffset = NOTSPECIFIED;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tgeta2 = GETA2;\n-\tfftWinSize = NOTSPECIFIED;\n-\tfftThreshold = NOTSPECIFIED;\n-\tRNAppenalty = NOTSPECIFIED;\n-\tRNApthr = NOTSPECIFIED;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( ( c = *++argv[0] ) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'o\':\n-\t\t\t\t\tRNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'f\':\n-\t\t\t\t\tppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'g\':\n-\t\t\t\t\tppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'O\':\n-\t\t\t\t\tppenalty_OP = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'E\':\n-\t\t\t\t\tppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'h\':\n-\t\t\t\t\tpoffset = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'k\':\n-\t\t\t\t\tkimuraR = myatoi( *++argv );\n-//\t\t\t\t\tfprintf( stderr, "kimuraR = %d\\n", kimuraR );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'b\':\n-\t\t\t\t\tnblosum = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 1;\n-\t\t\t\t\tfprintf( stderr, "blosum %d\\n", nblosum );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'j\':\n-\t\t\t\t\tpamN = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tTMorJTT = JTT;\n-\t\t\t\t\tfprintf( stderr, "jtt %d\\n", pamN );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'m\':\n-\t\t\t\t\tpamN = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tTMorJTT = TM;\n-\t\t\t\t\tfprintf( stderr, "TM %d\\n", pamN );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'l\':\n-\t\t\t\t\tppslocal = (int)( atof( *++argv ) * 1000 + 0.5 );\n-\t\t\t\t\tpslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5);\n-//\t\t\t\t\tfprintf( stderr, "ppslocal = %d\\n", ppslocal );\n-//\t\t\t\t\tfprintf( stderr, "pslocal = %d\\n", pslocal );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-#if 1\n-\t\t\t\tcase \'a\':\n-\t\t\t\t\tfmodel = 1;\n-\t\t\t\t\tbreak;\n-#endif\n-\t\t\t\tcase \'r\':\n-\t\t\t\t\tfmodel = -1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tdorp = \'d\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tdorp = \'p\';\n-\t\t\t\t\tbreak;\n-#if 0\n-\t\t\t\tcase \'e\':\n-\t\t\t\t\tfftscore = 0;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'O\':\n-\t\t\t\t\tfftNoAnchStop = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'R\':\n-\t\t\t\t\tfftRepeatStop = 1;\n-\t\t\t\t\tbreak;\n-#endif\n-\t\t\t\tcase \'Q\':\n-\t\t\t\t\tcalledByXced = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'s\':\n-\t\t\t\t\ttreemethod = \'s\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'x\':\n-\t\t\t\t\tdisp = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'p\':\n-\t\t\t\t\ttreemethod = \'p\';\n-\t\t\t\t\tbreak;\n-#if 0\n-\t\t\t\tcase \'a\':\n-\t\t\t\t\talg = \'a\';\n-\t\t\t\t\tbreak;\n-#endif\n-\t\t\t\tcase \'S\':\n-\t\t\t\t\talg = \'S\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'L\':\n-\t\t\t\t\talg = \'L\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'M\':\n-\t\t\t\t\talg = \'M\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'R\':\n-\t\t\t\t\talg = \'R\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'N\':\n-\t\t\t\t\talg = \'N\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'A\':\n-\t\t\t\t\talg = \'A\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'V\':\n-\t\t\t\t\talg = \'V\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'C\':\n-\t\t\t\t\talg = \'C\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'F\':\n-\t\t\t\t\tuse_fft = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'v\':\n-\t\t\t\t\ttbrweight = 3;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'d\':\n-\t\t\t\t\tdivpairsc'..b'treemethod == \'s\' )\n-\t\tfprintf( fp, "Tree = UPGMA (2).\\n" );\n-\telse if( treemethod == \'p\' )\n-\t\tfprintf( fp, "Tree = UPGMA (1).\\n" );\n-\telse\n-\t\tfprintf( fp, "Unknown tree.\\n" );\n-\n- if( use_fft )\n- {\n- fprintf( fp, "FFT on\\n" );\n- if( dorp == \'d\' )\n- fprintf( fp, "Basis : 4 nucleotides\\n" );\n- else\n- {\n- if( fftscore )\n- fprintf( fp, "Basis : Polarity and Volume\\n" );\n- else\n- fprintf( fp, "Basis : 20 amino acids\\n" );\n- }\n- fprintf( fp, "Threshold of anchors = %d%%\\n", fftThreshold );\n- fprintf( fp, "window size of anchors = %dsites\\n", fftWinSize );\n- }\n-\telse\n- fprintf( fp, "FFT off\\n" );\n-\tfflush( fp );\n-}\n-\t \n-\n-int main( int argc, char *argv[] )\n-{\n-\tstatic int *nlen;\t\n-\tstatic char **name, **seq, **useq;\n-\tstatic char **mseq1, **mseq2;\n-\tstatic char **aseq;\n-\tstatic char **bseq;\n-\tstatic double *eff;\n-\tint i;\n-\tFILE *infp;\n-\tchar c;\n-\tint alloclen;\n-\tRNApair **pair1;\n-\tRNApair **pair2;\n-\tfloat **map;\n-\n-\targuments( argc, argv );\n-\n-\tif( inputfile )\n-\t{\n-\t\tinfp = fopen( inputfile, "r" );\n-\t\tif( !infp )\n-\t\t{\n-\t\t\tfprintf( stderr, "Cannot open %s\\n", inputfile );\n-\t\t\texit( 1 );\n-\t\t}\n-\t}\n-\telse\n-\t\tinfp = stdin;\n-\n-\tgetnumlen( infp );\n-\trewind( infp );\n-\n-\tif( njob > M )\n-\t{\n-\t\tfprintf( stderr, "The number of sequences must be < %d\\n", M );\n-\t\tfprintf( stderr, "Please try the splittbfast program for such large data.\\n" );\n-\t\texit( 1 );\n-\t}\n-\n- name = AllocateCharMtx( njob, B+1 );\n- nlen = AllocateIntVec( njob ); \n-\n-\tseq = AllocateCharMtx( njob, nlenmax*5+1 );\n-\tuseq = AllocateCharMtx( njob, nlenmax*5+1 );\n-\taseq = AllocateCharMtx( njob, nlenmax*5+1 );\n-\tbseq = AllocateCharMtx( njob, nlenmax*5+1 );\n-\tmseq1 = AllocateCharMtx( njob, 0 );\n-\tmseq2 = AllocateCharMtx( njob, 0 );\n-\talloclen = nlenmax*5;\n-\n-\tpair1 = calloc( nlenmax*5+1, sizeof( RNApair *) );\n-\tpair2 = calloc( nlenmax*5+1, sizeof( RNApair *) );\n-\tmap = AllocateFloatMtx( nlenmax+1, nlenmax );\n-\n-\teff = AllocateDoubleVec( njob );\n-\n-\treadData_pointer( infp, name, nlen, seq );\n-\tfclose( infp );\n-\n-\tfor( i=0; i<njob; i++ ) strcpy( useq[i], seq[i] );\n-\n-\tconstants( njob, seq );\n-\n-#if 0\n-\tfprintf( stderr, "params = %d, %d, %d\\n", penalty, penalty_ex, offset );\n-#endif\n-\n-\tinitSignalSM();\n-\n-\tinitFiles();\n-\n-\tWriteOptions( trap_g );\n-\n-\tc = seqcheck( seq );\n-\tif( c )\n-\t{\n-\t\tfprintf( stderr, "Illeagal character %c\\n", c );\n-\t\texit( 1 );\n-\t}\n-\n-//\twritePre( njob, name, nlen, seq, 0 );\n-\n-\tfor( i=0; i<njob; i++ ) eff[i] = 1.0;\n-\n-//\tfor( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );\n-\n-\n-\tfprintf( stderr, "folding group1\\n" );\n-//\trnalocal( seq, useq, eff, eff, njob, njob, alloclen, pair1 );\n-\trnaalifoldcall( seq, njob, pair1 );\n-\texit( 1 );\n-\n-#if 0\n-\tfprintf( stderr, "folding group1\\n" );\n-\trnalocal( seq+1, useq+1, eff+1, eff+1, 1, 1, alloclen, pair2 );\n-\tfprintf( stderr, "aligning 1 and 2, phase 1\\n" );\n-\tLalignmm_hmout( seq, seq+1, eff, eff+1, 1, 1, alloclen, NULL, NULL, NULL, NULL, map );\n-\n-\n-#if 0\n-\tlgth1 = strlen( seq[0] );\n-\tfor( i=0; i<lgth1; i++ )\n-\t{\n-\t\tfprintf( stderr, "\\n" );\n-\t\tif( pair1[i].pos == -1 ) continue;\n-\t\tfprintf( stderr, "i=%d (%c):%d", i, seq[0][i], pair1[i].pos );\n-\t\tif( map12[pair1[i].pos].pos == -1 ) continue;\n-\t\tfprintf( stderr, "%c -> %c ", seq[0][pair1[i].pos], seq[1][map12[pair1[i].pos].pos] );\n-\t\tif( pair2[map12[pair1[i].pos].pos].pos == -1 ) continue;\n-\t\tfprintf( stderr, "%d:%d (%c)", map12[pair1[i].pos].pos, pair2[map12[pair1[i].pos].pos].pos, seq[1][pair2[map12[pair1[i].pos].pos].pos] );\n-\t}\n-#endif\n-\n-\n-\texit( 1 );\n-\n-\n-\tpairalign( name, nlen, bseq, aseq, mseq1, mseq2, eff, alloclen ); \n-\tfprintf( trap_g, "done.\\n" );\n-#if DEBUG\n-\tfprintf( stderr, "closing trap_g\\n" );\n-#endif\n-\tfclose( trap_g );\n-\n-//\twritePre( njob, name, nlen, aseq, !contin );\n-#if 0\n-\twriteData( stdout, njob, name, nlen, aseq );\n-#endif\n-#if IODEBUG\n-\tfprintf( stderr, "OSHIMAI\\n" );\n-#endif\n-\tSHOWVERSION;\n-\treturn( 0 );\n-#endif\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/score.c --- a/mafft/core/score.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,98 +0,0 @@ -#include "mltaln.h" - -#define DEBUG 0 - -void arguments( int argc, char *argv[] ) -{ - int c; - - ppenalty = NOTSPECIFIED; - ppenalty_ex = NOTSPECIFIED; - poffset = NOTSPECIFIED; - kimuraR = NOTSPECIFIED; - pamN = NOTSPECIFIED; - scoremtx = NOTSPECIFIED; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( ( c = *++argv[0] ) ) - { - switch( c ) - { - case 'f': - ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 ); - fprintf( stderr, "ppenalty = %d\n", ppenalty ); - --argc; - goto nextoption; - case 'g': - ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 ); - fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex ); - --argc; - goto nextoption; - case 'h': - poffset = (int)( atof( *++argv ) * 1000 - 0.5 ); - fprintf( stderr, "poffset = %d\n", poffset ); - --argc; - goto nextoption; - case 'k': - kimuraR = myatoi( *++argv ); - fprintf( stderr, "kimuraR = %d\n", kimuraR ); - --argc; - goto nextoption; - case 'D': - scoremtx = -1; - break; - case 'P': - scoremtx = 0; - break; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc == 1 ) - { - cut = atof( (*argv) ); - argc--; - } -} - - -int main( int ac, char **av ) -{ - int *nlen; - static char **name, **seq; - double score; - extern double score_calc_for_score( int, char ** ); - - arguments( ac, av ); - - getnumlen( stdin ); - rewind( stdin ); - - nlen = AllocateIntVec( njob ); - name = AllocateCharMtx( njob, B+1 ); - seq = AllocateCharMtx( njob, nlenmax+2 ); - - readData_pointer( stdin, name, nlen, seq ); - - if( !isaligned( njob, seq ) ) ErrorExit( "Not aligned." ); - - constants( njob, seq ); - - score = score_calc_for_score( njob, seq ); - if( scoremtx == 0 ) score += offset; - - fprintf( stdout, "score = %f\n", score ); - if ( scoremtx == 0 ) fprintf( stdout, "JTT %dPAM\n", pamN ); - else if( scoremtx == 1 ) fprintf( stdout, "Dayhoff( machigai ga aru )\n" ); - else if( scoremtx == 2 ) fprintf( stdout, "M-Y\n" ); - else if( scoremtx == -1 ) fprintf( stdout, "DNA 1:%d\n", kimuraR ); - - fprintf( stdout, "gap penalty = %+6.2f, %+6.2f, %+6.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 ); - exit( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/seekquencer_premafft.tmpl --- a/mafft/core/seekquencer_premafft.tmpl Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,600 +0,0 @@\n-#!/usr/bin/perl\n-\n-####################################################################################\n-# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)\n-#\n-# Ver. Date Changelog\n-####################################################################################\n-# 1.0 11.01.13 Initial release\n-#\n-# **Skipped version 2 to standardise version numbers to seekquencer.pl script**\n-#\n-# 3.0 04.24.14 Added split option -mod <mafftash-split> for output\n-# Uses seekquencer_v3 backend\n-#\n-# 4.0 05.12.14 Added new options: -run <thread|normal> -trd <count> -noin\n-# Sets -seqa fast in seekquencer.pl\n-# Uses seekquencer_v4 backend\n-#\n-# 4.1 05.19.14 Added a check on running REST requests before proceeding\n-# to avoid server load problems\n-#\n-# 4.2 05.27.14 Seq limit processing done in seekquencer.pl script\n-# to avoid server load problems\n-#\n-# 4.3 07.22.14 Added new option: -seqd <uniref100|uniref90|uniref70|uniprot>\n-# Blast limit changed from factor of 10 to -blim option\n-# Timing on sleep changed; added srand() for making seed\n-# Moved the job limit processing to server side\n-#\n-# 4.4 08.05.14 Modified to work in multiple OS\n-#\n-#\n-####################################################################################\n-\n-use strict;\n-use Getopt::Long;\n-use File::Path qw(make_path remove_tree);\n-use Cwd;\n-use LWP::Simple;\n-use LWP::UserAgent;\n-\n-# to prevent error: Header line too long (limit is 8192)\n-use LWP::Protocol::http;\n-push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);\n-\n-\n-\n-my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft";\n-my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG );\n-my $OUTTYPE = "mafftash";\n-\n-my $SEQDATABASE = "uniref100";\n-my $SEQLIMIT = 100;\n-my $SEQBLASTLIMIT = 100;\n-\n-my $RUNMODE = "normal"; # thread|normal\n-my $THREADCOUNT = 3;\n-\n-\n-GetOptions\n-(\n- \'inp=s\' => \\$INPUTFILE,\n- \'idf=s\' => \\$IDLISTFILE,\n- \'seqf=s\' => \\$SEQFASTAFILE,\n- \'out=s\' => \\$OUTPUTFILE,\n- \'str\' => \\$STRFLAG,\n- \'seq\' => \\$SEQFLAG,\n- \'seqd=s\' => \\$SEQDATABASE,\n- \'lim=i\' => \\$SEQLIMIT,\n- \'blim=i\' => \\$SEQBLASTLIMIT,\n- \'pre\' => \\$EVALFLAG,\n- \'noin\' => \\$NOINFLAG,\n- \'mod=s\' => \\$OUTTYPE,\n- \'run=s\' => \\$RUNMODE,\n- \'trd=i\' => \\$THREADCOUNT,\n-\n-\n-);\n-\n-my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0;\n-print STDERR "[Seekquencer-premafft 4.4 on $^O]\\n";\n-\n-\n-# set temp directory\n-my $CWD = getcwd;\n-my $TMP = "$CWD/seekpremafft$$";\n-make_path($TMP) unless -d $TMP;\n-\n-\n-\n-######\n-# validation\n-help("Required parameter: define input as \'-inp\' or \'-idf\' or \'-seqf\'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE );\n-help("\'-inp\' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) );\n-help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) );\n-help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) );\n-help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) );\n-help("Required parameter: output file \'-out\'") unless ( defined $OUTPUTFILE );\n-help("Set either \'-str\' or \'-seq\' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG );\n-\n-help("Invalid value for \'-seqd <uniref100|uniref90|uniref70|uniprot>\'") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot");\n-help("Invalid value for \'-mod <fasta|mafftash|mafftash-split>\'") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" );\n-help("Invalid value for \'-run <thread|normal>\'") if ( $RUN'..b'ksum\n-{\n- my $infile = shift;\n-\n- # md5 binary check\n- my $MD5BIN = "";\n-\n- if ( -x "/usr/bin/md5sum" )\n- {\n- $MD5BIN = "/usr/bin/md5sum";\n- }\n- elsif ( -x "/sbin/md5" )\n- {\n- $MD5BIN = "/sbin/md5 -q";\n- }\n-\n- return "" if $MD5BIN eq "";\n-\n-\n- my $checksum = "";\n- open MD5EXE, "$MD5BIN $infile|" or return "";\n-\n- while(<MD5EXE>)\n- {\n- if (/^(\\S+)\\s+(\\S+)$/)\n- {\n- $checksum = $1;\n- last;\n- }\n- elsif (/^(\\S+)$/)\n- {\n- $checksum = $1;\n- last;\n- }\n- }\n-\n- close MD5EXE;\n-\n- return $checksum;\n-\n-}\n-\n-\n-sub backticks\n-{\n- my $command = shift;\n-\n- `$command`;\n- return ($? == -1) ? 0 : 1;\n-}\n-\n-\n-sub bail\n-{\n- my $str = shift;\n- my $status = shift;\n-\n- #0 for success and 1 for error\n- $status = 1 unless defined;\n-\n- print STDERR "$str\\n" if defined $str;\n-\n- cleanup();\n-\n- exit($status);\n-}\n-\n-\n-sub cleanup\n-{\n- return if ($TMP eq "" || !-d $TMP);\n-\n- opendir(MAINDIR, $TMP);\n- my @files = readdir(MAINDIR);\n- closedir(MAINDIR);\n-\n- foreach my $file (@files)\n- {\n- unlink "$TMP/$file" if -e "$TMP/$file";\n- }\n-\n- remove_tree($TMP);\n-\n-}\n-\n-\n-sub appendToFile\n-{\n- my $inpfile = shift;\n- my $outfile = shift;\n-\n- open INPF, "<$inpfile" or bail("Server Error: Error in reading file.");\n- open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file.");\n-\n- while(<INPF>)\n- {\n- print OUTF $_;\n- }\n-\n- close OUTF;\n- close INPF;\n-}\n-\n-\n-\n-sub help\n-{\n- my $str = shift;\n-\n- print <<\'HELPME\';\n-\n-USAGE\n- ./seekquencer_premafft.pl -inp <INFILE> -out <OUTFILE> [-str|-seq]\n- ./seekquencer_premafft.pl -idf <LISTFILE> -seqf <SEQFASTA> -out <OUTFILE> [-str|-seq]\n-\n-\n-PARAMETERS\n- -inp <INFILE>\n- INFILE is a FASTA-formatted file\n- PDB entries are written as:\n- >PDBID\n- [5-character pdbid+chain]\n-\n- While sequence entries are written as:\n- >[id]\n- [sequence]\n-\n- -idf <LISTFILE>\n- IDLISTFILE is a file containing a list of pdbids\n- pdbids should be a 5-character pdbid + chain\n-\n- -seqf <SEQFASTA>\n- SEQFASTA is a fasta file\n- entries are written as:\n- >[id]\n- [sequence]\n-\n- -out <OUTFILE>\n- Results are writen to a file named OUTFILE\n-\n- -str\n- Only structures will be collected by Seekquencer\n- If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer\n-\n- -seq\n- Only sequences will be collected by Seekquencer\n- If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer\n-\n-\n-OPTIONAL PARAMETERS:\n- -seqd <uniref100|uniref90|uniref70|uniprot>\n- Search Database for sequence homologs. Default value: uniref100\n-\n- -lim <count>\n- this sets the maximum number of sequence homologs collected. Default value: 100\n-\n- -blim <count>\n- this sets the -b and -v value when running blastall. Default value: 100\n-\n- -pre\n- When -str is set, this will compare all structures against all using pdp-ash\n- This would ensure that all structures collected are matching\n- All structures that do not match will be removed\n-\n- -noin\n- When set, inputs will not be included in the output\n-\n- -mod <mafftash|mafftash-split|fasta>\n- Defines the output format\n- mafftash (default) will print a mafftash-formatted fasta file\n- mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq)\n- fasta will print a regular fasta file\n-\n- -run <thread|normal>\n- thread will run simultaneous jobs during blast queries (faster but takes more nodes)\n- normal will run sequential blast queries (slower but takes less nodes)\n- Default value: normal\n-\n- -trd <count>\n- if -run <thread> is defined, this sets the number of parallel jobs to run. Default value: 3\n-\n-\n-HELPME\n-\n- bail($str);\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/seq2regtable.c --- a/mafft/core/seq2regtable.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,88 +0,0 @@ -#include "mltaln.h" - -#define DEBUG 0 - -char *weboutfile = NULL; - - -void arguments( int argc, char *argv[] ) -{ - int c; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( (c = *++argv[0]) ) - { - switch( c ) - { - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - case 'w': - weboutfile = *++argv; - fprintf( stderr, "weboutfile = %s\n", weboutfile ); - --argc; - goto nextoption; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: Check source file !\n" ); - exit( 1 ); - } -} - - -int main( int argc, char *argv[] ) -{ - FILE *infp; - FILE *weboutfp; - int nlenmin; - int isaligned = 0; - - arguments( argc, argv ); - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - - if( weboutfile ) - { - weboutfp = fopen( weboutfile, "w" ); - if( !weboutfp ) - { - fprintf( stderr, "Cannot open %s\n", weboutfile ); - exit( 1 ); - } - } - - dorp = NOTSPECIFIED; - if( weboutfile ) - { - getnumlen_nogap_outallreg_web( infp, weboutfp, &nlenmin, &isaligned ); - if( isaligned ) fprintf( stdout, "Aligned\n" ); - else fprintf( stdout, "Not aligned\n" ); - } - else - getnumlen_nogap_outallreg( infp, &nlenmin ); - - return( 0 ); - -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/setcore.c --- a/mafft/core/setcore.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,503 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 1\n-\n-double corethr;\n-int coreext;\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tfftkeika = 1;\n-\tconstraint = 0;\n-\tnblosum = 62;\n-\tfmodel = 0;\n-\tcalledByXced = 0;\n-\tdevide = 0;\n-\tuse_fft = 0;\n-\tfftscore = 1;\n-\tfftRepeatStop = 0;\n-\tfftNoAnchStop = 0;\n- weight = 3;\n- utree = 1;\n-\ttbutree = 1;\n- refine = 0;\n- check = 1;\n- cut = 0.0;\n- disp = 0;\n- outgap = 1;\n- alg = \'A\';\n- mix = 0;\n-\ttbitr = 0;\n-\tscmtd = 5;\n-\ttbweight = 0;\n-\ttbrweight = 3;\n-\tcheckC = 0;\n-\ttreemethod = \'x\';\n-\tcontin = 0;\n-\tscoremtx = 0;\n-\tkobetsubunkatsu = 0;\n-\tdorp = NOTSPECIFIED;\n-\tppenalty = NOTSPECIFIED;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tpoffset = NOTSPECIFIED;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tgeta2 = GETA2;\n-\tfftWinSize = NOTSPECIFIED;\n-\tfftThreshold = NOTSPECIFIED;\n-\tcorethr = .5;\n-\tcoreext = 0;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( ( c = *++argv[0] ) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'f\':\n-\t\t\t\t\tppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "ppenalty = %d\\n", ppenalty );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'g\':\n-\t\t\t\t\tppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "ppenalty_ex = %d\\n", ppenalty_ex );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'h\':\n-\t\t\t\t\tpoffset = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "poffset = %d\\n", poffset );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'k\':\n-\t\t\t\t\tkimuraR = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "kimuraR = %d\\n", kimuraR );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'b\':\n-\t\t\t\t\tnblosum = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 1;\n-\t\t\t\t\tfprintf( stderr, "blosum %d\\n", nblosum );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'j\':\n-\t\t\t\t\tpamN = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 0;\n-\t\t\t\t\tfprintf( stderr, "jtt %d\\n", pamN );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'l\':\n-\t\t\t\t\tfastathreshold = atof( *++argv );\n-\t\t\t\t\tconstraint = 2;\n-\t\t\t\t\tfprintf( stderr, "weighti = %f\\n", fastathreshold );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tcorethr = atof( *++argv );\n-\t\t\t\t\tfprintf( stderr, "corethr = %f\\n", corethr );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'m\':\n-\t\t\t\t\tfmodel = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'c\':\n-\t\t\t\t\tcoreext = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'r\':\n-\t\t\t\t\tfmodel = -1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'D\':\n-\t\t\t\t\tdorp = \'d\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'P\':\n-\t\t\t\t\tdorp = \'p\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'e\':\n-\t\t\t\t\tfftscore = 0;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'O\':\n-\t\t\t\t\tfftNoAnchStop = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'R\':\n-\t\t\t\t\tfftRepeatStop = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'Q\':\n-\t\t\t\t\tcalledByXced = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'s\':\n-\t\t\t\t\ttreemethod = \'s\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'x\':\n-\t\t\t\t\ttreemethod = \'x\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'p\':\n-\t\t\t\t\ttreemethod = \'p\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'a\':\n-\t\t\t\t\talg = \'a\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'A\':\n-\t\t\t\t\talg = \'A\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'S\':\n-\t\t\t\t\talg = \'S\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'C\':\n-\t\t\t\t\talg = \'C\';\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'F\':\n-\t\t\t\t\tuse_fft = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'v\':\n-\t\t\t\t\ttbrweight = 3;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'d\':\n-\t\t\t\t\tdisp = 1;\n-\t\t\t\t\tbreak;\n-\t\t\t\tcase \'o\':\n-\t\t\t\t\toutgap = 0;\n-\t\t\t\t\tbreak;\n-/* Modified 01/08/27, default: user tree */\n-\t\t\t\tcase \'J\':\n-\t\t\t\t\ttbutree = 0;\n-\t\t\t\t\tbreak;\n-/* modification end. */\n-\t\t\t\tcase \'z\':\n-\t\t\t\t\tfftThreshold = myatoi( *++argv );\n-\t\t\t\t\t--argc; \n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'w\':\n-\t\t\t\t\tfftWinSize = myatoi( *++argv );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'Z\':\n-\t\t\t\t\tcheckC = 1;\n-\t\t\t\t\tbreak;\n- default:\n- fprintf( stderr, "illegal option %c\\n", c );\n- argc = 0;\n- break;\n- }\n-\t\t}\n-\t\tnextoption:\n-\t\t\t;\n-\t}\n- if( argc == 1 )\n- {\n- cut = atof( (*argv) );\n- argc--;\n- }\n- if( argc != 0 ) \n- {\n- fprintf( stderr, "options: Check source file !\\n" );\n- ex'..b'd be input!\\n"\n-\t\t\t\t\t\t "Only %d sequence found.\\n", njob ); \n-\t\texit( 1 );\n-\t}\n-\n-\tseq = AllocateCharMtx( njob, nlenmax*9+1 );\n-\tname = AllocateCharMtx( njob, B+1 );\n-\toseq = AllocateCharMtx( njob, nlenmax*9+1 );\n-\talloclen = nlenmax*9;\n-\n-\ttopol = AllocateIntCub( njob, 2, njob );\n-\tlen = AllocateDoubleMtx( njob, 2 );\n-\tpscore = AllocateDoubleMtx( njob, njob );\n-\teff = AllocateDoubleVec( njob );\n-\tnode0 = AllocateDoubleMtx( njob, njob );\n-\tnode1 = AllocateDoubleMtx( njob, njob );\n-\tgapc = AllocateDoubleVec( alloclen );\n-\tavgap = AllocateDoubleVec( alloclen );\n-\n-#if 0\n-\tRead( name, nlen, seq );\n-#else\n-\treadData_pointer( stdin, name, nlen, seq );\n-#endif\n-\n-\tconstants( njob, seq );\n-\n-#if 0\n-\tfprintf( stderr, "params = %d, %d, %d\\n", penalty, penalty_ex, offset );\n-#endif\n-\n-\tinitSignalSM();\n-\n-\tinitFiles();\n-\n-\tWriteOptions( trap_g );\n-\n-\tc = seqcheck( seq );\n-\tif( c )\n-\t{\n-\t\tfprintf( stderr, "Illeagal character %c\\n", c );\n-\t\texit( 1 );\n-\t}\n-\n-\twritePre( njob, name, nlen, seq, 0 );\n-\n-\tif( tbutree == 0 )\n-\t{\n-\t\tfor( i=1; i<njob; i++ ) \n-\t\t{\n-\t\t\tif( nlen[i] != nlen[0] ) \n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "Input pre-aligned seqences or make hat2.\\n" );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-\t\t}\n-\t\tfor( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) \n-\t\t{\n-\t\t/*\n-\t\t\tpscore[i][j] = (double)score_calc1( seq[i], seq[j] );\n-\t\t*/\n-\t\t\tpscore[i][j] = (double)substitution_hosei( seq[i], seq[j] );\n-\t\t}\n-\t}\n-\telse\n-\t{\n-\t\tfprintf( stderr, "Loading \'hat2\' ... " );\n-\t\tprep = fopen( "hat2", "r" );\n-\t\tif( prep == NULL ) ErrorExit( "Make hat2." );\n-\t\treadhat2_pointer( prep, njob, name, pscore );\n-\t\tfclose( prep );\n-\t\tfprintf( stderr, "done.\\n" );\n-\n-#if 0\n-\t\tprep = fopen( "hat2_check", "w" );\n-\t\tWriteHat2( prep, njob, name, pscore );\n-\t\tfclose( prep );\n-#endif\n-\n-\t}\n-\n-\tfprintf( stderr, "Constructing dendrogram ... " );\n-\tif( treemethod == \'x\' )\n-\t\tsupg( njob, pscore, topol, len );\n-\telse if( treemethod == \'s\' )\n-\t\tspg( njob, pscore, topol, len );\n-\telse if( treemethod == \'p\' )\n-\t\tupg2( njob, pscore, topol, len );\n-\telse \n-\t\tErrorExit( "Incorrect tree\\n" );\n-\tfprintf( stderr, "done.\\n" );\n-\n-\tcountnode( njob, topol, node0 );\n-\tif( tbrweight )\n-\t{\n-\t\tweight = 3; \n-#if 0\n-\t\tutree = 0; counteff( njob, topol, len, eff ); utree = 1;\n-#else\n-\t\tcounteff_simple( njob, topol, len, eff );\n-#endif\n-\t}\n-\telse\n-\t{\n-\t\tfor( i=0; i<njob; i++ ) eff[i] = 1.0;\n-\t}\n-\n-\n-\tfor( i=0; i<nlenmax; i++ )\n-\t{\n-\t\tgapc[i] = 0.0;\n-\t\tfor( j=0; j<njob; j++ )\n-\t\t{\n-\t\t\tif( seq[j][i] == \'-\' ) gapc[i] += eff[j];\n-\t\t}\n-\t}\n-\n-\tgapmin = 1.0;\n-\twinsize = fftWinSize;\n-\tgoffset = winsize/2;\n-\ttmpavgap = 0.0;\n-\tcorestart = coreend = -1;\n-\tfor( i=0; i<winsize; i++ )\n-\t{\n-\t\ttmpavgap += gapc[i];\n-\t}\n-\tfor( i=winsize; i<nlenmax; i++ )\n-\t{\n-\t\tm = i - goffset;\n-\t\tavgap[m] = tmpavgap / winsize;\n-//\t\tfprintf( stdout, "%d %f %f\\n", m, avgap[m], gapc[i] );\n-\t\tif( avgap[m] < corethr )\n-\t\t{\n-\t\t\tif( corestart == -1 )\n-\t\t\t\tcorestart = i - winsize;\n-//\t\t\tfprintf( stdout, "ok, gapmin = %f, corestart = %d, coreend = %d\\n", gapmin, corestart, coreend );\n-\t\t\tif( avgap[m] < gapmin )\n-\t\t\t{ \n-\t\t\t\tgapmin = avgap[m];\n-\t\t\t}\n-\t\t\tcoreend = i;\n-\t\t}\n-\t\ttmpavgap -= gapc[i-winsize];\n-\t\ttmpavgap += gapc[i];\n-\t}\n-\tif( corestart == -1 || coreend == -1 )\n-\t{\n-\t\tcorestart = 0;\n-\t\tcoreend = nlenmax-1;\n-\t}\n-\n-\tfor( i=0; i<njob; i++ )\n-\t{\n-\t\tpt = oseq[i];\n-\t\tm = winsize;\n-\t\twhile( m-- ) *pt++ = \'-\';\n-\t\tfor( j=corestart; j<=coreend; j++ )\n-\t\t\t*pt++ = seq[i][j];\n-\t\tm = winsize;\n-\t\twhile( m-- ) *pt++ = \'-\';\n-\t\t*pt = 0;\n-\n-\t\tot = oseq[i]+winsize-1;\n-\t\tpt = seq[i]+corestart-1;\n-\t\tif( coreext ) m = winsize;\n-\t\telse m = 0;\n-\t\twhile( m && --pt > seq[i] )\n-\t\t\tif( *pt != \'-\' )\n-\t\t\t{\n-\t\t\t\t*ot-- = *pt;\n-\t\t\t\tm--;\n-\t\t\t}\n-\n-\t\tot = oseq[i]+winsize+coreend-corestart+1;\n-\t\tpt = seq[i]+coreend;\n-\t\tif( coreext ) m = winsize;\n-\t\telse m = 0;\n-\t\twhile( m && *(++pt) )\n-\t\t{\n-\t\t\tif( *pt != \'-\' ) \n-\t\t\t{\n-\t\t\t\t*ot++ = *pt;\n-\t\t\t\tm--;\n-\t\t\t}\n-\t\t}\n-\t\tfprintf( stdout, ">%s\\n", name[i] );\n-\t\tfprintf( stdout, "%s\\n", oseq[i] );\n-\t}\n-\n-\texit( 1 );\n-\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/setdirection.c --- a/mafft/core/setdirection.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,155 +0,0 @@ -#include "mltaln.h" - -#define DEBUG 0 - -char *directionfile; -static int show_R_ = 1; - -void arguments( int argc, char *argv[] ) -{ - int c; - - inputfile = NULL; - directionfile = NULL; - show_R_ = 1; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( (c = *++argv[0]) ) - { - switch( c ) - { - case 'd': - directionfile = *++argv; - fprintf( stderr, "directionfile = %s\n", directionfile ); - --argc; - goto nextoption; - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - case 'r': - show_R_ = 0; - break; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: Check source file !\n" ); - exit( 1 ); - } -} - - - -int main( int argc, char *argv[] ) -{ - FILE *infp; - FILE *difp; - int nlenmin; - char **name; - char **seq; - char *tmpseq; - char line[100]; - int *nlen; - int i; - - arguments( argc, argv ); - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - - if( directionfile ) - { - difp = fopen( directionfile, "r" ); - if( !difp ) - { - fprintf( stderr, "Cannot open %s\n", directionfile ); - exit( 1 ); - } - } - else - { - fprintf( stderr, "Give directionfile!\n" ); - } - - - dorp = NOTSPECIFIED; - getnumlen_casepreserve( infp, &nlenmin ); - - fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp ); - - seq = AllocateCharMtx( njob, nlenmax+1 ); - tmpseq = AllocateCharVec( MAX( B, nlenmax )+1 ); - name = AllocateCharMtx( njob, B+1 ); - nlen = AllocateIntVec( njob ); - - readData_pointer_casepreserve( infp, name, nlen, seq ); - - for( i=0; i<njob; i++ ) - { - fgets( line, 99, difp ); - if( line[0] != '_' ) - { - fprintf( stderr, "Format error!\n" ); - exit( 1 ); - } - if( line[1] == 'R' ) - { - sreverse( tmpseq, seq[i] ); - strcpy( seq[i], tmpseq ); - - strncpy( tmpseq, name[i]+1, B-3 ); - tmpseq[B-3] = 0; - if( show_R_ ) - { - strcpy( name[i]+1, "_R_" ); - strcpy( name[i]+4, tmpseq ); - } - else - { - strcpy( name[i]+1, tmpseq ); - } - } - else if( line[1] == 'F' ) - { - ; - } - else - { - fprintf( stderr, "Format error!\n" ); - exit( 1 ); - } - } - - - for( i=0; i<njob; i++ ) - { - fprintf( stdout, ">%s\n", name[i]+1 ); - fprintf( stdout, "%s\n", seq[i] ); - } - - free( nlen ); - FreeCharMtx( seq ); - FreeCharMtx( name ); - free( tmpseq ); - - return( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/sextet5.c --- a/mafft/core/sextet5.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,319 +0,0 @@ -#include "mltaln.h" -#include "mtxutl.h" - -#define DEBUG 0 -#define TEST 0 - -#define END_OF_VEC -1 - -static int maxl; -static int tsize; - -void arguments( int argc, char *argv[] ) -{ - int c; - - inputfile = NULL; - disopt = 0; - scoremtx = 1; - nblosum = 62; - dorp = NOTSPECIFIED; - - while( --argc > 0 && (*++argv)[0] == '-' ) - { - while ( ( c = *++argv[0] ) ) - { - switch( c ) - { - case 'i': - inputfile = *++argv; - fprintf( stderr, "inputfile = %s\n", inputfile ); - --argc; - goto nextoption; - case 'D': - dorp = 'd'; - break; - case 'P': - dorp = 'p'; - break; - case 'I': - disopt = 1; - break; - default: - fprintf( stderr, "illegal option %c\n", c ); - argc = 0; - break; - } - } - nextoption: - ; - } - if( argc != 0 ) - { - fprintf( stderr, "options: -i\n" ); - exit( 1 ); - } -} - -void seq_grp_nuc( int *grp, char *seq ) -{ - int tmp; - while( *seq ) - { - tmp = amino_grp[(int)*seq++]; - if( tmp < 4 ) - *grp++ = tmp; - else - fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) ); - } - *grp = END_OF_VEC; -} - -void seq_grp( int *grp, char *seq ) -{ - int tmp; - while( *seq ) - { - tmp = amino_grp[(int)*seq++]; - if( tmp < 6 ) - *grp++ = tmp; - else - fprintf( stderr, "WARNING : Unknown character %c\n", *(seq-1) ); - } - *grp = END_OF_VEC; -} - -void makecompositiontable_p( short *table, int *pointt ) -{ - int point; - - while( ( point = *pointt++ ) != END_OF_VEC ) - table[point]++; -} - -int commonsextet_p( short *table, int *pointt ) -{ - int value = 0; - short tmp; - int point; - static short *memo = NULL; - static int *ct = NULL; - static int *cp; - - if( !memo ) - { - memo = (short *)calloc( tsize, sizeof( short ) ); - if( !memo ) ErrorExit( "Cannot allocate memo\n" ); - ct = (int *)calloc( MIN( maxl, tsize)+1, sizeof( int ) ); - if( !ct ) ErrorExit( "Cannot allocate memo\n" ); - } - - cp = ct; - while( ( point = *pointt++ ) != END_OF_VEC ) - { - tmp = memo[point]++; - if( tmp < table[point] ) - value++; - if( tmp == 0 ) *cp++ = point; -// fprintf( stderr, "cp - ct = %d (tsize = %d)\n", cp - ct, tsize ); - } - *cp = END_OF_VEC; - - cp = ct; - while( *cp != END_OF_VEC ) - memo[*cp++] = 0; - - return( value ); -} - -void makepointtable_nuc( int *pointt, int *n ) -{ - int point; - register int *p; - - p = n; - point = *n++ * 1024; - point += *n++ * 256; - point += *n++ * 64; - point += *n++ * 16; - point += *n++ * 4; - point += *n++; - *pointt++ = point; - - while( *n != END_OF_VEC ) - { - point -= *p++ * 1024; - point *= 4; - point += *n++; - *pointt++ = point; - } - *pointt = END_OF_VEC; -} - -void makepointtable( int *pointt, int *n ) -{ - int point; - register int *p; - - p = n; - point = *n++ * 7776; - point += *n++ * 1296; - point += *n++ * 216; - point += *n++ * 36; - point += *n++ * 6; - point += *n++; - *pointt++ = point; - - while( *n != END_OF_VEC ) - { - point -= *p++ * 7776; - point *= 6; - point += *n++; - *pointt++ = point; - } - *pointt = END_OF_VEC; -} - -int main( int argc, char **argv ) -{ - int i, j; - FILE *fp, *infp; - char **seq; - int *grpseq; - char *tmpseq; - int **pointt; - static char **name; - static int nlen[M]; - double **mtx; - double **mtx2; - double score, score0; - static short *table1; - char b[B]; - - arguments( argc, argv ); - - if( inputfile ) - { - infp = fopen( inputfile, "r" ); - if( !infp ) - { - fprintf( stderr, "Cannot open %s\n", inputfile ); - exit( 1 ); - } - } - else - infp = stdin; - -#if 0 - PreRead( stdin, &njob, &nlenmax ); -#else - getnumlen( infp ); -#endif - rewind( infp ); - if( njob < 2 ) - { - fprintf( stderr, "At least 2 sequences should be input!\n" - "Only %d sequence found.\n", njob ); - exit( 1 ); - } - - name = AllocateCharMtx( njob, B+1 ); - tmpseq = AllocateCharVec( nlenmax+1 ); - seq = AllocateCharMtx( njob, nlenmax+1 ); - grpseq = AllocateIntVec( nlenmax+1 ); - pointt = AllocateIntMtx( njob, nlenmax+1 ); - mtx = AllocateDoubleMtx( njob, njob ); - mtx2 = AllocateDoubleMtx( njob, njob ); - pamN = NOTSPECIFIED; - -#if 0 - FRead( infp, name, nlen, seq ); -#else - readData_pointer( infp, name, nlen, seq ); -#endif - - fclose( infp ); - - constants( njob, seq ); - - if( dorp == 'd' ) tsize = (int)pow( 4, 6 ); - else tsize = (int)pow( 6, 6 ); - - maxl = 0; - for( i=0; i<njob; i++ ) - { - gappick0( tmpseq, seq[i] ); - nlen[i] = strlen( tmpseq ); - if( nlen[i] < 6 ) - { - fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nlen[i] ); - exit( 1 ); - } - if( nlen[i] > maxl ) maxl = nlen[i]; - if( dorp == 'd' ) /* nuc */ - { - seq_grp_nuc( grpseq, tmpseq ); - makepointtable_nuc( pointt[i], grpseq ); - } - else /* amino */ - { - seq_grp( grpseq, tmpseq ); - makepointtable( pointt[i], grpseq ); - } - } - for( i=0; i<njob; i++ ) - { - table1 = (short *)calloc( tsize, sizeof( short ) ); - if( !table1 ) ErrorExit( "Cannot allocate table1\n" ); - if( i % 10 == 0 ) - { - fprintf( stderr, "%4d / %4d\r", i+1, njob ); - } - makecompositiontable_p( table1, pointt[i] ); - - for( j=i; j<njob; j++ ) - { - score = (double)commonsextet_p( table1, pointt[j] ); - mtx[i][j] = score; - } - free( table1 ); - } - for( i=0; i<njob; i++ ) - { - score0 = mtx[i][i]; - for( j=0; j<njob; j++ ) - mtx2[i][j] = ( score0 - mtx[MIN(i,j)][MAX(i,j)] ) / score0 * 3.0; - } - for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) - { -#if TEST - double jscore; - jscore = mtx[i][j] / ( MIN( strlen( seq[i] ), strlen( seq[j] ) ) - 2 ); - fprintf( stdout, "jscore = %f\n", jscore ); - - fprintf( stdout, "mtx2[%d][%d] = %f, mtx2[%d][%d] = %f\n", i, j, mtx2[i][j], j, i, mtx2[j][i] ); -#endif - mtx2[i][j] = MIN( mtx2[i][j], mtx2[j][i] ); -#if TEST - fprintf( stdout, "sonokekka mtx2[%d][%d] %f\n", i, j, mtx2[i][j] ); -#endif - } - - if( disopt ) - { - for( i=0; i<njob; i++ ) - { - sprintf( b, "=lgth = %04d", nlen[i] ); - strins( b, name[i] ); - } - } - - fp = fopen( "hat2", "w" ); - if( !fp ) ErrorExit( "Cannot open hat2." ); - WriteHat2_pointer( fp, njob, name, mtx2 ); - fclose( fp ); - - fprintf( stderr, "\n" ); - SHOWVERSION; - exit( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/share.h --- a/mafft/core/share.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,16 +0,0 @@ -#if 0 -#include <sys/types.h> -#include <sys/ipc.h> -#include <sys/shm.h> - -#endif -#define IMA_YONDERU 'x' /* iranai */ -#define IMA_KAITERU 0 /* iranai */ -#define KAKIOWATTA 'w' -#define YOMIOWATTA 'r' -#define OSHIMAI 'd' -#define ISRUNNING 0 -#define SEMAPHORE 1 -#define STATUS 2 - -#define IPC_ALLOC 0100000 |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/splittbfast.c --- a/mafft/core/splittbfast.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,3220 +0,0 @@\n-#include "mltaln.h"\n-\n-\n-#define TREE 1\n-#define PICKSIZE 50 // must be >= 3\n-#define WEIGHT 0\n-#define TOKYORIPARA 0.70 // 0.70\n-#define TOKYORIPARA_A 0.70 // changed\n-#define LENFAC 1\n-#define HUKINTOTREE 1\n-#define DIANA 0\n-#define MAX6DIST 10.0\n-\n-// kouzoutai ni sasareru pointer ha static\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 0\n-\n-#define END_OF_VEC -1\n-\n-static char *fastapath;\n-static int doalign;\n-static int fromaln;\n-static int uselongest;\n-static int treeout;\n-static int classsize;\n-static int picksize;\n-static int maxl;\n-static int tsize;\n-static int reorder;\n-static int pid;\n-static int maxdepth = 0;\n-static double tokyoripara;\n-\n-static double lenfaca, lenfacb, lenfacc, lenfacd;\n-#define PLENFACA 0.01\n-#define PLENFACB 10000\n-#define PLENFACC 10000\n-#define PLENFACD 0.1\n-#define DLENFACA 0.01\n-#define DLENFACB 2500\n-#define DLENFACC 2500\n-#define DLENFACD 0.1\n-\n-static char datafile[1000];\n-static char queryfile[1000];\n-static char resultfile[1000];\n-\n-typedef struct _scores\n-{\n-\tdouble score;\n-\tint selfscore;\n-\tint orilen;\n-\tint *pointt;\n-\tint numinseq;\n-\tchar *name;\n-//\tchar *seq; // reallo\n-//\tchar **seqpt;\n-\tint shimon;\n-} Scores;\n-\n-int intcompare( const int *a, const int *b )\n-{\n-\treturn( *a - *b );\n-}\n-\n-int lcompare( const Scores *a, const Scores *b )\n-{\n-\tif( a->orilen < b->orilen ) return 1;\n-\telse if( a->orilen > b->orilen ) return -1;\n-\telse return 0;\n-}\n-\n-int dcompare( const Scores *a, const Scores *b )\n-{\n-\tif( a->score > b->score ) return 1;\n-\telse if( a->score < b->score ) return -1;\n-\telse\n-\t{\n-\t\tif( a->selfscore < b->selfscore ) return 1;\n-\t\telse if( a->selfscore > b->selfscore ) return -1;\n-\t\telse \n-\t\t{\n-\t\t\tif( a->orilen < b->orilen ) return 1;\n-\t\t\telse if( a->orilen > b->orilen ) return -1;\n-\t\t\telse return 0;\n-\t\t}\n-\t}\n-}\n-\n-\n-static void getfastascoremtx( int **tmpaminodis )\n-{\n-\tFILE *qfp;\n-\tFILE *dfp;\n-\tFILE *rfp;\n-\tint i, j;\n-\tchar aa;\n-\tint slen;\n-\tint res;\n-\tchar com[10000];\n-\tstatic char *tmpseq;\n-\tstatic char *tmpname;\n-\tdouble *resvec;\n-\n-\tif( scoremtx == -1 )\n-\t{\n-\t\ttmpaminodis[\'a\'][\'a\'] = 5;\n-\t\ttmpaminodis[\'g\'][\'g\'] = 5;\n-\t\ttmpaminodis[\'c\'][\'c\'] = 5;\n-\t\ttmpaminodis[\'t\'][\'t\'] = 5;\n-\t\ttmpaminodis[\'n\'][\'n\'] = -1;\n-\n-\t\treturn;\n-\t}\n-\n-\n-\ttmpseq = calloc( 2000, sizeof( char ) );\n-\ttmpname = calloc( B, sizeof( char ) );\n-\tresvec = calloc( 1, sizeof( double ) );\n-\n-//\tfprintf( stderr, "xformatting .. " );\n-\tdfp = fopen( datafile, "w" );\n-\tif( !dfp ) ErrorExit( "Cannot open datafile." );\n-\tsprintf( tmpname, ">+===========+%d ", 0 );\n-\tstrcpy( tmpseq, "AAAAAAXXXXXX" );\n-\tstrcat( tmpseq, "CCCCCCXXXXXX" );\n-\tstrcat( tmpseq, "DDDDDDXXXXXX" );\n-\tstrcat( tmpseq, "EEEEEEXXXXXX" );\n-\tstrcat( tmpseq, "FFFFFFXXXXXX" );\n-\tstrcat( tmpseq, "GGGGGGXXXXXX" );\n-\tstrcat( tmpseq, "HHHHHHXXXXXX" );\n-\tstrcat( tmpseq, "IIIIIIXXXXXX" );\n-\tstrcat( tmpseq, "KKKKKKXXXXXX" );\n-\tstrcat( tmpseq, "LLLLLLXXXXXX" );\n-\tstrcat( tmpseq, "MMMMMMXXXXXX" );\n-\tstrcat( tmpseq, "NNNNNNXXXXXX" );\n-\tstrcat( tmpseq, "PPPPPPXXXXXX" );\n-\tstrcat( tmpseq, "QQQQQQXXXXXX" );\n-\tstrcat( tmpseq, "RRRRRRXXXXXX" );\n-\tstrcat( tmpseq, "SSSSSSXXXXXX" );\n-\tstrcat( tmpseq, "TTTTTTXXXXXX" );\n-\tstrcat( tmpseq, "VVVVVVXXXXXX" );\n-\tstrcat( tmpseq, "WWWWWWXXXXXX" );\n-\tstrcat( tmpseq, "YYYYYYXXXXXX" );\n-\tslen = strlen( tmpseq );\n-\twriteData_pointer( dfp, 1, &tmpname, &slen, &tmpseq );\n-\tfclose( dfp );\n-\tfprintf( stderr, "done.\\n" );\n-\n-\tfor( i=0; i<20; i++ )\n-\t{\n-\t\taa = amino[i];\n-//\t\tfprintf( stderr, "checking %c\\n", aa );\n-\t\t*tmpseq = 0;\n-\t\tsprintf( tmpname, ">+===========+%d ", 0 );\n-\t\tfor( j=0; j<6; j++ )\n-\t\t\tsprintf( tmpseq+strlen( tmpseq ), "%c", aa );\n-\t\tqfp = fopen( queryfile, "w" );\n-\t\tif( !qfp ) ErrorExit( "Cannot open queryfile." );\n-\t\twriteData_pointer( qfp, 1, &tmpname, &slen, &tmpseq );\n-\t\tfclose( qfp );\n-\n-\t\tif( scoremtx == -1 ) \n-\t\t\tsprintf( com, "%s -z3 -m10 -n -Q -H -b%d -E%d -d%d %s %s %d > %s", fastapath, M, M, 0, queryfile, datafile, 6, resultfile );\n-\t'..b'\n-\t\tif( treeout )\n-\t\t{\n-\t\t\tfprintf( stderr, "\\n" );\n-\t\t\tfprintf( stderr, "A guide tree is in the \'%s\' file.\\n", treefile );\n-\t\t}\n-//\t\telse\n-//\t\t{\n-//\t\t\tfprintf( stderr, "To output guide tree,\\n" );\n-//\t\t\tfprintf( stderr, "%% %s -t -i %s\\n", progName( argv[0] ), "inputfile" );\n-//\t\t}\n-#endif\n-\t\tif( !doalign )\n-\t\t{\n-\t\t\tfprintf( stderr, "\\n" );\n-\t\t\tfprintf( stderr, "mafft --dpparttree might give a better result, although slow.\\n" );\n-\t\t\tfprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\\n" );\n-\t\t}\n-\t\tfprintf( stderr, "\\n" );\n-\t\tfprintf( stderr, "----------------------------------------------------------------------------\\n" );\n-\t}\n-\telse if( groupnum > 1 )\n-\t{\n-\t\tfprintf( stderr, "\\n\\n" );\n-\t\tfprintf( stderr, "----------------------------------------------------------------------------\\n" );\n-\t\tfprintf( stderr, "\\n" );\n-\t\tfprintf( stderr, "groupsize = %d, picksize=%d\\n", classsize, picksize );\n-\t\tfprintf( stderr, "The input sequences have been classified into %d groups + some paraphyletic groups\\n", groupnum );\n-\t\tfprintf( stderr, "Note that the alignment is not completed.\\n" );\n-\t\tif( reorder )\n-\t\t\tfprintf( stderr, "The order of sequences has been changed according to estimated similarity.\\n" );\n-#if TREE\n-\t\tif( treeout )\n-\t\t{\n-\t\t\tfprintf( stderr, "\\n" );\n-\t\t\tfprintf( stderr, "A guide tree is in the \'%s\' file.\\n", treefile );\n-\t\t}\n-//\t\telse\n-//\t\t{\n-//\t\t\tfprintf( stderr, "To output guide tree,\\n" );\n-//\t\t\tfprintf( stderr, "%% %s -t -i %s\\n", progName( argv[0] ), "inputfile" );\n-//\t\t}\n-#endif\n-\t\tif( !doalign )\n-\t\t{\n-\t\t\tfprintf( stderr, "\\n" );\n-\t\t\tfprintf( stderr, "mafft --dpparttree might give a better result, although slow.\\n" );\n-\t\t\tfprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\\n" );\n-\t\t}\n-\t\tfprintf( stderr, "\\n" );\n-\t\tfprintf( stderr, "----------------------------------------------------------------------------\\n" );\n-\t}\t\t\t\n-\telse\n-\t{\n-\t\tfprintf( stderr, "\\n\\n" );\n-\t\tfprintf( stderr, "----------------------------------------------------------------------------\\n" );\n-\t\tfprintf( stderr, "\\n" );\n-\t\tfprintf( stderr, "nseq = %d\\n", njob );\n-\t\tfprintf( stderr, "groupsize = %d, partsize=%d\\n", classsize, picksize );\n-//\t\tfprintf( stderr, "A single alignment containing all the input sequences has been computed.\\n" );\n-//\t\tfprintf( stderr, "If the sequences are highly diverged and you feel there are too many gaps,\\n" );\n-//\t\tfprintf( stderr, "please try \\n" );\n-//\t\tfprintf( stderr, "%% mafft --parttree --groupsize 100 inputfile\\n" );\n-//\t\tfprintf( stderr, "which classifies the sequences into several groups with <~ 100 sequences\\n" );\n-//\t\tfprintf( stderr, "and performs only intra-group alignments.\\n" );\n-\t\tif( reorder )\n-\t\t\tfprintf( stderr, "The order of sequences has been changed according to estimated similarity.\\n" );\n-#if TREE\n-\t\tif( treeout )\n-\t\t{\n-\t\t\tfprintf( stderr, "\\n" );\n-\t\t\tfprintf( stderr, "A guide tree is in the \'%s\' file.\\n", treefile );\n-\t\t}\n-//\t\telse\n-//\t\t{\n-//\t\t\tfprintf( stderr, "To output guide tree,\\n" );\n-//\t\t\tfprintf( stderr, "%% %s -t -i %s\\n", progName( argv[0] ), "inputfile" );\n-//\t\t}\n-#endif\n-\t\tif( !doalign || fromaln )\n-\t\t{\n-\t\t\tfprintf( stderr, "\\n" );\n-\t\t\tfprintf( stderr, "mafft --dpparttree might give a better result, although slow.\\n" );\n-\t\t\tfprintf( stderr, "mafft --fastaparttree is also available if you have fasta34.\\n" );\n-\t\t}\n-\t\tfprintf( stderr, "\\n" );\n-\t\tfprintf( stderr, "----------------------------------------------------------------------------\\n" );\n-\t}\n-#if TREE\n-\tif( treeout ) free( treefile );\n-#endif\n-\n-#if 0\n-\tfprintf( stdout, "weight =\\n" );\n-\tfor( i=0; i<njob; i++ )\n-\t\tfprintf( stdout, "%d: %f\\n", i+1, weight[i] );\n-#endif\n-\n-\tif( doalign == \'f\' )\n-\t{\n-\t\tstrcpy( com, "rm -f" );\n-\t\tstrcat( com, " " );\n-\t\tstrcat( com, datafile );\n-\t\tstrcat( com, "* " );\n-\t\tstrcat( com, queryfile );\n-\t\tstrcat( com, " " );\n-\t\tstrcat( com, resultfile );\n-\t\tfprintf( stderr, "%s\\n", com );\n-\t\tsystem( com );\n-\t}\n-\n-\tSHOWVERSION;\n-\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/suboptalign11.c --- a/mafft/core/suboptalign11.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,678 +0,0 @@\n-#include "mltaln.h"\n-#include "dp.h"\n-\n-#define DEBUG 0\n-#define DEBUG2 0\n-#define XXXXXXX 0\n-#define USE_PENALTY_EX 1\n-\n-typedef struct _shuryoten\n-{\n-\tint i;\n-\tint j;\n-\tfloat wm;\n-\tstruct _shuryoten *next;\n-\tstruct _shuryoten *prev;\n-} Shuryoten;\n-\n-\n-static int localstop;\n-\n-static int compshuryo( Shuryoten *s1_arg, Shuryoten *s2_arg )\n-{\n-\tShuryoten *s1 = (Shuryoten *)s1_arg;\n-\tShuryoten *s2 = (Shuryoten *)s2_arg;\n-\tif ( s1->wm > s2->wm ) return( -1 );\n-\telse if ( s1->wm < s2->wm ) return( 1 );\n-\telse return( 0 );\n-}\n-\n-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )\n-{\n-\tint j;\n-\n-\tfor( j=0; j<lgth2; j++ )\n-\t\tmatch[j] = amino_dis[(int)(*s1)[i1]][(int)(*s2)[j]];\n-}\n-\n-static float gentracking( int **used,\n-\t\t\t\t\t\tchar **seq1, char **seq2, \n- char **mseq1, char **mseq2, \n- float **cpmx1, float **cpmx2, \n- int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj )\n-{\n-\tint l, iin, jin, lgth1, lgth2, k, limk;\n-\tint ifi=0, jfi=0;\n-//\tchar gap[] = "-";\n-\tchar *gap;\n-\tgap = newgapstr;\n-\tstatic char *res1 = NULL, *res2 = NULL;\n-\tchar *mspt1, *mspt2;\n-\tif( res1 == NULL )\n-\t{\n-\t\tres1 = (char *)calloc( N, sizeof( char ) );\n-\t\tres2 = (char *)calloc( N, sizeof( char ) );\n-\t}\n-\n-\tlgth1 = strlen( seq1[0] );\n-\tlgth2 = strlen( seq2[0] );\n-\n-\tmspt1 = res1 + lgth1+lgth2;\n-\t*mspt1 = 0;\n-\tmspt2 = res2 + lgth1+lgth2;\n-\t*mspt2 = 0;\n-\tiin = endi; jin = endj;\n-\n-\tlimk = lgth1+lgth2;\n-\tif( used[iin][jin] ) return( -1.0 );\n-\tfor( k=0; k<=limk; k++ ) \n-\t{\n-\t\tifi = ( ijpi[iin][jin] );\n-\t\tjfi = ( ijpj[iin][jin] );\n-\n-\t\tif( used[ifi][jfi] ) return( -1.0 );\n-\n-\t\tl = iin - ifi;\n-\t\twhile( --l ) \n-\t\t{\n-\t\t\t*--mspt1 = seq1[0][ifi+l];\n-\t\t\t*--mspt2 = *gap;\n-\t\t\tk++;\n-\t\t}\n-\t\tl= jin - jfi;\n-\t\twhile( --l )\n-\t\t{\n-\t\t\t*--mspt1 = *gap;\n-\t\t\t*--mspt2 = seq2[0][jfi+l];\n-\t\t\tk++;\n-\t\t}\n-\n-\t\tif( iin <= 0 || jin <= 0 ) break;\n-\t\t*--mspt1 = seq1[0][ifi];\n-\t\t*--mspt2 = seq2[0][jfi];\n-\t\tif( ijpi[ifi][jfi] == localstop ) break;\n-\t\tif( ijpj[ifi][jfi] == localstop ) break;\n-\t\tk++;\n-\t\tiin = ifi; jin = jfi;\n-\t}\n-\tif( ifi == -1 ) *off1pt = 0; else *off1pt = ifi;\n-\tif( jfi == -1 ) *off2pt = 0; else *off2pt = jfi;\n-\n-//\tfprintf( stderr, "ifn = %d, jfn = %d\\n", ifi, jfi );\n-\n-\tiin = endi; jin = endj;\n-\tlimk = lgth1+lgth2;\n-\tfor( k=0; k<=limk; k++ ) \n-\t{\n-\t\tifi = ( ijpi[iin][jin] );\n-\t\tjfi = ( ijpj[iin][jin] );\n-\n-\t\tused[ifi][jfi] = 1;\n-\t\tif( iin <= 0 || jin <= 0 ) break;\n-\t\tif( ijpi[ifi][jfi] == localstop ) break;\n-\t\tif( ijpj[ifi][jfi] == localstop ) break;\n-\n-\t\tk++;\n-\t\tiin = ifi; jin = jfi;\n-\t}\n-\n-\n-\tstrcpy( mseq1[0], mspt1 );\n-\tstrcpy( mseq2[0], mspt2 );\n-\n-\tfprintf( stderr, "mseq1=%s\\nmseq2=%s\\n", mspt1, mspt2 );\n-\n-\treturn( 0.0 );\n-}\n-\n-\n-float suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt )\n-/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */\n-{\n-\tint k;\n-\tstatic int **used;\n-\tregister int i, j;\n-\tint lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */\n-\tint lgth1, lgth2;\n-\tint resultlen;\n-\tfloat wm = 0.0; // by D.Mathog, \n-\tfloat g;\n-\tfloat *currentw, *previousw;\n-#if 1\n-\tfloat *wtmp;\n-\tint *ijpipt;\n-\tint *ijpjpt;\n-\tfloat *mjpt, *Mjpt, *prept, *curpt;\n-\tint *mpjpt, *Mpjpt;\n-#endif\n-\tstatic float mi, *m;\n-\tstatic float Mi, *largeM;\n-\tstatic int **ijpi;\n-\tstatic int **ijpj;\n-\tstatic int mpi, *mp;\n-\tstatic int Mpi, *Mp;\n-\tstatic float *w1, *w2;\n-//\tstatic float *match;\n-\tstatic float *initverticalw; /* kufuu sureba iranai */\n-\tstatic float *lastverticalw; /* kufuu sureba iranai */\n-\tstatic char **mseq1;\n-\tstatic char **mseq2;\n-\tstatic float **cpmx1;\n-\tstatic float **cpmx2;\n-\tstatic int **intwork;\n-\tstatic float **floatwork;\n-\tstatic int orlgth1 = 0, orlgth2 = 0;\n-\tfloat maxwm;\n-\tfloat tbk;\n-\tint tbki, tbkj;\n-\tint endali, endalj;\n-//\tfloat localthr = 0.0;\n-//\tfloat localthr2 = 0.0;\n-\tfloat fpenalty = (float)penalt'..b'-//\t\t\t\tfprintf( stderr, "hit! i%d, j%d, ijpi = %d, ijpj = %d\\n", i, j, *ijpipt, *ijpjpt );\n-\t\t\t}\n-\t\t\tg = Mi;\n-\t\t\tif( g > tbk )\n-\t\t\t{\n-\t\t\t\ttbk = g;\n-\t\t\t\ttbki = i-1;\n-\t\t\t\ttbkj = Mpi;\n-\t\t\t}\n-\t\t\tg = *Mjpt;\n-\t\t\tif( g > tbk )\n-\t\t\t{\n-\t\t\t\ttbk = g;\n-\t\t\t\ttbki = *Mpjpt;\n-\t\t\t\ttbkj = j-1;\n-\t\t\t}\n-//\t\t\ttbk += fpenalty_EX;// + foffset;\n-\n-\t\t\tg = *prept;\n-\t\t\tif( g > *Mjpt )\n-\t\t\t{\n-\t\t\t\t*Mjpt = g;\n-\t\t\t\t*Mpjpt = i-1;\n-\t\t\t}\n-//\t\t\t*Mjpt += fpenalty_EX;// + foffset;\n-\n-\t\t\tg = *prept;\n-\t\t\tif( g > Mi )\n-\t\t\t{\n-\t\t\t\tMi = g;\n-\t\t\t\tMpi = j-1;\n-\t\t\t}\n-//\t\t\tMi += fpenalty_EX;// + foffset;\n-\n-\n-//\t\t\tfprintf( stderr, "wm=%f, tbk=%f(%c-%c), mi=%f, *mjpt=%f\\n", wm, tbk, seq1[0][tbki], seq2[0][tbkj], mi, *mjpt );\n-//\t\t\tfprintf( stderr, "ijp = %c,%c\\n", seq1[0][abs(*ijpipt)], seq2[0][abs(*ijpjpt)] );\n-\n-\n-\t\t\tif( maxwm < wm )\n-\t\t\t{\n-\t\t\t\tmaxwm = wm;\n-\t\t\t\tendali = i;\n-\t\t\t\tendalj = j;\n-\t\t\t}\n-\n-#if 1\n-\t\t\tif( numshuryo < 100 )\n-\t\t\t{\n-\t\t\t\tshuryo[numshuryo].i = i;\n-\t\t\t\tshuryo[numshuryo].j = j;\n-\t\t\t\tshuryo[numshuryo].wm = wm;\n-\n-\t\t\t\tif( minshuryowm > wm )\n-\t\t\t\t{\n-\t\t\t\t\t minshuryowm = wm;\n-\t\t\t\t\t minshuryopos = numshuryo;\n-\t\t\t\t}\n-\t\t\t\tnumshuryo++;\n-\t\t\t}\n-\t\t\telse\n-\t\t\t{\n-\t\t\t\tif( wm > minshuryowm )\n-\t\t\t\t{\n-\t\t\t\t\tshuryo[minshuryopos].i = i;\n-\t\t\t\t\tshuryo[minshuryopos].j = j;\n-\t\t\t\t\tshuryo[minshuryopos].wm = wm;\n-\t\t\t\t\tminshuryowm = wm;\n-\t\t\t\t\tfor( k=0; k<100; k++ ) // muda\n-\t\t\t\t\t{\n-\t\t\t\t\t\tif( shuryo[k].wm < minshuryowm )\n-\t\t\t\t\t\t{\n-\t\t\t\t\t\t\tminshuryowm = shuryo[k].wm;\n-\t\t\t\t\t\t\tminshuryopos = k;\n-\t\t\t\t\t\t\tbreak;\n-\t\t\t\t\t\t}\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t}\n-#endif\n-#if 1\n-\t\t\tif( wm < localthr )\n-\t\t\t{\n-//\t\t\t\tfprintf( stderr, "stop i=%d, j=%d, curpt=%f\\n", i, j, *curpt );\n-\t\t\t\t*ijpipt = localstop;\n-//\t\t\t\t*ijpjpt = localstop;\n-\t\t\t\twm = localthr2;\n-\t\t\t}\n-#endif\n-#if 0\n-\t\t\tfprintf( stderr, "%5.0f ", *curpt );\n-#endif\n-#if DEBUG2\n-\t\t\tfprintf( stderr, "%5.0f ", wm );\n-//\t\t\tfprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\\n", seq1[0][i], seq2[0][j], *ijppt, localstop );\n-#endif\n-\n-\t\t\t*curpt += wm;\n-\t\t\tijpipt++;\n-\t\t\tijpjpt++;\n-\t\t\tmjpt++;\n-\t\t\tMjpt++;\n-\t\t\tprept++;\n-\t\t\tmpjpt++;\n-\t\t\tMpjpt++;\n-\t\t\tcurpt++;\n-\t\t}\n-#if DEBUG2\n-\t\tfprintf( stderr, "\\n" );\n-#endif\n-\n-\t\tlastverticalw[i] = currentw[lgth2-1];\n-\t}\n-\n-\tfor( k=0; k<100; k++ )\n-\t{\n-\t\tfprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );\n-\t}\n-\n-\n-#if 1\n-\tfprintf( stderr, "maxwm = %f\\n", maxwm );\n-\tfprintf( stderr, "endali = %d\\n", endali );\n-\tfprintf( stderr, "endalj = %d\\n", endalj );\n-#endif\n-\n-\tqsort( shuryo, 100, sizeof( Shuryoten ), (int (*)())compshuryo );\n-\tfor( k=0; k<100; k++ )\n-\t{\n-\t\tfprintf( stderr, "shuryo[%d].i,j,wm = %d,%d,%f\\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );\n-\t}\n-\n-\t\t\n-\tlasti = lgth1+1;\n- for( i=0; i<lasti; i++ ) \n- {\n- ijpi[i][0] = localstop;\n- ijpj[i][0] = localstop;\n- }\n-\tlastj = lgth2+1;\n- for( j=0; j<lastj; j++ ) \n- {\n- ijpi[0][j] = localstop;\n- ijpj[0][j] = localstop;\n- }\n-\n-\tfor( i=0; i<lasti; i++ ) for( j=0; j<lastj; j++ ) used[i][j] = 0;\n-\n-\tfor( k=0; k<numshuryo; k++ )\n-\t{\n-\t\tif( shuryo[k].wm < shuryo[0].wm * 0.3 ) break;\n-\t\tfprintf( stderr, "k=%d, shuryo[k].i,j,wm=%d,%d,%f go\\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );\n-\t\tresf = gentracking( used, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, shuryo[k].i, shuryo[k].j );\n-\t\tif( resf == -1.0 ) continue;\n-\t\tputlocalhom3( mseq1[0], mseq2[0], lhmpt, *off1pt, *off2pt, (int)shuryo[k].wm, strlen( mseq1[0] ) );\n-#if 0\n-\t\tfprintf( stderr, "\\n" );\n-\t\tfprintf( stderr, ">\\n%s\\n", mseq1[0] );\n-\t\tfprintf( stderr, ">\\n%s\\n", mseq2[0] );\n-#endif\n-\t}\n-\tfor( i=0; i<20; i++ )\n-\t{\n-\t\tfor( j=0; j<20; j++ )\n-\t\t{\n-\t\t\tfprintf( stderr, "%2d ", used[i][j] );\n-\t\t}\n-\t\tfprintf( stderr, "\\n" );\n-\t}\n-\n-\n-//\tfprintf( stderr, "### impmatch = %f\\n", *impmatch );\n-\n-\tresultlen = strlen( mseq1[0] );\n-\tif( alloclen < resultlen || resultlen > N )\n-\t{\n-\t\tfprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\\n", alloclen, resultlen, N );\n-\t\tErrorExit( "LENGTH OVER!\\n" );\n-\t}\n-\n-\n-\n-\n-\n-\treturn( wm );\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/tbfast.c --- a/mafft/core/tbfast.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2175 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-#define IODEBUG 0\n-#define SCOREOUT 0\n-\n-static int nadd;\n-static int treein;\n-static int topin;\n-static int treeout;\n-static int distout;\n-static int noalign;\n-static int multidist;\n-static int subalignment;\n-static int subalignmentoffset;\n-\n-#ifdef enablemultithread\n-typedef struct _jobtable\n-{\n- int i; \n- int j; \n-} Jobtable;\n-\n-typedef struct _distancematrixthread_arg\n-{\n-\tint njob;\n-\tint thread_no;\n-\tfloat *selfscore;\n-\tfloat **iscore;\n-\tchar **seq;\n-\tint **skiptable;\n-\tJobtable *jobpospt;\n-\tpthread_mutex_t *mutex;\n-} distancematrixthread_arg_t;\n-\n-typedef struct _treebasethread_arg\n-{\n-\tint thread_no;\n-\tint *nrunpt;\n-\tint njob;\n-\tint *nlen;\n-\tint *jobpospt;\n-\tint ***topol;\n-\tTreedep *dep;\n-\tchar **aseq;\n-\tdouble *effarr;\n-\tint *alloclenpt;\n-\tLocalHom **localhomtable;\n-\tRNApair ***singlerna;\n-\tdouble *effarr_kozo;\n-\tint *fftlog;\n-\tchar *mergeoralign;\n-\tpthread_mutex_t *mutex;\n-\tpthread_cond_t *treecond;\n-} treebasethread_arg_t;\n-#endif\n-\n-void arguments( int argc, char *argv[] )\n-{\n- int c;\n-\n-\tnthread = 1;\n-\toutnumber = 0;\n-\tscoreout = 0;\n-\tspscoreout = 0;\n-\ttreein = 0;\n-\ttopin = 0;\n-\trnaprediction = \'m\';\n-\trnakozo = 0;\n-\tnevermemsave = 0;\n-\tinputfile = NULL;\n-\taddfile = NULL;\n-\taddprofile = 1;\n-\tfftkeika = 0;\n-\tconstraint = 0;\n-\tnblosum = 62;\n-\tfmodel = 0;\n-\tcalledByXced = 0;\n-\tdevide = 0;\n-\tuse_fft = 0; // chuui\n-\tforce_fft = 0;\n-\tfftscore = 1;\n-\tfftRepeatStop = 0;\n-\tfftNoAnchStop = 0;\n- weight = 3;\n- utree = 1;\n-\ttbutree = 1;\n- refine = 0;\n- check = 1;\n- cut = 0.0;\n- disp = 0;\n- outgap = 1;\n- alg = \'A\';\n- mix = 0;\n-\ttbitr = 0;\n-\tscmtd = 5;\n-\ttbweight = 0;\n-\ttbrweight = 3;\n-\tcheckC = 0;\n-\ttreemethod = \'X\';\n-\tsueff_global = 0.1;\n-\tcontin = 0;\n-\tscoremtx = 1;\n-\tkobetsubunkatsu = 0;\n-\tdorp = NOTSPECIFIED;\n-\tppenalty_dist = NOTSPECIFIED;\n-\tppenalty = NOTSPECIFIED;\n-\tpenalty_shift_factor = 1000.0;\n-\tppenalty_ex = NOTSPECIFIED;\n-\tpoffset = NOTSPECIFIED;\n-\tkimuraR = NOTSPECIFIED;\n-\tpamN = NOTSPECIFIED;\n-\tgeta2 = GETA2;\n-\tfftWinSize = NOTSPECIFIED;\n-\tfftThreshold = NOTSPECIFIED;\n-\tRNAppenalty = NOTSPECIFIED;\n-\tRNAppenalty_ex = NOTSPECIFIED;\n-\tRNApthr = NOTSPECIFIED;\n-\tTMorJTT = JTT;\n-\tconsweight_multi = 1.0;\n-\tconsweight_rna = 0.0;\n-\tmultidist = 0;\n-\tsubalignment = 0;\n-\tsubalignmentoffset = 0;\n-\tlegacygapcost = 0;\n-\tspecificityconsideration = 0.0;\n-\n- while( --argc > 0 && (*++argv)[0] == \'-\' )\n-\t{\n- while ( ( c = *++argv[0] ) )\n-\t\t{\n- switch( c )\n- {\n-\t\t\t\tcase \'i\':\n-\t\t\t\t\tinputfile = *++argv;\n-\t\t\t\t\tfprintf( stderr, "inputfile = %s\\n", inputfile );\n-\t\t\t\t\t--argc;\n- goto nextoption;\n-\t\t\t\tcase \'I\':\n-\t\t\t\t\tnadd = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "nadd = %d\\n", nadd );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'e\':\n-\t\t\t\t\tRNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'o\':\n-\t\t\t\t\tRNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'V\':\n-\t\t\t\t\tppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\tfprintf( stderr, "ppenalty = %d\\n", ppenalty );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'f\':\n-\t\t\t\t\tppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\tfprintf( stderr, "ppenalty = %d\\n", ppenalty );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'Q\':\n-\t\t\t\t\tpenalty_shift_factor = atof( *++argv );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'g\':\n-\t\t\t\t\tppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );\n-\t\t\t\t\tfprintf( stderr, "ppenalty_ex = %d\\n", ppenalty_ex );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'h\':\n-\t\t\t\t\tpoffset = (int)( atof( *++argv ) * 1000 - 0.5 );\n-//\t\t\t\t\tfprintf( stderr, "poffset = %d\\n", poffset );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'k\':\n-\t\t\t\t\tkimuraR = myatoi( *++argv );\n-\t\t\t\t\tfprintf( stderr, "kappa = %d\\n", kimuraR );\n-\t\t\t\t\t--argc;\n-\t\t\t\t\tgoto nextoption;\n-\t\t\t\tcase \'b\':\n-\t\t\t\t\tnblosum = myatoi( *++argv );\n-\t\t\t\t\tscoremtx = 1;\n-\t\t\t\t\tfp'..b'ob; i++ )\n-\t\t{\n-\t\t\tnogaplen = strlen( bseq[i] );\n-\t\t\tsinglerna[i] = (RNApair **)calloc( nogaplen+1, sizeof( RNApair * ) );\n-\t\t\tfor( j=0; j<nogaplen; j++ )\n-\t\t\t{\n-\t\t\t\tsinglerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );\n-\t\t\t\tsinglerna[i][j][0].bestpos = -1;\n-\t\t\t\tsinglerna[i][j][0].bestscore = -1.0;\n-\t\t\t}\n-\t\t\tsinglerna[i][nogaplen] = NULL;\n-//\t\t\tfprintf( stderr, "### reading bpp %d ...\\n", i );\n-\t\t\treadmccaskill( prep, singlerna[i], nogaplen );\n-\t\t}\n-\t\tfclose( prep );\n-\t\tfprintf( stderr, "\\ndone.\\n" );\n-\t}\n-\telse\n-\t\tsinglerna = NULL;\n-\n-\n-\tfprintf( stderr, "Progressive alignment ... \\n" );\n-\n-#ifdef enablemultithread\n-\tif( nthread > 0 && nadd == 0 )\n-\t{\n-\t\ttreebasethread_arg_t *targ;\t\n-\t\tint jobpos;\n-\t\tpthread_t *handle;\n-\t\tpthread_mutex_t mutex;\n-\t\tpthread_cond_t treecond;\n-\t\tint *fftlog;\n-\t\tint nrun;\n-\t\tint nthread_yoyu;\n-\n-\t\tnthread_yoyu = nthread * 1;\n-\t\tnrun = 0;\n-\t\tjobpos = 0;\n-\t\ttarg = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) );\n-\t\tfftlog = AllocateIntVec( njob );\n-\t\thandle = calloc( nthread_yoyu, sizeof( pthread_t ) );\n-\t\tpthread_mutex_init( &mutex, NULL );\n-\t\tpthread_cond_init( &treecond, NULL );\n-\n-\t\tfor( i=0; i<njob; i++ ) dep[i].done = 0;\n-\t\tfor( i=0; i<njob; i++ ) fftlog[i] = 1;\n-\n-\t\tif( constraint )\n-\t\t\tcalcimportance( njob, eff, bseq, localhomtable );\n-//\t\t\tdontcalcimportance( njob, eff, bseq, localhomtable ); // CHUUUUIIII!!!\n-\n-\t\tfor( i=0; i<nthread_yoyu; i++ )\n-\t\t{\n-\t\t\ttarg[i].thread_no = i;\n-\t\t\ttarg[i].nrunpt = &nrun;\n-\t\t\ttarg[i].njob = njob;\n-\t\t\ttarg[i].nlen = nlen;\n-\t\t\ttarg[i].jobpospt = &jobpos;\n-\t\t\ttarg[i].topol = topol;\n-\t\t\ttarg[i].dep = dep;\n-\t\t\ttarg[i].aseq = bseq;\n-\t\t\ttarg[i].effarr = eff;\n-\t\t\ttarg[i].alloclenpt = &alloclen;\n-\t\t\ttarg[i].localhomtable = localhomtable;\n-\t\t\ttarg[i].singlerna = singlerna;\n-\t\t\ttarg[i].effarr_kozo = eff_kozo_mapped;\n-\t\t\ttarg[i].fftlog = fftlog;\n-\t\t\ttarg[i].mergeoralign = mergeoralign;\n-\t\t\ttarg[i].mutex = &mutex;\n-\t\t\ttarg[i].treecond = &treecond;\n-\n-\t\t\tpthread_create( handle+i, NULL, treebasethread, (void *)(targ+i) );\n-\t\t}\n-\n-\t\tfor( i=0; i<nthread_yoyu; i++ )\n-\t\t{\n-\t\t\tpthread_join( handle[i], NULL );\n-\t\t}\n-\t\tpthread_mutex_destroy( &mutex );\n-\t\tpthread_cond_destroy( &treecond );\n-\t\tfree( handle );\n-\t\tfree( targ );\n-\t\tfree( fftlog );\n-\t}\n-\telse\n-#endif\n-\n-\t\ttreebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, &alloclen, localhomtable, singlerna, eff_kozo_mapped );\n-\tfprintf( stderr, "\\ndone.\\n" );\n-\tif( scoreout )\n-\t{\n-\t\tunweightedspscore = plainscore( njob, bseq );\n-\t\tfprintf( stderr, "\\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );\n-\t\tfprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );\n-\t\tfprintf( stderr, "\\n\\n" );\n-\t}\n-\n-#if 0\n-\tif( constraint )\n-\t{\n-\t\tLocalHom *tmppt1, *tmppt2;\n-\t\tfor( i=0; i<njob; i++ )\n-\t\t{\n-\t\t\tfor( j=0; j<njob; j++ )\n-\t\t\t{\n-\t\t\t\ttmppt1 = localhomtable[i]+j;\n-\t\t\t\twhile( tmppt2 = tmppt1->next )\n-\t\t\t\t{\n-\t\t\t\t\tfree( (void *)tmppt1 );\n-\t\t\t\t\ttmppt1 = tmppt2;\n-\t\t\t\t}\n-\t\t\t\tfree( (void *)tmppt1 );\n-\t\t\t}\n-\t\t\tfree( (void *)(localhomtable[i]+j) );\n-\t\t}\n-\t\tfree( (void *)localhomtable );\n-\t}\n-#endif\n-\n-\tfprintf( trap_g, "done.\\n" );\n-\tfclose( trap_g );\n-\tfree( mergeoralign );\n-\tif( rnakozo && rnaprediction == \'m\' ) \n-\t{\n-\t\tif( singlerna ) // nen no tame\n-\t\t{\n-\t\t\tfor( i=0; i<njob; i++ ) \n-\t\t\t{\n-\t\t\t\tfor( j=0; singlerna[i][j]!=NULL; j++ )\n-\t\t\t\t{\n-\t\t\t\t\tif( singlerna[i][j] ) free( singlerna[i][j] );\n-\t\t\t\t}\n-\t\t\t\tif( singlerna[i] ) free( singlerna[i] );\n-\t\t\t}\n-\t\t\tfree( singlerna );\n-\t\t\tsinglerna = NULL;\n-\t\t}\n-\t}\n-\n-\twriteData_pointer( prep_g, njob, name, nlen, bseq );\n-#if 0\n-\twriteData( stdout, njob, name, nlen, bseq );\n-\twritePre( njob, name, nlen, bseq, !contin );\n-\twriteData_pointer( prep_g, njob, name, nlen, aseq );\n-#endif\n-#if IODEBUG\n-\tfprintf( stderr, "OSHIMAI\\n" );\n-#endif\n-\n-\tif( constraint ) FreeLocalHomTable( localhomtable, njob );\n-\n-\tif( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\\n", sumofpairsscore( njob, bseq ) );\n-\n-\tSHOWVERSION;\n-\treturn( 0 );\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/tddis.c --- a/mafft/core/tddis.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,905 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-\n-#if 0\n-void mdfymtx( char pair[njob][njob], int s1, double **partialmtx, double **mtx )\n-#else\n-void mdfymtx( char **pair, int s1, double **partialmtx, double **mtx )\n-#endif\n-{\n-\tint i, j;\n-\tint icount, jcount;\n-#if DEBUG\n-\tFILE *fp;\n-\tstatic char name[M][B];\n-\n-\tfor( i=0; i<M; i++ ) name[i][0] = 0;\n-\tfprintf( stdout, "s1 = %d\\n", s1 );\n-\tfor( i=0; i<njob; i++ ) \n-\t{\n-\t\tfor( j=0; j<njob; j++ ) \n-\t\t{\n-\t\t\tprintf( "%#2d", pair[i][j] );\n-\t\t}\n-\t\tprintf( "\\n" );\n-\t}\n-#endif\n-\n-\tfor( i=0, icount=0; i<njob-1; i++ )\n-\t{\n-\t\tif( !pair[s1][i] ) continue;\n-\t\tfor( j=i+1, jcount=icount+1; j<njob; j++ ) \n-\t\t{\n-\t\t\tif( !pair[s1][j] ) continue;\n-\t\t\tpartialmtx[icount][jcount] = mtx[i][j];\n-\t\t\tjcount++;\n-\t\t}\n-\t\ticount++;\n-\t}\n-#if DEBUG\n-\tfp = fopen( "hat2.org", "w" );\n-\tWriteHat2( fp, njob, name, mtx );\n-\tfclose( fp );\n-\tfp = fopen( "hat2.mdf", "w" );\n-\tWriteHat2( fp, icount, name, partialmtx );\n-\tfclose( fp );\n-#endif\n-\t\t\n-}\n-\n-\t\t\n-float score_calc( char **seq, int s ) /* method 3 */\n-{\n- int i, j, k, c;\n- int len = strlen( seq[0] );\n- float score;\n- int tmpscore;\n- char *mseq1, *mseq2;\n-\n- score = 0.0;\n- for( i=0; i<s-1; i++ )\n- {\n- for( j=i+1; j<s; j++ )\n- {\n- mseq1 = seq[i];\n- mseq2 = seq[j];\n- tmpscore = 0;\n- c = 0;\n- for( k=0; k<len; k++ )\n- {\n- if( mseq1[k] == \'-\' && mseq2[k] == \'-\' ) continue;\n- c++;\n- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];\n- if( mseq1[k] == \'-\' )\n- {\n- tmpscore += penalty;\n- while( mseq1[++k] == \'-\' )\n- ;\n- k--;\n- if( k > len-2 ) break;\n- continue;\n- }\n- if( mseq2[k] == \'-\' )\n- {\n- tmpscore += penalty;\n- while( mseq2[++k] == \'-\' )\n- ;\n- k--;\n- if( k > len-2 ) break;\n- continue;\n- }\n- }\n- /*\n- if( mseq1[0] == \'-\' || mseq2[0] == \'-\' )\n- {\n- for( k=0; k<len; k++ )\n- {\n- if( mseq1[k] == \'-\' && mseq2[k] == \'-\' ) continue;\n- if( !( mseq1[k] != \'-\' && mseq2[k] != \'-\' ) ) \n- {\n- c--;\n- tmpscore -= penalty;\n- break;\n- }\n- else break;\n- }\n- }\n- if( mseq1[len-1] == \'-\' || mseq2[len-1] == \'-\' )\n- {\n- for( k=0; k<len; k++ )\n- {\n- if( mseq1[k] == \'-\' && mseq2[k] == \'-\' ) continue;\n- if( !( mseq1[k] != \'-\' && mseq2[k] != \'-\' ) ) \n- {\n- c--;\n- tmpscore -= penalty;\n- break;\n- }\n- else break;\n- }\n- }\n- */\n- score += (double)tmpscore / (double)c;\n- }\n- }\n- score = (float)score / ( ( (double)s * ((double)s-1.0) ) / 2.0 );\n-\tfprintf( stderr, "score in score_calc = %f\\n", score );\n- return( score );\n-}\n-\n-void cpmx_calc( char **seq, float **cpmx, double *eff, int lgth, int clus )\n-{\n-\tint i, j, k;\n-\tdouble totaleff = 0.0;\n-\n-\tfor( i=0; i<clus; i++ ) totaleff += eff[i]; \n-\tfor( i=0; i<nalphabets; i++ ) for( j=0; j<lgth; j++ ) cpmx[i][j] = 0.0;\n-\tfor( j=0; j<lgth; j++ ) for( k=0; k<clus; k++ )\n-\t\t\tcpmx[(int)amino_n[(int)seq[k][j]]][j] += (float)eff[k] / totaleff;\n-}\n-\n-\n-void cpmx_calc_new_bk( char **seq, float **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0 \n-{\n- int i, j, k;\n- float feff;\n-\n- for( i=0; i<nalphabets; i++ ) for( j=0; j<lgth'..b'+ pairwisenode[s][innergroup[0]]\n-\t\t\t\t - pairwisenode[innergroup[0]][outergroup1[0]] + 1;\n-\t\tresult[s] /= 2;\n-\t}\n-\n-#if 0\n-\tfor( i=0; i<nseq; i++ ) \n-\t\tfprintf( stderr, "result[%d] = %d\\n", i+1, result[i] );\n-#endif\n-}\n-\t\t\n-\n-\n-\n-\n-\t\n-\n-\t\t\n-\t\t\n-\t\t\t\t\t\n-\t\t\t\t\t\n-\n-\t\t\t\t\n-void OneClusterAndTheOther_fast( int locnjob, int *memlist1, int *memlist2, int *s1, int *s2, char *pair, int ***topol, int step, int branch, double **smalldistmtx, double **distmtx )\n-{\n-\tint i, k, j;\n-\tint r1;\n-//\tchar *pair;\n-\n-//\tpair = calloc( locnjob, sizeof( char ) );\n-\n-\tfor( i=0; i<locnjob; i++ ) pair[i] = 0;\n- for( i=0, k=0; (r1=topol[step][branch][i])>-1; i++ ) \n-\t{\n- pair[r1] = 1;\n-\t\tmemlist1[k++] = r1;\n-\t}\n-\tmemlist1[k] = -1;\n-\n- for( i=0, k=0; i<locnjob; i++ ) \n- {\n- if( !pair[i] ) \n- {\n-\t\t\tmemlist2[k++] = i;\n- }\n- }\n-\tmemlist2[k] = -1;\n-\n-\t*s1 = memlist1[0];\n-\t*s2 = memlist2[0];\n-\n-\tif( smalldistmtx )\n-\t{\n-\t\tint im, jm;\n-\t\tfor( i=0; (im=memlist1[i])!=-1; i++ ) for( j=0; (jm=memlist2[j])!=-1; j++ )\n-\t\t{\n-\t\t\tsmalldistmtx[i][j] = distmtx[MIN(im,jm)][MAX(im,jm)];\n-//\t\t\tfprintf( stderr, "#### %d-%d, %f\\n", im, jm, smalldistmtx[i][j] );\n-\t\t}\n-\t}\n-//\tfree( pair );\n-}\n-\t\t\n-\n-void makeEffMtx( int nseq, double **mtx, double *vec )\n-{\n-\tint i, j;\n-\tfor( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) \n-\t\tmtx[i][j] = vec[i] * vec[j];\n-}\n-\t\n-void node_eff( int nseq, double *eff, int *node )\n-{\n-\tint i;\n-\textern double ipower( double, int );\n-\tfor( i=0; i<nseq; i++ ) \n-\t\teff[i] = ipower( 0.5, node[i] ) + geta2;\n-\t/*\n-\t\teff[i] = ipower( 0.5, node[i] ) + 0;\n-\t*/\n-#if DEBUG\n-\tfor( i=0; i<nseq; i++ ) \n-#endif\n-}\n-\n-\n-int shrinklocalhom( char **pair, int s1, int s2, LocalHom **localhom, LocalHom ***localhomshrink )\n-{\n-\tint m1, k1, m2, k2;\n-\n-\tfor( m1=s1, k1=0; m1<njob; m1++ )\n-\t{\n-\t\tif( pair[s1][m1] != 0 )\n-\t\t{\n-\t\t\tfor( m2=s2, k2=0; m2<njob; m2++ )\n-\t\t\t{\n-\t\t\t\tif( pair[s2][m2] != 0 )\n-\t\t\t\t{\n-\t\t\t\t\tif( localhom[m1][m2].opt == -1 )\n-\t\t\t\t\t\tlocalhomshrink[k1][k2] = NULL;\n-\t\t\t\t\telse\n-\t\t\t\t\t\tlocalhomshrink[k1][k2] = localhom[m1]+m2;\n-\t\t\t\t\tk2++;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tk1++;\n-\t\t}\n-\t}\n-\treturn( 0 );\n-}\n-\n-int msshrinklocalhom_fast( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink )\n-{\n-\tint m1, k1, m2, k2;\n-\n-\tfor( k1=0; (m1=memlist1[k1])!=-1; k1++ )\n-\t{\n-\t\tfor( k2=0; (m2=memlist2[k2])!=-1; k2++ )\n-\t\t{\n-\t\t\tif( localhom[m1][m2].opt == -1 )\n-\t\t\t\tlocalhomshrink[k1][k2] = NULL;\n-\t\t\telse\n-\t\t\t\tlocalhomshrink[k1][k2] = localhom[m1]+m2;\n-\t\t}\n-\t}\n-\treturn( 0 );\n-}\n-int fastshrinklocalhom_one( int *mem1, int *mem2, int norg, LocalHom **localhom, LocalHom ***localhomshrink )\n-{\n-\tint k1, k2;\n-\tint *intpt1, *intpt2;\n-\n-\t\n-\tfor( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ )\n-\t{\n-\t\tfor( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ )\n-\t\t{\n-\t\t\tif( *intpt2 != norg ) \n-\t\t\t{\n-\t\t\t\tfprintf( stderr, "ERROR! *intpt2 = %d\\n", *intpt2 );\n-\t\t\t\texit( 1 );\n-\t\t\t}\n-\t\t\tif( localhom[*intpt1][0].opt == -1 )\n-\t\t\t\tlocalhomshrink[k1][k2] = NULL;\n-\t\t\telse\n-\t\t\t\tlocalhomshrink[k1][k2] = localhom[*intpt1];\n-\t\t}\n-\t}\n-\treturn( 0 );\n-}\n-\n-int fastshrinklocalhom( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink )\n-{\n-\tint k1, k2;\n-\tint *intpt1, *intpt2;\n-\n-\t\n-\tfor( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ )\n-\t{\n-\t\tfor( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ )\n-\t\t{\n-\t\t\tif( localhom[*intpt1][*intpt2].opt == -1 )\n-\t\t\t\tlocalhomshrink[k1][k2] = NULL;\n-\t\t\telse\n-\t\t\t\tlocalhomshrink[k1][k2] = localhom[*intpt1]+*intpt2;\n-\t\t}\n-\t}\n-\treturn( 0 );\n-}\n-\n-int msfastshrinklocalhom( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink )\n-{\n-\tint k1, k2;\n-\tint *intpt1, *intpt2;\n-\tint m1, m2;\n-\t\n-\tfor( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ )\n-\t{\n-\t\tfor( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ )\n-\t\t{\n-\t\t\tm1 = MIN(*intpt1,*intpt2); m2 = MAX(*intpt1,*intpt2);\n-\t\t\tif( localhom[m1][m2].opt == -1 )\n-\t\t\t\tlocalhomshrink[k1][k2] = NULL;\n-\t\t\telse\n-\t\t\t\tlocalhomshrink[k1][k2] = localhom[m1]+m2;\n-\t\t}\n-\t}\n-\treturn( 0 );\n-}\n-\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/tditeration.c --- a/mafft/core/tditeration.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2248 +0,0 @@\n-\n-/* \n-\ttree-dependent iteration \n- algorithm A+ when group-to-group, C when group-to-singleSeqence \n-\t OR\n- algorithm A+\n-*/\n-\n-#include "mltaln.h"\n-\n-\n-#define DEBUG 0\n-#define RECORD 0\n-#define MCD 0\n-\n-extern char **seq_g;\n-extern char **res_g;\n-\n-static int nwa;\n-\n-\n-#ifdef enablemultithread\n-typedef struct _threadarg\n-{\n-\tint thread_no;\n-\tint *jobposintpt;\n-\tint *ndonept;\n-\tint *ntrypt;\n-\tint *collectingpt;\n-\tint njob;\n-\tint nbranch;\n-\tint maxiter;\n-\tint nkozo;\n-\tint *subgenerationpt;\n-\tfloat *basegainpt;\n-\tfloat *gainlist;\n-\tfloat *tscorelist;\n-\tint *generationofinput;\n-\tchar *kozoarivec;\t\n-\tchar **mastercopy;\n-\tchar ***candidates;\n-\tint *generationofmastercopypt;\n-\tint *branchtable;\n-\tRNApair ***singlerna;\n-\tLocalHom **localhomtable;\n-\tint alloclen;\n-\tNode *stopol;\n-\tint ***topol;\n-//\tdouble **len;\n-\tfloat **tscorehistory_detail;\n-\tint *finishpt;\n-\tint **skipthisbranch;\n-\tdouble **distmtx;\n-\tpthread_mutex_t *mutex;\n-\tpthread_cond_t *collection_end;\n-\tpthread_cond_t *collection_start;\n-} threadarg_t;\n-#endif\n-\n-#if 1\n-static void shuffle( int *arr, int n )\n-{\n-\tint i;\n-\tint x;\n-\tint b;\n-\n-\tfor( i=1; i<n; i++ )\n-\t{\n-\t\tx = rand() % (i+1);\n-\t\tif( x != i )\n-\t\t{\n-\t\t\tb = arr[i];\n-\t\t\tarr[i] = arr[x];\n-\t\t\tarr[x] = b;\n-\t\t}\n-\t}\n-}\n-#endif\n-\n-\n-static void makescoringmatrices( double ***matrices, double **originalmtx )\n-{\n-\tint c;\n-\tfloat rep;\n-\tfor( c=0; c<maxdistclass; c++ )\n-\t{\n-\t\trep = (double) 2 * c / ndistclass; // rep:0..2\n-//\t\tfprintf( stderr, "rep = %f\\n", rep );\n-\t\tmakedynamicmtx( matrices[c], originalmtx, rep * 0.5 ); // upgma ni awaseru node, 0..1\n-//\t\tfprintf( stderr, "c=%d, score for %c-%c = %f\\n", c, \'W\', \'W\', matrices[c][amino_n[\'W\']][amino_n[\'W\']] );\n-\t}\n-}\n-\n-static void classifypairs( int n1, double **eff1s, double *eff1, int n2, double **eff2s, double *eff2, double **smalldistmtx, int **matnum, int maxdistclass )\n-{\n-\tint i, j, c;\n-\tfor( c=0; c<maxdistclass; c++ ) \n-\t{\n-\t\tfor( i=0; i<n1; i++ ) eff1s[c][i] = 0.0;\n-\t\tfor( j=0; j<n2; j++ ) eff2s[c][j] = 0.0;\n-\t}\n-\t\n-//\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )\n-\t{\n-\t\tc = (int)( smalldistmtx[i][j] / 2.0 * ndistclass ); // dist:0..2\n-//\t\tif( c >= ndistclass ) c = ndistclass-1;\n-\t\tif( c >= maxdistclass ) c = maxdistclass-1;\n-//\t\tfprintf( stderr, "pair %d-%d (%f), dist=%f -> c=%d\\n", i, j, eff1[i] * eff2[j], smalldistmtx[i][j], c );\n-\t\teff1s[c][i] = 1.0;\n-\t\teff2s[c][j] = 1.0;\n-\t\tmatnum[i][j] = c;\n-\t}\n-\tfor( c=0; c<maxdistclass; c++ ) for( i=0; i<n1; i++ ) eff1s[c][i] *= eff1[i];\n-\tfor( c=0; c<maxdistclass; c++ ) for( i=0; i<n2; i++ ) eff2s[c][i] *= eff2[i];\n-#if 0\n-\tdouble totaleff;\n-\ttotaleff = 0.0; for( c=0; c<maxdistclass; c++ ) for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) totaleff += eff1s[c][i] * eff2s[c][j];\n-\tfprintf( stderr, "totaleff1s-2s = %f\\n", totaleff );\n-\ttotaleff = 0.0; for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) totaleff += eff1[i] * eff2[j];\n-\tfprintf( stderr, "totaleff1-2 = %f\\n", totaleff );\n-\n-\ttotaleff = 0.0; for( c=0; c<maxdistclass; c++ ) for( i=0; i<n1; i++ ) totaleff += eff1s[c][i]; \n-\tfprintf( stderr, "totaleff1s = %f\\n", totaleff );\n-\ttotaleff = 0.0; for( c=0; c<maxdistclass; c++ ) for( i=0; i<n2; i++ ) totaleff += eff2s[c][i]; \n-\tfprintf( stderr, "totaleff2s = %f\\n", totaleff );\n-\ttotaleff = 0.0; for( i=0; i<n1; i++ ) totaleff += eff1[i]; \n-\tfprintf( stderr, "totaleff1 = %f\\n", totaleff );\n-\ttotaleff = 0.0; for( i=0; i<n2; i++ ) totaleff += eff2[i]; \n-\tfprintf( stderr, "totaleff2 = %f\\n", totaleff );\n-\t{\n-//\t\tfor( i=0; i<n1; i++ ) fprintf( stderr, "eff1s[%d][%d] = %f\\n", c, i, eff1s[c][i] );\n-//\t\tfor( i=0; i<n2; i++ ) fprintf( stderr, "eff2s[%d][%d] = %f\\n", c, i, eff2s[c][i] );\n-//\t\tfprintf( stderr, "\\n" );\n-\t}\n-#endif\n-}\n-\n-static void Writeoption2( FILE *fp, int cycle, double cut )\n-{\n-\tfprintf( fp, "%dth cycle\\n", cycle );\n- fprintf( fp, "marginal score to search : current score * (100-%d) / 100\\n", (int)cut );\n-}\n-\n-static void Writeoptions( FILE *fp )\n-{\n-\t'..b'2[i] );\n-\t\t#endif\n-\t\t\n-\t\t\t\t\t\t}\n-\t\t\t\t\t\telse\n-\t\t\t\t\t\t{\n-\t\t\t\t\t\t\ttscore = mscore + 1.0;\n-\t//\t\t\t\t\t\ttscore = 0.0;\n-\t//\t\t\t\t\t\tfprintf( stderr, "in line 705, tscore=%f\\n", tscore );\n-\t//\t\t\t\t\t\tfor( i=0; i<length; i++ )\n-\t//\t\t\t\t\t\t\ttscore = tscore + (double)mseq1[0][i];\n-\t//\t\t\t\t\t\tmscore = tscore - 1.0;\n-\t\t\t\t\t\t}\n-\t\n-\t\t\t\t\t\tif( isnan( mscore ) )\n-\t\t\t\t\t\t{\n-\t\t\t\t\t\t\tfprintf( stderr, "\\n\\nmscore became NaN\\n" );\n-\t\t\t\t\t\t\texit( 1 );\n-\t\t\t\t\t\t}\n-\t\t\t\t\t\tif( isnan( tscore ) )\n-\t\t\t\t\t\t{\n-\t\t\t\t\t\t\tfprintf( stderr, "\\n\\ntscore became NaN\\n" );\n-\t\t\t\t\t\t\texit( 1 );\n-\t\t\t\t\t\t}\n-\t\n-\t\n-\t\n-\t//\t\t\t\t\tfprintf( stderr, "@@@@@ mscore,tscore = %f,%f\\n", mscore, tscore );\n-\t\n-\t\t\t\t\t\tif( tscore > mscore - cut/100.0*mscore ) \n-\t\t\t\t\t\t{\n-\t\t\t\t\t\t\twritePre( locnjob, name, nlen, aseq, 0 );\n-\t\t\t\t\t\t\tfor( i=0; i<locnjob; i++ ) strcpy( bseq[i], aseq[i] );\n-\t\t\t\t\t\t\tif( score_check == 2 )\n-\t\t\t\t\t\t\t{\n-\t\t\t\t\t\t\t\teffarr1[0] = 1.0;\n-\t\t\t\t\t\t\t\teffarr2[0] = 1.0;\n-\t\t\t\t\t\t\t\tfor( i=0; i<locnjob-1; i++ )\n-\t\t\t\t\t\t\t\t\tfor( j=i+1; j<locnjob; j++ )\n-\t\t\t\t\t\t\t\t\t\tintergroup_score( bseq+i, bseq+j, effarr1, effarr2, 1, 1, length, imanoten[i]+j );\n-\t\t\t\t\t\t\t}\n-\t\t\n-\t#if 0\n-\t\t\t\t\t\t\tfprintf( stderr, "tscore = %f mscore = %f accepted.\\n", tscore, mscore );\n-\t#endif\n-\t\t\t\t\t\t\tfprintf( stderr, " accepted." );\n-\t\t\t\t\t\t\tconverged = 0;\n-\t\t\n-\t\t\t\t\t\t}\n-\t\t\t\t\t\telse \n-\t\t\t\t\t\t{\n-\t#if 0\n-\t\t\t\t\t\t\tfprintf( stderr, "tscore = %f mscore = %f rejected.\\n", tscore, mscore );\n-\t#endif\n-\t\t\t\t\t\t\tfprintf( stderr, " rejected." );\n-\t\t\t\t\t\t\ttscore = mscore;\n-\t\t\t\t\t\t\tconverged++;\n-\t\t\t\t\t\t}\n-\t\t\t\t\t}\n-\t\t\t\t\tfprintf( stderr, "\\r" );\n-\t\n-\t\n-\t\t\t\t\thistory[iterate][l][k] = (float)tscore;\n-\t\n-\t//\t\t\t\tfprintf( stderr, "tscore = %f\\n", tscore );\n-\t\t\n-\t\t\t\t\tif( converged >= locnjob * 2 )\n-\t\t\t\t\t{\n-\t\t\t\t\t\tfprintf( trap_g, "Converged.\\n\\n" );\n-\t\t\t\t\t\tfprintf( stderr, "\\nConverged.\\n\\n" );\n-\t\t\t\t\t\tif( scoreout )\n-\t\t\t\t\t\t{\n-\t\t\t\t\t\t\tunweightedspscore = plainscore( njob, bseq );\n-\t\t\t\t\t\t\tfprintf( stderr, "\\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore );\n-\t\t\t\t\t\t\tfprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );\n-\t\t\t\t\t\t\tif( weight || constraint ) fprintf( stderr, " (differs from the objective score)" );\n-\t\t\t\t\t\t\tfprintf( stderr, "\\n\\n" );\n-\t\t\t\t\t\t}\n-\t\t\t\t\t\tif( grouprna1 ) free( grouprna1 );\n-\t\t\t\t\t\tif( grouprna2 ) free( grouprna2 );\n-\t\t\t\t\t\treturn( 0 );\n-\t\t\t\t\t}\n-\t\t\t\t\tif( iterate >= 1 )\n-\t\t\t\t\t{\n-\t\t/* oscillation check */\n-\t\t\t\t\t\toscillating = 0;\n-\t\t\t\t\t\tfor( ii=iterate-2; ii>=0; ii-=2 ) \n-\t\t\t\t\t\t{\n-\t\t\t\t\t\t\tif( (float)tscore == history[ii][l][k] )\n-\t\t\t\t\t\t\t{\n-\t\t\t\t\t\t\t\toscillating = 1;\n-\t\t\t\t\t\t\t\tbreak;\n-\t\t\t\t\t\t\t}\n-\t\t\t\t\t\t}\n-\t\t\t\t\t\tif( ( oscillating && !cooling ) || ( oscillating && cut < 0.001 && cooling ) )\n-\t\t\t\t\t\t{\n-\t\t\t\t\t\t\tfprintf( trap_g, "Oscillating.\\n" );\n-\t\t\t\t\t\t\tfprintf( stderr, "\\nOscillating.\\n\\n" );\n-\t\t\t\t\t\t\tif( scoreout )\n-\t\t\t\t\t\t\t{\n-\t\t\t\t\t\t\t\tunweightedspscore = plainscore( njob, bseq );\n-\t\t\t\t\t\t\t\tfprintf( stderr, "\\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore );\n-\t\t\t\t\t\t\t\tfprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );\n-\t\t\t\t\t\t\t\tif( weight || constraint ) fprintf( stderr, " (differs from the objective score)" );\n-\t\t\t\t\t\t\t\tfprintf( stderr, "\\n\\n" );\n-\t\t\t\t\t\t\t}\n-\t#if 1 /* hujuubun */\n-\t\t\t\t\t\t\tif( grouprna1 ) free( grouprna1 );\n-\t\t\t\t\t\t\tif( grouprna2 ) free( grouprna2 );\n-\t\t\t\t\t\t\treturn( -1 );\n-\t#endif\n-\t\t\t\t\t\t}\n-\t\t\t\t\t} /* if( iterate ) */\n-\t\t\t\t} /* for( k ) */\n-\t\t\t} /* for( l ) */\n-\t\t\tif( scoreout )\n-\t\t\t{\n-\t\t\t\tunweightedspscore = plainscore( njob, bseq );\n-\t\t\t\tfprintf( stderr, "\\nSCORE %d = %.0f, ", iterate * ( (njob-1)*2-1 ), unweightedspscore );\n-\t\t\t\tfprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );\n-\t\t\t\tif( weight || constraint ) fprintf( stderr, " (differs from the objective score)" );\n-\t\t\t\tfprintf( stderr, "\\n\\n" );\n-\t\t\t}\n-\t\t} /* for( iterate ) */\n-\t}\n-\tif( grouprna1 ) free( grouprna1 );\n-\tif( grouprna2 ) free( grouprna2 );\n-\treturn( 2 );\n-} \t /* int Tree... */\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/treeOperation.c --- a/mafft/core/treeOperation.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,658 +0,0 @@\n-#include "mltaln.h"\n-\n-#define DEBUG 0\n-\n-#define EF_THREEWAY 1.0\n-#define MAXBW 1.0\n-#define MINBW 0.01\n-\n-#define MINLEN 0.001\n-\n-#if DEBUG\n-Node *stopol_g;\n-#endif\n-\n-\n-void checkMinusLength( int nseq, double **len )\n-{\n-\tint i, j;\n-\tfor( i=0; i<nseq-1; i++ ) for( j=0; j<2; j++ ) \n-\t\tif( len[i][j] < MINLEN ) len[i][j] = MINLEN;\n-}\n-\n-void negativeMember2( int *mem, int *query, int locnseq )\n-{\n-\tint *ptr;\n-\tchar *tmp;\n-\tint i;\n-\tint n;\n-\n-\ttmp = AllocateCharVec( locnseq );\n-\n-\tfor( i=0; i<locnseq; i++ ) tmp[i] = 0;\n-\twhile( (n=*query++) != -1 ) tmp[n] = 1;\n-\n-\tptr = mem;\n-\tfor( i=0; i<locnseq; i++ ) \n-\t{\n-\t\tif( !tmp[i] ) \n-\t\t{\n-\t\t\t*ptr++ = i;\n-\t\t}\n-\t}\n-\t*ptr = -1;\n-\tfree( tmp );\n-}\n-\n-int *negativeMember( int *query, int locnseq )\n-{\n-\tint *bk, *value = NULL;\n-\tchar *tmp;\n-\tint i;\n-\tint n;\n-\n-\ttmp = AllocateCharVec( locnseq );\n-\tbk = value = AllocateIntVec( locnseq );\n-\tif( !value ) ErrorExit( "Cannot allocate value" );\n-\n-\tfor( i=0; i<locnseq; i++ ) tmp[i] = 0;\n-\twhile( (n=*query++) != -1 ) tmp[n] = 1;\n-\n-\tfor( i=0; i<locnseq; i++ ) \n-\t{\n-\t\tif( !tmp[i] )\n-\t\t{\n-\t\t\tfprintf( stderr, "%3d ", i );\n-\t\t\t*value++ = i;\n-\t\t}\n-\t}\n-\tfprintf( stderr, "\\n" );\n-\t*value = -1;\n-\tfree( tmp );\n-\treturn( bk );\n-}\n-\n-int IntExistsInVec( int query, int *vector )\n-{\n-\twhile( *vector != -1 )\n-\t\tif( query == *vector++ ) return( 1 );\n-\treturn( 0 );\n-}\n-\n-NodeInCub searchParent( int top, int ***topol, int Start, int End )\n-{\n-\tint i, j;\n-\tNodeInCub value;\n-\tfor( i=Start; i<End; i++ ) \n-\t{\n-\t\tfor( j=0; j<2; j++ ) \n-\t\t{\n-\t\t\tif( IntExistsInVec( top, topol[i][j] ) )\n-\t\t\t{\n-\t\t\t\tvalue.step = i; \n-\t\t\t\tvalue.LorR = j; \n-\t\t\t\treturn( value );\n-\t\t\t}\n-\t\t}\n-\t}\n-\tfprintf( stderr, "ERROR!!!\\n" );\n-\tErrorExit( "Error in searchParent" );\n-\tvalue.step=0; // by D.Mathog, katoh\n-\tvalue.LorR=0; // by D.Mathog, katoh\n-\treturn( value );\n-}\n-\n-void stopolInit( int n, Node *stopol )\n-{\n-\tint i, j;\n-\tfor( i=0; i<n; i++ )\n-\t{\n-\t\tfor( j=0; j<3; j++ ) \n-\t\t{\t\n-\t\t\tstopol[i].length[j] = 0.0;\n-\t\t\tstopol[i].children[j] = NULL;\n-\t\t\tstopol[i].tmpChildren[j] = -1;\n-\t\t\tstopol[i].top[j] = -1;\n-\t\t\tstopol[i].members[j] = NULL;\n-\t\t\tstopol[i].weightptr[j] = NULL;\n-\t\t}\n-\t}\n-#if 0\n-\twhile( --numintvec >= 0 )\n-\t{\n-\t\tfree( tmpintvec[numintvec] );\n-\t}\n-\tfree( tmpintvec );\n-\tnumintvec = 0;\n-#endif\n-}\n-\n-void treeCnv( Node *stopol, int locnseq, int ***topol, double **len, double **bw )\n-{\n-\tint i;\n-\tNodeInCub parent;\n-\tint *count;\n-\tint ccount;\n-\tint rep;\n-\tint tmpint;\n-\tstatic int **tmpintvec = NULL;\n-\tstatic int numintvec = 0;\n-\n-\tcount = AllocateIntVec( 2 * locnseq ); /* oome */\n-\tif( !count ) ErrorExit( "Cannot allocate count.\\n" );\n-\n-\tcheckMinusLength( locnseq, len ); /* uwagaki */\n-\n-\tstopolInit( locnseq * 2, stopol );\n-\tfor( i=0; i<locnseq * 2; i++ ) count[i] = 0;\n-\n-\tfor( i=locnseq; i<locnseq*2; i++ ) \n-\t{\n-\t\trep = i - locnseq;\n-\t\tparent = searchParent( rep, topol, 0, locnseq-1 ); \n-#if DEBUG\n-\t\tfprintf( stderr, "Parent of node No.%d ( Seq No.%d ) = %d - %d\\n", i, i-locnseq, parent.step, parent.LorR );\n-#endif\n-\n-\t\tccount = count[parent.step];\n-\t\tstopol[parent.step].length[ccount] = len[parent.step][parent.LorR];\n-\t\tstopol[parent.step].weightptr[ccount] = &(bw[parent.step][parent.LorR]);\n-\t\tstopol[parent.step].children[ccount] = &stopol[i];\n-\t\tstopol[parent.step].tmpChildren[ccount] = i;\n-\t\tstopol[parent.step].members[ccount] = topol[parent.step][parent.LorR];\n-\t\tcount[parent.step]++;\n-\n-\t\tccount = count[i];\n-\t\tstopol[i].length[ccount] = len[parent.step][parent.LorR];\n-\t\tstopol[i].weightptr[ccount] = &(bw[parent.step][parent.LorR]);\n-\t\tstopol[i].children[ccount] = &stopol[parent.step];\n-\t\tstopol[i].tmpChildren[ccount] = parent.step;\n-\t\tstopol[i].members[ccount] = topol[parent.step][parent.LorR];\n-\t\tcount[i]++;\n-\t}\n-\tfor( i=0; i<locnseq-2; i++ ) \n-\t{\n-\t\trep = MIN( topol[i][0][0], topol[i][1][0] );\n-\t\tparent = searchParent( rep, topol, i+1, locnseq-1 ); \n-\t\tccount = count[parent.step];\n-\t\tstopol[parent.step].length[ccount] = len[parent.step][parent.LorR];\n-\t\tstopol[pare'..b'anch( int nseq, double *result, Node *stopol, int ***topol, int step, int LorR )\n-{\n-\tNode *topNode, *btmNode;\n-\tint i;\n-\n-\tif( step == nseq - 2 )\n-\t{\n-\t\ttopNode = stopol[nseq-2].children[0];\n-\t\tbtmNode = stopol + nseq-3;\n-#if DEBUG\n-\t\tfprintf( stderr, "Now step == nseq-3, topNode = %d, btmNode = %d\\n", topNode - stopol_g, btmNode-stopol_g );\n-#endif\n-\t}\n-\t\t\n-\telse\n-\t{\n-\t\tfor( i=0; i<3; i++ ) \n-\t\t{\n-\t\t\tif( stopol[step].members[i][0] == topol[step][LorR][0] )\n-\t\t\tbreak;\n-\t\t}\n-\t\tif( i== 3 ) ErrorExit( "Incorrect call of weightFromABranch." );\n-\t\tbtmNode = stopol[step].children[i];\n-\t\ttopNode = stopol+step;\n-\t}\n-\n-\tfor( i=0; i<nseq; i++ ) result[i] = 1.0;\n-\tweightFromABranch_rec( result, btmNode, topNode ); \n-\tweightFromABranch_rec( result, topNode, btmNode ); \n-#if 0\n-\tfor( i=0; i<nseq; i++ )\n-\t\tfprintf( stdout, "w[%d] = %30.20f\\n", i, result[i] );\n-#endif\n-//\tfprintf( stderr, "new weight!\\n" );\n-//\tfor( i=0; i<nseq; i++ )\n-//\t\tresult[i] *= result[i];\n-\n-\n-}\n-void assignstrweight_rec( double *strweight, Node *ob, Node *op, char *kozoari, double *seqweight )\n-{\n-\tint i, n, count, lastkozo;\n-\tint dir_ch[3], dir_pa;\n-\tdouble sumweight;\n-\n-#if DEBUG\n-\tfprintf( stderr, "In weightFromABranch_rec, ob = %d\\n", ob - stopol_g );\n-#endif\n-\tif( isLeaf( *ob ) )\n-\t{\n-//\t\tfprintf( stderr, "Leaf!\\n" );\n-\t\treturn;\n-\t}\n-\tfor( i=0, count=0; i<3; i++ ) \n-\t{\n-\t\tif( ob->children[i] != op ) dir_ch[count++] = i;\n-\t\telse dir_pa = i;\n-\t}\n-\tif( count != 2 ) \n-\t{\n-#if DEBUG\n-\t\tfprintf( stderr, "Node No.%d has no child like No.%d \\n", ob-stopol_g, op-stopol_g );\n-#endif\n-\t\tErrorExit( "Incorrect call of weightFromABranch_rec" );\n-\t}\n-\n-\n-//\tfprintf( stderr, "\\n" );\n-\tsumweight = 0.0;\n-\tcount = 0;\n-\tlastkozo = -1;\n-\tfor( i=0; (n=ob->members[dir_ch[0]][i])!=-1; i++ ) \n-\t{\n-//\t\tfprintf( stderr, "member1! n=%d\\n", n );\n-\t\tsumweight += seqweight[n];\n-\t\tif( kozoari[n] ) \n-\t\t{\n-\t\t\tcount++;\n-\t\t\tlastkozo = n;\n-\t\t}\n-\t}\n-\tfor( i=0; (n=ob->members[dir_ch[1]][i])!=-1; i++ ) \n-\t{\n-//\t\tfprintf( stderr, "member2! n=%d\\n", n );\n-\t\tsumweight += seqweight[n];\n-\t\tif( kozoari[n] ) \n-\t\t{\n-\t\t\tcount++;\n-\t\t\tlastkozo = n;\n-\t\t}\n-\t}\n-\n-//\tfprintf( stderr, "count = %d\\n", count );\n-\n-\tif( count == 1 )\n-\t\tstrweight[lastkozo] = sumweight;\n-\telse if( count > 1 )\n-\t{\n-\t\tassignstrweight_rec( strweight, ob->children[dir_ch[0]], ob, kozoari, seqweight );\n-\t\tassignstrweight_rec( strweight, ob->children[dir_ch[1]], ob, kozoari, seqweight );\n-\t}\n-}\n-\n-void assignstrweight( int nseq, double *strweight, Node *stopol, int ***topol, int step, int LorR, char *kozoari, double *seqweight )\n-{\n-\tNode *topNode, *btmNode;\n-\tint i;\n-\n-\tif( step == nseq - 2 )\n-\t{\n-\t\ttopNode = stopol[nseq-2].children[0];\n-\t\tbtmNode = stopol + nseq-3;\n-#if DEBUG\n-\t\tfprintf( stderr, "Now step == nseq-3, topNode = %d, btmNode = %d\\n", topNode - stopol_g, btmNode-stopol_g );\n-#endif\n-\t}\n-\t\t\n-\telse\n-\t{\n-\t\tfor( i=0; i<3; i++ ) \n-\t\t{\n-\t\t\tif( stopol[step].members[i][0] == topol[step][LorR][0] )\n-\t\t\tbreak;\n-\t\t}\n-\t\tif( i== 3 ) ErrorExit( "Incorrect call of weightFromABranch." );\n-\t\tbtmNode = stopol[step].children[i];\n-\t\ttopNode = stopol+step;\n-\t}\n-\n-\tfor( i=0; i<nseq; i++ ) strweight[i] = 0.0;\n-\tfor( i=0; i<nseq; i++ ) if( kozoari[i] ) strweight[i] = seqweight[i];\n-//\tfprintf( stderr, "calling _rec (1)\\n" );\n-\tassignstrweight_rec( strweight, btmNode, topNode, kozoari, seqweight ); \n-//\tfprintf( stderr, "calling _rec (2)\\n" );\n-\tassignstrweight_rec( strweight, topNode, btmNode, kozoari, seqweight ); \n-\n-#if 1 // nazeka kokowo tobasuto seido ga sagaru ?????\n-\tfprintf( stderr, "STEP %d\\n", step );\n-\tfor( i=0; topol[step][0][i]>-1; i++ )\n-\t\tfprintf( stderr, "%3d ", topol[step][0][i] );\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; topol[step][1][i]>-1; i++ )\n-\t\tfprintf( stderr, "%3d ", topol[step][1][i] );\n-\tfprintf( stderr, "\\n" );\n-\tfor( i=0; i<nseq; i++ )\n-\t\tfprintf( stderr, "seqweight[%d] = %f\\n", i, seqweight[i] );\n-\tfor( i=0; i<nseq; i++ )\n-\t\tfprintf( stderr, "strweight[%d] = %f\\n", i, strweight[i] );\n-\tfprintf( stderr, "\\n" );\n-#endif\n-}\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/univscript.tmpl --- a/mafft/core/univscript.tmpl Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,36 +0,0 @@ -progs="_PROGS" -for prog in $progs; do - printf $prog" " -done - -make clean -make CC="$HOME/soft/gcc/usr/local/bin/gcc" CFLAGS="-O3 -m32 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5 -static-libgcc" LIBS="-lm -lpthread -lgcc_eh" ENABLE_MULTITHREAD="-Denablemultithread" -for prog in $progs; do - mv $prog $prog.intel32 -done - -make clean -make CC="$HOME/soft/gcc/usr/local/bin/gcc" CFLAGS="-O3 -m64 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5 -static-libgcc" LIBS="-lm -lpthread -lgcc_eh" ENABLE_MULTITHREAD="-Denablemultithread" -for prog in $progs; do - mv $prog $prog.intel64 -done - -make clean -make CC="gcc-4.0" CFLAGS="-arch ppc64 -m64 -O3 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5" ENABLE_MULTITHREAD="" -for prog in $progs; do - mv $prog $prog.ppc64 -done - -make clean -make CC="gcc-4.0" CFLAGS="-arch ppc -m32 -O3 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5" ENABLE_MULTITHREAD="" -for prog in $progs; do - mv $prog $prog.ppc32 -done - - -set $progs -for prog in $progs; do -# lipo -create $prog.icc $prog.ppc32 $prog.ppc64 -output $prog - lipo -create $prog.intel64 $prog.intel32 $prog.ppc32 $prog.ppc64 -output $prog - cp $prog ../binaries -done |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/core/version.c --- a/mafft/core/version.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -#include "mltaln.h" - -int main() -{ - fprintf( stdout, VERSION"\n" ); - return( 0 ); -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/license --- a/mafft/license Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,48 +0,0 @@ -============================================================ - -If you have the './extensions' directory, please also see -license.extensions file. - -============================================================ - -The codes in the './core' directory is distributed with -the BSD license. - -MAFFT: multiple sequence alignment program -Copyright (c) 2009 Kazutaka Katoh - -Redistribution and use in source and binary forms, -with or without modification, are permitted provided -that the following conditions are met: - -Redistributions of source code must retain the -above copyright notice, this list of conditions -and the following disclaimer. Redistributions in -binary form must reproduce the above copyright -notice, this list of conditions and the following -disclaimer in the documentation and/or other -materials provided with the distribution. - -The name of the author may not be used to endorse -or promote products derived from this software without -specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, -BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT -OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, -OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY -OF SUCH DAMAGE. ---------------------------------------------------------------------------------- - - - |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/readme --- a/mafft/readme Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,139 +0,0 @@ ------------------------------------------------------------------------ - MAFFT: a multiple sequence alignment program - version 7.221beta, 2015/04/16 - - http://mafft.cbrc.jp/alignment/software/ - kazutaka.katoh@aist.go.jp ------------------------------------------------------------------------ - -1. COMPILE - % cd core - % make clean - % make - % cd .. - - To enable multithreading (linux only), - % cd core - Uncomment line 8 of Makefile, - ENABLE_MULTITHREAD = -Denablemultithread - % make clean - % make - % cd .. - - If you have the './extensions' directory, which is for RNA alignments, - % cd extensions - % make clean - % make - % cd .. - - -2. INSTALL (select 2a or 2b []) -2a. Install as root - # cd core - # make install - # cd .. - - If you have the './extensions' directory, - # cd extensions - # make install - # cd .. - - By this procedure (2a), programs are installed into - /usr/local/bin/. Some binaries, which are not directly - used by a user, are installed into /usr/local/libexec/mafft/. - - If the MAFFT_BINARIES environment variable is set to /somewhare/else/, - the binaries in the /somewhere/else/ directory are used, instead - of those in /usr/local/libexec/mafft/. - -2b. Install without being root - % cd core/ - Edit the first line of Makefile - From: - PREFIX = ${PKGDIR}/usr/local - To: - PREFIX = /home/your_home/somewhere - - Edit the third line of Makefile - From: - BINDIR = $(PREFIX)/bin - To: - BINDIR = /home/your_home/bin - (or elsewhere in your command-search path) - % make clean - % make - % make install - - If you have the './extensions' directory, - % cd ../extensions/ - Edit the first line of Makefile - From: - PREFIX = ${PKGDIR}/usr/local - To: - PREFIX = /home/your_home/somewhere - % make clean - % make - % make install - - The MAFFT_BINARIES environment variable *must not be* set. - - If the MAFFT_BINARIES environment variable is set to /somewhare/else/, - it overrides the setting of PREFIX (/home/your_home/somewhere/ in the - above example) in Makefile. - -3. CHECK - % cd test - % rehash # if necessary - % mafft sample > test.fftns2 # FFT-NS-2 - % mafft --maxiterate 100 sample > test.fftnsi # FFT-NS-i - % mafft --globalpair sample > test.gins1 # G-INS-1 - % mafft --globalpair --maxiterate 100 sample > test.ginsi # G-INS-i - % mafft --localpair sample > test.lins1 # L-INS-1 - % mafft --localpair --maxiterate 100 sample > test.linsi # L-INS-i - % diff test.fftns2 sample.fftns2 - % diff test.fftnsi sample.fftnsi - % diff test.gins1 sample.gins1 - % diff test.ginsi sample.ginsi - % diff test.lins1 sample.lins1 - - If you have the './extensions' directory, - % mafft-qinsi samplerna > test.qinsi # Q-INS-i - % mafft-xinsi samplerna > test.xinsi # X-INS-i - % diff test.qinsi samplerna.qinsi - % diff test.xinsi samplerna.xinsi - - If you use the multithread version, the results of iterative refinement - methods (*-*-i) are not always identical. Try this test with the single- - thread mode (--thread 0). - - -4. INPUT FORMAT - fasta format. - - The type of input sequences (nucleotide or amino acid) is - automatically recognized based on the frequency of A, T, G, C, U and N. - - -5. USAGE - % /usr/local/bin/mafft input > output - -See also http://mafft.cbrc.jp/alignment/software/ - - -6. UNINSTALL - # rm -r /usr/local/libexec/mafft - # rm /usr/local/bin/mafft - # rm /usr/local/bin/fftns - # rm /usr/local/bin/fftnsi - # rm /usr/local/bin/nwns - # rm /usr/local/bin/nwnsi - # rm /usr/local/bin/linsi - # rm /usr/local/bin/ginsi - # rm /usr/local/bin/mafft-* - # rm /usr/local/share/man/man1/mafft* - - -7. LICENSE - See the './license' file. - - If you have the extensions, see also the './license.extensions' file, |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample --- a/mafft/test/sample Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,285 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n-MNGTEGDNFYVPFSNKTGLARSPYEYPQYYLAEPWKYSALAAYMFFLILVGFPVNFLTLF\n-VTVQHKKLRTPLNYILLNLAMANLFMVLFGFTVTMYTSMNGYFVFGPTMCSIEGFFATLG\n-GEVALWSLVVLAIERYIVICKPMGNFRFGNTHAIMGVAFTWIMALACAAPPLVGWSRYIP\n-EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES\n-ASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGSDFGATFMTLPAFFAKSSAL\n-YNPVIYILMNKQFRNCMITTLCCGKNPLGDDESGASTSKTEVSSVSTSPVSPA\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n-MNGTEGPNFYVPFSNITGVVRSPFEQPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLY\n-VTVQHKKLRTPLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLG\n-GEIGLWSLVVLAIERYVVVCKPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIP\n-EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES\n-ATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGSNFGPIFMTLPAFFAKTASI\n-YNPIIYIMMNKQFRNCMLTSLCCGKNPLGDDEASATASKTETSQVAPA\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n-MNGTEGINFYVPMSNKTGVVRSPFEYPQYYLAEPWKYRLVCCYIFFLISTGLPINLLTLL\n-VTFKHKKLRQPLNYILVNLAVADLFMACFGFTVTFYTAWNGYFVFGPVGCAVEGFFATLG\n-GQVALWSLVVLAIERYIVVCKPMGNFRFSATHAMMGIAFTWVMAFSCAAPPLFGWSRYMP\n-EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES\n-ATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGADFTATLMAVPAFFSKSSSL\n-YNPIIYVLMNKQFRNCMITTICCGKNPFGDEDVSSTVSQSKTEVSSVSSSQVSPA\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n-MNGTEGKNFYVPMSNRTGLVRSPFEYPQYYLAEPWQFKILALYLFFLMSMGLPINGLTLV\n-VTAQHKKLRQPLNFILVNLAVAGTIMVCFGFTVTFYTAINGYFVLGPTGCAVEGFMATLG\n-GEVALWSLVVLAIERYIVVCKPMGSFKFSSSHAFAGIAFTWVMALACAAPPLFGWSRYIP\n-EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS\n-ASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGADFSAKFMAIPAFFSKSSAL\n-YNPVIYVLLNKQFRNCMLTTIFCGKNPLGDDESSTVSTSKTEVSSVSPA\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n-MNGTEGNNFYVPLSNRTGLVRSPFEYPQYYLAEPWQFKLLAVYMFFLICLGLPINGLTLI\n-CTAQHKKLRQPLNFILVNLAVAGAIMVCFGFTVTFYTAINGYFALGPTGCAVEGFMATLG\n-GEVALWSLVVLAIERYIVVCKPMGSFKFSSTHASAGIAFTWVMAMACAAPPLVGWSRYIP\n-EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS\n-ASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGAAFSAQFMAIPAFFSKTSAL\n-YNPVIYVLLNKQFRSCMLTTLFCGKNPLGDEESSTVSTSKTEVSSVSPA\n-> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-\n-MKQVPEFHEDFYIPIPLDINNLSAYSPFLVPQDHLGNQGIFMAMSVFMFFIFIGGASINI\n-LTILCTIQFKKLRSHLNYILVNLSIANLFVAIFGSPLSFYSFFNRYFIFGATACKIEGFL\n-ATLGGMVGLWSLAVVAFERWLVICKPLGNFTFKTPHAIAGCILPWISALAASLPPLFGWS\n-RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKA\n-QADSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGEEFDLRMATIPSCLSK\n-ASTVYNPVIYVLMNKQFRSCMMKMVCGKNIEEDEASTSSQVTQVSSVAPEK\n-> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]\n-MRKMSEEEFYLFKNISSVGPWDGPQYHIAPVWAFYLQAAFMGTVFLIGFPLNAMVLVATL\n-RYKKLRQPLNYILVNVSFGGFLLCIFSVFPVFVASCNGYFVFGRHVCALEGFLGTVAGLV\n-TGWSLAFLAFERYIVICKPFGNFRFSSKHALTVVLATWTIGIGVSIPPFFGWSRFIPEGL\n-QCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESATT\n-QKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNHGLDLRLVTIPSFFSKSACIYNP\n-IIYCFMNKQFQACIMKMVCGKAMTDESDTCSSQKTEVSTVSSTQVGPN\n-> 8=opsin, greensensitive human (fragment) S07060\n-DLAETVIASTISIVNQVSGYFVLGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKP\n-FGNVRFDAKLAIVGIAFSWIWAAVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQS\n-YMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFC\n-> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]\n-MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM\n-IFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISVVNQVYGYFV\n-LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRFDAKLAIVGIAFSWIWA\n-AVWTAPPIFGWSRYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYL\n-QVWLAIRAVAKQQKESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGYPFH\n-PLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLFGKKVDDGSELSSASKTEVSSVSS\n-VSPA\n-> 10== Z68193 1 human Red Opsin <>[]\n-MAQQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNSTRGPFEGPNYHIAPRWVYHLTSVWM\n-IFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIASTISIVNQVSGYFV\n-LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRFDAKLAIVGIAFSWIWS\n-AVWTAPPIFGWSRYWPH"..b"ISIDR\n-YLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGWAQNVNDDKVCLISQDFGYT\n-IYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRVEPDSVIALNGIVKLQKEVE\n-ECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSC\n-IPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMHEALK\n-LAERPERPEFVLQNADYCRKKGHDS\n-> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]\n-MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL\n-VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM\n-DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW\n-AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV\n-QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL\n-PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQC\n-QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT\n-> 31=p A47425 serotonin receptor 5HT-7 - rat\n-MPHLLSGFLEVTASPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCL\n-VVISVSFVKKLRQPSNYLIVSLALADLSVAVAVMPFVSVTDLIGGKWIFGHFFCNVFIAM\n-DVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASITLPPLFGW\n-AQNVNDDKVCLISQDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRV\n-QPESVISLNGVVKLQKEVEECANLSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWL\n-PFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQC\n-QYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT\n-> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]\n-MDVLSPGQGNNTTSPPAPFETGGNTTGISDVTVSYQVITSLLLGTLIFCAVLGNACVVAA\n-IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC\n-TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED\n-RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADT\n-RHGASPAPQPKKSVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN\n-SKEHLPLPSEAGPTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP\n-FFIVALVLPFCESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC\n-RQ\n-> 33=p A35181 serotonin receptor class 1A - rat\n-MDVFSFGQGNNTTASQEPFGTGGNVTSISDVTFSYQVITSLLLGTLIFCAVLGNACVVAA\n-IALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVLNKWTLGQVTCDLFIALDVLCC\n-TSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLISIPPMLGWRTPED\n-RSDPDACTISKDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGT\n-SLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN\n-SKEHLPLPSESGSNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLP\n-FFIVALVLPFCESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC\n-RR\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS\n-HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV\n-MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT\n-ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP\n-DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE\n-ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA\n-NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK\n-LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL\n-NPIIYTIFSPEFRSAFQKILFGKYRRGHR\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS\n-HSSRSRDNASANDTSATNMTDDRYWSLTVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAV\n-MLERSLHNVANYLILSLAVADLMVAVLVMPLSVVSEISKVWFLHSEVCDMWISVDVLCCT\n-ASILHLVAIAMDRYWAVTSIDYIRRRSARRILLMIMVVWIVALFISIPPLFGWRDPNNDP\n-DKTGTCIISQDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE\n-ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENA\n-NGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYSRTREK\n-LELKRERKAARTLAIITGAFLICWLPFFIIALIGPFVDPEGIPPFARSFVLWLGYFNSLL\n-NPIIYTIFSPEFRSAFQKILFGKYRRGHR\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n-MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVPNTTWWQASAPFDTPAALVR\n-AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLVMPLGAV\n-YEVVQRWTLGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTNIDYIHASTAKRVGMM\n-IACVWTVSFFVCIAQLLGWKDPDWNQRVSEDLRCVVSQDVGYQIFATASSFYVPVLIILI\n-LYWRIYQTARKRIRRRRGATARGGVGPPPVPAGGALVAGGGSGGIAAAVVAVIGRPLPTI\n-SETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKR\n-ERKAAKTLAIITGAFVACWLPFFVLAILVPTCDCEVSPVLTSLSLWLGYFNSTLNPVIYT\n-VFSPEFRHAFQRLLCGRRVRRRRAPQ\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample.dpparttree --- a/mafft/test/sample.dpparttree Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,504 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n-MNG---------------------------------------------------------\n--TE--GDNFYVPF-----------------SNKTGLARSPYEYPQ---------Y-YLAE\n-PWK---------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANL\n-FMVLFGF-TVTMYTS-MN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVI\n-CKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDY\n-YTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAAQQ---------------------------------ESASTQKAEKEVTRMVVLMVIG\n-FLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCM\n-ITTLCC-------GKNPLGD-DE---SGASTSKT------------------------EV\n-SSVS--------------------------------------------------------\n-------TSPVSP-A---\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n-MNG---------------------------------------------------------\n--TE--GPNFYVPF-----------------SNITGVVRSPFEQPQ---------Y-YLAE\n-PWQ---------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADL\n-FMVFGGF-TTTLYTS-LH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVV\n-CKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDY\n-YTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAQQQ---------------------------------ESATTQKAEKEVTRMVIIMVIF\n-FLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCM\n-LTSLCC-------GKNPLGD-DE---ASATASKT------------------------E-\n-------------------------------------------------------------\n-------TSQVAP-A---\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n-MNG---------------------------------------------------------\n--TE--GINFYVPM-----------------SNKTGVVRSPFEYPQ---------Y-YLAE\n-PWK---------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADL\n-FMACFGF-TVTFYTA-WN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVV\n-CKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDY\n-YTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAQQQ---------------------------------ESATTQKAEKEVTRMVILMVLG\n-FMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCM\n-ITTICC-------GKNPFGD-EDV-SSTVSQSKT------------------------EV\n-SSVS--------------------------------------------------------\n-------SSQVSP-A---\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n-MNG---------------------------------------------------------\n--TE--GKNFYVPM-----------------SNRTGLVRSPFEYPQ---------Y-YLAE\n-PWQ---------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGT\n-IMVCFGF-TVTFYTA-IN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVV\n-CKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDY\n-YTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAQQQ---------------------------------DSASTQKAEREVTKMVILMVFG\n-FLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCM\n-LTTIFC-------GKNPLGD-DE--SSTVSTSKT------------------------EV\n-SS----------------------------------------------------------\n----------VSP-A---\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n-MNG---------------------------------------------------------\n--TE--GNNFYVPL-----------------SNRTGLVRSPFEYPQ---------Y-YLAE\n-PWQ---------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGA\n-IMVCFGF-TVTFYTA-IN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVV\n-CKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDY\n-YTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAQQQ---------------------------------DSASTQKAEREVTKMVILMVLG\n-FLVAWT"..b"------------------\n-------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL\n-MVSVLVL-PMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAI\n-TDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS-\n--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK---------------\n-------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNWR\n-LGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP-----\n-ASFERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGT\n-FILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF\n-KKIIKC--KFCR------------------------------------------------\n-------------------------------------------------------------\n-----------------Q\n-> 33=p A35181 serotonin receptor class 1A - rat\n-M-DVFSFGQ--------GNNTTASQEPFGTGGNVTSI-----------------------\n---SDVTFSY---------------------------------------------------\n-------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL\n-MVSVLVL-PMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAI\n-TDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS-\n--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK---------------\n-------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDWR\n-RCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP-----\n-ACLERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGT\n-FILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF\n-KKIIKC--KFCR------------------------------------------------\n-------------------------------------------------------------\n-----------------R\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n-M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L\n-VTSDFNDSYGLTGQFINGSHSSRSR-----DNASAN-DTSATNMTDDRYWSLTVY-----\n---------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL\n-MVAVLVM-PLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAV\n-TS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS-\n--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEET\n-TLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK------------\n-----NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-I\n-AMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGA\n-FLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF\n-QKILFG--KYRRG-----------------------------------------------\n-------------------------------------------------------------\n----------------HR\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n-M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L\n-VTSDFNDSYGLTGQFINGSHSSRSR-----DNASAN-DTSATNMTDDRYWSLTVY-----\n---------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL\n-MVAVLVM-PLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAV\n-TS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS-\n--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEET\n-TLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK------------\n-----NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-I\n-AMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGA\n-FLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF\n-QKILFG--KYRRG-----------------------------------------------\n-------------------------------------------------------------\n----------------HR\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n-M-EGAE-GQEELD----WEALYLRLPLQNCSWNSTGWEPNW------------------N\n-VTVVPNTTW---------------------WQASAPFDTPAALVR---------------\n-------------AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADL\n-LVACLVM-PLGAVYE-VV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAV\n-TN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RCVVS-\n--------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR---------------\n-------------------------------RRGATARGGVGPPPVP--------------\n--------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNVSSNN\n-TSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKTLAIITGA\n-FVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAF\n-QRLLCG--RRVRRRR---------------------------------------------\n-------------------------------------------------------------\n---------------APQ\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample.fftns2 --- a/mafft/test/sample.fftns2 Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,504 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n--------------------MNGTE------------------------GDNF--------\n-YVP----F-SNKTGLARSPY----------------EYPQY-------YLAEPWK-----\n-----YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-F\n-TVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FR\n-FGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNN\n-ESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE------------------------\n----------------------------------------------------AAAAQQ---\n-------------------------------------------------------------\n---------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-\n-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC---------GKN\n-PLGD-DE--SGASTSKTEVSSVS-TSPV--------------------------------\n---------------------------------------------SPA-------------\n-------\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n--------------------MNGTE------------------------GPNF--------\n-YVP----F-SNITGVVRSPF----------------EQPQY-------YLAEPWQ-----\n-----FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-F\n-TTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FR\n-FGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNN\n-ESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE------------------------\n----------------------------------------------------AAAQQQ---\n-------------------------------------------------------------\n---------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-\n-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC---------GKN\n-PLGD-DE--ASATASKTE------TSQV--------------------------------\n---------------------------------------------APA-------------\n-------\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n--------------------MNGTE------------------------GINF--------\n-YVP----M-SNKTGVVRSPF----------------EYPQY-------YLAEPWK-----\n-----YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-F\n-TVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FR\n-FSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHN\n-ESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE------------------------\n----------------------------------------------------AAAQQQ---\n-------------------------------------------------------------\n---------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-\n-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC---------GKN\n-PFGD-EDVSSTVSQSKTEVSSVS-SSQV--------------------------------\n---------------------------------------------SPA-------------\n-------\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n--------------------MNGTE------------------------GKNF--------\n-YVP----M-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ-----\n-----FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-F\n-TVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK\n-FSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNN\n-ESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA------------------------\n----------------------------------------------------AAAQQQ---\n-------------------------------------------------------------\n---------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-\n-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC---------GKN\n-PLGD-DE-SSTVSTSKTEVSS------V--------------------------------\n---------------------------------------------SPA-------------\n-------\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n--------------------MNGTE------------------------GNNF--------\n-YVP----L-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ-----\n-----FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-F\n-TVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK\n-FSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNN\n-ESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA------------------------\n----------------------------------------------------AAAQQQ---\n-------------------------------------------------------------\n---------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-\n-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC--"..b"TGG--------\n-----------NTTGISDVTV----------------------------------------\n---SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L\n-PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK\n-RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH---------\n--GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------\n----------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR-\n---------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG-\n--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-\n-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC---------\n------RQ-----------------------------------------------------\n-------------------------------------------------------------\n-------\n-> 33=p A35181 serotonin receptor class 1A - rat\n-----------MD-------VFSFG------------QGNNTTASQEPFGTGG--------\n-----------NVTSISDVTF----------------------------------------\n---SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L\n-PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK\n-RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH---------\n--GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------\n----------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG-\n---------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG-\n--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-\n-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC---------\n------RR-----------------------------------------------------\n-------------------------------------------------------------\n-------\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n-MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG--------\n-----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY\n-SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M\n-PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR\n-RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK---------\n--GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE\n-YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--\n---------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-\n-------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-\n-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR---------\n------RGHR---------------------------------------------------\n-------------------------------------------------------------\n-------\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n-MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG--------\n-----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY\n-SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M\n-PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR\n-RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK---------\n--GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE\n-YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--\n---------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-\n-------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-\n-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR---------\n------RGHR---------------------------------------------------\n-------------------------------------------------------------\n-------\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n--MEGAEGQEELD-------WEAL-------YLRLP--LQNCSWNSTGWEPNW--------\n-----------NVTVVPNTTW---------WQASAPFDTPAALVRAAAK------------\n---------AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-M\n-PLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHA\n-STAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV---------\n--GYQIFATASSFYVPVLIILILYWRIYQTARKRIR-------------------------\n---------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGI\n-AAAVVAVIGRPLPTISETTTTGFTNVSS----NNTS---PEKQSCANGLEADPPTTGYGA\n-VAAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-\n-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV---------\n------RRRRA--------------------------------------------------\n----------------------------------------------PQ-------------\n-------\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample.fftnsi --- a/mafft/test/sample.fftnsi Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,504 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n---------------------MNGTE--------------------------GDNF-----\n----YVP----F-SNKTGLARSPY----------------EYPQY-------YLAEPWK--\n--------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLF\n-G-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN\n--FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPN\n-FNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ---------------\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------ESASTQKAEKEVTRMVVLMVIGFLVCWVPYASV\n-AFYIFTHQGS---DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC-----\n---GKNPLGD-DE--SGASTSKTEVSSVS--TSPV--------------------------\n-----------------------------------------------SPA-----------\n----\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n---------------------MNGTE--------------------------GPNF-----\n----YVP----F-SNITGVVRSPF----------------EQPQY-------YLAEPWQ--\n--------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFG\n-G-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN\n--FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPE\n-VNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ---------------\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------ESATTQKAEKEVTRMVIIMVIFFLICWLPYASV\n-AMYIFTHQGS---NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC-----\n---GKNPLGD-DE--ASATASKTE-------TSQV--------------------------\n-----------------------------------------------APA-----------\n----\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n---------------------MNGTE--------------------------GINF-----\n----YVP----M-SNKTGVVRSPF----------------EYPQY-------YLAEPWK--\n--------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACF\n-G-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN\n--FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPD\n-YHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----REAAAQQQ---------------\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------ESATTQKAEKEVTRMVILMVLGFMLAWTPYAVV\n-AFWIFTNKGA---DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC-----\n---GKNPFGD-EDVSSTVSQSKTEVSSVS--SSQV--------------------------\n-----------------------------------------------SPA-----------\n----\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n---------------------MNGTE--------------------------GKNF-----\n----YVP----M-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ--\n--------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCF\n-G-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS\n--FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPD\n-YNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ---------------\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------DSASTQKAEREVTKMVILMVFGFLIAWTPYATV\n-AAWIFFNKGA---DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC-----\n---GKNPLGD-DE-SSTVSTSKTEVSS-------V--------------------------\n-----------------------------------------------SPA-----------\n----\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n---------------------MNGTE--------------------------GNNF-----\n----YVP----L-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ--\n--------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCF\n-G-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS\n--FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPE\n-YNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ---------------\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------DSASTQKAEREVTKMVILMVLGFLVAWTPYATV\n-AAWIFFNKGA---AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC-----\n-"..b"-----PFETGG---------------\n--------------NTTGISDVTV-------------------------------------\n------SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL\n-V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY\n-VNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPE-DRSDPDA--CTISKDH------\n-----GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK--------------\n--------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNW\n-RLGVESKAGGALCANGAVRQG-----------------DDGAALEVIEVHRVGNSKEHLP\n-LPSEAGPTPCA--PASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIV\n-ALVLPFCESS-C-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRQ\n-------------------------------------------------------------\n-------------------------------------------------------------\n----\n-> 33=p A35181 serotonin receptor class 1A - rat\n------MDVFSFGQ-------GNNTTASQE----------PFGTGG---------------\n--------------NVTSISDVTF-------------------------------------\n------SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL\n-V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY\n-VNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPE-DRSDPDA--CTISKDH------\n-----GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK--------------\n--------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDW\n-RRCAENRAVGTPCTNGAVRQG-----------------DDEATLEVIEVHRVGNSKEHLP\n-LPSESGSNSYA--PACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIV\n-ALVLPFCESS-C-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRR\n-------------------------------------------------------------\n-------------------------------------------------------------\n----\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n------MANFTFGDLALDVARMGGLASTPS----------GLRSTGLTTPGLSPTG-----\n--------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSL\n-TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL\n-V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY\n-IRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK------\n-----GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEE\n-TTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN--\n--------ANGVNSNSSSS-ER-LKQIQIETAEAFANGCAEEASIAMLERQ-CNNGKKISS\n-NDTPYS------------RT-------REKLELKRERKAARTLAIITGAFLICWLPFFII\n-ALIGPFVDPE---GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--\n-------------------------------------------------------------\n--------------------------------------------------RGHR-------\n----\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n------MANFTFGDLALDVARMGGLASTPS----------GLRSTGLTTPGLSPTG-----\n--------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSL\n-TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL\n-V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY\n-IRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK------\n-----GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEE\n-TTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN--\n--------ANGVNSNSSSS-ER-LKQIQIETAEAFANGCAEEASIAMLERQ-CNNGKKISS\n-NDTPYS------------RT-------REKLELKRERKAARTLAIITGAFLICWLPFFII\n-ALIGPFVDPE---GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--\n-------------------------------------------------------------\n--------------------------------------------------RGHR-------\n----\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n---------------------MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNW-----\n--------------NVTVVPNTTW---------WQASAPFDTPAAL---------------\n------VRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACL\n-V-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDY\n-IHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV------\n-----GYQIFATASSFYVPVLIILILYWRIY-------QTARKRIRR--------------\n--------------------------------RRGATARGGVGPPPVPAGG-ALVAGGG--\n--------SGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSP---EKQSCANGLEADP\n-PTTGYGAVAAAYYPSLVRRKP------KEAADSKRERKAAKTLAIITGAFVACWLPFFVL\n-AILVPTCDCE----VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRVRR\n-R-----------------------------------------------------------\n----------------------------------------------RAPQ-----------\n----\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample.gins1 --- a/mafft/test/sample.gins1 Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,504 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n---------------------MN-------------------------GTE-------GDN\n-FYVP-----------------------------------------FSNKTG---------\n---LARSPYEYPQY-YLAEPW----------------------KYSALAAYMFFLILVGFP\n-VNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTM\n-CSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-\n-AAPPLVG-WS-----RYIPEGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFF\n-CYGRLLCTVKEAAAAQQESA----------------------------------------\n-------------------------------------------------------------\n------------------------------------------STQKAEKEVTRMVVLMVIG\n-FLVCWVPYASVAFYIFT-HQGS-D-FGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCM\n-ITTL-----CCGKNPLGDDE-SG-ASTSKTEVSSVST-----------------------\n-------------------------------------------------------------\n-------------SPV-------SP-A\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n---------------------MN-------------------------GTE-------GPN\n-FYVP-----------------------------------------FSNITG---------\n---VVRSPFEQPQY-YLAEPW----------------------QFSMLAAYMFLLIVLGFP\n-INFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTG\n-CNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-\n-AAPPLVG-WS-----RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFF\n-CYGQLVFTVKEAAAQQQESA----------------------------------------\n-------------------------------------------------------------\n------------------------------------------TTQKAEKEVTRMVIIMVIF\n-FLICWLPYASVAMYIFT-HQGS-N-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCM\n-LTSL-----CCGKNPLGDDE-AS-ATASKTE-----T-----------------------\n-------------------------------------------------------------\n-------------SQV-------AP-A\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n---------------------MN-------------------------GTE-------GIN\n-FYVP-----------------------------------------MSNKTG---------\n---VVRSPFEYPQY-YLAEPW----------------------KYRLVCCYIFFLISTGLP\n-INLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVG\n-CAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-\n-AAPPLFG-WS-----RYMPEGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFF\n-SYGRLICKVREAAAQQQESA----------------------------------------\n-------------------------------------------------------------\n------------------------------------------TTQKAEKEVTRMVILMVLG\n-FMLAWTPYAVVAFWIFT-NKGA-D-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCM\n-ITTI-----CCGKNPFGDEDVSSTVSQSKTEVSSVSS-----------------------\n-------------------------------------------------------------\n-------------SQV-------SP-A\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n---------------------MN-------------------------GTE-------GKN\n-FYVP-----------------------------------------MSNRTG---------\n---LVRSPFEYPQY-YLAEPW----------------------QFKILALYLFFLMSMGLP\n-INGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTG\n-CAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-\n-AAPPLFG-WS-----RYIPEGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFF\n-TYGRLVCTVKAAAAQQQDSA----------------------------------------\n-------------------------------------------------------------\n------------------------------------------STQKAEREVTKMVILMVFG\n-FLIAWTPYATVAAWIFF-NKGA-D-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCM\n-LTTI-----FCGKNPLGDDE-SSTVSTSKTEVSS--------------------------\n-------------------------------------------------------------\n---------------V-------SP-A\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n---------------------MN-------------------------GTE-------GNN\n-FYVP-----------------------------------------LSNRTG---------\n---LVRSPFEYPQY-YLAEPW----------------------QFKLLAVYMFFLICLGLP\n-INGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTG\n-CAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-\n-AAPPLVG-WS-----RYIPEGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFF\n-TYGRLVCTVKAAAAQQQDSA----------------------------------------\n-------------------------------------------------------------\n---------------------------------"..b"-------------VTVSYQVITSLLLGTLIFCAVL\n-GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVT\n-CDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-\n-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPLLLMLV\n-LYGRIFRAARFRIRKTVKKVEKTGADTRHGASPAPQPKK-----SVNG--ESGSRNWRLG\n-VESKAGGALCANGAVRQGDDGAAL--EVIEVHRVGNSKEHLPLPSEAGPTPCAPAS----\n---------------FERKNERNA-------------EAKRKMALARERKTVKTLGIIMGT\n-FILCWLPFFIVALVLPF-CESSCH-MPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF\n-KKII-----KCKF-----------------------------------------------\n-------------------------------------------------------------\n-----------CR-------------Q\n-> 33=p A35181 serotonin receptor class 1A - rat\n---------------------MDVFSFG---------------------------------\n----------------------------------------------QGNNT----------\n--TASQEPF------GTGGNVTSISD-------------VTFSYQVITSLLLGTLIFCAVL\n-GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVT\n-CDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-\n-SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPLLLMLV\n-LYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSSAPPPKK-----SLNG--QPGSGDWRRC\n-AENRAVGTPCTNGAVRQGDDEATL--EVIEVHRVGNSKEHLPLPSESGSNSYAPAC----\n---------------LERKNERNA-------------EAKRKMALARERKTVKTLGIIMGT\n-FILCWLPFFIVALVLPF-CESSCH-MPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF\n-KKII-----KCKF-----------------------------------------------\n-------------------------------------------------------------\n-----------CR-------------R\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n---------------------MANFTFGDLALDVARMGGLASTPSGLRS-----TGLTTPG\n-LSPTGLVTSDFNDSYGLTGQFINGSHSSRSRD-----------NASANDT----------\n--SATNM---------TDDRYWSLTV-------------YSHEHLVLTSVILGLFVLCCII\n-GNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEV\n-CDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-\n-SIPPLFG-WRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMI\n-IYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKK\n-RRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEA\n-SIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAIITGA\n-FLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF\n-QKIL-----FGKY-----------------------------------------------\n-------------------------------------------------------------\n-----------RR----------GH-R\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n---------------------MANFTFGDLALDVARMGGLASTPSGLRS-----TGLTTPG\n-LSPTGLVTSDFNDSYGLTGQFINGSHSSRSRD-----------NASANDT----------\n--SATNM---------TDDRYWSLTV-------------YSHEHLVLTSVILGLFVLCCII\n-GNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEV\n-CDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-\n-SIPPLFG-WRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMI\n-IYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKK\n-RRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEA\n-SIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAIITGA\n-FLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF\n-QKIL-----FGKY-----------------------------------------------\n-------------------------------------------------------------\n-----------RR----------GH-R\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n---------------------M---------------------------------------\n-----------------------EGAEGQEELDWEALYLRLPLQNCSWNSTGWEPN-----\n-WNVTVV---------PNTTWWQASAPFDT--------PAALVRAAAKAVVLGLLILATVV\n-GNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPEL\n-CDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-\n-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFATASSFYVPVLIILI\n-LYWRIYQTARKRIRR------RRGATARGGVGPPPVPAG---------------------\n-------------GALVAGGGSGGIAAAVVAV-----IGRPLPTISETTTTGFTNVSSNNT\n-SPE--KQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLAIITGA\n-FVACWLPFFVLAILVPT-CD--CE-VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAF\n-QRLL-----CGRR-----------------------------------------------\n-------------------------------------------------------------\n-----------VRRRR-------AP-Q\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample.ginsi --- a/mafft/test/sample.ginsi Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,504 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n-MN-------------------------GTE-------GDNFYVP----------------\n--------------------------FSNKTG------LARSPYEYPQY-YLAEPW-----\n------------------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLN\n-LAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIE\n-RYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQCS\n-CGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA-----\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------STQKAEKEVTRMVVLMVIGFLVCWVPY\n-ASVAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTL---\n---CCGKNPLGDDE-SG-ASTSKTEVSSVST------------------------------\n-------------------------------------------------------------\n-----SPVSPA\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n-MN-------------------------GTE-------GPNFYVP----------------\n--------------------------FSNITG------VVRSPFEQPQY-YLAEPW-----\n------------------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLN\n-LAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIE\n-RYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQCS\n-CGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA-----\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------TTQKAEKEVTRMVIIMVIFFLICWLPY\n-ASVAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSL---\n---CCGKNPLGDDE-AS-ATASKTET-----------------------------------\n-------------------------------------------------------------\n-----SQVAPA\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n-MN-------------------------GTE-------GINFYVP----------------\n--------------------------MSNKTG------VVRSPFEYPQY-YLAEPW-----\n------------------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVN\n-LAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIE\n-RYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQCS\n-CGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA-----\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------TTQKAEKEVTRMVILMVLGFMLAWTPY\n-AVVAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTI---\n---CCGKNPFGDEDVSSTVSQSKTEVSSVSS------------------------------\n-------------------------------------------------------------\n-----SQVSPA\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n-MN-------------------------GTE-------GKNFYVP----------------\n--------------------------MSNRTG------LVRSPFEYPQY-YLAEPW-----\n------------------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVN\n-LAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIE\n-RYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQCS\n-CGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA-----\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------STQKAEREVTKMVILMVFGFLIAWTPY\n-ATVAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTI---\n---FCGKNPLGDDE-SSTVSTSKTEVSS---------------------------------\n-------------------------------------------------------------\n-------VSPA\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n-MN-------------------------GTE-------GNNFYVP----------------\n--------------------------LSNRTG------LVRSPFEYPQY-YLAEPW-----\n------------------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVN\n-LAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIE\n-RYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQCS\n-CGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA-----\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------STQKAEREVTKMVILMVLGFLVAWTPY\n-ATVAAWIFF---NKGA-AFSAQFMAIPAFFSKTS"..b"------------------QGNNTT------SPPAPF------ETGGNTTGISD\n--------------VTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGS\n-LAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALD\n-RYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACT\n-ISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGA\n-DTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALCA--------------\n--NGAVRQGD---------------------------------DGAALEVIEVHRVGNSKE\n-HLPLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPF\n-FIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII---\n---KCKF------------------------------------------------------\n-------------------------------------------------------------\n---CR-----Q\n-> 33=p A35181 serotonin receptor class 1A - rat\n-MDVFSFG-----------------------------------------------------\n--------------------------QGNNTT------ASQEPF------GTGGNVTSISD\n--------------VTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGS\n-LAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALD\n-RYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACT\n-ISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGA\n-GTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPCT--------------\n--NGAVRQGD---------------------------------DEATLEVIEVHRVGNSKE\n-HLPLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPF\n-FIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII---\n---KCKF------------------------------------------------------\n-------------------------------------------------------------\n---CR-----R\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n-MANFTFGDLALDVARMGGLASTPSGLRSTGL-----TTPGLSPTGLVTSDFNDSYGLTGQ\n-FINGSHSSRSRD-----------NASANDTS------ATNMT---------DDRYWSLTV\n--------------YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILS\n-LAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMD\n-RYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCI\n-ISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARL\n-KTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLP\n-ENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK\n--------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPF\n-FIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL---\n---FGKY------------------------------------------------------\n-------------------------------------------------------------\n------RRGHR\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n-MANFTFGDLALDVARMGGLASTPSGLRSTGL-----TTPGLSPTGLVTSDFNDSYGLTGQ\n-FINGSHSSRSRD-----------NASANDTS------ATNMT---------DDRYWSLTV\n--------------YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILS\n-LAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMD\n-RYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCI\n-ISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARL\n-KTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLP\n-ENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK\n--------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPF\n-FIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL---\n---FGKY------------------------------------------------------\n-------------------------------------------------------------\n------RRGHR\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n-ME----------------------------------------------------------\n----GAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVP---------NTTWWQASA\n-PFDTP--------AALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILS\n-LAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALD\n-RYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCV\n-VSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARGGV\n-GP-------PP-----------------------------------------------VP\n-AGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANGLE\n-ADPPTTGYGAVAAAYYPSLVRR------KPKEAADSKRERKAAKTLAIITGAFVACWLPF\n-FVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLL---\n---CGRR------------------------------------------------------\n-------------------------------------------------------------\n---VRRRRAPQ\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample.lins1 --- a/mafft/test/sample.lins1 Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,468 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n--------------------------MNGTE--G-------------------DNFYVPFS\n-NKTG--------------------------------LARSPYEYPQY-------------\n----YLAEPW---------KYSA-----LAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPL\n-NYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSL\n-VVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLV-GWS-----RYIP\n-EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES\n-A-----------------------------------------------------------\n-------------------------------------------------------------\n-----------------------STQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H\n-QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC------GKNPLGD\n-DE-SG-ASTSK-TEVSSVS--TSPVSPA--------------------------------\n------------------------------------------------------------\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n--------------------------MNGTE--G-------------------PNFYVPFS\n-NITG--------------------------------VVRSPFEQPQY-------------\n----YLAEPW---------QFSM-----LAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPL\n-NYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSL\n-VVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLV-GWS-----RYIP\n-EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES\n-A-----------------------------------------------------------\n-------------------------------------------------------------\n-----------------------TTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H\n-QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC------GKNPLGD\n-DE-AS-ATASK-TETSQVA--PA-------------------------------------\n------------------------------------------------------------\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n--------------------------MNGTE--G-------------------INFYVPMS\n-NKTG--------------------------------VVRSPFEYPQY-------------\n----YLAEPW---------KYRL-----VCCYIFFLISTGLPINLLTLLVTFKHKKLRQPL\n-NYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSL\n-VVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLF-GWS-----RYMP\n-EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES\n-A-----------------------------------------------------------\n-------------------------------------------------------------\n-----------------------TTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N\n-KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC------GKNPFGD\n-EDVSSTVSQSK-TEVSSVS--SSQVSPA--------------------------------\n------------------------------------------------------------\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n--------------------------MNGTE--G-------------------KNFYVPMS\n-NRTG--------------------------------LVRSPFEYPQY-------------\n----YLAEPW---------QFKI-----LALYLFFLMSMGLPINGLTLVVTAQHKKLRQPL\n-NFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSL\n-VVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLF-GWS-----RYIP\n-EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS\n-A-----------------------------------------------------------\n-------------------------------------------------------------\n-----------------------STQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N\n-KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC------GKNPLGD\n-DE-SSTVSTSK-TEVSSVS--PA-------------------------------------\n------------------------------------------------------------\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n--------------------------MNGTE--G-------------------NNFYVPLS\n-NRTG--------------------------------LVRSPFEYPQY-------------\n----YLAEPW---------QFKL-----LAVYMFFLICLGLPINGLTLICTAQHKKLRQPL\n-NFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSL\n-VVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLV-GWS-----RYIP\n-EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS\n-A-----------------------------------------------------------\n-------------------------------------------------------------\n-----------------------STQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N\n-KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC------GKNPLGD\n-EE-SSTVSTSK-TEVSSVS--PA-"..b"---------------------MDVLS-PG---------------------------\n-------------------------------------QGNNT--------TSPPAPFETGG\n-NTTGIS-------DVTVSYQVI-----TSLLLGTLIFCAVLGNACVVAAIALERSLQNVA\n-NYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHL\n-CAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTPEDRSDPD\n----ACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKK\n-VEKTGADTRHGASPAPQPKK-----SVNG--ESGSRNWRLGVESKAGGALCANGAVRQGD\n-DGAAL--EVIEVHRVGNSKEHLPLPSEAGPTPCAPAS------------------FERKN\n-ERNA-------------EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C\n-ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KIIKCK--FCRQ-----\n-------------------------------------------------------------\n------------------------------------------------------------\n-> 33=p A35181 serotonin receptor class 1A - rat\n--------------------------MDVFS-FG---------------------------\n-------------------------------------QGNNT--------TASQEPFGTGG\n-NVTSIS-------DVTFSYQVI-----TSLLLGTLIFCAVLGNACVVAAIALERSLQNVA\n-NYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHL\n-CAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTPEDRSDPD\n----ACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRK\n-VEKKGAGTSLGTSSAPPPKK-----SLNG--QPGSGDWRRCAENRAVGTPCTNGAVRQGD\n-DEATL--EVIEVHRVGNSKEHLPLPSESGSNSYAPAC------------------LERKN\n-ERNA-------------EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C\n-ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KIIKCK--FCRR-----\n-------------------------------------------------------------\n------------------------------------------------------------\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n----------------MANFTFGDLALDVAR-MGGLASTPS---GLRS-----TGLTTPGL\n-SPTGLVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---TDD\n-RYWSLT-------VYSHEHLVL-----TSVILGLFVLCCIIGNCFVIAAVMLERSLHNVA\n-NYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHL\n-VAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDPNN--DPD\n-KTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQ\n-MTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKN\n-RAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSND\n-TPYS-------------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-V\n-DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KILFGK--YRRGH-R--\n-------------------------------------------------------------\n------------------------------------------------------------\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n----------------MANFTFGDLALDVAR-MGGLASTPS---GLRS-----TGLTTPGL\n-SPTGLVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---TDD\n-RYWSLT-------VYSHEHLVL-----TSVILGLFVLCCIIGNCFVIAAVMLERSLHNVA\n-NYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHL\n-VAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDPNN--DPD\n-KTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQ\n-MTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKN\n-RAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSND\n-TPYS-------------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-V\n-DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KILFGK--YRRGH-R--\n-------------------------------------------------------------\n------------------------------------------------------------\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n-----------------MEGAEGQEELDWEA-LY---------------------LRLP--\n------------------------------LQ---NCSWNSTGWEPNW--NVTVV---PNT\n-TWWQAS-------APFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAA\n-NNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHL\n-VAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLL-GWKDPDWNQRVS\n-EDLRCVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRR----\n---RRGATARGGVGPPPVPAG---------------------------------GALVAGG\n-GSGGIAAAVVAV-----IGRPLPTISETTTTGFTNVSSNNTSPE--KQSCANGLEADPPT\n-TGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-C\n-DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQ-RLLCGR--RVRRR-RA-\n----------------PQ-------------------------------------------\n------------------------------------------------------------\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample.linsi --- a/mafft/test/sample.linsi Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,504 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n---------------------MNGTE-G---------------------DNFYVPFSNKTG\n---------------------------------LARSPYEYPQY-----------------\n--------YLAEPW---------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLN\n-YILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLV\n-VLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLV-GWS-----RYIPE\n-GMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------------STQKAEKEVTRMVVLMVIGFL\n-VCWVPYASVAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMI\n-TTLCC-----GKNPLGDDE--SGASTSK-TEVSSVS-TSPVSPA----------------\n-------------------------------------------------------------\n-------------------------\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n---------------------MNGTE-G---------------------PNFYVPFSNITG\n---------------------------------VVRSPFEQPQY-----------------\n--------YLAEPW---------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLN\n-YILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLV\n-VLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLV-GWS-----RYIPE\n-GMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------------TTQKAEKEVTRMVIIMVIFFL\n-ICWLPYASVAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCML\n-TSLCC-----GKNPLGDDE--ASATASK-TETSQVA-PA---------------------\n-------------------------------------------------------------\n-------------------------\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n---------------------MNGTE-G---------------------INFYVPMSNKTG\n---------------------------------VVRSPFEYPQY-----------------\n--------YLAEPW---------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLN\n-YILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLV\n-VLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLF-GWS-----RYMPE\n-GMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------------TTQKAEKEVTRMVILMVLGFM\n-LAWTPYAVVAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMI\n-TTICC-----GKNPFGDEDVSSTVSQSK-TEVSSVS-SSQVSPA----------------\n-------------------------------------------------------------\n-------------------------\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n---------------------MNGTE-G---------------------KNFYVPMSNRTG\n---------------------------------LVRSPFEYPQY-----------------\n--------YLAEPW---------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLN\n-FILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLV\n-VLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLF-GWS-----RYIPE\n-GMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------------STQKAEREVTKMVILMVFGFL\n-IAWTPYATVAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCML\n-TTIFC-----GKNPLGDDE-SSTVSTSK-TEVSSVS-PA---------------------\n-------------------------------------------------------------\n-------------------------\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n---------------------MNGTE-G---------------------NNFYVPLSNRTG\n---------------------------------LVRSPFEYPQY-----------------\n--------YLAEPW---------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLN\n-FILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLV\n-VLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLV-GWS-----RYIPE\n-GIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA\n-------------------------------------------------------------\n-------------------------------------------------------------\n----------------------------------------S"..b"--DVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVAN\n-YLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLC\n-AIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP---EDRSD\n-PDACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKV\n-EKTGADTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALCA---------\n-------NGAVRQGD---------------------------------DGAALEVIEVHRV\n-GNSKEHLPLPSEAG--PTPCAPASFERKNERNAEA-KRKMALARERKTVKTLGIIMGTFI\n-LCWLPFFIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK\n-KIIKCK--FCRQ------------------------------------------------\n-------------------------------------------------------------\n-------------------------\n-> 33=p A35181 serotonin receptor class 1A - rat\n---------------------MDVFSFG-------------------------------QG\n---------------------------N---NTTASQE---PFG-----------------\n--TGGNVTSIS-------DVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVAN\n-YLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLC\n-AIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP---EDRSD\n-PDACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKV\n-EKKGAGTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPCT---------\n-------NGAVRQGD---------------------------------DEATLEVIEVHRV\n-GNSKEHLPLPSESG--SNSYAPACLERKNERNAEA-KRKMALARERKTVKTLGIIMGTFI\n-LCWLPFFIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK\n-KIIKCK--FCRR------------------------------------------------\n-------------------------------------------------------------\n-------------------------\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n---------------------MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG\n-LVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM----------\n--TDDRYWSLT-------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVAN\n-YLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLV\n-AIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NNDPDK\n-TGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQM\n-TKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNR\n-AKKLPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQC\n-NNGKK-------------------ISSNDTPYSRT-REKLELKRERKAARTLAIITGAFL\n-ICWLPFFIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ\n-KILFGK--YRRGH-----------------------------------------------\n-------------------------------------------------------------\n--------------------R----\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n---------------------MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG\n-LVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM----------\n--TDDRYWSLT-------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVAN\n-YLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLV\n-AIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NNDPDK\n-TGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQM\n-TKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNR\n-AKKLPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQC\n-NNGKK-------------------ISSNDTPYSRT-REKLELKRERKAARTLAIITGAFL\n-ICWLPFFIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ\n-KILFGK--YRRGH-----------------------------------------------\n-------------------------------------------------------------\n--------------------R----\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n---------------------MEGAE-GQEELD-------------WEALYLRLP------\n--------------------------LQ---NCSWNSTGWEPNW--NVTVV----------\n--PNTTWWQASAPFDT--PAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAAN\n-NLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLV\n-AIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLL-GWKDPDWNQRVSE\n-DLRCVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGAT\n-ARGGVG--------PPPV------------------------------------------\n-----PAGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSC\n-ANGLEADPPTTGYGAVAAAYYPSLVRR------KP-KEAADSKRERKAAKTLAIITGAFV\n-ACWLPFFVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQ\n-RLLCGR--RVRRR-----------------------------------------------\n-------------------------------------------------------------\n--------------------RAPQ-\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/sample.parttree --- a/mafft/test/sample.parttree Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,504 +0,0 @@\n-> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]\n-MNG---------------------------------------------------------\n--TE--GDNFYVPF-----------------SNKTGLARSPYEYPQ---------Y-YLAE\n-PWK---------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANL\n-FMVLFG-FTVTMYTS-MN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVI\n-CKPMGN-FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDY\n-YTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAAQQ---------------------------------ESASTQKAEKEVTRMVVLMVIG\n-FLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCM\n-ITTLC----C---GKNPLGD-DE--SGASTSKTEV-------------------------\n---------------------------------------------------------SSVS\n--------TSPVSP-A----------\n-> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]\n-MNG---------------------------------------------------------\n--TE--GPNFYVPF-----------------SNITGVVRSPFEQPQ---------Y-YLAE\n-PWQ---------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADL\n-FMVFGG-FTTTLYTS-LH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVV\n-CKPMSN-FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDY\n-YTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAQQQ---------------------------------ESATTQKAEKEVTRMVIIMVIF\n-FLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCM\n-LTSLC----C---GKNPLGD-DE--ASATASKTE--------------------------\n-------------------------------------------------------------\n--------TSQVAP-A----------\n-> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9\n-MNG---------------------------------------------------------\n--TE--GINFYVPM-----------------SNKTGVVRSPFEYPQ---------Y-YLAE\n-PWK---------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADL\n-FMACFG-FTVTFYTA-WN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVV\n-CKPMGN-FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDY\n-YTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAQQQ---------------------------------ESATTQKAEKEVTRMVILMVLG\n-FMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCM\n-ITTIC----C---GKNPFGD-EDVSSTVSQSKTEV-------------------------\n---------------------------------------------------------SSVS\n--------SSQVSP-A----------\n-> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish\n-MNG---------------------------------------------------------\n--TE--GKNFYVPM-----------------SNRTGLVRSPFEYPQ---------Y-YLAE\n-PWQ---------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGT\n-IMVCFG-FTVTFYTA-IN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVV\n-CKPMGS-FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDY\n-YTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAQQQ---------------------------------DSASTQKAEREVTKMVILMVFG\n-FLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCM\n-LTTIF----C---GKNPLGD-DE-SSTVSTSKTEV-------------------------\n---------------------------------------------------------SS--\n-----------VSP-A----------\n-> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish\n-MNG---------------------------------------------------------\n--TE--GNNFYVPL-----------------SNRTGLVRSPFEYPQ---------Y-YLAE\n-PWQ---------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGA\n-IMVCFG-FTVTFYTA-IN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVV\n-CKPMGS-FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDY\n-YTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA---------------\n-------------------------------------------------------------\n------------------------------------------------------------A\n-AAQQQ-------------------------------"..b"GTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL\n-MVSVLV-LPMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAI\n-TDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS-\n--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK---------------\n-------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNWR\n-LGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP-----\n-ASFERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGT\n-FILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF\n-KKIIK--------CKFCR------------------------------------------\n-------------------------------------------------------------\n-------------------------Q\n-> 33=p A35181 serotonin receptor class 1A - rat\n-M-DVFSFGQ--------GNNTTASQEPFGTGGNVTSI-----------------------\n---SDVTFSY---------------------------------------------------\n-------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDL\n-MVSVLV-LPMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAI\n-TDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS-\n--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK---------------\n-------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDWR\n-RCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP-----\n-ACLERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGT\n-FILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF\n-KKIIK--------CKFCR------------------------------------------\n-------------------------------------------------------------\n-------------------------R\n-> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]\n-M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L\n-VTSDFNDSYGLTG-----QFINGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVY-----\n---------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL\n-MVAVLV-MPLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAV\n-TS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS-\n--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEET\n-TLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK------------\n-----NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-I\n-AMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGA\n-FLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF\n-QKILF--------GKYRRG-----------------------------------------\n-------------------------------------------------------------\n------------------------HR\n-> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail\n-M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L\n-VTSDFNDSYGLTG-----QFINGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVY-----\n---------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADL\n-MVAVLV-MPLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAV\n-TS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS-\n--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEET\n-TLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK------------\n-----NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-I\n-AMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGA\n-FLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF\n-QKILF--------GKYRRG-----------------------------------------\n-------------------------------------------------------------\n------------------------HR\n-> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi\n-M-EGAE-GQEELD----WEALYLRLPLQNCSWNSTGWEPNW------------------N\n-VTVVPNTTW---------------------WQASAPFDTPAALVR---------------\n-------------AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADL\n-LVACLV-MPLGAVYE-VV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAV\n-TN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RCVVS-\n--------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR---------------\n-------------------------------RRGATARGGVGPPPVP--------------\n--------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNVSSNN\n-TSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKTLAIITGA\n-FVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAF\n-QRLLC--------GRRVRRR----R-----------------------------------\n-------------------------------------------------------------\n-----------------------APQ\n" |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/samplerna --- a/mafft/test/samplerna Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,36 +0,0 @@ ->AJ006331.1_1230 -ccauggcguuaguau -gagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc -ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcgagu - ->Z84287.1_1250 -uucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc -ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcucgagagu - ->AF064490.1_2296 -ugagu -gucgaacagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc -ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag -accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg ->Z84230.1_1250 -uucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc -ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcucgagagu - ->AB049100.1_1360 -auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc -ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag -accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg - |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/samplerna.qinsi --- a/mafft/test/samplerna.qinsi Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,35 +0,0 @@ ->AJ006331.1_1230 ----------------------------------------------------ccauggcgu -uaguaugagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc -ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcgagu------------------- ------------------------------------------------------------- ->Z84287.1_1250 --------------------------------uucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc -ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcucgagagu------------------- ------------------------------------------------------------- ->AF064490.1_2296 ------------------------------------------------------------- ------ugagugucgaacagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc -ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag -accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg ->Z84230.1_1250 --------------------------------uucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc -ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcucgagagu------------------- ------------------------------------------------------------- ->AB049100.1_1360 -auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc -ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag -accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg |
b |
diff -r e4d75f9efb90 -r 675a8370675b mafft/test/samplerna.xinsi --- a/mafft/test/samplerna.xinsi Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,35 +0,0 @@ ->AJ006331.1_1230 -c--------------------------------ca-------------------uggcgu -uaguaugagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc -ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcga--------------------- -----------------------------------------------------------gu ->Z84287.1_1250 -u-------------------------------ucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc -ucgaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcucgaga--------------------- -----------------------------------------------------------gu ->AF064490.1_2296 -u----------------------------------------------------------- -------gagugucgaacagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccgggaugaccggguccuuucuuggauaaacccgc -ucaaugcccggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag -accgugcaacaugagcacgaauccuaaaccucaaagaaaaaccaaaagaaacaccaaccg ->Z84230.1_1250 -u-------------------------------ucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggauaagcccgc -ucaaugccuggagauuugggcgugcccccgcgagacugcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcucgaga--------------------- -----------------------------------------------------------gu ->AB049100.1_1360 -auagaucacuccccugugaggaacuacugucuucacgcagaaagcgucuagccauggcgu -uaguaugagugucgugcagccuccaggacccccccucccgggagagccauaguggucugc -ggaaccggugaguacaccggaauugccaggacgaccggguccuuucuuggaucaacccgc -ucaaugccuggagauuugggcgugcccccgcgagaccgcuagccgaguaguguugggucg -cgaaaggccuugugguacugccugauagggugcuugcgagugccccgggaggucucguag -accgugcaccaugagcacgaauccuaaaccucaaagaaaaaccaaacguaacaccaaccg |
b |
diff -r e4d75f9efb90 -r 675a8370675b make-barcodes.awk --- a/make-barcodes.awk Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,59 +0,0 @@ -# The awk code that transforms the one-line fastq record pair into the output that can be sorted -# by barcode. -# Input columns (the 4 FASTQ lines for both reads in a read pair): -# 1: read1 name -# 2: read2 name -# 3: read1 sequence -# 4: read2 sequence -# 5: read1 + line -# 6: read2 + line -# 7: read1 quality -# 8: read2 quality -# Output columns: -# 1: the barcode, put into a canonical form -# 2: the order of the barcode halves ("ab" or "ba") -# 3: read1 name -# 4: sequence of read 1, minus the 12bp barcode and 5bp invariant sequence -# 5: read1 quality scores, minus the same first 17bp -# 6: read2 name -# 7: sequence of read 2, minus the first 17bp -# 8: read2 quality scores, minus the first 17bp -# The canonical form of the barcode is composed of two concatenated tags, one from each read. -# By default, each tag is the first 12bp of the read. The tag from the first read is the "alpha" and -# the tag from the second is the "beta". The barcode is formed by concatenating them in an order -# determined by a string comparison of the two. The lesser tag is first (if they are equal, the -# beta is first, but then you have bigger problems). - -BEGIN { - FS = "\t" - OFS = "\t" - # The number of bases from the start of each read that form the two halves of the barcode. - # (this should be half the size of the full, canonical barcode). - if (TAG_LEN == "") { - TAG_LEN = 12 - } - # The number of bases in the read that are between the barcode and the start of the actual sample - # sequence (the restriction site in the Loeb 2014 protocol). - if (INVARIANT == "") { - INVARIANT = 5 - } -} - -$3 && $4 { - alpha = substr($3, 1, TAG_LEN) - beta = substr($4, 1, TAG_LEN) - if (alpha < beta) { - barcode = alpha beta - order = "ab" - } else { - barcode = beta alpha - order = "ba" - } - name1 = substr($1, 2) - name2 = substr($2, 2) - seq1 = substr($3, TAG_LEN + INVARIANT + 1) - seq2 = substr($4, TAG_LEN + INVARIANT + 1) - qual1 = substr($7, TAG_LEN + INVARIANT + 1) - qual2 = substr($8, TAG_LEN + INVARIANT + 1) - print barcode, order, name1, seq1, qual1, name2, seq2, qual2 -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b make_families.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_families.xml Thu Feb 02 19:14:13 2017 -0500 |
b |
@@ -0,0 +1,84 @@ +<?xml version="1.0"?> +<tool id="make_families" name="Du Novo: Make families" version="0.5"> + <description>of duplex sequencing reads</description> + <requirements> + <requirement type="package" version="0.5">duplex</requirement> + <requirement type="set_environment">DUPLEX_DIR</requirement> + </requirements> + <!-- TODO: Add dependency on coreutils to get paste? --> + <command>paste '$fastq1' '$fastq2' + | paste - - - - + | awk -f "\$DUPLEX_DIR/make-barcodes.awk" -v TAG_LEN=$taglen -v INVARIANT=$invariant + | sort + > '$output' + </command> + <inputs> + <param name="fastq1" type="data" format="fastq" label="Sequencing reads, mate 1"/> + <param name="fastq2" type="data" format="fastq" label="Sequencing reads, mate 2"/> + <param name="taglen" type="integer" value="12" min="0" label="Tag length" help="length of each random barcode on the ends of the fragments"/> + <param name="invariant" type="integer" value="5" min="0" label="Invariant sequence length" help="length of the sequence between the tag and actual sample sequence (the restriction site, normally)"/> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test> + <param name="fastq1" value="smoke_1.fq"/> + <param name="fastq2" value="smoke_2.fq"/> + <param name="taglen" value="5"/> + <param name="invariant" value="1"/> + <output name="output" file="smoke.families.tsv"/> + </test> + <test> + <param name="fastq1" value="smoke_1.fq"/> + <param name="fastq2" value="smoke_2.fq"/> + <param name="taglen" value="5"/> + <param name="invariant" value="0"/> + <output name="output" file="smoke.families.i0.tsv"/> + </test> + </tests> + <help> + +**What it does** + +This tool is for processing raw duplex sequencing data, removing the barcodes and grouping by them into families of reads from the same fragment. + +----- + +**Output** + +The output will be a tabular file where each line corresponds to a pair of input reads. + +The columns are:: + + 1: barcode (both tags joined and ordered) + 2: tag order in barcode ("ab" or "ba") + 3: read1 name + 4: read1 sequence (minus the tag and invariant sequences) + 5: read1 quality scores (minus the same tag and invariant) + 6: read2 name + 7: read2 sequence (minus the tag and invariant sequences) + 8: read2 quality scores (minus the same tag and invariant) + +----- + +**Barcode creation** + +For each pair, the tool will remove the tag at the beginning of each read and create a barcode by concatenating the two tags. The order of the tags is determined by a string comparison so that it will make an identical barcode from pairs of either order. The original tag order will be noted in the second column. + +Since pairs from opposite strands will have the same tags, but in the reverse order, this produces the same barcode for reads from the same fragment, regardless of strand. Then a simple sort will group all reads from the same strand together, separated into strands by the different "order" values. + +Examples:: + + +---------------+-----------------+ + | input tags | output | + +-------+-------+-------+---------+ + | read1 | read2 | order | barcode | + +-------+-------+-------+---------+ + | ATG | CCT | ab | ATGCCT | + +-------+-------+-------+---------+ + | CCT | ATG | ba | ATGCCT | + +-------+-------+-------+---------+ + + </help> +</tool> |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/00README.txt --- a/misc/00README.txt Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -bug1/diff.family.msa.tsv -bug1/diff.sscs.after.fa -bug1/diff.sscs.before.fa --------------------- -Files on the differences between the outputs of two versions of duplex.py. The diff.sscs.*.fa files are the SSCS's (from --sscs-file) which are present in both outputs, but have different sequences. diff.family.msa.tsv contains the MSA's which produced the SSCS's. -before = a0d599c -after = 665ebe2 |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/ACCGACACAGACTAGGGATCAAAG.msa.qual.tsv --- a/misc/ACCGACACAGACTAGGGATCAAAG.msa.qual.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,58 +0,0 @@\n->ab.1\n-AGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCCTTTAGTGTTGTGTATGGTTATCATTTGTTTTGAGGTTAGTTTGATTAGTCATTGTTGGGTGGTGATTAGTCGGTTGTTGATGAGATATTTGGAGGTGGGGATCAATAGAGGGGGAAATAGAATGATCAGTACTGCGGCGGGTAGGCCTAGGATTGTGGGGGCAATGAATGAAGCGAACAGATTTTCGTTCATTTTGGTTNTNNGGGTTTGTTN\n-........................................................................................................................................................... ...... ...................... ....................... . . . ..... . ... .. .. ........ .. ....... . ... .. . .. .. \n-..................................................................................... .. ......................... .... ................ .......................... ......... ......... ................... .. .. . .. ......... .......... . . . ..... .. .. . \n-......... ............................................................................................................................................... .. ............................. .. .... ........ ......... ...... ........ . ...... .. .. ..C ... ...... CA. .. \n-................................................................................................................................................................................. . ....... ........................... . .. . .......... ........... . ... . . . . . . . \n-........................................... ................... ...... ........................................................................................... . ... ....... .... ..... .. ............... .................... ..... .. . .. ...... .. . . . .. ..... \n-................................................................................................................................................................... ......................... .. ............. ............ .. .. ... ...... .. ... . . \n-............ .... ..................... ................................................ ..... .......... .............. . ................................... .................. ...... . .. .. ............. .. . .. .. ...... . . .. ...C. AC.. \n-.................................................................................................................................... ........... ............................... ...... . . ...... .... ..................... . ....................... . . . . ... \n-... ..... ....... ......................... ................................ .. .......................... ....... . ........... .......... ........... . ..... ... ........... ........................... ............ ..... .. . . . .. . . ... .. \n->ab.2\n-GGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTANCATTAACCTTNTANNNCANAGATTAANCNAACCAACACCTNNNNNCANCNNANTNCCCNAACNACATANNACCNN---------\n-. ............................... .................................................................. ...... ........... ....... .. .... ........ ........ . ....T.... .. . .. ..... ... . ..... ... . . . . . . . ..... C. . .........\n-.................... ... ............. ...... . . .................................... .... . . . . ... .... . . .. . ...........T..... . . .....T...---------.. ........ .. . .. ... . . . A . . . . \n-....................................................................... ....... ........... ............................ ......... ............................ . .... .. . ... . .. ... . ... ... ... . .. .'..b'........ ...... ............ . .. .......... ........... . . .... . .... . ....... .. .C.......... ......... .. ......... . . ........ ... . .... . . . . .... .... ... .. .. .. .. . . . \n- .. ..... ........................... ....... .......... ...T.............................. .... ...... ........ .... ..... . ......... .. ...... ........ .. ... ........ ...... . .. . . . .. .... .... \n-... ... . ........ ........ .. . .................... ..... ........... ........... ... . . ....... ..... .. ....... ....... .. . . . .......... . . . . .. . . . ... .. . . ... .. . .. \n-................................................... .............. .................................... .... ..................... . ........... .................. ....... .. . . . .. . ...................... . . . . ..... . . . .. \n-................. .................. .......... ...................... .................. ...... ... ......... ....... . .............. . ...... ........ . ............ .. .... .G ... ... .. ...... . . . ... .. . . . \n- . . ... .. ........ .. ...... ....... . . .. ........ ..... ........ . ... ........ .. ........ .. ...... ..... .. ...... ............ ..... . C.. . ..... . . . . .. . . . ... .... ... . . . .T. . . . \n-. ....... .................................................................... .................................... ............. ..... . .. . ......... . ... .... . . .C ... .. ............ .. . . ... . . .. . . . \n-............................................................................................... .. ........ .... ...... ............... . ...... ...... . .. . .... . . . . .. .... .. .. ...T. . . . . . ... .. . . . . \n-. . ..... ...... ... .......... ............. .. ... . ... .... . .... . .. ... .. . ....... . . ...G . ...... . ....... ... . . .C.... ... . .. . .. ... .. .... .A . \n-... ... . ... ....... ... .............. .. .......... ....... .... . .. .. .. .... . ..... ... . ..... . . ... ... . .. .. .. . .. . . . . . . . G . . .. . T . . . ... \n-.................... ...... ....................... ... ............................ ........... ... ..... . ............... ... ... . .... ... . .. .. ... ... ...... ... ... . .. .... . . . . . . .. ... . \n-............ ........ ................ .... .T.............. ........ . .. ... .... . .. .G.... ....... . .... ...... .... ....... ...G..G. .. . . .T . .. . . . . .... . ... ... ...... .. . .... . . . \n-......... ................................. .................................. ............ .. . .. .. ... ............. . ... .. . . .. . ......... . .... ...... . C ... G .. ..... ....... C.. .. . \n-................... ......... ............................................ ............ ...................... ................. ...... ...... ........... ................. .. ...... ..... ... ....... ...... .. . .. . .. . . .. . ..... \n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/ACCGACACAGACTAGGGATCAAAG.msa.tsv --- a/misc/ACCGACACAGACTAGGGATCAAAG.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,58 +0,0 @@\n->ab.1:\n-AGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCCTTTAGTGTTGTGTATGGTTATCATTTGTTTTGAGGTTAGTTTGATTAGTCATTGTTGGGTGGTGATTAGTCGGTTGTTGATGAGATATTTGGAGGTGGGGATCAATAGAGGGGGAAATAGAATGATCAGTACTGCGGCGGGTAGGCCTAGGATTGTGGGGGCAATGAATGAAGCGAACAGATTTTCGTTCATTTTGGTTCTCAGGGTTTGTTT\n-......................................................................................................................................................................................................................................................T...............................G...G.\n-.........................................................................................................................................................................................T.........................................................C.....T......................AT.T........\n-.............................................................................................................................................................................................................................................................C......T.G......G..............\n-.................................................................................................................................................................................C............................................................................A.......G.....................\n-...........................................C..................................................................................................................................................................................................C.............................................\n-.........................................................................................................................................................................................................................................C..............A.......................A...T.......\n-.........................................................................................................................C..................................................................G.....................................................T..........G......AC......C...AC..........\n-.......................................................................................................................................................................................T.......C...........................................................................T.....GT.........\n-.................C......................................................................................................................................................T.............................................................T..........C..A.T.......G..............GT.G..T.......G\n->ab.2:\n-GGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTACCCCCTCTAGAGCCCACTCTAAAGCTAACTTAGCATTAACCTTTTCCGTTAACGTTTAAGCAAACCAACACCTCTCTACAGTGATATGCCCCAACCACATCCTACCGC\n-...........................................................................................................................................................T......T....T.................T..........C..C..........C........C.....C................C..........C.......C.CA........AC.........\n-...............................................G.........................................A.....A.............................A...........T.................T.................G..G.G..CACT.TAAAGCTAACTTCT.ATTAACCTTT.A.GA.AAAG.TTAAG.GAAC...CGCCT.T.TACAT.TAAATGCCAC.C..AATTACTA.CGGATGGC.AA.\n-............................................................................................................................................................................C..............T...................T.............AA......TA......G.........'..b'..................................................................A.......................C......................C..........................................................G.T......T...................A..........T.....T.A............A.CTC..A..C.....G.G..AG.....CT.G..GAG..AC.C.A..\n-............................................................T..............................................................C.......................................................................C.....................T....C.C........C...G.C....C.......GCG.A.G.GC..........AT...A.....C\n-.........T.................T....T.....................................................T...T............................................C....CT...............T.........C...G.......G.C...TG.....C..C.........T...T....T.T......G.............T....C.....G..G.GG.T..CGC.G.G.....GCT....G....G\n-.........................................................................................................................................................................................................................A.C..C.........G....T......C.G.A.....G......C...G...GG...T...A....G\n-.................................................................................................................................................................................C...........G.............................C..C.G.......G.G.........A.TG......G........G.CT.G...C...T.......\n-...C.....................................................T....................A........................T........................T.....................C..TT........T...A......G..T...A........................A....T.G........G.A....A.T.TC..AT.T.GTC.T..TAC..G.AT.CT....AT.G...AC........GC\n-...........................................................................................................................................................C.....................C.......C....................................CG..C....AG.C..G......C...TCTG.........C.....C.CT.C...TGC...CC\n-..........................................................................................................................................................................T................C.........A........T......G..AT.....G...A..T..G.G.....G..C.........G.T...G.........GG.GT..G.....G\n-....................C............C......................C.............................A.A.......GC...G....C.....G.................C..C................TG...C....C..........CA....T.C.T....C....C.GAT.....C........A.....TA....TG..AT..T.AC.G.A.....T..G....C....G...GTG.AC.TG.G..TT...C.A...\n-.........C................................................................................G.....C...A......................................T..........G..T.TC.GG..T....T...C.GTT......TA..G.G.TC.....AT.....A..TG..AG...GTG..T....G......TG..G......A..GT.....G.T.G.G....G....A.T.T.TG......\n-................................................................................................C............................G.........T....................G......T....................................C...........CT..A..C......G......G...AG..T..C....C..G.......GC..G.....G.CGT..G......\n-..............................................T..............T...........A..G...........A.....A.G....A..................A........................G..G..G..........TC..A.....G...................................A..........T..G...........G..A...CG..T.CA..GGG......A.G.G...AG..TG.A.GC...GG\n-.........................................................................................................................................................C...C.............CG...C..C.....G.TGC...........C...............A.C..C.G...C.....C..ATG....AC.C...............GA..A.GT.CT....G.....\n-.........................................................................................................................................................................................................C...............A....G.G...T..TAC........C.A......G....G..A............CC..........\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/ACCGACACAGACTAGGGATCAAAG.tsv --- a/misc/ACCGACACAGACTAGGGATCAAAG.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,25 +0,0 @@\n-ACCGACACAGACTAGGGATCAAAG\tab\t@M02286:57:000000000-AGCM5:1:1105:17191:19189 1:N:0:1\tAGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCCTTTAGTGTTGTGTATGGTTATCATTTGTTTTGAGGTTAGTTTGATTAGTCATTGTTGGGTGGTGATTAGTCGGTTGTTGATGAGATATTTGGAGGTGGGGATCAATAGAGGGGGAAATAGAATGATCAGTACTGCGGCGGGTAGGCCTAGGATTGTGGGGGCAATGAATGAAGCGTACAGATTTTCGTTCATTTTGGTTCTCAGGGTGTGTGT\tGFGGGGGGDEEGGCFFGFGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGAFGGFFGGGGGGFFGDEGGGDGGGGDFFCGGGGGDGGFFAFGGFFGGGFGGGCFGFFGGEGGGFGFDGGGEGGCFFDDFG:CFGGEFGGFGGDFGGGGGGGGCD,ADF9FG,,@7EE777CEFCDFC>BBCEDG7*1?DF7>FCGC?CFGFCGFGG5=CC35/7*9++>FG6:**:*>5<)7536C*9@CB7B?D)3)06@)9C<FGB?)9-<F?)<B(-44-6(7>(9:1((\t@M02286:57:000000000-AGCM5:1:1105:17191:19189 2:N:0:1\tGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATATCACCCCTTCTATCCCCTCTACCCCCTCTATAGCCCACTCTCAACCTAACTTAGCCTTAACCTTCTCCGTCAACGTTTAAGCAAACCCACACCTCTCTCCAGTGATCTCACCCAACCAACTCCTACCGC\t:,C@@8CFEEFGGGGGGGCFFAEFGGGGGE@FG,@FEED@C7=FF<FEGCFCCGGGFE9EFFG9F<EGGGGGA8FA<7=EF==FG<599@FGF9E<EEF:+?FCFDF,??<<D9F9DE<+D9<AF@F,E9,D>FF,3,DEEG:58C3DCBCDCCE,;,EF:BC8:C:,@D*=,AD*<CGGG4CF7,<+=6CCC4)4*2+*20(;>6+*2+)/*)2;3:*.2++./5,.*:)2(72-6)(.5((,8@?6704()972(-)))))))..3:(((-423))))).((\n-ACCGACACAGACTAGGGATCAAAG\tab\t@M02286:57:000000000-AGCM5:1:1105:8085:17960 1:N:0:1\tAGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCCTTTAGTGTTGTGTATGGTTATCATTTGTTTTGAGGTTAGTTTGATTAGTCATTGTTGGGTGGTGATTAGTCGGTTGTTGATGAGATATTTGGAGGTGGGGATCAATAGAGGGGGTAATAGAATGATCAGTACTGCGGCGGGTAGGCCTAGGATTGTGGGGGCAATGAATGAACCGAACTGATTTTCGTTCATTTTGGTTCTATGTGTTTGTTT\tGCCFECCECFGEGDGGGFGFGGGGGGFG@EEF<EE@E9@AEC@AGGGGGGGCFF9CFGFGGGGFGGGGGGGD9<C@CCFGC<FGF,CF,<EDFFFGGGF<AD??EGFF<D@<CA,CB?F,C9F?FFFEFGGGDGF<,FFFGFDFC7FFFGFGFGF9C;=AFFG,37B=:>EEEG,7EB;ECC6>**41>DC7>CEC9;<9D@DFF7:45C*/*::32A+9?2<7C8DFDCD/C>D9CCFFC=**.)7()).9)9*)7C:?;)44/95).442))..96(,2,8(\t@M02286:57:000000000-AGCM5:1:1105:8085:17960 2:N:0:1\tGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATAGTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTAATGCCAATCGTCCTAGAATTAATTCCCCTAAAAATATTTGAAATAGGTCCCGTATTTACCCTATATCACCCCCTCTACCCCCTGTAGCGCCCACTGTAAAGCTAACTTCTCATTAACCTTTTAAGACAAAGCTTAAGAGAACTAACGCCTCTATACATTTAAATGCCACACATAATTACTACCGGATGGCCAAC\t:DFFEGCFCFC<<<DFEEFD,,,6CF,B@FGGGDB::BE7+F@=CEF,9,<,+C?:<CFF9?<EACEGGGGC<@EFGGF<EF8<=E?AF,BEFE,,5,,?4,:+A,??A,<FFF,,A,,A,7:,A,@?A@AFFFDC8>D9==<+3,@,@,@9@F9CE;EG?*BEE?;EF8*1**5=,,1;*)440<=+=8:+*11/=+4+0+)3;+1*;3+1+6++)92/*)*/+1)3*;)*)143)(,.),)./64-))))-9424)).(().)))).).)(((,((((((,(\n-ACCGACACAGACTAGGGATCAAAG\tab\t@M02286:57:000000000-AGCM5:1:1108:14879:20886 1:N:0:1\tAGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCCTTTAGTGTTGTGTATGGTTATCATTTGTTTTGAGGTTAGTTTGATTAGTCATTGTTGGGTGGTGATTAGTCGGTTGTTGATGAGATATTTGGAGGTGGGGATCAATAGAGGGGGAAATAGAATGATCAGTACTGCGGCGGGTAGGCCTAGGATTGTGGGGGCAATGAATGAAGCGAACAGATCTTCGTTTAGTTTGGTGCTCAGGGTTTGTTT\tFCGGGGFG;+FFCGD@FFGFGFGGGGGGGGGGGGGGGGGGF<FGFGGGGGGGGGDDFCFGGGGFFGGGGGGGGG<FGFGGGGGG9EFGGGGGGCFFGED@F8DCFCEEEGGGFGEFFF?EFF9FGEGGGGGGDGGGFGGGGGGGGDGGGG@;F,A>,>DD9=9=DGECFGC@:CC8CAFCFFECG5*,2=F,9C;<,2=EFGD7DG*=DGEGEEGG46CFF?<*09<CDGGG>3)7*9:DFF64F9*)7>)797*9>?1))2)6<<B??)436>?0((.,<F((\t@M02286:57:000000000-AGCM5:1:1108:14879:20886 2:N:0:1\tGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCCCTACCCCCTCTAGATCCCACTCTAAAGCTAACTTTGCATTAACCTTTTAAGTTAACTATTAAGCGAACCAACACCTCTTTACATCGTCATGCCCCAACTACATAATCACAT\tCFFFGCFGGEFGGGGAFGEDGCFGG9FFGEGGGGGGDGGECEFEGGGGGCFGGGGGGGGD9EEF?FFFEC?,C?CCE@E,E=FGCDFCDEF,<FFGGGGGGGFEGF9FDGG9BDFGGGFF,,CAFGFGF>B3>FGAD=FCG8FCFFCCFGGGCCDEF9F>*>*358DB,>C***:*=CC*?)AD+1++16C7*;.+1+49:D47D7++/47::):4<9+*09*)1+6***60<+6**253)34()2,-)0444/..))((,-.)))5((-(,.)..))))),))\n-ACCGACACAGACTAGGGATCAAAG\tab\t@M02286:57:000000000-AGCM5:1:1116:18034:3824 1:N:0:1\tAGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCC'..b'CF9FEEGD,C<@+@@:+A,@E@F,,@7>+3+3@,,,98,,6>,C=93@C834>6@6:@,,2>;D,BE8**/2,D+=C,86:;,*+0,2;//(8*7*/*8(*2.*;)(**)*11:*()17<*)****/))()02-(1.:66,440())*.)(,))(*(-())9((((,\n-ACCGACACAGACTAGGGATCAAAG\tba\t@M02286:57:000000000-AGCM5:1:2109:13003:15058 1:N:0:1\tGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTTAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTAACCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGAATTAACATTTTAAGTTAAAGCTTAAGAGCACAAACACATCTATACAGTGAAATACCCAAACTAAATACTACAGT\t,9C<FBEDFFFGGFDEFFAFFFGGFGEGDGGGFEEGG7:+@+7FGG9FGGGCCF,BFFEFGEF,EFCGFGGF,BBFFGF8,BAFGFFDFGCD9?F<FFG7FB:FGFG9<A?,FEFD<B@F@><B>>FA,,F,@DEGC,F:FEGGG@FGG<8,@<3@9,7,@*@<EE9,7D,166,,@,5**:E,<,@9@:::BFGC?994AC+;<?1+=,>+=?FE+<99;C6+5>9F+;+>>77+<**2C:*<FFG:09F*8>F*95*00)6*.36*(0)*)4<9A=*)))..\t@M02286:57:000000000-AGCM5:1:2109:13003:15058 2:N:0:1\tAGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATATTAGTATAAGAGATCTGGTTCGTCCTTAAGGGTTGTGTATGGATATCAATGGTTTAGAGGTTAGTTTGATTAGTAATTGTTGGGTGGTGATTAGTCGGTGGTGGAGGAGATATTTGTCGGAGGGGAGCAATAGAGGGGGAAATAGAATGATCAGTACTGCGGAGGGTAGGCCTTGGGTTGTGGGGGCAGTGAATGCGGGTACAAGGGGTTCGTTAAGTGTGGAGCTTGGAGGCTGTGG\t:<DCAF89C7@6+8BFCGD<6,<BFFC9FCGF@FEGDF,C<F9,,6CEF9FGAFAFCCEE<,CECE6@@B,8,,C?,:F8+88C<,54,5C,,,,<5A<E<,,,CB==99?,,C,,A<F?,,C<5C9B,6@?7+A9FFF9@4>>@:@8FC+,3@E,8,8,,6=,6++5:+8+*5*,,53,7,58C:*;1:C7++=:7+>9CDC=++**(3:5*+/93+*00*)/**1/7C8<(7*,<4*):*()()(0)0.(()2((0,)*())***)**(.(04)))((0(,(\n-ACCGACACAGACTAGGGATCAAAG\tba\t@M02286:57:000000000-AGCM5:1:2118:27229:11823 1:N:0:1\tGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTACCCCCTCTCGTGCCCCCTGTAAAGCTAACTTAGCATTCACCTTTTAAGTCAAAGATTAAGATAACACAAACCTCTTTACAGTGCAATTCCCCACATACATACTACCGT\t9FFFGGGGGGGGFCFGGGGCDDFEFGGGGCFGGFGGGGC7FEEGGGFG9FGDGGDFGGGGGGGFGEFGGGGDAECEEGGFGGGGGG9@FF9FEGFGFGG>DFC:F,E,CFDEFF,FGDF7>DGDFFGFG?FG9FG<=,3C7@+>AB=F,@>><<@,<;,>::FBE,>,,>F*5>>9<DBF:*=C,1*6*42:*/*3C++5?9+2,@C++5++3++>CA:9:<6C1+<C+3++<*2*<299F++*).*8476:7*0)0-***672*./9,1(024429*4:<F14\t@M02286:57:000000000-AGCM5:1:2118:27229:11823 2:N:0:1\tAGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCCTTTAGTGTTGTGTATGGTTATCATTTGTTTTGAGGTTAGTTTGATTAGTCATTGTTGGGTGGTGATTAGTCGGTTGTTGATGCGATCTTTGGAGGTGGGGCGCAACAGCGGGGGGATGCGAATGATCAGTCCTGCGGCGGGTAGGCATCGGCTGGTGCGGGCACTGATGGAAGACACCAGATTTTCGTTCATGATGATGTTCTGGGTGTGTTT\t6:C6@FFFE+@CFDC6CFFGGGGGGCAFFGDCFGGGECFGGGF,6:CF9FEE@FFCGGFGDD@CGFEE@CCCA6FFEG,9BFGGGCGG?9?,5<,<,CE,9A,8B<4<<A9C<F<FAF@E,A,F<9,:A+4:+@4,97,,;,>@F>7@EG9,,,3383,@=CD,@=EDCE+3**3**6164;5B**2);)2/)96*++3+*2+9<CG53CGFC:77*+2785)*)0)2*2C81,1.))+)0)*((*().),.)**))-,((0)/)-:)*1.)/)..*2)(-.((\n-ACCGACACAGACTAGGGATCAAAG\tba\t@M02286:57:000000000-AGCM5:1:2119:25043:11501 1:N:0:1\tGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCCACCCCCTCTCGACCCCACTGTAAAGCTAACTTAACCTTAACCTCTTAAATTAAAGATTAAGCGAACCAAAACCCCTTTACAGTGAAATGCCCCAACTAAATACTACCGT\tEGGGGGGGCGGGGGGGGGFGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGFGGGGG9DEGGGFG9FGGGGFFGGGGGGFDFFDEEFFGGGFGGGDFGGGGGGFGGFFGCGAFF9AEFG,DFDFGGGCFG9DFGCD8FGGGG7FFG9F,DF7FE<F,,@*FF*33,,?<DC8C*5DFCGC8FC,*6*:CF1:B+++2<+?A2;@1;++?+ACC7FC7+0+12++3<9C0<<?F?4+0;08)***.:)*1.707D494*:*25==:5*(0*99,)*2<):*8(,\t@M02286:57:000000000-AGCM5:1:2119:25043:11501 2:N:0:1\tAGGAGTCCGAGGAGGTTAGTTGTGGCAATAAAAATGATTAAGGATACTAGTATAAGAGATCAGGTTCGTCCTTTAGTGTTGTGTATGGTTATCATTTGTTTTGAGGTTAGTTTGATTAGTCATTGTTGGGTGGTGATTAGTCGGTTGTTGATGAGATATTTGGAGGTGGGGATCAATAGAGGGGGAAATAGAATGATCAGTCCTGCGGCGGGTAGGCATAGGGTGGTGTGGTACATGCATGACGAGAACAGGTTTTGGTACATTTTGGTTCTCCGGGTTTGTTT\t:9CDFGFGG@:F@@GFFG@,E<FGGGE@C,;E96FFGCCFFF9CFFGD9E<AFCFFF?DDCFFGGDGGEFGFCA,6C?FEFFGGFFG,CEGGGGCFAEGGGGGEE<FGGF,EE9<?D?<FC?FGEEG8,CFFGC<,;EF?AE+@=CEEGGF9=;,;BFGGGGGFGEGDCCC6+7B,@DG<FC*8C7:92CGG+197=EFGG,*=F?BDD*:5)/73**2*<C*;(27F*1C0))0*9)/9=*)-/,(06))).0010(,+))).*=B>7:0-*)(-)1-*()..\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.family.msa.tsv --- a/misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.family.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ -CTGCGACACAATATTGGGCTCCCC ab 2 @M02286:46:000000000-AEG11:1:1116:22967:7077 2:N:0:1 AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAA-CGACGAGCGTGACCCCACGAGG FGGGGGGGGFGGGGGGGGGGGGGG<FDGGGGGGGGGGGGGGAFGEGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGG,EFGGGGFGGGGGGGFDGGGGGGDGGFEGGGGGGGC;;FGGGGFGGGCC8BCDFGFCDFFFC9EGGGGGGGG7DGGGEC*:CCF7FFGGFGDCCFGFGGFCG+8?7)95D>G>BFGAFFFAFEA:>BFF;*:>:@?F0(<2:1-399>:?)<<BA9A77?42132?? <?69>>>((-(((*.41(,((( -CTGCGACACAATATTGGGCTCCCC ab 2 @M02286:46:000000000-AEG11:1:1118:14605:8689 2:N:0:1 AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGACGACCGAAGCAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGACCCGGAGCTGCATGAAGCCATACCCAC-CGACGCGCGTGACACCCCGCTT FAAFDE8FEDECB6@@CC@7FGEE<FDCEFFFGGGEGGGGGCEFGGGGC8FCCEFCEFGD9FFFGG,:@F8<A6EFGGF<FCCC<<BEAF=CEF9E9<,:,5?9?ADD9F@FFEGGG9?,BE,,CE,9CFG;FEAD9,8CEG6+3@37AD=;DF9A9+38+8D8=8A++3479@EG*,=41253,*4(6-0///;=6+*2*86**(5/;;;++;C1.A8)4>D:?B*).0(344(*(,-((42(.(.)5A)9?0<4<7?+5( (,(,,(((->18:0,((02-92 -CTGCGACACAATATTGGGCTCCCC ab 2 @M02286:46:000000000-AEG11:1:1118:21309:6959 2:N:0:1 AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCACTTTTTTGCACAACCTGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGCATGAAGCCATCCCAAA-CGACGACCGTGACACCACCATT GGGGGGGGGGFGDCGGGGGGGGDGGGGGGGGFGGGFFFGGGCFGF@CCGFFGGGGGGGGFGGGGFGFCFDFFDEGE?@FFF7FC?FFGGGGGGGAF?FDFFGGGFGFEFGCEFGGGGGGGFEFFGGGGGFDCFGGGGFD@EGGGFEEEFE,EDDEFF5DD@FCFEE>CDCGGD>ED5CDFFGAFFGF@CEEFG4C:A:8?*//C5577?F;FACCFFF4D@EB33=675A1(7284<?9>FB9?B02)6<29???A(23+43 :<767(-(4C<((0)-()(()) -CTGCGACACAATATTGGGCTCCCC ab 2 @M02286:46:000000000-AEG11:1:2101:17733:13519 2:N:0:1 AAGGGCACCCGGGTGGCGGCAACCATAATTCTAAGATTGCTTGGGTGGGGTATTACTTAGCACAGGAAAAGAATCTAAGGAAGGGCAGACAGGAAAGGAATTAATGCATTCCTGCATAACCAAGGAGGGAAAAACCGGCGGCCAACTTCCTTCGACAAAGGTAGGGGGGACCAAAGGGGCAAACCGCTTTTTTCCACAAATGGGGGCATAATGTAACGCCCCTTG-TTGTTGGGGACCGGGCCCGAAAGGACCCAAACCAACACGACGACCCTATCACAAAACGG B9,,,,:,,,+8+++8C+++++,,,,,,<,,,,,,:9,,,,C,B,B,++++,,,5,,5,,,<E,,,C4E,,+,,,<5,,:,,4B+8+++,,,+,B,,,,:,75,,,,7,,5,,,83,5C3<<+,+,8+++,@,,++++3@*1*1,*,2,,1,,***4:C,,1,,3***28E**;/2***)*1*9*)/9:*7*1*0***)1**1095))))0*00/**))()-80) **)0.439>>4*-(,)((-(()((.((,4((-((4(),((-((-(()()/).))(((4- -CTGCGACACAATATTGGGCTCCCC ab 2 @M02286:46:000000000-AEG11:1:2103:23125:15471 2:N:0:1 AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGCAGACATAACAAA-CGCCGAGCGTTACACCCCGATG FGGGGGGFGGGGGGGGGGDGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGCGFGGFGGGGGGGGGGGGGGGGGGGGCEGGGGGGGGGGGG9FGGGGGGGGGGFGGGEBEFGFBFFGGGGGFGGFGGGGGGGGGFDEEEGFGDDFGGGGGG,@EEFEFFGGG6CDEGFEC8?*,79CFCFGGGGDGGFGGGGFGGGF4*8*6=7>FD+788FC7:37GEA@<8F?5:?46C),<(9B90??>?4*)1..406B).5)2 4<((49>07()--4/4(2((-( -CTGCGACACAATATTGGGCTCCCC ab 2 @M02286:46:000000000-AEG11:1:2104:14576:24265 2:N:0:1 AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACATTAAGAGAATTCTGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAAAGATTGGAGGAACGAAGTGTATAACCACTTTTTTGCACAACATGCGGGATCGTGTAACTCGCTTTGTTCGTTGCTCACCGGAAGCGATAGCGACCATGCCACC-CGTACCGCGGTCAACACCGTTT <<F,;C6CFGGCFG7C:BCD7C:9,,CCFG,,,<,,,<E,<FC6<:DFCAEF,,,66CC<9CFGGGDGCE6=ECFA<F<A7:@FGC4ED,<A,9,:,C,,4,,,,,5@A,?F,55EFFFGGG@9,4=,9E;@DGGF9,E+++++3A@,6,=;DD,6@,@=,6,,7,,+6+,0+4@8+,,,+2257,3@,2,219@+4+*/406**)*02?*/;C)=>8+).**/)1)1):;4(++./26()(((((0)(.,)(0())(.64( ()--()()(.((,0).(((.(( -CTGCGACACAATATTGGGCTCCCC ab 2 @M02286:46:000000000-AEG11:1:2104:25265:19405 2:N:0:1 AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAACGAGCTAACCGCTTTTTTGCACAACCTGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCTGAGCTGAATGAAGCCCTACCAAC-CGACGAGCGTGACACCACGATG GFFGGGGGGFGGDEFGGGGGGGGGGGGGGGGFGGAD<FAFGDG9FGGCEGFFFGGCGGGCGGGGGGC@FEFGGFFGGGG?7FGGFGGGGGGGDGDFFFDFFFGDGGGFE<FEFDC@<FFEFFFGCCFAF9FCAFFGG?EFGGGGGGCFFGA@>EF;E?DFGGF?EE@+8@DD6E>*@C574=B:DEG>=*ADGBFGC=D4*;*;76C378;A6CACCDD59CC()+*.8*.)45*3>7((0,,54)/*)426))(.4:())( 4)--6073(8?((633(36((( -CTGCGACACAATATTGGGCTCCCC ab 2 @M02286:46:000000000-AEG11:1:2119:22759:6520 2:N:0:1 AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACCACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGACTGAAGCACTACCCAA-CGACTACCGTCACACCACGATT GGGFFCFGGGGEGGGGGGGGGGDFGGGGGGGGGGFGAEGGGFGGFFGGGGFGGGGGGGFGGGGGGGCFGDDFGGGGGGGGGGGEDFGGFGGGGGFGGFFGGGGGGFGFFGFCFFGFCD@?FDGGFFG4EFFFGGDGGFGGGEGGFFGFDA9EFGG=9,@F+8+@>E6@E68:E5*;7C>CCE@FFGD9?96:57DFGFCGBC8?3(:CD3;8:@:+8+;3CDE<+27:FF5,:5A,73*((170(4).*/4+,)(.:?B:<, 8<(-((((-((((*,7(4(((( |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.sscs.after.fa --- a/misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.sscs.after.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->CTGCGACACAATATTGGGCTCCCC.ab.2 8 -AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATNGGGGATCATGTAACTCGCCTTGATCGTTGGNAACCNGAGCTGAATGANGCCATACCCAACGNNGAGNNTGNCNNNNNNNNN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.sscs.before.fa --- a/misc/bug1/CTGCGACACAATATTGGGCTCCCC.ab.2.sscs.before.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->CTGCGACACAATATTGGGCTCCCC.ab.2 8 -AAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATNGGGGATCATGTAACTCGCCTTGATCGTTGGNAACCNGAGCTGAATGANGCCATACCCAANCGNNGAGNNTGNCNNNNNNNNN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.family.msa.tsv --- a/misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.family.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ -GAGAACTGAAACAGCAACTATCCG ba 2 @M02286:46:000000000-AEG11:1:2107:12224:17165 2:N:0:1 AATAGCAAAGCAAGCAAGAGTTCTATTACTAAACACAGCATGACTCAAAAAACTTAGCAATTCTGAAGGAAAGTCCTTGGGGTCTTCTACCTTTCTCTTCTTTTTTGGAGGAGTAGAATGTTGAGAGTCAGCAGTAGCCTCATCATCACTAGCTGGCATTTCTTCTGAGCAAAACAGGTTTTCCTCATTAAAGGCATTACACCACTGATCCCTTTCATCAGTTCCATAGTGTGGAATCTAAAATATACAAACACTTAGCTTCAGTTGTTAACAGATTTATACAC- GCFD,FCFEFAE,,<,,,;6CEF,,<@E,CEG<FGF?CF8@FAEFCEG<E,CFF6C@EE9E<,CF9FGD8@,@CF9E,<69@7F,AE<CFGGDG9F@EFAEEFG,=FFGGFC=<E,CFEG,,,5AF84E9E@,AD;?FEGFFGGAF,=E=,,,,8,88DEGFGG;DBD@FFCD,@=D?2@,6?,6@?E7,+2,26=8A+@8;:D??2,5+B:*+9=*+5?A@+;?;9**+*313.*3*7@;**2*+*4*7*)/(*00***1*8:***/-*1***201)+19<1+ -GAGAACTGAAACAGCAACTATCCG ba 2 @M02286:46:000000000-AEG11:1:2113:9530:10512 2:N:0:1 AATAGCAAAGCAAGCAAGAGTTCTATTACTAAACACAGCATGACTCAAAAAACTTAGCAATTCTGAAGGAAAGTCCTTGGGGTCTTCTACCTTTCTCTTCTTTTTTGGAGGAGTAGAATGTTGAGAGTCAGCAGTAGACTCATCATCACTAGATGGCATTTCTTCTGAGCAAAACAGGTTTTCCTCATTAAAGGCATT-CACCACTGCTCCCATTCATCAGTTCCATAGGTTGGACTTTAAAATACATAAACAATTAGAATCAGTAGTTTAACACATTATACACT GGGGGGGGGGGGGGGG8FFGGGGCFGGGGGGFGGGGGGGGGFFGGGGEFGCECGFFFGEFG<FGGCEGGFGGGGDFCCFFGCEFGCCGGGGGGGGFGGFEGGGGFGGG8EFEGGGGGFGGGF9FF8:FGGCCA8F;9,>BD8FFDDFFDGGFDFAF8FGGA;FF;EDACF@FD88D?ED?DFDDEBEDFDC7D+?C+2 =@F7A::?A?+;?CCFC9EFF5;BEC@A<@*1>>)92***4:6*2*+4*2+*1A268*)5*058*174>4/**>*3().9<)79)) -GAGAACTGAAACAGCAACTATCCG ba 2 @M02286:46:000000000-AEG11:1:2114:17623:15531 2:N:0:1 AATAGCAAAGCAAGCAAGAGTTCTATTACTAAACACAGCATGACTCAAAAAACTTAGCAATTCTGAAGGAAAGTCCTTGGGGTCTTCTACCTTTCTCTTCTTTTTTGGAGGAGTAGAATGTTGAGAGTCAGCAGTAGCCTCATCATCACTAGATGGCATTTCTTCTGAGCAAAACAGGATCTCCTCATTAAAGGCATTCCACCACTGCTCCCATTCATCAGTTCCATAGGTTGGAATATAAAATACACAACCAATTAGAATCAGTAGTTTACCACCTTCTACAC- GGC<FCFFGFCFGDCDEGGGGFGFFGGCAGFGGGGFFFG@<AEFDGGGEFFGGDGGG9FFFF@FAFDF?F?EFEEFEFGDFFF:<FGGC<5ECFFGFGGGGGD<F@@F<=C@CFFF9FFFEDFCGGCFFGAFC8EFGFG9E8FECGA,>FF?EDDDGGGF=@FGGGG,EGC,DD?FFF+2,@=A,,=FFG?DGGDDFGCGGGF7DFF?A?*:6:8BE695:@?F5B@>@5;B8@*1**=;*;+;AA>96<(5(/8=:3*9**8.774@C*1*)./)6=(;?).( |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.sscs.after.fa --- a/misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.sscs.after.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->GAGAACTGAAACAGCAACTATCCG.ba.2 3 -AATAGCAAAGCAAGCAAGAGTTCTATTACTAAACACAGCATGACTCAAAAAACTTAGCAATTCTGAAGGAAAGTCCTTGGGGTCTTCTACCTTTCTCTTCTTTTTTGGAGGAGTAGAATGTTGAGAGTCAGCAGTAGCCTCATCATCACTAGATGGCATTTCTTCTGAGCAAAACAGGTTTTCCTCATTAAAGGCATTCCACCACTGCTCCCATTCATCAGTTCCATAGNTTGGNNNNTAAANTACANNANNANNTANNNNNNGNNNTTNNNCNNNNTNTACNNN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.sscs.before.fa --- a/misc/bug1/GAGAACTGAAACAGCAACTATCCG.ba.2.sscs.before.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->GAGAACTGAAACAGCAACTATCCG.ba.2 3 -AATAGCAAAGCAAGCAAGAGTTCTATTACTAAACACAGCATGACTCAAAAAACTTAGCAATTCTGAAGGAAAGTCCTTGGGGTCTTCTACCTTTCTCTTCTTTTTTGGAGGAGTAGAATGTTGAGAGTCAGCAGTAGCCTCATCATCACTAGATGGCATTTCTTCTGAGCAAAACAGGTTTTCCTCATTAAAGGCATTNCACCACTGCTCCCATTCATCAGTTCCATAGNTTGGNNNNTAAANTACANNANNANNTANNNNNNGNNNTTNNNCNNNNTNTACNNN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.family.msa.tsv --- a/misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.family.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -GCCTGAAATGACGGTTGTTACATT ab 1 @M02286:46:000000000-AEG11:1:2107:14361:14714 1:N:0:1 TGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCCACCATCTTCATTTTTATCAGCATTTTCCTGGCTGTCTTCATCATCATCATCACTGTTTCTTAGCCAATCTAAAACTCCAATTCCCATAGCCACATTAAACTTCATTTTTTGATACACTGACAAACTAAACTCTTTGTCCAATCTCTCTTTCCACTCCACAATTCTGCTCTGAATACTTTGAGCAAACTCAGCCACAGG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFFFFGGFGGGFGGGGFGGGGD=EFGGGGFGGGGGGGGGGDECGFGGGGDGGCFGFGAFC8EGFGGGE+DAFGCFGGGGG95<@FGACFGFCAFGF@AFGFFG+=AFFFF7=2*?:F=AEF63CBF4;3 -GCCTGAAATGACGGTTGTTACATT ab 1 @M02286:46:000000000-AEG11:1:2113:19415:18691 1:N:0:1 TGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCCACCATCTTCATTTTTATCAGCATTTTCCTGGCTGTCTTCATCATCATCATCACTGTTTCTTAGCCAATCTAAAACTCCAATTCCCATAGCCACATTAAACTTCATTTTTTGATACACTGACAACCTAAACTCTTTGTCCAATCTCTCTTTCCACTCCACAATTCTGCTCTGAATCCTTTGAGCAACTTCAGCCACAGG GFGFGDF8FGGGGFCGGGGGG?FGFGGGGGGGGGGGGDEFGGGGFCFGGGGGEDGGGGGGG8ECEGFFFGCFGGGGGGCFFFFFGGGG7,FFFEFGGGGGGFFGGGCEGGGGEFFGCGGDGGGGG9EFFGDFGGGCGGGGGGGFGGEFFGGGFGFGF9AFCBFGCCBAEFCFGGGGG9=FGCF;@,87;,=,EEFGECFCCFEG,=D,@,+3:7EE:CFFGCC::E7A>:7CDGGG:<++2,*;?9*/+191:++=9=*=+,188=*)).)()00=/+?><(2) |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.sscs.after.fa --- a/misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.sscs.after.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->GCCTGAAATGACGGTTGTTACATT.ab.1 10 -TGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCCACCATCTTCATTTTTATCAGCATTTTCCTGGCTGTCTTCATCATCATCATCACTGTTTCTTAGCCAATCTAAAACTCCAATTCCCATAGCCACATTAAACTTCATTTTTTGATACACTGACAAACTAAACTCTTTGTCCAATCTCTCTTTCCACTCCACAATTCTGCTCTGAATACTTTGAGCAANCTCAGCCACAGNN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.sscs.before.fa --- a/misc/bug1/GCCTGAAATGACGGTTGTTACATT.ab.1.sscs.before.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->GCCTGAAATGACGGTTGTTACATT.ab.1 10 -TGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCCACCATCTTCATTTTTATCAGCATTTTCCTGGCTGTCTTCATCATCATCATCACTGTTTCTTAGCCAATCTAAAACTCCAATTCCCATAGCCACATTAAACTTCATTTTTTGATACACTGACAAACTAAACTCTTTGTCCAATCTCTCTTTCCACTCCACAATTCTGCTCTGAATACTTTGAGCAANCTCAGCCACAGN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.family.msa.tsv --- a/misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.family.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,5 +0,0 @@ -TAATACGATGACATTTCGCACCGA ab 2 @M02286:46:000000000-AEG11:1:1107:16019:3802 2:N:0:1 AAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTGGCGACAGGGGGCTGTGCTGCAGGGCGAGTCAGTTGGGTAACGCCCGGGTTTTCCCAGTCACGACGTGGTAAAACG-ACGGCCAGTGACTTTGACTACGAGTCACTATAGGACGAATTGG -FFGGGG:@EFFGGGGCDCFGGC77E<<CEFGFGGD?C7EFGGGGGGEGFGDGFFECDFC?CGDGEDGGGGG9EFCGGGECFCE<>D+8@FEC7CFGF@@CG:FGF9@AFFGGGG*<CC,7@<AF<:C:CCF7CECEB8C58EEC*;:?CCCGEG7FFFEGECFGGG5/:***5/*:*)2C7*0+0/+*2)**)0*2:*0*9)8C@F507)7>)81537*9<44*)*-0-5,()-6).443 9;(-((-(),*))-).)))..(,(-4).44).6)*)3((((.( -TAATACGATGACATTTCGCACCGA ab 2 @M02286:46:000000000-AEG11:1:1112:7443:21645 2:N:0:1 AAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTGGCGAAAGGGGGGTGTGCTGCAAGGCGGTTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCACGCCGTTGTAAAACG-ACGGCCAGTGGAGTGTCAGTCGACTCACTCTAGGGCGTATTTG GGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGEGGGGGGGGGGGGGEGGGGGGGGGGGGGDGGF@EGCCFFFFFGGFGGGCCGFFGGFFG7FFGGF7F7CFF*1?8EGGGGFCFCECC8?<C@8CEEE57+CCEE*:C=EDE*/855?FFF*<CF5C)1).*::7@4>766?37)0<>D3>D?<7*-0<?)4@0>B((.4*<462( ,-311(50).).(..-)..).4>>?2,(5-))))((((,((). -TAATACGATGACATTTCGCACCGA ab 2 @M02286:46:000000000-AEG11:1:2108:2493:15900 2:N:0:1 AAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGAGCGTAACCACCACACCAGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCATCTTAGCTATTACGCCAGCTGGCGCAAGGAGGAAGGGCTAGCAGGCGATTCAGTTGGGTAACGACAGGGATTTCACAGTCAAGGCGTTGTACAACG-ACGCCTAGGGACTTGTAATACGAGTCACTATAGGGCGAATTGG G@FFA,:FE@FFFDEEFF?EEDECCFDDFEGFFFEEGGGCC,,6+CCF+B,8,F+:,BB:B+B,?:+>=>:7?@,<C:7C7+@>FFGG::7:+@+5@EA,3<CEGAFFGG,6F7:*158F,@CCE,2=*=BF*43<7*4=B8CE**0+3+3<++++++22/8?2+<CE/2:5=5**2:*:*:C***)+0*))**)0*:***99:4<7/)))0)>*.(90/**0/)**1)(,-53(0)0)), 23(()(-8(74((/.<6*6))4)((.:-6*)-)*,(0.,(-+0 -TAATACGATGACATTTCGCACCGA ab 2 @M02286:46:000000000-AEG11:1:2111:24850:13036 2:N:0:1 AAGGAGCGGGCGCTAGGGCGCTGGCAACTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAACGCGCACCTACCTTGCCCGTCCATTAGCCATTCAGCCTGCGCAACTGTCGGGACGGGCGCTCGGTTCGTGCCCCTTCTCTATCACGTCACCTGCTGCACGGAGCATCTCCCTCACGCCCACTACGTTGGCTAGCGCCATGTTTTTACCTTCCGCGCCTGTGTAACAAGCACGCACTGGAGTTTGAAGTCCCCTCCATCGTAGCTCAATTGT- GG<<,,C@++6+@:@+;,E::F+FC,:,:6CF<E+6CF:,,,6:FCE47>+BB,48=DF?:F7CC+@:++33,,+8>C+6:,,D,,,,:,3<@<FG9B,,@,,,33,,,75>D*>7>F<,@,1::4*1**11*:<****2**//;*22;E+0+++++2**;;*<*0)+*+00***2*1++0*0***)1*1)1C)*9)/**1*)+**)*,1)**)2047*9)*.*(*())()(6)6*74-*),)(()()(.(.(4)-))4*.4)*.3,,()/((.77)))))-)) -TAATACGATGACATTTCGCACCGA ab 2 @M02286:46:000000000-AEG11:1:2118:11759:4034 2:N:0:1 AAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTGGCGAAAGGGGGGTGTGCTGCAAGGCGCTTAAGTTGGGTAACGCCAGGGTTTTCCCAGTCACGCCGTTGTAACACG-ACGGCCCGGGAATTGCAATACGACTCACTATCGGGCGAGTTGG GGGGGGGGGGGGGGEGGGGFFGEFGGGGGGGFAF@CFGEGGGGGGGGGEGDFGGCGGGGGGGGDF+BFEECFGFGGGGGGGGDEGG3>B:7>FFEECFGCCFGE;F;F:C>FFFG*CF:CCFGFEGFGG7@C5:;ECG@EE>58ED6++<C5838C7ECEC5*:6A@EGGE=CFG*:)298*:>@FFFF:<>*)1)/09@498(999/..)->().4.7<?0/442@3*0()-1946)<:: ;9>B99(4)(*-2),)).))4(-,.4)9-)*)0((4(((-()( |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.sscs.after.fa --- a/misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.sscs.after.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->TAATACGATGACATTTCGCACCGA.ab.2 5 -AAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTGGCGAAAGGGGGANGTGCTGCAAGGCGANNNNGTTGNGTAACNNNNGGGNTTTCCCANTCNCGNCGNNNNNANACGACGGNNNNNNNNNNNTNNNNNNACTNANNNNNNNNNNNNNNNN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.sscs.before.fa --- a/misc/bug1/TAATACGATGACATTTCGCACCGA.ab.2.sscs.before.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->TAATACGATGACATTTCGCACCGA.ab.2 5 -AAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTACAGGGCGCGTCCATTCGCCATTCAGGCTGCGCAACTGTTGGGAAGGGCGATCGGTGCGGGCCTCTTCGCTATTACGCCAGCTGGCGAAAGGGGGANGTGCTGCAAGGCGANNNNGTTGNGTAACNNNNGGGNTTTCCCANTCNCGNCGNNNNNANACGNACGGNNNNNNNNNNNTNNNNNNACTNANNNNNNNNNNNNNNNN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.family.msa.tsv --- a/misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.family.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:1111:10934:19340 2:N:0:1 TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCACCCTGATGC- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGFGGGGCFFGGFFGGGGDGGGGGGFFDCFGGGGEGGGGGGGGGGGGGGGFGGGGFFF;BFGGGGGDGGGGGGGGGGC9;AE7CEFG7ACFGGC@@9DFFFGAGFC>9B+59CGFA?DFGF8AE:++=5+9A5CC6:)@4)(;)7:@E<AEF@8@9<9@;36(2193:AA26<?BF0;((.):))((-4:96- -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:1112:24472:13834 2:N:0:1 TTAAGATCCAGTTCGCTGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTGCACCAGCGTCTCTGGCTGAGCAAAAACAGGAAGGCAAAATGCCGCAAACAAGGGAATAAGGGCGACACGGACAGGTTGAATACTCATACTCTTCCTTTTTCACTATTATTGAAGCATCTATCAGGGTTATTGTCTCATTAGAGGATACATATTTCAGTGTATTTAGAATAATCAACAAATAGTCGTTCCGCACACTTTTCCCCGTACAGTGCCATCTGATGC- GC,CEFG<,,,6,,,+6,BFG<6CFCA<F@FC@<,BCD:CEE6,,6,,;CFF8<C,6,:69,<FFG<8@@8,,:C,A,,,:BB,,46BFG,,BF,?=F<AF<<,=+448+,,:A<=A=:BGA5,8FC::BFEG+C++CFDE,E=B9339,3,3@3>,EEF93A,+@<<D;,EC:@99@A,59;A+5*55;*4++2++2+:+2+,4*94A911+=*++;*289>=:*@=)+**1=*1*(**/4;;)).)/(5)((.((()))1)2(2(,-(.)).)-)()-.)6) -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:1113:12129:21325 2:N:0:1 TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCCACATGATGC- GGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFDFGGGGGGGGGGGEGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGFGGGGFGGGGGGGGGGGGGFDGGGGGGGGGGGGG9EGGG9;DCFGGGGGG7C;EEGGGFFCFGGGFFGFGCFGGGGGFGGGFCA7>A4EFGFFAF6FFGFEFFFFFF?FFFFE3=;F3AABFFFFFFFBF6@8<DFFF>;<9>B>)42339927:A)67(5>AAA) -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:1115:11255:23962 2:N:0:1 TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCTCGACAAGTGCCACCTGATGC- GGGGGGGGGFGGGGGGGGGGGGGGEEGGGGGGGGGGGGGGGFGGGFGGGGGGGGGFFGGGGGGGGGGGGGFEGGGGGGGGGFGGGGGGGGGFGGGGGGGGCFFCEGGGGGGGGGGGFFFGFA:FEGDFDEGGFGEGGDGGGGFFGGGGGGGCGFGGGGCFFGFGFFGEGGGAFCFGFGGGGGGGGGGGGF59FCFCGGFEE928@?79DFFFAFFCFA=CF6=8>8EF:7:=<FE@3=@22>>E8;)3:7ED0>>E;><=>A4).(((,((.4)69<((((69) -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:2104:22219:19734 2:N:0:1 TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCCAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTGAGTAAAATAAACAAATAGGGGTTCCGCACCCATTTCCCCGAAAAGTGCCAACTGATGC- GGGGGGGGGGGGGGGGGFGGGGGDFEGFFGFGECFGGFGGGGGCGGGGGGGGFFFGGGFGFGGGGGGGGGEGGFDAEFEECFF,B<EFGGFG?EG>,ABFGGGGFC:=F:FFG+=FAFFF9<<FC@:FEEGGG+CCAFF9EFGAADFFGGGGGEF;FFGGGGGGEFC9BGFGEC,@EGFGGGGGGDF>FG58EAGFGFCF;>4:*7*3*8+1++=@5C785=;985+0++4+*;7);C./))1.;6DFF;130),(,,.@>2A*47170>(?))4*)3(.8:A) -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:2108:15124:17674 2:N:0:1 TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGACGCATTTCTCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATACACAAATAGGTGTTCCGCGCACATTTCCCCGAAAAGTGCAACCTGATTG- GGGGGGAGGFGFGFGFGFGGGGAFFFGFGFGGFGG>CFEE<FGGGGGGDCEFGGGCFGFGGAE<@EGFG7@:FGCFGGGG,BEFGGGGGGGGGG8@AEGFEFG9FFEGG@FC@FCB<<@B,C9<FEEGE@C@EC>DFG9CDFGGGADF9ADD,@DFFA=FDF6DFE;EFFF;;@,3+;CCG,+@9;F,;D?CGFB=EFGG+=4<295ED;BFBDC+*4=@81+=*+++219;;A;+/9<5=EF<@+)/59+624B>>>F>2A<*(46;(365-6*))0).3)0+ -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:2113:3515:13547 2:N:0:1 TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATTAGCGGATACATATTTGAATGTATGTAGAAAAAGAAACAAATAGGGTTTCCGCGCACATTTCCCCGACAAGTGCACCCTGATTT- GGGFEGGGGGGCFGBFFGGGGGGCCFF8<FGGGGFGGGGGGGGGGGGGGGGGGFA9EEEFGGGGGGGGGGBCEGGGFF@FGGGGDGGFDFGGGGGGGGGG8AEFFEGGGGFG@FE:FF?,AFEDCGGGGDE@BEGCFEGCDGDCFDGFGGGFFC;,BF=FD?DFGGGFGG7;ED77ACFCFCA+@ACFGGGCCCF9CF;75=+A>4C4?FFGEGGFFFFA8=@F*==C8*//()2(.3=<<<7345(/8>E@0>@FB69<2<224(-(-6?94)).:((,4<2) -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:2116:16001:8442 2:N:0:1 TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGCATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCCTTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTTACTGTATTTAGAAAAATACACAAATAGGGTTTCCGCGCACATTTCCCCGTACAGTGCCACCTGATGC- GGFCE@FGGFGEFGGFGGGGGGGGGGGGG<<FGGGGG?FGGGGGGGFCFG<FGGFGGFGGGGGGGGGGGGGD@DEFFF<?,CFFGG<ECFGG8=FCCFFGFFFGGGGCCCC7+4:=+9==59E@FGEGGGGCEEFDE=;EFB==ECG,D,EFGGFFGGGFGGGG:=FGFDEGA+7,:,=+E7CF<,@8D75>=3@CDCFF,=99BECD>F=CFFC+*0,=BFFFGF@F)=9(35=)*;*6(718)6)//;C58)171(.6.)-*.((-(,4=)-).5(4)(66) -TTTTAAGCGAAATTTACCCGTTAA ab 2 @M02286:46:000000000-AEG11:1:2118:20541:18054 2:N:0:1 TTGAGATCCAGTTCGATGTAACCCACTCGTGCA-CCAACTGATCTTCAGCATCTTTTACTTTCACCAACGTTTCTGGGTTAGCAAAAACCGGAAGGCAAAATGCCGCCAAAAAGGGAATAAGGGCGACACGGCAATGTTGAATACTCCTACCCTTCCTTTTTCAATATTATTTAAGCATTTATCAGGGTTATTGTCTCATTAGCGGATACATATTTTAATTTATTTTGAAAAATAAACAAATAGGGGTTCCCCGCACATTTCCCCGAACAGTGCCACCTGATGCG GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG GGGG7@,6CC<FEGFFGGFCGGGC<FGGA6,EF,869BE,,,CEA,,,:,CEG?@,7=+>+CD=FFEEFGG:B+=FGFG:FG?FCF,,AFEBF==E:C++@,,3C9@C;;:,=>=,2,,,6>DEEG<@,,32=@ECGG;AE:CCFGC7@F+,*@AD5:AAFGFFAF,:+5):?>?AAD4@7=+*2=/87:/*+40+;>A14:.67E;<3))/679())*,2,314.4:6<01)--7,.*6952(.51-)6, |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.sscs.after.fa --- a/misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.sscs.after.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->TTTTAAGCGAAATTTACCCGTTAA.ab.2 9 -TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGANCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCNNCNNGATGNN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.sscs.before.fa --- a/misc/bug1/TTTTAAGCGAAATTTACCCGTTAA.ab.2.sscs.before.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->TTTTAAGCGAAATTTACCCGTTAA.ab.2 9 -TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGANCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCNNCNNGATGN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/cmp.sh --- a/misc/bug1/cmp.sh Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,43 +0,0 @@ -#!/usr/bin/env bash -if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then - echo "Error: Must use bash version 4+." >&2 - exit 1 -fi -set -ue - -TmpSscsBefore="tmp.sscs.before.fa" -TmpSscsAfter="tmp.sscs.after.fa" -TmpMsa="tmp.family.msa.tsv" - -Usage="Usage: \$ $(basename $0) diff.family.msa.tsv diff.sscs.before.fa diff.sscs.after.fa > cmp.txt" - -function main { - if [[ $# -lt 3 ]] || [[ $1 == '-h' ]]; then - fail "$Usage" - else - msa_input="$1" - sscs_before="$2" - sscs_after="$3" - fi - - lines=$(cat $sscs_before | wc -l) - choice=$(python -c "import random; print 2*random.randint(1, $lines/2)") - - echo $lines $choice >&2 - - head -n $choice $sscs_before | tail -n 2 > $TmpSscsBefore - head -n $choice $sscs_after | tail -n 2 > $TmpSscsAfter - cat $msa_input | ../msa_sscs_matcher.py $TmpSscsBefore > $TmpMsa - - tail -n +2 $TmpSscsBefore - tail -n +2 $TmpSscsAfter - echo - cut -f 5,6 $TmpMsa -} - -function fail { - echo "$@" >&2 - exit 1 -} - -main "$@" |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/cmp.txt --- a/misc/bug1/cmp.txt Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGNGCNNNNACATATTTGAATGTA -AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGNGCNNNNACATATTTGAATGTAN - -AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTT- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGFGGGGGGDGDDFGGGG?GGGGGGFGGGGGGGGGGGGGGCCFGGGEEGGG?FGGGGGGGECEGGCC7CDCCFCGGGFGGD9CFGGGFGGGGGGGGGG7FFFGGGGGCFFFFFFGFFFG0,C?GFGDFAF<?-962.)(4>?AFBF>BDF<A>7* -AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATTAGGGTTATTGTCTCATGAGCGGTTACATATTTGAATGTT- GGGGGGGGGGGGGGGGGFFGGFGGGGGGGGCED@FGGGGGGGGGGGGG@FF@FGGGGGGGGGGGGGGGDFEGGFGGDEFGGGGGGFCEFFGGGGGGGGGGGGFCFCCE?FGGCGGAFGGGGFFGGGGGGGGFCFFFGGGGEFGGGGFG@FGGGGGGDCFFFGGGFGECC*3:3>EC7D:EFED?8CCEGDGGGGGDGF2:CFG9E7FC3:=CGFCCFC9AFGGGGCFGGFGFF9=>?E6C7@7/*9DF>FGG<?FF96*-68)25)4/4>)-6=<BFF?<BAF) -AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATC-TTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGCAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTACTGAGCGGCTCCATATTTGAATGTAT GGGGGFFGDECFGGGGGGGFGGGGGGGGGGGGG>FGGGGGGGGFFGGGGCFFFGGGGGGGGGGGGFGGGGGGGFGGGGGGGGGECFCGGGCFG7FGGGGGGGGGGGFFFGGGGG FGGGAEFFFFFGGCBBFGGFGGGDDCGF8EG8FFFGGGFD<+<3DF,FFGGGGGGGDGEECCFFGGGFFC7:@CECC8*8CCFGGGGGGGGCFFGGGFGFGCFGFFFGFGFEGGGGGFGGGCGGGGFFF+*9<<FFGFF=2)10*1)9D()/(0).,*.849**6@AG<4 -AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGATCGGCTACATATTTGAATGTT- GGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGG7@F8CEEFGGGGGEFFDGGGG5CGGGEFGGGG9@979EFCFECFGGFGFGFGGGFGGC7CFFGGFGFFFFGGGFFGGFFFFFGFF9<*:B9>>2*:7))1)4)=?AB?*65*.6) |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/diff.family.msa.tsv --- a/misc/bug1/diff.family.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,26086 +0,0 @@\n-AAAACAGCAAAACATGCTGTAGAT\tba\t2\t@M02286:46:000000000-AEG11:1:2101:18863:4476 2:N:0:1\tAAAATGCTTTATTTGTGAAATTTGTGATGCTATTGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTAAAGCAAGTAAAACCTCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTGCATCCC-CTGAGTCTTCCATGTTCTTCTCTCCACTTTCTTT\tGGGGGGGGGGGGCGGGGFDGGGFCFFGGGGGGGFCCFFFGGGGGFGFGGFGGGGGGGGGFGGGAFGFGGGGGGFGFGGGGGGGFGGFGFEGGFGGFGCGAFGGF<FGDFGGFGGDC>:CGGG>FGGGCCEGGGGG8EGDGGFGF;EDFGFCGGGGG8DGFGFE8F@DFDCGGGCEFCFGFGGDDFFFDG??DD7EAGF5+=+?D+*0CF*;01<@>BAFE4:>=FFFFC6;AC9*1C;@FFFE0*5+/08 2:2:349)/7<F??75)76))2))0/:)))))1)\n-AAAACAGCAAAACATGCTGTAGAT\tba\t2\t@M02286:46:000000000-AEG11:1:2103:10458:6445 2:N:0:1\tAAAATGCTTTATTTGTGAAATTTGTGATGCTATTGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTAAAGCAAGTACAACCTCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAATTGAGTATTGGGACTGTGAATCAATGCCTGATTCATGCA-CTGCGTCTTCCATGTTCTTCTCCACAACATCCTT\tGGFGEGCCFEGG9FAGCF,6CCFFFGGGGAGGGGGGDFGGGGGGGGFGGFGGFGGGGGGGGFEFCE,CFFEGAFFFGCFGGGGEF9DDFFFGGG9AEAGEFE9EEGGF,A?AEEGG>BFFG,@CFF=F4AE7=C8A=,EF@GGG9;DCFFDDFGGG,=9EGE83;@,@EG?AE,=FFEF+8=D9E@D?@?D8,+?,3=FFFGFD+:C*@AC+?>FG*5596*5*-1;?/=76>69+>6.40(007?+=@* *83*(0.1=9>)9419)6:5?.61/6)(,6)(/)\n-AAAACAGCAAAACATGCTGTAGAT\tba\t2\t@M02286:46:000000000-AEG11:1:2107:21148:11093 2:N:0:1\tAAAATGCTTTATTTGTGAAATTTGTGATGCTATTGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTAAAGCAAGTAAAACCTCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCC-CTGAGTCTTCCATGTTCTTCTCCCCTACATCTTT\tGGGGGGGGGGGGGGGGGGGGGCFEFGGGGGGGGGGGGEGGGGGFFGGGGFGFGGGGGGGGGFGGGACCFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGAFGGGGGGGGFG,F7FGGGGGGGGGGGGCGGGGGFFGFFGGGGGEGGGCEF9FGGFGGGFFG8ADDGGGCGCGGGGFGGGFFEFGFGFGFGGGGCGFGGGFC;@DEE>7;;BCC6@CGF4A>5?DF@FF?EFFEFFFFFE*8@EC?>=: ;>9(*9;E337;)+.<3C));;/31*,/29<4=+\n-AAAACAGCAAAACATGCTGTAGAT\tba\t2\t@M02286:46:000000000-AEG11:1:2110:11463:11919 2:N:0:1\tAAAATGCTTTATTTGTGAAATTTGTGATGCTATTGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTAAAGCAAGTAAAACATCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGAC-CTGAGTATTCACAGTTCTTCGTCCCACCATCTTC\tFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGFFGGGGGGGGGGGGGGGGGGGGGGGGFEGGGGGGGGGGGGGGGGGGG8FCGGGGGGGGFEFDGGFDGGGGGFGGFGFGDGGGGEGDG>@FCGCFD7BCFFFCFC@F=C@CFG,EE9DDFEGGGGFFGFGGG?FDGGGGGGGCDFGGGFGGGD685DFCDF?9F?;,59)43589D9B7+=@9AF3*77B*);@974*+:8*(0=*4*99?CE10 0=:((**.:)+/)//)).))((,/(*,8;)=F))\n-AAAACAGCAAAACATGCTGTAGAT\tba\t2\t@M02286:46:000000000-AEG11:1:2111:10042:23877 2:N:0:1\tAAAATGCTTTATTTGTGAAATTTGTGATGCTATTGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAAAAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTTAAGCAAGTAAAACCTCTAAAAATGTGTTATGGCTGATTATGATCATGAATAGACTGTGCGGACGGAGGGGCCTGAAATGAGCATTGGGACTGTGATTAAAGGCCGTTTTCAGGCCAGTGAGCATTCAGTGTCTTGTCTAGTCCCTCATT-\tFGFG@FAFGGGG9<FGGGGGG<FFFFGEFCFFG9FG,<FF9EF,CFFGFC<FGFGFCFFF@FCFCCCAEFCEAEC9@EGGC,B4<ECFFFG9<AF<,@EC@,:,ABEFC9<<AEEGCGG,AFC77=FF:F9C@++94,E>=@CE@FGGE,@E,,8>,>,8:,,CD;63+63@:D,DEF:EED9=,,:=F99+:7+3*+*@;***5**15*)*+0BA:DCB@8262*/*1)+).1)+*(/).(2/);)*(/,())+)().*.)-*).).))*)))**.).),()- \n-AAAACATAGTGGCCGCGAGTTCTT\tab\t1\t@M02286:46:000000000-AEG11:1:1102:22486:1531 1:N:0:1\tTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCC-TTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTACCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACT\tGGFGFGEFGCGGCFFGGGGGFEFGGGFGFFFGGGGGFGGGGGGGGGGGGGGGGGGGGGFFFGGGGGGGGGFGFGGGGDEEEE@FGGFFGFFGG?FFGGGGGGGGDEFGGGCF9FGF<FFFGDFGGGFC<EEFGFCFFFFFGFGGG8<ADF@FEFDGGGG:FGGGFGEGGGGGGGFFF?D;:,7=>;D,DFAFFGFG CFEFFGECCE:CGBEDE>@AC9:FFAC9?*BCFGE4C;5DC?<58CAFB>FFF:7BB:5,=F38).<=4<5:?BFF?2<:>?21231(\n-AAAACATAGTGGCCGCGAGTTCTT\tab\t1\t@M02286:4'..b';FFF?=EFAFFF)>E<77035AC59)30))*:E;AC9AB22596:EFB4)5(833@3??\n-TTGCTCAGCTCAAGTGTTATATCG\tba\t2\t@M02286:46:000000000-AEG11:1:2107:26502:21197 2:N:0:1\tAGGATGAAGCATGAAAATAGAAAATTATACAGGAAAGATCCACTTGTGTGGGTTGATTGCTACTGCTTCGATTGCTTTAGACTGTGGTTTGGACTTGATCTTTGTGAAGGAACCTTACTTCTGTGGTTTGACATATTTGGACAAACTACCTACAGAGATTTAAAGCTCTAAGGTAAATATAAAATTTTTAAGTGTATAATGTGTTAAACTACTGATTCTACTTGTTTGTGTATTTTAGCTTCCATCCTATGGAACTGATGAATGGCCGCAGTGTTGGAATGCTT-\t-68<E@<@@F<@FCC6,<,CCF9,CF<F,CFFF86,C<FECFGGGFGFFCECFCE8@F9EGGGFDC6C@F,CAFGGGGGA86E<@FFGGF,B,,:5A,C?B?FEF,,,:E,,,4=?F49FDE9F,=F+E7,;=F=,CA,=,E8,,9D,A?F9F8>,>,@@;E,4@DCDF8=CFE8E,@:EA,+7?=B,?=9,@=@++++=+?==:9,7:D5FFD+?CCF9+421B:00AF8*9?CA63*=E955*+195>8)8**..*21**:1)))//9)8>*B/5)+776+4 \n-TTGCTCAGCTCAAGTGTTATATCG\tba\t2\t@M02286:46:000000000-AEG11:1:2108:20398:11389 2:N:0:1\tAGGATGAAGCATGAAAATAGAAAATTATACAGGAAAGATCCACTTGTGTGGGTTGATTGCTACTGCTTCGATTGCTTTAGAATGTGGTTTGGACTTGATCTTTGTGAAGGAACCTTACTTCTGTGGTGTGACATAATTGGACAAACTACCTACAGAGATTTAAAGCTCTAAGGTAAATATAAAATTTTTAAGTGTATAATGTGTTAAACTACTGATTCTAATTGTTTGTGTATTTTAGATTCCAACCTATGGAACTGATGAATGGGAGCAGTGGTGGAATGCCT-\tEGGGGGGGGGGGGGGGGGGGGFEGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFDFFFFFGG9@FGGFGGC<FGGGG<FGGGGG7EFGGGBAAFFFGGGGGGFEFFGGDG?>EGGGGGGGFFGGGCDFCFCFGGG?CCFCFEFFGGGFFGFECDFGG=D8DGGGE,=CFGGGFDAFFFC,9F<=DFFD@A<DFGGGGF+=:9CDDBDFB6EFECGFFAGF=;CFFF5*?+;CCF@FFE**3*1)0>F@C@F4*4+>*6ACEB)<)-0*73=(62/>@C,5 \n-TTGCTCAGCTCAAGTGTTATATCG\tba\t2\t@M02286:46:000000000-AEG11:1:2115:5903:8910 2:N:0:1\tAGGATGAAGCATGAAAATAGAAAATTATACAGGAAAGATCCACTTGTGTGGGTTGATTGCTACTGCTTCGATTGCTTTAGAATGTGGTTTGGACTTGATCTTTGTGAAGGAACCTTACTTCTGTGGTGTGACATAATTGGACAAACTACCTACAGAGATTTAAAGCTCTAAGGTAAATATAAAATTTTTAAGTGTATAATGTGTTACACTACTGATTCTAATTGTTTGTGTATTTTAGATTCCAACCTATGGAACTGATGAATGGGAGCAGTGGTGGAATGCCT-\tGGGGGGGGGGFGGGGGGGGGGCDFFGGGCFGFGFEGGGGGGGGCEFF<FECDFCFGGGGGGGGFGGGGGGCF,B9EEFGGGGGGFFFFFGEGGGCFFG9FFFFGGGFGFFGFGG@?E?FGGGGGFGGGGGGFFGDFFGGGGEG@EGAFFGFFCEFGGFGC9,D@EFFGGF=FFFCFDEGGCGG,,=CD8==9FG9=CFFFGF;EGD,;=?FF+3*0@CCCFEFFFFFF@DFFEF7CAD*>EE+;;5?F@A@587)*6<EF>6EEF22>*((*(7@@225356./ \n-TTGCTCAGCTCAAGTGTTATATCG\tba\t2\t@M02286:46:000000000-AEG11:1:2118:23131:17696 2:N:0:1\tAGGATGAAGCATGAAAATAGAAAATTATACAGGAAAGATCCACTTGTGTGGGTTGATTGCTACTGCTTCGATTGCTTTAGAATGTGGTTTGGACTTGATCTTTGTGAAGGAACCTTACTTCTGTGGTGTGACATAATTGGACAAACTACCTACAGAGATTTAAAGCTCTAAGGTAAATATAAAATTTTTAAGTGTATAATGTGTTAAACTACTGATTCTAATTGTTTGTGTATTTTAGATTCCAACCTATGGAACTGATGAATGGGAGCAGTGGTGGAATGCCT-\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFFGGGGGGGGAEGGGGGGDGGGGGGGGGGGGGGGGGGGCFFGGAFFGEDFGGGGGGGFGGGGGGGGGGFG8;DGGGEAFGDFGGGDGFDGFGGGG9DAFGFFGGEGFGFFFBGBFC;EFF=8F?DC=DFFFGF7FFEEFFCC?=C<EFEFC,>EEEE5)>))3*@*9=EE>?;<*6AB6>DD(3BB)*((587@F)9 \n-TTGCTCAGCTCAAGTGTTATATCG\tba\t2\t@M02286:46:000000000-AEG11:1:2119:15589:18585 2:N:0:1\tAGGATGAAGCATGAAAATAGAAAATTATACAGGAAAGATCCACTTGTGTGGGTTGATTGCTACTGCTTCGATTGCTTTAGAATGTGGTTTGGACTTGATCTTTGTGAAGGAACCTTACTTCTGTGGTGTGACATAATTGGACAAACTACCTACAGAGATTTAAAGCTCTAAGGTAAATATAAAATTTTTAAGTGTATAATGTGTTAAACTACTGATTCTAATTGTTTGTGTATTTTAGATTCCAACCTATGGAACTGATGAATGGGAGCAGTGGTGGAATGCCT-\tGGGGGGDGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGG<FFFGFGGGGGGGGGGGGGGGGGGCACFGFFGGGFGGGGGFGGFCDGGGGCDCEFFFGGGFGGFF=DEGGGGGGGGFFGGEFFFGGFCFE7=FGF+=DFBFFFFFFFC@5CFEFEGFGFF@EFEEFB7A;AAA;=>@9A@C595BF9AEE4AEF@@4:>:>:BE:>5>=@*)8 \n-TTGCTCAGCTCAAGTGTTATATCG\tba\t2\t@M02286:46:000000000-AEG11:1:2119:2379:17240 2:N:0:1\tAGGATGAAGCATGAAAATAGAAAATTATACAGGAAAGATCCACTTGTGTGGGTTGATTGCTACTGCTTCGATTGCTTTAGAATGTGGTTTGGACTTGATCTTTGTGAAGGAACCTTACTTCTGTGGTGTGACATAATTGGACAAACTACCTACAGAGATTTAAAGCTCTAAGGTAAATATAAAATTTTTAAGTGTATAATGTGTTAAACTACTGATTCTAATTGTTTGTGTATTTTAGATTCCAACCTATGGAACTGATGAGTGGGAGCAGTGGTGGAATGCCT-\tGGGGGDGGGGGGGGFGGGGGFFGGGDFGGGGG?FDGGGGFF@<@@FFFFFGG@CFGGGDGGFFFGGCCFGF9FGG9FG<FGFDGFFG4F,CCDFF9EFDF9FGGGGCFGGGF?AFE?,CEEFFFG8=D==@E>9EC>BEFGFCFCDED@D@FGG8AD,;,,@==D9,6;=>FFGGFFDGGDGGGCDCDFD?88=FFFFGF7=+?F7D=+?D?+;FDFAFEGFFE<;7==FF@*C,5BC7E@**3;55)3<:@>>@3=BE=C)/.)/49>2=@=294@F:*)101 \n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/diff.sscs.after.fa --- a/misc/bug1/diff.sscs.after.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,9426 +0,0 @@\n->AAAAAAACGGAACCACGTCACATT.ba.2 3\n-CCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCANANTCCCTTAACGTGANTTTTCGTTCCNNTGAGCGTCAGACCCCGTANNNNNGNNCANAGGNTCTTCTTGAGNTCCTTTTT\n->AAAAAAAGCATGCTGCGGAATGAC.ab.1 7\n-GAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCNTTATCCNCTGATTCTGTGGNN\n->AAAAAAATCTATCCTACCGTCATA.ba.2 5\n-GAAACTCCTTGCATTTTTTTAAATATGCCTTTCTCATCAGAGGAATATTCCCCCAGGCACTCCTTTCAAGACCTAGAAGGTCCATTAGCTGCAAAGATTCCTCTCTGTTTAAAACTTTATCCATCTTTGCAAAGCTTTTTGCAAAAGCCTAGGCCTCCAAAAAAGCCTCCTCACTACTTCTGGAATAGCTCAGAGGCCGNGGCGGCCTCGGCCTCTGCATNAATAAAAAAANTTAGTCANCCATGGGNNNNNNNNNGGGCNNNNNNNGGCNGNNTTNGGGNNGG\n->AAAAACGACGCTCGTTCTCGAGAT.ba.2 5\n-ATCATAACATACTGTTTTTTCTTACTCCACACAGGCATAGAGTGTCTGCTATTAATAACTATGCTCAAAAATTGTGTACCTTTAGCTTTTTAATTTGTAAAGGGGTTAATAAGGAATATTTGATGTATAGTGCCTTGACTAGAGATCCATTTTCTGTTATTGAGGAAAGTTTGCCAGGTGGGTTAAAGGAGCATGATTTTAATCCAGAAGAAGCAGAGGAAACTAAACAAGTGTCCTGGAAGCTTGTAACAGAGTATGCAATGGAAACAAANTGTGATGATGTNN\n->AAAAAGAAAACATAGCGCTGTGAA.ab.1 5\n-CAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCN\n->AAAAAGATATACAGGATTAATAAG.ab.2 7\n-AGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGNTTAATCAGTGAGNNN\n->AAAAAGCATGTGTAGCTAATAAAA.ab.1 7\n-AAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCANCACANCCGCCGCGCNN\n->AAAAATGATAAAACAGGCATGAGA.ab.1 7\n-AATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCCACCATCTTCATTTTTATCAGCATTTTCCTGGCTGTCTTCATCATCATCATCACTGTTTCTTAGCCAATCTAAAACTCCAATTCCCATAGCCACATTAAACTTCATTTTTTGATACACTGACAAACTAAACTCTTTGTCCAATCTCTCTTTCCACTCCACAATTCNNN\n->AAAACACGCTCCCGGACGTTGTAC.ab.2 5\n-TGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTAAAGCAAGTAAAACCTCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCCACCATCTTCATTTTTATCAGCANTTTCCTGGNNGTCTTCATNN\n->AAAACAGAGAATCGGAAGGAATAT.ab.1 5\n-GTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAANAGGAGAGCGCANN\n->AAAACAGCAAAACATGCTGTAGAT.ba.2 5\n-AAAATGCTTTATTTGTGAAATTTGTGATGCTATTGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTAAAGCAAGTAAAACCTCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGANNCTTNCATGNTNTTNTCNNNNCNANCTTN\n->AAAACATACCAGTAGGACTTGAGA.ba.1 9\n-AGGAGGCACATTTTCCCCACCTGTGTAGGTTCCAAAATATCTAGTGTTTTCATTTTTACTTGGATCAGGAACCCAGCACTCCACTGGATAAGCATTATCCTTATCCAAAACAGCCTTGTGGTCAGTGTTCATCTGCTGACTGTCAACTGTAGCATTTTTTGGGGTTACAGTTTGAGCAGGATATTTGGTCCTGTAGTTTGCTAACACACCCTGCAGCTCCAAAGGTTCCCCACCAACAGCAAAAAAATGAAAATTTGACCCTTGAATGGGTTTTCCAGCACCATN\n->AAAACATAGTGGCCGCGAGTTCTT.ab.1 8\n-TGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGA'..b'TTGCATAATGCTTTTCATGGTACTTATAGTGGCTGGGCTGTTCTTTTTTAATACATTTTAAACACATTTCAAAACTGTACTGAAATTCCAAGTACATCCCAAGCAATAACAACACATCATCACATTTTGTTTCCATTGCATACTCN\n->TTTCCTGGCCCAAAGATCTTTTCA.ab.2 5\n-TGCAAAGCATGCATCTCAATTAGTCAGCAACCAGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCATAGTCCCGCCCCTAACTCCGCCCATCCCGCCCCTAACTCCGCCCAGTTCCGCCCATTCTCCGCCCCATGGCTGACTAATTTTTTTTATTTATGCAGAGGCCGAGGCCGCCTCGGCCTCTGAGCTATTCCAGAAGTAGTGAGGAGGCTTTTTTNNNNGNCTNNNCNTTNNCNAANNNNNTN\n->TTTCGACTCCCGTGAATGTGTCGA.ab.2 12\n-TGGAACAAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGNNAGGAAGGGAAGAAAGCGAANGGAGCGGGCGCNNNNGCGCTGNNANGTGTNGNNNTCACGCNNNNCNNNACNACCANNNNCNCN\n->TTTCGCGTAAACTCCCCTTGTGAA.ba.1 5\n-AAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTANN\n->TTTCTCACGCACCATTCTTTAAAG.ab.1 10\n-TTCTTCTGGATTAAAATCATGCTCCTTTAACCCACCTGGCAAACTTTCCTCAATAACAGAAAATGGATCTCTAGTCAAGGCACTATACATCAAATATTCCTTATTAACCCCTTTACAAATTAAAAAGCTAAAGGTACACAATTTTTGAGCATAGTTATTAATAGCAGACACTCTATGCCTGTGTGGAGTAAGAAAAAACAGTATGTTATGATTATAACTGTTATGCCTACTTATAAAGGTTACAGAATATTTTTCCATAATTTTCTTGTATAGCAGTGCAGCTT\n->TTTGACATCAACAGAGTACGTTTC.ba.2 3\n-AGTTTGGCAAGGTTTTTAGAGGAAACTACTTGGACAGTAATTAATGCTCCTGTTAATTGGTATAACTCTTTACAAGATTACTACTCTACTTTGTCTCCCATTAGGCCTACAATGGTNAGACAAGTAGCCAACAGGGAAGGGTTGCAAATATCNTTTGNGCACACCTATGATAANATTGATGAANCANACAGTATTNNGCAAGTANCTGNGNGGTNGGNAGNNNAAANNNNANGTCNTAANNTNNNNNNNNNNNANTTNNNTNNAAANNNNNNNNNNNNTGGTNNN\n->TTTGAGCAGATTGGTCACTTTTCG.ba.1 4\n-GAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTNAGGGATTTTGGTCAT\n->TTTGCCAGATCCGCTTACCTCCTT.ab.1 4\n-TATCCCCTGATTCTGTGGATAACCGTATTACCGCCTTTGAGTGAGCTGATACCGCTCGCCGCAGCCGAACGACCGAGCGCAGCGAGTCAGTGAGCGAGGAAGCGGAAGAGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGAGCGCAACGCAATTAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTANGTTGTGNN\n->TTTGCTAGGAAACGCTACCGTATT.ba.2 4\n-GTAGAATGTTGAGAGTCAGCAGTAGCCTCATCATCACTAGATGGCATTTCTTCTGAGCAAAACAGGTTTTCCTCATTAAAGGCATTCCACCACTGCTCCCATTCATCAGTTCCATAGGTTGGAATCTAAAATACACAAACAATTAGAATCAGTAGTTTAACACATTATACACTTAAAAATTTTATATTTACCTTAGAGCTTTAAATCTCTGTAGGTAGTTTGTCCAATTATGTCACACCACAGAAGTAAGGTTCCTTCACAAAGATCAAGTCCAANCNNCATTNN\n->TTTGGAGACAGATGCCTACGCCGT.ab.1 6\n-TACAGGACCAAATATCCTGCTCAAACTGTAACCCCAAAAAATGCTACAGTTGACAGTCAGCAGATGAACACTGACCACAAGGCTGTTTTGGATAAGGATAATGCTTATCCAGTGGAGTGCTGGGTTCCTGATCCAAGTAAAAATGAAAACACTAGATATTTTGGAACCTACACAGGTGGGGAAAATGTGCCTCCTGTTTTGCACATTACTAACACAGCAACCACAGTGCTTCTTGATGAGCAGGGTGTTGGGCCCTTGTGCAAAGCTGACAGCTTGTATGTTTCT\n->TTTGGCGAACGGGATTGCTTCACC.ba.2 5\n-AGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCNTNNNNCNNNNGNCNTANNCCNNNCTGNNANNNNNNCNNNNNNNNCTNTNNNNNNNCGN\n->TTTGGGACAAATTTATTAGGGCTT.ab.1 4\n-AGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCNN\n->TTTTAAGCGAAATTTACCCGTTAA.ab.2 9\n-TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGANCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCNNCNNGATGNN\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/diff.sscs.before.fa --- a/misc/bug1/diff.sscs.before.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,9426 +0,0 @@\n->AAAAAAACGGAACCACGTCACATT.ba.2 3\n-CCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCNNANTCCCTTAACGTGANTTTTCGTTCCNNTGAGCGTCAGACCCCGTANNNNNGNNCANAGGNTCTTCTTGAGNTCCTTTTN\n->AAAAAAAGCATGCTGCGGAATGAC.ab.1 7\n-GAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAACAGGAGAGCGCACGAGGGAGCTTCCAGGGGGAAACGCCTGGTATCTTTATAGTCCTGTCGGGTTTCGCCACCTCTGACTTGAGCGTCGATTTTTGTGATGCTCGTCAGGGGGGCGGAGCCTATGGAAAAACGCCAGCAACGCGGCCTTTTTACGGTTCCTGGCCTTTTGCTGGCCTTTTGCTCACATGTTCTTTCCTGCNTTATCCNCTGATTCTGTGGN\n->AAAAAAATCTATCCTACCGTCATA.ba.2 5\n-GAAACTCCTTGCATTTTTTTAAATATGCCTTTCTCATCAGAGGAATATTCCCCCAGGCACTCCTTTCAAGACCTAGAAGGTCCATTAGCTGCAAAGATTCCTCTCTGTTTAAAACTTTATCCATCTTTGCAAAGCTTTTTGCAAAAGCCTAGGCCTCCAAAAAAGCCTCCTCACTACTTCTGGAATAGCTCAGAGGCCGNGGCGGCCTCGGCCTCTGCATNAATAAAAAAANTTAGTCANCCATGGGNNNNNNNNNGGGCNNNNNNNGGCNGNNTTNGGGNNGGN\n->AAAAACGACGCTCGTTCTCGAGAT.ba.2 5\n-ATCATAACATACTGTTTTTTCTTACTCCACACAGGCATAGAGTGTCTGCTATTAATAACTATGCTCAAAAATTGTGTACCTTTAGCTTTTTAATTTGTAAAGGGGTTAATAAGGAATATTTGATGTATAGTGCCTTGACTAGAGATCCATTTTCTGTTATTGAGGAAAGTTTGCCAGGTGGGTTAAAGGAGCATGATTTTAATCCAGAAGAAGCAGAGGAAACTAAACAAGTGTCCTGGAAGCTTGTAACAGAGTATGCAATGGAAACAAANTGTGATGATGTN\n->AAAAAGAAAACATAGCGCTGTGAA.ab.1 5\n-CAATAACCCTGATAAATGCTTCAATAATATTGAAAAAGGAAGAGTATGAGTATTCAACATTTCCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATC\n->AAAAAGATATACAGGATTAATAAG.ab.2 7\n-AGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGNTTAATCAGTGAGNN\n->AAAAAGCATGTGTAGCTAATAAAA.ab.1 7\n-AAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCANCACANCCGCCGCGCN\n->AAAAATGATAAAACAGGCATGAGA.ab.1 7\n-AATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCCACCATCTTCATTTTTATCAGCATTTTCCTGGCTGTCTTCATCATCATCATCACTGTTTCTTAGCCAATCTAAAACTCCAATTCCCATAGCCACATTAAACTTCATTTTTTGATACACTGACAAACTAAACTCTTTGTCCAATCTCTCTTTCCACTCCACAATTC\n->AAAACACGCTCCCGGACGTTGTAC.ab.2 5\n-TGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTAAAGCAAGTAAAACCTCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCCTGAGTCTTCCATGTTCTTCTCCCCACCATCTTCATTTTTATCAGCANTTTCCTGGNNGTCTTCATN\n->AAAACAGAGAATCGGAAGGAATAT.ab.1 5\n-GTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGAGCGAACGACCTACACCGAACTGAGATACCTACAGCGTGAGCTATGAGAAAGCGCCACGCTTCCCGAAGGGAGAAAGGCGGACAGGTATCCGGTAAGCGGCAGGGTCGGAANAGGAGAGCGCAN\n->AAAACAGCAAAACATGCTGTAGAT.ba.2 5\n-AAAATGCTTTATTTGTGAAATTTGTGATGCTATTGCTTTATTTGTAACCATTATAAGCTGCAATAAACAAGTTAACAACAACAATTGCATTCATTTTATGTTTCAGGTTCAGGGGGAGGTGTGGGAGGTTTTTTAAAGCAAGTAAAACCTCTACAAATGTGGTATGGCTGATTATGATCATGAACAGACTGTGAGGACTGAGGGGCCTGAAATGAGCCTTGGGACTGTGAATCAATGCCTGTTTCATGCCNCTGANNCTTNCATGNTNTTNTCNNNNCNANCTTN\n->AAAACATACCAGTAGGACTTGAGA.ba.1 9\n-AGGAGGCACATTTTCCCCACCTGTGTAGGTTCCAAAATATCTAGTGTTTTCATTTTTACTTGGATCAGGAACCCAGCACTCCACTGGATAAGCATTATCCTTATCCAAAACAGCCTTGTGGTCAGTGTTCATCTGCTGACTGTCAACTGTAGCATTTTTTGGGGTTACAGTTTGAGCAGGATATTTGGTCCTGTAGTTTGCTAACACACCCTGCAGCTCCAAAGGTTCCCCACCAACAGCAAAAAAATGAAAATTTGACCCTTGAATGGGTTTTCCAGCACCAT\n->AAAACATAGTGGCCGCGAGTTCTT.ab.1 8\n-TGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTT'..b'CAGCATTTGCATAATGCTTTTCATGGTACTTATAGTGGCTGGGCTGTTCTTTTTTAATACATTTTAAACACATTTCAAAACTGTACTGAAATTCCAAGTACATCCCAAGCAATAACAACACATCATCACATTTTGTTTCCATTGCATACTC\n->TTTCCTGGCCCAAAGATCTTTTCA.ab.2 5\n-TGCAAAGCATGCATCTCAATTAGTCAGCAACCAGGTGTGGAAAGTCCCCAGGCTCCCCAGCAGGCAGAAGTATGCAAAGCATGCATCTCAATTAGTCAGCAACCATAGTCCCGCCCCTAACTCCGCCCATCCCGCCCCTAACTCCGCCCAGTTCCGCCCATTCTCCGCCCCATGGCTGACTAATTTTTTTTATTTATGCAGAGGCCGAGGCCGCCTCGGCCTCTGAGCTATTCCAGAAGTAGTGAGGAGGCTTTTTTNNNNGNCTNNNCNTTNNCNAANNNNNTT\n->TTTCGACTCCCGTGAATGTGTCGA.ab.2 12\n-TGGAACAAGAGTCCACTATTAAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGNNAGGAAGGGAAGAAAGCGAANGGAGCGGGCGCNNNNGCGCTGNNANGTGTNGNNNTCACGCNNNNCNNNACNACCANNNNCNCC\n->TTTCGCGTAAACTCCCCTTGTGAA.ba.1 5\n-AAAGAACGTGGACTCCAACGTCAAAGGGCGAAAAACCGTCTATCAGGGCGATGGCCCACTACGTGAACCATCACCCTAATCAAGTTTTTTGGGGTCGAGGTGCCGTAAAGCACTAAATCGGAACCCTAAAGGGAGCCCCCGATTTAGAGCTTGACGGGGAAAGCCGGCGAACGTGGCGAGAAAGGAAGGGAAGAAAGCGAAAGGAGCGGGCGCTAGGGCGCTGGCAAGTGTAGCGGTCACGCTGCGCGTAACCACCACACCCGCCGCGCTTAATGCGCCGCTAN\n->TTTCTCACGCACCATTCTTTAAAG.ab.1 10\n-TTCTTCTGGATTAAAATCATGCTCCTTTAACCCACCTGGCAAACTTTCCTCAATAACAGAAAATGGATCTCTAGTCAAGGCACTATACATCAAATATTCCTTATTAACCCCTTTACAAATTAAAAAGCTAAAGGTACACAATTTTTGAGCATAGTTATTAATAGCAGACACTCTATGCCTGTGTGGAGTAAGAAAAAACAGTATGTTATGATTATAACTGTTATGCCTACTTATAAAGGTTACAGAATATTTTTCCATAATTTTCTTGTATAGCAGTGCAGCTTN\n->TTTGACATCAACAGAGTACGTTTC.ba.2 3\n-AGTTTGGCAAGGTTTTTAGAGGAAACTACTTGGACAGTAATTAATGCTCCTGTTAATTGGTATAACTCTTTACAAGATTACTACTCTACTTTGTCTCCCATTAGGCCTACAATGGTNAGACAAGTAGCCAACAGGGAAGGGTTGCAAATATCNTTTGNGCACACCTATGATAANATTGATGAANCANACAGTATTNNGCAAGTANCTGNGNGGTNGGNAGNNNAAANNNNANGTCNTAANNTNNNNNNNNNNNANTTNNNTNNAAANNNNNNNNNNNNTGGTNN\n->TTTGAGCAGATTGGTCACTTTTCG.ba.1 4\n-GAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTNAGGGATTTTGGTCAN\n->TTTGCCAGATCCGCTTACCTCCTT.ab.1 4\n-TATCCCCTGATTCTGTGGATAACCGTATTACCGCCTTTGAGTGAGCTGATACCGCTCGCCGCAGCCGAACGACCGAGCGCAGCGAGTCAGTGAGCGAGGAAGCGGAAGAGCGCCCAATACGCAAACCGCCTCTCCCCGCGCGTTGGCCGATTCATTAATGCAGCTGGCACGACAGGTTTCCCGACTGGAAAGCGGGCAGTGAGCGCAACGCAATTAATGTGAGTTAGCTCACTCATTAGGCACCCCAGGCTTTACACTTTATGCTTCCGGCTCGTANGTTGTGN\n->TTTGCTAGGAAACGCTACCGTATT.ba.2 4\n-GTAGAATGTTGAGAGTCAGCAGTAGCCTCATCATCACTAGATGGCATTTCTTCTGAGCAAAACAGGTTTTCCTCATTAAAGGCATTCCACCACTGCTCCCATTCATCAGTTCCATAGGTTGGAATCTAAAATACACAAACAATTAGAATCAGTAGTTTAACACATTATACACTTAAAAATTTTATATTTACCTTAGAGCTTTAAATCTCTGTAGGTAGTTTGTCCAATTATGTCACACCACAGAAGTAAGGTTCCTTCACAAAGATCAAGTCCAANCNNCATTN\n->TTTGGAGACAGATGCCTACGCCGT.ab.1 6\n-TACAGGACCAAATATCCTGCTCAAACTGTAACCCCAAAAAATGCTACAGTTGACAGTCAGCAGATGAACACTGACCACAAGGCTGTTTTGGATAAGGATAATGCTTATCCAGTGGAGTGCTGGGTTCCTGATCCAAGTAAAAATGAAAACACTAGATATTTTGGAACCTACACAGGTGGGGAAAATGTGCCTCCTGTTTTGCACATTACTAACACAGCAACCACAGTGCTTCTTGATGAGCAGGGTGTTGGGCCCTTGTGCAAAGCTGACAGCTTGTATGTTTC\n->TTTGGCGAACGGGATTGCTTCACC.ba.2 5\n-AGCAGAGCGCAGATACCAAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGCTAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGATAAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCNTNNNNCNNNNGNCNTANNCCNNNCTGNNANNNNNNCNNNNNNNNCTNTNNNNNNNCGNN\n->TTTGGGACAAATTTATTAGGGCTT.ab.1 4\n-AGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGATCTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCGTAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCN\n->TTTTAAGCGAAATTTACCCGTTAA.ab.2 9\n-TTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGANCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCNNCNNGATGN\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/tmp.family.msa.tsv --- a/misc/bug1/tmp.family.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -CTCGAGCTATACCACCTTAGACGT ba 1 @M02286:46:000000000-AEG11:1:1108:8496:6724 1:N:0:1 AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTT- GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGFGGGGGGDGDDFGGGG?GGGGGGFGGGGGGGGGGGGGGCCFGGGEEGGG?FGGGGGGGECEGGCC7CDCCFCGGGFGGD9CFGGGFGGGGGGGGGG7FFFGGGGGCFFFFFFGFFFG0,C?GFGDFAF<?-962.)(4>?AFBF>BDF<A>7* -CTCGAGCTATACCACCTTAGACGT ba 1 @M02286:46:000000000-AEG11:1:1118:6636:16568 1:N:0:1 AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATTAGGGTTATTGTCTCATGAGCGGTTACATATTTGAATGTT- GGGGGGGGGGGGGGGGGFFGGFGGGGGGGGCED@FGGGGGGGGGGGGG@FF@FGGGGGGGGGGGGGGGDFEGGFGGDEFGGGGGGFCEFFGGGGGGGGGGGGFCFCCE?FGGCGGAFGGGGFFGGGGGGGGFCFFFGGGGEFGGGGFG@FGGGGGGDCFFFGGGFGECC*3:3>EC7D:EFED?8CCEGDGGGGGDGF2:CFG9E7FC3:=CGFCCFC9AFGGGGCFGGFGFF9=>?E6C7@7/*9DF>FGG<?FF96*-68)25)4/4>)-6=<BFF?<BAF) -CTCGAGCTATACCACCTTAGACGT ba 1 @M02286:46:000000000-AEG11:1:2108:5062:14791 1:N:0:1 AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATC-TTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGCAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTACTGAGCGGCTCCATATTTGAATGTAT GGGGGFFGDECFGGGGGGGFGGGGGGGGGGGGG>FGGGGGGGGFFGGGGCFFFGGGGGGGGGGGGFGGGGGGGFGGGGGGGGGECFCGGGCFG7FGGGGGGGGGGGFFFGGGGG FGGGAEFFFFFGGCBBFGGFGGGDDCGF8EG8FFFGGGFD<+<3DF,FFGGGGGGGDGEECCFFGGGFFC7:@CECC8*8CCFGGGGGGGGCFFGGGFGFGCFGFFFGFGFEGGGGGFGGGCGGGGFFF+*9<<FFGFF=2)10*1)9D()/(0).,*.849**6@AG<4 -CTCGAGCTATACCACCTTAGACGT ba 1 @M02286:46:000000000-AEG11:1:2116:21186:17735 1:N:0:1 AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGATCGGCTACATATTTGAATGTT- GGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGG7@F8CEEFGGGGGEFFDGGGG5CGGGEFGGGG9@979EFCFECFGGFGFGFGGGFGGC7CFFGGFGFFFFGGGFFGGFFFFFGFF9<*:B9>>2*:7))1)4)=?AB?*65*.6) |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/tmp.sscs.after.fa --- a/misc/bug1/tmp.sscs.after.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->CTCGAGCTATACCACCTTAGACGT.ba.1 4 -AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGNGCNNNNACATATTTGAATGTAN |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/bug1/tmp.sscs.before.fa --- a/misc/bug1/tmp.sscs.before.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->CTCGAGCTATACCACCTTAGACGT.ba.1 4 -AAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGNGCNNNNACATATTTGAATGTA |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family.align.fa --- a/misc/family.align.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,20 +0,0 @@ ->1 -cagcaccccctctaccccctctaccccctctagag ->2 ----------cagcaccccctctaccccctctagag ->3 --agcaccccctctaccccctctaccccccctagag ->4 --agcaccccctctaccccctctaccccctctagcg ->5 --aacacacctttcac--------ccctctccagag ->6 --agcaccccctctaccccctctaccccctctagaa ->7 --agcaccccctctaccccctctaccccctctaaag ->8 --agcaccccctctaccccctctaccccctctcgag ->9 --agccccccctctaccccctctaccccctctagag ->10 --agcaccccctctaccccctctaccccctctacag |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family.cons.fa --- a/misc/family.cons.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->EMBOSS_001 -nAGCACCCCCTCTACCCCCTCTACCCCCTCTAGAG |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family.fa --- a/misc/family.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,20 +0,0 @@ ->1 -CAGCACCCCCTCTACCCCCTCTACCCCCTCTAGAG ->2 -CAGCACCCCCTCTACCCCCTCTAGAG ->3 -AGCACCCCCTCTACCCCCTCTACCCCCCCTAGAG ->4 -AGCACCCCCTCTACCCCCTCTACCCCCTCTAGCG ->5 -AACACACCTTTCACCCCTCTCCAGAG ->6 -AGCACCCCCTCTACCCCCTCTACCCCCTCTAGAA ->7 -AGCACCCCCTCTACCCCCTCTACCCCCTCTAAAG ->8 -AGCACCCCCTCTACCCCCTCTACCCCCTCTCGAG ->9 -AGCCCCCCCTCTACCCCCTCTACCCCCTCTAGAG ->10 -AGCACCCCCTCTACCCCCTCTACCCCCTCTACAG |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family.msa.tsv --- a/misc/family.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,11 +0,0 @@ -ACCGGACAACGA CONSENSUS nAGCACCCCCTCTACCCCCTCTACCCCCTCTAGAG -ACCGGACAACGA 1 cagcaccccctctaccccctctaccccctctagag -ACCGGACAACGA 2 ---------cagcaccccctctaccccctctagag -ACCGGACAACGA 3 -agcaccccctctaccccctctaccccccctagag -ACCGGACAACGA 4 -agcaccccctctaccccctctaccccctctagcg -ACCGGACAACGA 5 -aacacacctttcac--------ccctctccagag -ACCGGACAACGA 6 -agcaccccctctaccccctctaccccctctagaa -ACCGGACAACGA 7 -agcaccccctctaccccctctaccccctctaaag -ACCGGACAACGA 8 -agcaccccctctaccccctctaccccctctcgag -ACCGGACAACGA 9 -agccccccctctaccccctctaccccctctagag -ACCGGACAACGA 10 -agcaccccctctaccccctctaccccctctacag |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family2.align.fa --- a/misc/family2.align.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ ->7 -ttagcctagccacacccccacgggaaacagcagtgattaacc ->8 ----------ccacacccc-----gaaacagcagtgatt---- ->9 -------tagccacacccccacgggaaac-------------- ->10 -ttagcctagccacacccccacgg------------------- |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family2.cons.fa --- a/misc/family2.cons.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->EMBOSS_001 -ttagccTAGCCACACCCCCACGGGAAACagcagtgattnnnn |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family2.fa --- a/misc/family2.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ ->1 -TTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACC ->2 -CCACACCCCGAAACAGCAGTGATT ->3 -TAGCCACACCCCCACGGGAAAC ->4 -TTAGCCTAGCCACACCCCCACGG |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family3.align.fa --- a/misc/family3.align.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,10 +0,0 @@ ->1 -TTAGCCT-GCCACACCCC-ACGG-AA-CAGCAGTGACTGATA ->2 -TTAGCCTAGCCACACCCCCACGGGAAACAGCAGT----GATT ->3 -TTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATT---- ->4 -TTA---TAGCCACACCCCCACGGGAAACA-CAGTGATT---- ->5 -TTA---TAGCCACACCCCCACGGGAAACA-CAGTGACTGATA \ No newline at end of file |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/family3.cons.fa --- a/misc/family3.cons.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->EMBOSS_001 -TTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGAnTGATn |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/msa_sscs_matcher.py --- a/misc/msa_sscs_matcher.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,37 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -import sys -import argparse - -OPT_DEFAULTS = {} -USAGE = "gunzip -c families.msa.tsv.gz | %(prog)s sscs.set.fa" -DESCRIPTION = """Find the input MSA's which produced a given set of SSCS's. Pipe the full set of -MSA's to stdin and it will filter them to the matching MSA's on stdout.""" - - -def main(argv): - - parser = argparse.ArgumentParser(usage=USAGE, description=DESCRIPTION) - parser.set_defaults(**OPT_DEFAULTS) - - parser.add_argument('sscs', metavar='sscs.set.fa', - help='A set of SSCS\'s, as output from the duplex.py script with the --sscs-file option.') - - args = parser.parse_args(argv[1:]) - - sscs = set() - with open(args.sscs) as sscs_file: - for line in sscs_file: - if line.startswith('>'): - name = line.lstrip('>').split()[0] - sscs.add(name) - - for line in sys.stdin: - barcode, order, mate, rname, seq, qual = line.rstrip('\r\n').split('\t') - name = '.'.join((barcode, order, mate)) - if name in sscs: - sys.stdout.write(line) - - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/read.fa --- a/misc/read.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ ->AAAAACAAGGATCTAAAACCAGAT.1 -TGACTGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTACCCTTTTAAGTTAAAGATTAAGAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGT - |
b |
diff -r e4d75f9efb90 -r 675a8370675b misc/sscs_diff.py --- a/misc/sscs_diff.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,74 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -import sys -import argparse -import subprocess - -OPT_DEFAULTS = {} -USAGE = "%(prog)s [options]" -DESCRIPTION = """Find differences between the SSCS produced by one version of the pipeline and -another, when working on the same input MSA's.""" -EPILOG = """Warning: This injects raw command-line arguments into shell commands and executes them. -""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG) - parser.set_defaults(**OPT_DEFAULTS) - - parser.add_argument('sscs_before', metavar='sscs.all.before.fa', - help='SSCSs from earlier version (can be gzipped).') - parser.add_argument('sscs_after', metavar='sscs.all.after.fa', - help='SSCSs from later version (can be gzipped).') - parser.add_argument('-b', '--before', metavar='sscs.all.before.diffs.fa', required=True, - help='Output SSCSs from earlier version that differ from the SSCS in the later version here.') - parser.add_argument('-a', '--after', metavar='sscs.all.after.diffs.fa', required=True, - help='Output SSCSs from later version that differ from the SSCS in the earlier version here.') - - args = parser.parse_args(argv[1:]) - - sscs_before = {} - if args.sscs_before.endswith('.gz'): - command = 'gunzip -c {} | paste - - | sort'.format(args.sscs_before) - else: - command = 'cat {} | paste - - | sort'.format(args.sscs_before) - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) - for line in process.stdout: - fields = line.rstrip('\r\n').split('\t') - name = fields[0].lstrip('>').split()[0] - seq = fields[1] - sscs_before[name] = seq - - before_fh = open(args.before, 'w') - after_fh = open(args.after, 'w') - diffs = {} - if args.sscs_after.endswith('.gz'): - command = 'gunzip -c {} | paste - - | sort'.format(args.sscs_after) - else: - command = 'cat {} | paste - - | sort'.format(args.sscs_after) - process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE) - for line in process.stdout: - fields = line.rstrip('\r\n').split('\t') - header = fields[0].lstrip('>') - name, fam_size = header.split() - seq_after = fields[1] - if name in sscs_before: - seq_before = sscs_before[name] - if seq_before != seq_after: - diffs[name] = (seq_before, seq_after) - before_fh.write('>{} {}\n'.format(name, fam_size)) - before_fh.write(seq_before+'\n') - after_fh.write('>{} {}\n'.format(name, fam_size)) - after_fh.write(seq_after+'\n') - before_fh.close() - after_fh.close() - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b pipeline.sh --- a/pipeline.sh Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,32 +0,0 @@ -#!/usr/bin/env bash -if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then - echo "Error: Must use bash version 4+." >&2 - exit 1 -fi -set -ue - -# At the moment this isn't really a production version of the pipeline. -# It mainly just documents how the commands are used. -function main { - fastq1="$1" - fastq2="$2" - sscs="$3" - # This transforms the input fastq's into a format that can be sorted by family with the "sort" - # command. Mainly, it puts all the data for both read pairs on one line, and adds a column with - # the barcode. - # Warning: It assumes the fastq's have 4 lines per read! - paste "$fastq1" "$fastq2" \ - | paste - - - - \ - | awk -f make-barcodes.awk \ - | sort \ - | align_families.py \ - | dunovo.py \ - > "$sscs" -} - -function fail { - echo "$@" >&2 - exit 1 -} - -main "$@" |
b |
diff -r e4d75f9efb90 -r 675a8370675b planemo-template/cat.xml --- a/planemo-template/cat.xml Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,23 +0,0 @@ -<tool id="cat" name="Concatenate datasets (for test workflows)" version="1.0"> - <description>tail-to-head</description> - <command> - cat $input1 #for $q in $queries# ${q.input2} #end for# > $out_file1 - </command> - <inputs> - <param name="input1" type="data" label="Concatenate Dataset"/> - <repeat name="queries" title="Dataset"> - <param name="input2" type="data" label="Select" /> - </repeat> - </inputs> - <outputs> - <data name="out_file1" format="input" metadata_source="input1"/> - </outputs> - <tests> - <test> - <param name="input1" value="1.bed"/> - <output name="out_file1" file="1.bed"/> - </test> - </tests> - <help> - </help> -</tool> |
b |
diff -r e4d75f9efb90 -r 675a8370675b planemo-template/random_lines_two_pass.py --- a/planemo-template/random_lines_two_pass.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,71 +0,0 @@ -#!/usr/bin/env python -#Dan Blankenberg -#Selects N random lines from a file and outputs to another file, maintaining original line order -#allows specifying a seed -#does two passes to determine line offsets/count, and then to output contents - -import optparse, random - -def get_random_by_subtraction( line_offsets, num_lines ): - while len( line_offsets ) > num_lines: - del line_offsets[ random.randint( 0, len( line_offsets ) - 1 ) ] - return line_offsets - -def get_random_by_sample( line_offsets, num_lines ): - line_offsets = random.sample( line_offsets, num_lines ) - line_offsets.sort() - return line_offsets - -def get_random( line_offsets, num_lines ): - if num_lines > ( len( line_offsets ) / 2 ): - return get_random_by_subtraction( line_offsets, num_lines ) - else: - return get_random_by_sample( line_offsets, num_lines ) - -def __main__(): - #Parse Command Line - parser = optparse.OptionParser() - parser.add_option( '-s', '--seed', dest='seed', action='store', type="string", default=None, help='Set the random seed.' ) - (options, args) = parser.parse_args() - - assert len( args ) == 3, "Invalid command line specified." - - input = open( args[0], 'rb' ) - output = open( args[1], 'wb' ) - num_lines = int( args[2] ) - assert num_lines > 0, "You must select at least one line." - - if options.seed is not None: - random.seed( options.seed ) - - #get line offsets - line_offsets = [] - teller = input.tell - readliner = input.readline - appender = line_offsets.append - while True: - offset = teller() - if readliner(): - appender( offset ) - else: - break - - total_lines = len( line_offsets ) - assert num_lines <= total_lines, "Error: asked to select more lines (%i) than there were in the file (%i)." % ( num_lines, total_lines ) - - #get random line offsets - line_offsets = get_random( line_offsets, num_lines ) - - #write out random lines - seeker = input.seek - writer = output.write - for line_offset in line_offsets: - seeker( line_offset ) - writer( readliner() ) - input.close() - output.close() - print "Kept %i of %i total lines." % ( num_lines, total_lines ) - if options.seed is not None: - print 'Used random seed of "%s".' % options.seed - -if __name__=="__main__": __main__() |
b |
diff -r e4d75f9efb90 -r 675a8370675b planemo-template/randomlines.xml --- a/planemo-template/randomlines.xml Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,66 +0,0 @@ -<tool id="random_lines1" name="Select random lines" version="2.0.1"> - <description>from a file</description> - <command interpreter="python">random_lines_two_pass.py "${input}" "${out_file1}" "${num_lines}" - #if str( $seed_source.seed_source_selector ) == "set_seed": - --seed "${seed_source.seed}" - #end if - </command> - <inputs> - <param name="num_lines" size="5" type="integer" value="1" label="Randomly select" help="lines"/> - <param format="txt" name="input" type="data" label="from"/> - <conditional name="seed_source"> - <param name="seed_source_selector" type="select" label="Set a random seed"> - <option value="no_seed" selected="True">Don't set seed</option> - <option value="set_seed">Set seed</option> - </param> - <when value="no_seed"> - <!-- Do nothing here --> - </when> - <when value="set_seed"> - <param name="seed" type="text" label="Random seed" /> - </when> - </conditional> - </inputs> - <outputs> - <data format="input" name="out_file1" metadata_source="input"/> - </outputs> - <tests> - <test> - <param name="num_lines" value="65"/> - <param name="input" value="1.bed"/> - <param name="seed_source_selector" value="no_seed"/> - <output name="out_file1" file="1.bed"/> - </test> - <test> - <param name="num_lines" value="1"/> - <param name="input" value="1.bed"/> - <param name="seed_source_selector" value="set_seed"/> - <param name="seed" value="asdf"/> - <output name="out_file1" file="1_bed_random_lines_1_seed_asdf_out.bed"/> - </test> - </tests> - <help> - -**What it does** - -This tool selects N random lines from a file, with no repeats, and preserving ordering. - ------ - -**Example** - -Input File:: - - chr7 56632 56652 D17003_CTCF_R6 310 + - chr7 56736 56756 D17003_CTCF_R7 354 + - chr7 56761 56781 D17003_CTCF_R4 220 + - chr7 56772 56792 D17003_CTCF_R7 372 + - chr7 56775 56795 D17003_CTCF_R4 207 + - -Selecting 2 random lines might return this:: - - chr7 56736 56756 D17003_CTCF_R7 354 + - chr7 56775 56795 D17003_CTCF_R4 207 + - - </help> -</tool> |
b |
diff -r e4d75f9efb90 -r 675a8370675b planemo-template/test-data/1.bed --- a/planemo-template/test-data/1.bed Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,65 +0,0 @@ -chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - -chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + -chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - -chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + -chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - -chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - -chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + -chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - -chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + -chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - -chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + -chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - -chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - -chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + -chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - -chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + -chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + -chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - -chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + -chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - -chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - -chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + -chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - -chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + -chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + -chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - -chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + -chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - -chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + -chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - -chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + -chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - -chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - -chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + -chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - -chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + -chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + -chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - -chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + -chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - -chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + -chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - -chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + -chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - -chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + -chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - -chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + -chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - -chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - -chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + -chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - -chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + -chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + -chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - -chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + -chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - -chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - -chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + -chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - -chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + -chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - -chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + -chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - -chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + -chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - |
b |
diff -r e4d75f9efb90 -r 675a8370675b planemo-template/test-data/1_bed_random_lines_1_seed_asdf_out.bed --- a/planemo-template/test-data/1_bed_random_lines_1_seed_asdf_out.bed Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,1 +0,0 @@ -chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + |
b |
diff -r e4d75f9efb90 -r 675a8370675b seqtools.c --- a/seqtools.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,236 +0,0 @@ -#include <stdlib.h> -#include <string.h> -#include <ctype.h> -#include <math.h> - -// ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz -#define TRANS "TVGHEFCDIJMLKNOPQYWAABSXRZ[\\]^_`tvghefcdijmlknopqywaabsxrz" -#define TRANS_OFFSET 65 -#define TRANS_LEN 57 - -char* get_revcomp(char *input); -char get_char_comp(char c); -int *get_diffs_simple(char *cons, char *seqs[], int n_seqs); -double *get_diffs_frac_simple(char *cons, char *seqs[], int n_seqs); -double **get_diffs_frac_binned(char *cons, char *seqs[], int n_seqs, int seq_len, int bins); -char *transfer_gaps(char *gapped_seq, char *inseq, char gap_char1, char gap_char2); -char **transfer_gaps_multi(int n_seqs, char *gapped_seqs[], char *inseqs[], char gap_char1, char gap_char2); - - -// Return the reverse complement of a sequence. -// Makes a new copy of the string, so the original is not modified. -char* get_revcomp(char *input) { - int length = strlen(input); - char *output = malloc(sizeof(char) * length + 1); - int i, j; - for (i = 0, j = length - 1; i < length && j >= 0; i++, j--) { - output[j] = get_char_comp(input[i]); - } - output[length] = '\0'; - return output; -} - - -// Return the complement of a base. -// Uses a simple lookup table: a string with the complements of all possible sequence characters. -char get_char_comp(char c) { - int i = c - TRANS_OFFSET; - if (i < 0 || i > TRANS_LEN) { - return c; - } else { - return TRANS[i]; - } -} - - -/* Take an existing alignment and consensus and compute the number of differences between each - * sequence and the consensus. - * Known bugs: - * 1. Counts no differences in the following sequences: - * consensus: GA---CA - * seq 1: GA----A - * seq 2: GA--ACA - * 2. If a sequence starts with a gap, each base in the gap will be counted as a diff. - */ -int *get_diffs_simple(char *cons, char *seqs[], int n_seqs) { - int *diffs = malloc(sizeof(int) * n_seqs); - int i = 0; - // Uppercase the consensus. - while (cons[i] != 0) { - cons[i] = toupper(cons[i]); - i++; - } - // Loop through the sequences in the alignment. - for (i = 0; i < n_seqs; i++) { - int in_gap; - diffs[i] = 0; - int j = 0; - // Compare each base of the sequence to the consensus. - while (seqs[i][j] != 0 && cons[j] != 0) { - if (cons[j] != '-' && seqs[i][j] != '-') { - in_gap = 0; - } - if (toupper(seqs[i][j]) != cons[j]) { - if (!in_gap) { - diffs[i]++; - } - } - if (cons[j] == '-' || seqs[i][j] == '-') { - in_gap = 1; - } - j++; - } - } - return diffs; -} - - -// Convert the output of get_diffs_simple() from raw diff counts to fractions of the total sequence -// lengths. -//TODO: Don't count gaps in sequence length. -double *get_diffs_frac_simple(char *cons, char *seqs[], int n_seqs) { - int *diffs = get_diffs_simple(cons, seqs, n_seqs); - double *fracs = malloc(sizeof(double) * n_seqs); - int i; - for (i = 0; i < n_seqs; i++) { - int j = 0; - while (seqs[i][j] != 0 && cons[j] != 0) { - j++; - } - fracs[i] = (double)diffs[i]/j; - } - return fracs; -} - - -/* Take an existing alignment and consensus and compute the number of differences between each - * sequence and the consensus. Break each sequence into bins and tally the differences in each bin. - * Known bugs: - * 1. counts no differences in the following sequences: - * consensus: GA---CA - * seq 1: GA----A - * seq 2: GA--ACA - * 2. If a bin starts with a gap, each base in the gap will be counted as a diff. - */ -int **get_diffs_binned(char *cons, char *seqs[], int n_seqs, int seq_len, int bins) { - int bin_size = (int)round((float)seq_len/bins); - // Initialize the diffs 2d array. - int **diffs = malloc(sizeof(int*) * n_seqs); - int i, j; - for (i = 0; i < n_seqs; i++) { - diffs[i] = malloc(bins * sizeof(int)); - for (j = 0; j < bins; j++) { - diffs[i][j] = 0; - } - } - // Uppercase the consensus. - while (cons[i] != 0) { - cons[i] = toupper(cons[i]); - i++; - } - int bin, in_gap; - // Loop through the sequences in the alignment. - for (i = 0; i < n_seqs; i++) { - j = 0; - // Compare each base of the sequence to the consensus. - while (seqs[i][j] != 0 && cons[j] != 0) { - bin = j/bin_size; - if (bin >= bins) { - break; - } - if (cons[j] != '-' && seqs[i][j] != '-') { - in_gap = 0; - } - if (toupper(seqs[i][j]) != cons[j]) { - if (!in_gap) { - diffs[i][bin]++; - } - } - if (cons[j] == '-' || seqs[i][j] == '-') { - in_gap = 1; - } - j++; - } - } - return diffs; -} - - -// Convert the output of get_diffs_binned() from raw diff counts to fractions of the total bin -// lengths. -//TODO: Don't count gaps in bin length. -double **get_diffs_frac_binned(char *cons, char *seqs[], int n_seqs, int seq_len, int bins) { - int bin_size = (int)round((float)seq_len/bins); - int **diffs = get_diffs_binned(cons, seqs, n_seqs, seq_len, bins); - double **fracs = malloc(sizeof(double*) * n_seqs); - int i; - for (i = 0; i < n_seqs; i++) { - fracs[i] = malloc(sizeof(double) * bins); - // Create and init array of lengths of the bins. - int bin_lengths[bins]; - int bin; - for (bin = 0; bin < bins; bin++) { - bin_lengths[bin] = 0; - } - // Tally size of each bin. - int j = 0; - while (seqs[i][j] != 0 && cons[j] != 0) { - int bin = j/bin_size; - if (bin >= bins) { - break; - } - bin_lengths[bin]++; - j++; - } - // For each bin, calculate the diff fraction = diffs / bin_length. - for (bin = 0; bin < bins; bin++) { - fracs[i][bin] = (double)diffs[i][bin]/bin_lengths[bin]; - // printf("bin %d: %d / %d = %f\t", bin, diffs[i][bin], bin_lengths[bin], fracs[i][bin]); - } - // printf("\n"); - } - return fracs; -} - - -// Take an input sequence and insert gaps according to another, already-aligned sequence with gaps. -// Input strings must be null-terminated. "gap_char1" is the character used for gaps in -// "gapped_seq", and "gap_char2" is the gap character in "inseq". -// N.B.: The ungapped length of "gapped_seq" must be equal to the length of "inseq". -char *transfer_gaps(char *gapped_seq, char *inseq, char gap_char1, char gap_char2) { - if (gap_char1 == 0) { - gap_char1 = '-'; - } - if (gap_char2 == 0) { - gap_char2 = '-'; - } - int gapped_len = strlen(gapped_seq); - char *outseq = malloc(sizeof(char) * gapped_len + 1); - - // Transfer characters from inseq to outseq, except when gapped_seq has a gap at that spot - // (insert a gap there instead). - int g, o, i; - for (g = 0, o = 0, i = 0; g < gapped_len; g++, o++) { - if (gapped_seq[g] == gap_char1) { - outseq[o] = gap_char2; - } else { - outseq[o] = inseq[i]; - i++; - } - } - outseq[gapped_len] = '\0'; - - return outseq; -} - - -// Wrapper for transfer_gaps() when operating on a set of sequences at once. -char **transfer_gaps_multi(int n_seqs, char *gapped_seqs[], char *inseqs[], char gap_char1, - char gap_char2) { - char **outseqs = malloc(sizeof(char *) * n_seqs); - int i; - for (i = 0; i < n_seqs; i++) { - outseqs[i] = transfer_gaps(gapped_seqs[i], inseqs[i], gap_char1, gap_char2); - } - return outseqs; -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b seqtools.py --- a/seqtools.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,66 +0,0 @@ -import os -import ctypes - -script_dir = os.path.dirname(os.path.realpath(__file__)) -seqtools = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'libseqtools.so')) -seqtools.get_revcomp.restype = ctypes.c_char_p -seqtools.transfer_gaps.restype = ctypes.c_char_p - - -def get_revcomp(seq): - return seqtools.get_revcomp(seq) - - -def get_diffs_frac_simple(consensus, family): - c_consensus = ctypes.c_char_p(consensus) - c_family = (ctypes.c_char_p * len(family))() - for i, seq in enumerate(family): - c_family[i] = ctypes.c_char_p(seq) - seqtools.get_diffs_frac_simple.restype = ctypes.POINTER(ctypes.c_double * len(c_family)) - diffs = seqtools.get_diffs_frac_simple(c_consensus, c_family, len(c_family)) - return diffs.contents - - -def get_diffs_frac_binned(consensus, family, bins): - seq_len = None - c_consensus = ctypes.c_char_p(consensus) - c_family = (ctypes.c_char_p * len(family))() - for i, seq in enumerate(family): - if seq_len: - if seq_len != len(seq): - return None - else: - seq_len = len(seq) - c_family[i] = ctypes.c_char_p(seq) - double_array_pointer = ctypes.POINTER(ctypes.c_double * bins) - seqtools.get_diffs_frac_binned.restype = ctypes.POINTER(double_array_pointer * len(c_family)) - diffs_binned_c = seqtools.get_diffs_frac_binned(c_consensus, c_family, len(c_family), seq_len, bins) - diffs_binned = [] - for diffs_c in diffs_binned_c.contents: - diffs_binned.append(diffs_c.contents) - return diffs_binned - - -def transfer_gaps(aligned, seq, gap_char_in='-', gap_char_out='-'): - gap_char_in_c = ctypes.c_char(gap_char_in) - gap_char_out_c = ctypes.c_char(gap_char_out) - return seqtools.transfer_gaps(aligned, seq, gap_char_in_c, gap_char_out_c) - - -def transfer_gaps_multi(seqs, aligned, gap_char_in='-', gap_char_out='-'): - gap_char_in_c = ctypes.c_char(gap_char_in) - gap_char_out_c = ctypes.c_char(gap_char_out) - n_seqs = len(seqs) - assert n_seqs == len(aligned), 'Error: Unequal number of gapped and ungapped sequences.' - seqs_c = (ctypes.c_char_p * n_seqs)() - for i, seq in enumerate(seqs): - seqs_c[i] = ctypes.c_char_p(seq) - aligned_c = (ctypes.c_char_p * n_seqs)() - for i, seq in enumerate(aligned): - aligned_c[i] = ctypes.c_char_p(seq) - seqtools.transfer_gaps_multi.restype = ctypes.POINTER(ctypes.c_char_p * n_seqs) - output_c = seqtools.transfer_gaps_multi(n_seqs, aligned_c, seqs_c, gap_char_in_c, gap_char_out_c) - output = [] - for seq in output_c.contents: - output.append(seq) - return output |
b |
diff -r e4d75f9efb90 -r 675a8370675b seqtools.pyc |
b |
Binary file seqtools.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b swalign.c --- a/swalign.c Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,353 +0,0 @@\n-/*\n- * Copyright (c) 2010 Nicolaus Lance Hepler\n- * \n- * Permission is hereby granted, free of charge, to any person\n- * obtaining a copy of this software and associated documentation\n- * files (the "Software"), to deal in the Software without\n- * restriction, including without limitation the rights to use,\n- * copy, modify, merge, publish, distribute, sublicense, and/or sell\n- * copies of the Software, and to permit persons to whom the\n- * Software is furnished to do so, subject to the following\n- * conditions:\n- * \n- * The above copyright notice and this permission notice shall be\n- * included in all copies or substantial portions of the Software.\n- * \n- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES\n- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,\n- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR\n- * OTHER DEALINGS IN THE SOFTWARE.\n- */\n-// Note: That\'s an MIT license.\n-// All double-commented comments below are from Nicolaus Lance Hepler.\n-// Original repository: https://code.google.com/archive/p/swalign/\n-\n-#include "swalign.h"\n-\n-// /* reverse a string in place, return str */\n-static char* reverse(char *str) {\n- char *left = str;\n- char *right = left + strlen(str) - 1;\n- char tmp;\n-\n- while (left < right) {\n- tmp = *left;\n- *(left++) = *right;\n- *(right--) = tmp;\n- }\n-\n- return str;\n-}\n-\n-// Return the reverse complement of a sequence.\n-char* revcomp(char *str) {\n- char *left = str;\n- char *right = left + strlen(str) - 1;\n- char tmp;\n-\n- while (left < right) {\n- tmp = get_char_comp(*left);\n- *(left++) = get_char_comp(*right);\n- *(right--) = tmp;\n- }\n-\n- return str;\n-}\n-\n-// Return the complement of a base.\n-// Uses a simple lookup table: a string with the complements of all possible sequence characters.\n-static char get_char_comp(char c) {\n- int i = c - TRANS_OFFSET;\n- if (i < 0 || i > 57) {\n- return c;\n- } else {\n- return TRANS[i];\n- }\n-}\n-\n-// // works globally\n-// Note: Currently the "local" flag isn\'t functional. It seems to always do a local alignment.\n-static align_t *traceback(seq_pair_t *problem, matrix_t *S, bool local) {\n- align_t *result = malloc(sizeof(align_t));\n- seq_pair_t *seqs = malloc(sizeof(seq_pair_t));\n- unsigned int i = S->m - 1;\n- unsigned int j = S->n - 1;\n- unsigned int k = 0;\n- // Create output strings. Allocate maximum potential length.\n- char c[S->m + S->n + 1];\n- char d[S->m + S->n + 1];\n-\n- memset(c, \'\\0\', sizeof(c));\n- memset(d, \'\\0\', sizeof(d));\n-\n- // This wasn\'t finished by NLH. Not functioning correctly yet.\n- // It seems the purpose is to start the traceback from the place where the score reaches its\n- // maximum instead of the very end (set i and j to those coordinates).\n- if (local == true) {\n- unsigned int l, m;\n- double max = FLT_MIN;\n-\n- for (l = 0; l < S->m; l++) {\n- for (m = 0; m < S->n; m++) {\n- if (S->mat[l][m].score > max) {\n- i = l;\n- j = m;\n- max = S->mat[l][m].score;\n- } \n- } \n- }\n- }\n-\n- double score = DBL_MIN;\n- int matches = 0;\n- int start_a = 0;\n- int start_b = 0;\n- int end_a = 0;\n- int end_b = 0;\n- bool move_i = false;\n- bool move_j = false;\n- // Walk back through the matrix from the end, taking the path determined by the "prev" values of\n- // each cell. Assemble the sequence along the way.\n- if (S->mat[i][j].prev[0] != 0 && S->mat[i][j].prev[1] != 0) {\n- while (i > 0 || j > 0) {\n- unsigned int new_i = S->mat[i][j].prev[0];\n- unsigned int new_j = S->mat[i][j].prev[1];\n- \n- // If we\'ve moved in the i axis, add the new base to the sequence. Otherwise, it\'s a'..b'; \n-}\n-\n-static matrix_t *create_matrix(unsigned int m, unsigned int n) {\n- matrix_t *S = malloc(sizeof(matrix_t));\n- unsigned int i;\n-\n- S->m = m;\n- S->n = n;\n-\n- S->mat = malloc(sizeof(entry_t) * m * n);\n-\n- for (i = 0; i < m; i++) {\n- S->mat[i] = malloc(sizeof(entry_t) * n);\n- }\n-\n- return S;\n-}\n-\n-void destroy_matrix(matrix_t *S) {\n- unsigned int i;\n- for (i = 0; i < S->m; i++) {\n- free(S->mat[i]);\n- }\n- free(S->mat);\n- free(S);\n- return;\n-}\n-\n-// Print a visual representation of the path through the matrix.\n-void print_matrix(matrix_t *matrix, seq_pair_t *seq_pair) {\n- int i, j;\n- for (i = 0; i < matrix->m; i++) {\n- if (i == 0) {\n- printf("\\t\\t");\n- for (j = 0; j < seq_pair->blen; j++) {\n- printf("%c\\t", seq_pair->b[j]);\n- }\n- printf("\\n");\n- printf(" ");\n- for (j = 0; j < matrix->n; j++) {\n- printf("%d\\t", j);\n- }\n- printf("\\n");\n- }\n- if (i == 0) {\n- printf(" 0 ");\n- } else {\n- printf("%c %4d ", seq_pair->a[i-1], i);\n- }\n- for (j = 0; j < matrix->n; j++) {\n- printf("%d,%d|%0.0f\\t", matrix->mat[i][j].prev[0], matrix->mat[i][j].prev[1], matrix->mat[i][j].score);\n- }\n- printf("\\n");\n- }\n-}\n-\n-void destroy_seq_pair(seq_pair_t *pair) {\n- free(pair->a);\n- free(pair->b);\n- free(pair);\n- return;\n-}\n-\n-align_t *smith_waterman(seq_pair_t *problem, bool local) {\n- unsigned int m = problem->alen + 1;\n- unsigned int n = problem->blen + 1;\n- matrix_t *S = create_matrix(m, n);\n- align_t *result;\n- unsigned int i, j, k, l;\n-\n- S->mat[0][0].score = 0;\n- S->mat[0][0].prev[0] = 0;\n- S->mat[0][0].prev[1] = 0;\n-\n- for (i = 1; i <= problem->alen; i++) {\n- S->mat[i][0].score = 0.0;\n- S->mat[i][0].prev[0] = i-1;\n- S->mat[i][0].prev[1] = 0;\n- }\n-\n- for (j = 1; j <= problem->blen; j++) {\n- S->mat[0][j].score = 0.0;\n- S->mat[0][j].prev[0] = 0;\n- S->mat[0][j].prev[1] = j-1;\n- }\n-\n- for (i = 1; i <= problem->alen; i++) {\n- for (j = 1; j <= problem->blen; j++) {\n- int nw_score = (strncmp(problem->a+(i-1), problem->b+(j-1), 1) == 0) ? MATCH : MISMATCH;\n-\n- S->mat[i][j].score = DBL_MIN;\n- S->mat[i][j].prev[0] = 0;\n- S->mat[i][j].prev[1] = 0;\n-\n- for (k = 0; k <= 1; k++) {\n- for (l = 0; l <= 1; l++) {\n- int val;\n-\n- if (k == 0 && l == 0) {\n- continue;\n- } else if (k > 0 && l > 0) {\n- val = nw_score; \n- } else if (k > 0 || l > 0) {\n- if ((i == problem->alen && k == 0) ||\n- (j == problem->blen && l == 0))\n- val = 0.0;\n- else\n- val = GAP;\n- } else {\n- // do nothing..\n- }\n-\n- val += S->mat[i-k][j-l].score;\n-\n- if (val > S->mat[i][j].score) {\n- S->mat[i][j].score = val;\n- S->mat[i][j].prev[0] = i-k;\n- S->mat[i][j].prev[1] = j-l;\n- }\n- }\n- }\n- }\n- }\n-\n- result = traceback(problem, S, local);\n-\n- // print_matrix(S, problem);\n-\n- destroy_matrix(S);\n-\n- return result;\n-}\n-\n-void print_alignment(align_t *result, int target_len, int query_len) {\n- printf("Score: %0.0f Matches: %d\\n", result->score, result->matches);\n- printf("Target: %3d %s %-3d\\n", result->start_a, result->seqs->a, result->end_a);\n- printf("Query: %3d %s %-3d\\n", result->start_b, result->seqs->b, result->end_b);\n-}\n-\n-int main(int argc, const char **argv) {\n-\n- if (argc != 3) {\n- printf("usage: swalign TARGET_SEQ QUERY_SEQ\\n");\n- exit(1);\n- }\n-\n- {\n- seq_pair_t problem;\n- align_t *result;\n- char c[strlen(argv[1])], d[strlen(argv[2])];\n- \n- strcpy(c, argv[1]);\n- strcpy(d, argv[2]);\n- \n- problem.a = c;\n- problem.alen = strlen(problem.a);\n- problem.b = d;\n- problem.blen = strlen(problem.b);\n- \n- result = smith_waterman(&problem, false);\n- \n- print_alignment(result, problem.alen, problem.blen);\n- }\n-\n- exit(0);\n-} \n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b swalign.h --- a/swalign.h Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,89 +0,0 @@ -/* - * Copyright (c) 2010 Nicolaus Lance Hepler - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include <float.h> -#include <math.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define GAP -1.0 -#define MATCH 2.0 -#define MISMATCH -0.5 -// ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz -#define TRANS "TVGHEFCDIJMLKNOPQYWAABSXRZ[\\]^_`tvghefcdijmlknopqywaabsxrz" -#define TRANS_OFFSET 65 - -typedef enum { false, true } bool; - -typedef struct { - char *a; - unsigned int alen; - char *b; - unsigned int blen; -} seq_pair_t; - -// An entry is a cell in the matrix. -// prev holds the coordinates of the previous cell in the matrix. -typedef struct { - double score; - unsigned int prev[2]; -} entry_t; - -typedef struct { - unsigned int m; - unsigned int n; - entry_t **mat; -} matrix_t; - -typedef struct { - seq_pair_t *seqs; - int start_a; - int start_b; - int end_a; - int end_b; - int matches; - double score; -} align_t; - -static char* reverse(char *str); - -static char get_char_comp(char c); - -char* revcomp(char *str); - -static align_t *traceback(seq_pair_t *problem, matrix_t *S, bool local); - -static matrix_t *create_matrix(unsigned int m, unsigned int n); - -void destroy_matrix(matrix_t *S); - -void print_matrix(matrix_t *matrix, seq_pair_t *seq_pair); - -void destroy_seq_pair(seq_pair_t *pair); - -align_t *smith_waterman(seq_pair_t *problem, bool local); - -void print_alignment(align_t *result, int target_len, int query_len); |
b |
diff -r e4d75f9efb90 -r 675a8370675b swalign.py --- a/swalign.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,95 +0,0 @@ -import os -import ctypes -import string - -script_dir = os.path.dirname(os.path.realpath(__file__)) -swalign = ctypes.cdll.LoadLibrary(os.path.join(script_dir, 'libswalign.so')) - -REVCOMP_TABLE = string.maketrans('acgtrymkbdhvACGTRYMKBDHV', 'tgcayrkmvhdbTGCAYRKMVHDB') - - -# C struct for ctypes -class SeqPairC(ctypes.Structure): - _fields_ = [ - ('a', ctypes.c_char_p), - ('alen', ctypes.c_uint), - ('b', ctypes.c_char_p), - ('blen', ctypes.c_uint), - ] - - -# C struct for ctypes -class AlignC(ctypes.Structure): - _fields_ = [ - ('seqs', ctypes.POINTER(SeqPairC)), - ('start_a', ctypes.c_int), - ('start_b', ctypes.c_int), - ('end_a', ctypes.c_int), - ('end_b', ctypes.c_int), - ('matches', ctypes.c_int), - ('score', ctypes.c_double), - ] - - -# The Python version -class Align(object): - def __init__(self, align_c): - self.target = align_c.seqs.contents.a - self.query = align_c.seqs.contents.b - # Where the first base of the target aligns on the query, in query coordinates (or 1, if <= 0). - self.start_target = align_c.start_a - # Where the first base of the query aligns on the target, in target coordinates (or 1, if <= 0). - self.start_query = align_c.start_b - # Where the last base of the target aligns on the query, in query coordinates. - self.end_target = align_c.end_a - # Where the last base of the query aligns on the target, in target coordinates. - self.end_query = align_c.end_b - self.matches = align_c.matches - self.score = align_c.score - - # Provide this common function. - def __str__(self): - """Print a human-readable representation of the alignment.""" - start_query = str(self.start_query) - start_target = str(self.start_target) - start_width = str(max(len(start_query), len(start_target))) - line_format = '{:'+start_width+'} {} {}' - output = line_format.format(start_target, self.target, self.end_target) + '\n' - output += line_format.format(start_query, self.query, self.end_query) - return output - - -# Initialize functions (define types). -swalign.smith_waterman.restype = ctypes.POINTER(AlignC) -swalign.revcomp.restype = ctypes.c_char_p - - -def smith_waterman(target, query): - seq_pair = SeqPairC(target, len(target), query, len(query)) - align_c = swalign.smith_waterman(ctypes.pointer(seq_pair), 1).contents - return Align(align_c) - - -def smith_waterman_duplex(target, query): - """Smith-Waterman align query to target in both orientations and return the best. - Convenience function that calls smith_waterman() twice, and returns the - alignment with the highest score.""" - align = smith_waterman(target, query) - query_rc = revcomp(query) - align_rc = smith_waterman(target, query_rc) - if align_rc.score > align.score: - return align_rc - else: - return align - - -def revcomp(seq): - """Return the reverse complement of the input sequence. - Leaves the input string unaltered.""" - return seq.translate(REVCOMP_TABLE)[::-1] - - -def revcomp_inplace(seq): - """Convert the input sequence to its reverse complement. - WARNING: This will alter the input string in-place!""" - swalign.revcomp(seq) |
b |
diff -r e4d75f9efb90 -r 675a8370675b swalign.pyc |
b |
Binary file swalign.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b test-msa.fa --- a/test-msa.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,14 +0,0 @@ ->1 -ACCGACACAGACTAGGGATCAAAG ->2 -ACCGACACAGACTAGGATCAAAGT ->3 -ACCGACACAGACTAGGGATCAAAG ->4 -ACCGACACTGACTAGGGATCAAAG ->5 -ACCGAACACAGCACTAGATCAAAG ->6 -ACCTACAGCGACTATGGTTCGAAG ->7 -ACCGACACAGACTAGGGATCAAAG |
b |
diff -r e4d75f9efb90 -r 675a8370675b test.fa --- a/test.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ ->one (1) -GATTACA ->two -TAAGTGTT -ACCA ->three -GGGGAAACCT |
b |
diff -r e4d75f9efb90 -r 675a8370675b test.fq --- a/test.fq Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,15 +0,0 @@ -@1 (one) -GATTACA -+1 -asdlknn -@2 -TAAGTGTT -ACCA -+ -sdlkncsa -aknc -@3 -GGGGAAACCT -+three -aslknaoija - |
b |
diff -r e4d75f9efb90 -r 675a8370675b test.py --- a/test.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,42 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -from __future__ import print_function -import sys -import argparse -import subprocess - -OPT_DEFAULTS = {} -USAGE = "%(prog)s [options]" -DESCRIPTION = """""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**OPT_DEFAULTS) - - parser.add_argument('file1') - parser.add_argument('file2') - - args = parser.parse_args(argv[1:]) - - """ - cat $fastq1 | paste - - - - \ - | paste - <(cat $fastq2 | paste - - - -) \ - | awk -f make-barcodes.awk \ - | sort \ - | align_families.py \ - | dunovo.py \ - > $sscs - """ - - cmd1 = subprocess.Popen(['cat', args.file1], stdout=subprocess.PIPE) - cmd2 = subprocess.Popen(['cat', args.file2], stdout=subprocess.PIPE) - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b test.sam --- a/test.sam Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@SQ thing -read1 1 2 3 4 5 6 7 8 GATTACA 10 -read2 1 2 3 4 5 6 7 8 GGC 10 -read3 more info 1 2 3 4 5 6 7 8 TCTAATG 10 \ No newline at end of file |
b |
diff -r e4d75f9efb90 -r 675a8370675b test.sscs.fa --- a/test.sscs.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,16 +0,0 @@ ->ACCGACACAGACTAGGGATCAAAG.ab.1 4 -TAAGGATACTAGTATAAGAG ->ACCGACACAGACTAGGGATCAAAG.ba.2 3 -TAAGGATACTAGTATAAGAG ->ACCGACACAGACTAGGGATCAAAG.ab.2 4 -AGAGTCAGGTTCGTCTTTAG ->ACCGACACAGACTAGGGATCAAAG.ba.1 3 -AGAGTCAGGTTCGTCTTTAG ->ATGATTAAGGCTACTAGTATAAGC.ab.1 3 -TCTATCATTATGTTTTGAGG ->ATGATTAAGGCTACTAGTATAAGC.ab.2 3 -GCCCCCTCTACCCCCTCTAG ->TTGTTGATGAGATATTTGGAGGTA.ba.1 3 -GGTGATTAGTCGGTTGTTGA ->TTGTTGATGAGATATTTGGAGGTA.ba.2 3 -ACTTTACAATGCAATGCCCA |
b |
diff -r e4d75f9efb90 -r 675a8370675b test2.fa --- a/test2.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ ->1 -TCTGGTTTGTTCTTTGGCTGACAGCACGGAGTATCCGACGGGTAATACG ->2 -CGCTCATGACGCCCAATCCGTCCGCTGACATAGGAAACAGACGGGACTT ->3 -TAATCCAGATACCCTATCGGCCGCTCAACTTCAGGCCGGAACAGCCCGC ->4 -TAACCAGCCGGTCAGTCCTAAACGAATAATATCAAGACGTGTAGTGGAA - |
b |
diff -r e4d75f9efb90 -r 675a8370675b test_1.fa --- a/test_1.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ ->one/1 -GATTACAG ->two/1 -TAAGTGTT ->three/1 -GGGGAAAC |
b |
diff -r e4d75f9efb90 -r 675a8370675b test_2.fa --- a/test_2.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ ->one/2 -TTACAGAT ->two/2 -GTGTTTAA ->three/2 -AACGGGGA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.cons.fa --- a/tests/families.cons.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ ->ACCGACACAGACTAGGGATCAAAG.1 4/3 -TAAGGATACTAGTATAAGAG ->ACCGACACAGACTAGGGATCAAAG.2 4/3 -AGAGTCAGGTTCGTCTTTAG |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.cons.incl-sscs.fa --- a/tests/families.cons.incl-sscs.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,12 +0,0 @@ ->ACCGACACAGACTAGGGATCAAAG.1 4/3 -TAAGGATACTAGTATAAGAG ->ACCGACACAGACTAGGGATCAAAG.2 4/3 -AGAGTCAGGTTCGTCTTTAG ->ACTAGTATAAGCATGATTAAGGCT.2 3 -TCTATCATTATGTTTTGAGG ->ACTAGTATAAGCATGATTAAGGCT.1 3 -GCCCCCTCTACCCCCTCTAG ->TATTTGGAGGTATTGTTGATGAGA.1 3 -GGTGATTAGTCGGTTGTTGA ->TATTTGGAGGTATTGTTGATGAGA.2 3 -ACTTTACAATGCAATGCCCA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.cons_1.fa --- a/tests/families.cons_1.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->ACCGACACAGACTAGGGATCAAAG 4/3 -TAAGGATACTAGTATAAGAG |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.cons_2.fa --- a/tests/families.cons_2.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->ACCGACACAGACTAGGGATCAAAG 4/3 -AGAGTCAGGTTCGTCTTTAG |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.in.tsv --- a/tests/families.in.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,16 +0,0 @@ -ACCGACACAGACTAGGGATCAAAG ab pair1.ab.1 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA pair1.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab pair2.ab.1 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA pair2.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab pair3.ab.1 TAAGGATACTAGATAAGAGC AAAAAAAAAAAAAAAAAAAA pair3.ab.2 AGAGTCACGTTTCGTCTTTA AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab pair4.ab.1 TAAGGCTACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA pair4.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba pair5.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA pair5.ba.2 TAAGGCTACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba pair6.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA pair6.ba.2 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba pair7.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA pair7.ba.2 TAAGGATACTAGTAGAAGAG AAAAAAAAAAAAAAAAAAAA -ATGATTAAGGCTACTAGTATAAGC ab pair8.ab.1 TCTATCATTATGTTTTGAGG AAAAAAAAAAAAAAAAAAAA pair8.ab.2 GCCCCCTCTACCCCCTCTAG AAAAAAAAAAAAAAAAAAAA -ATGATTAAGGCTACTAGTATAAGC ab pair9.ab.1 TCTATCATTATGTCTTGAGG AAAAAAAAAAAAAAAAAAAA pair9.ab.2 GCCCCCTCTACCCCCTCTAG AAAAAAAAAAAAAAAAAAAA -ATGATTAAGGCTACTAGTATAAGC ab pair10.ab.1 TCTATCATTATGTTTTGAGG AAAAAAAAAAAAAAAAAAAA pair10.ab.2 GCCCCTCTACCCCCTCTAGC AAAAAAAAAAAAAAAAAAAA -TCTTAATAAGAACCAACACACTGT ab pair11.ab.1 TCGGTTGTTGATGAGATATT AAAAAAAAAAAAAAAAAAAA pair11.ab.2 GATTAAGAGAACCAACACCT AAAAAAAAAAAAAAAAAAAA -TTGTTGATGAGATATTTGGAGGTA ba pair12.ab.1 GGTGATTAGTCGGTTGTTGA AAAAAAAAAAAAAAAAAAAA pair12.ab.2 ACTTTACAATGCAATGCCCA AAAAAAAAAAAAAAAAAAAA -TTGTTGATGAGATATTTGGAGGTA ba pair13.ab.1 GGTGATTAGTCGGATGTTGA AAAAAAAAAAAAAAAAAAAA pair13.ab.2 ACTTTACCATGCAATGCCCA AAAAAAAAAAAAAAAAAAAA -TTGTTGATGAGATATTTGGAGGTA ba pair14.ab.1 GGTGACTAGTCGGTTGTTGA AAAAAAAAAAAAAAAAAAAA pair14.ab.2 ACTTTACAATGCAATGCACA AAAAAAAAAAAAAAAAAAAA -GACTAGGGATCAAAACCGACACAG ba pair15.ba.1 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA pair15.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -GACTAGGGATCAAAACCGACACAG ab pair16.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA pair16.ab.2 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.raw_1.fq --- a/tests/families.raw_1.fq Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,64 +0,0 @@ -@pair15.ba.1 -AAACCGACACAGGCATCTCAATGCTCTGAAATCTGTG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair12.ab.1 -TATTTGGAGGTAGCATCGGTGATTAGTCGGTTGTTGA -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair1.ab.1 -ACCGACACAGACGCATCTAAGGATACTAGTATAAGAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair4.ab.1 -ACCGACACAGACGCATCTAAGGCTACTAGTATAAGAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair10.ab.1 -ATGATTAAGGCTGCATCTCTATCATTATGTTTTGAGG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair7.ba.1 -TAGGGATCAAAGGCATCAGAGTCAGGTTCGTCTTTAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair16.ab.1 -GACTAGGGATCAGCATCGTTGATGAGATACTTGGAGG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair11.ab.1 -TCTTAATAAGAAGCATCTCGGTTGTTGATGAGATATT -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair9.ab.1 -ATGATTAAGGCTGCATCTCTATCATTATGTCTTGAGG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair14.ab.1 -TATTTGGAGGTAGCATCGGTGACTAGTCGGTTGTTGA -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair13.ab.1 -TATTTGGAGGTAGCATCGGTGATTAGTCGGATGTTGA -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair2.ab.1 -ACCGACACAGACGCATCTAAGGATACTAGTATAAGAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair6.ba.1 -TAGGGATCAAAGGCATCAGAGTCAGGTTCGTCTTTAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair5.ba.1 -TAGGGATCAAAGGCATCAGAGTCAGGTTCGTCTTTAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair3.ab.1 -ACCGACACAGACGCATCTAAGGATACTAGATAAGAGC -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair8.ab.1 -ATGATTAAGGCTGCATCTCTATCATTATGTTTTGAGG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.raw_2.fq --- a/tests/families.raw_2.fq Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,64 +0,0 @@ -@pair15.ba.2 -GACTAGGGATCAGCATCGTTGATGAGATATTTGGAGG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair12.ab.2 -TTGTTGATGAGAGCATCACTTTACAATGCAATGCCCA -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair1.ab.2 -TAGGGATCAAAGGCATCAGAGTCAGGTTCGTCTTTAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair4.ab.2 -TAGGGATCAAAGGCATCAGAGTCAGGTTCGTCTTTAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair10.ab.2 -ACTAGTATAAGCGCATCGCCCCTCTACCCCCTCTAGC -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair7.ba.2 -ACCGACACAGACGCATCTAAGGATACTAGTAGAAGAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair16.ab.2 -AAACCGACACAGGCATCTCAATGCTCTGAAATCTGTG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair11.ab.2 -CCAACACACTGTGCATCGATTAAGAGAACCAACACCT -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair9.ab.2 -ACTAGTATAAGCGCATCGCCCCCTCTACCCCCTCTAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair14.ab.2 -TTGTTGATGAGAGCATCACTTTACAATGCAATGCACA -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair13.ab.2 -TTGTTGATGAGAGCATCACTTTACCATGCAATGCCCA -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair2.ab.2 -TAGGGATCAAAGGCATCAGAGTCAGGTTCGTCTTTAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair6.ba.2 -ACCGACACAGACGCATCTAAGGATACTAGTATAAGAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair5.ba.2 -ACCGACACAGACGCATCTAAGGCTACTAGTATAAGAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair3.ab.2 -TAGGGATCAAAGGCATCAGAGTCACGTTTCGTCTTTA -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA -@pair8.ab.2 -ACTAGTATAAGCGCATCGCCCCCTCTACCCCCTCTAG -+ -TTTTTTTTTTTTCCCCCAAAAAAAAAAAAAAAAAAAA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.shuf.in.tsv --- a/tests/families.shuf.in.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,16 +0,0 @@ -GACTAGGGATCAAAACCGACACAG ba pair15.ba.1 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA pair15.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -TTGTTGATGAGATATTTGGAGGTA ba pair12.ab.1 GGTGATTAGTCGGTTGTTGA AAAAAAAAAAAAAAAAAAAA pair12.ab.2 ACTTTACAATGCAATGCCCA AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab pair1.ab.1 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA pair1.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab pair4.ab.1 TAAGGCTACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA pair4.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ATGATTAAGGCTACTAGTATAAGC ab pair10.ab.1 TCTATCATTATGTTTTGAGG AAAAAAAAAAAAAAAAAAAA pair10.ab.2 GCCCCTCTACCCCCTCTAGC AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba pair7.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA pair7.ba.2 TAAGGATACTAGTAGAAGAG AAAAAAAAAAAAAAAAAAAA -GACTAGGGATCAAAACCGACACAG ab pair16.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA pair16.ab.2 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA -TCTTAATAAGAACCAACACACTGT ab pair11.ab.1 TCGGTTGTTGATGAGATATT AAAAAAAAAAAAAAAAAAAA pair11.ab.2 GATTAAGAGAACCAACACCT AAAAAAAAAAAAAAAAAAAA -ATGATTAAGGCTACTAGTATAAGC ab pair9.ab.1 TCTATCATTATGTCTTGAGG AAAAAAAAAAAAAAAAAAAA pair9.ab.2 GCCCCCTCTACCCCCTCTAG AAAAAAAAAAAAAAAAAAAA -TTGTTGATGAGATATTTGGAGGTA ba pair14.ab.1 GGTGACTAGTCGGTTGTTGA AAAAAAAAAAAAAAAAAAAA pair14.ab.2 ACTTTACAATGCAATGCACA AAAAAAAAAAAAAAAAAAAA -TTGTTGATGAGATATTTGGAGGTA ba pair13.ab.1 GGTGATTAGTCGGATGTTGA AAAAAAAAAAAAAAAAAAAA pair13.ab.2 ACTTTACCATGCAATGCCCA AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab pair2.ab.1 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA pair2.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba pair6.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA pair6.ba.2 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba pair5.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA pair5.ba.2 TAAGGCTACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab pair3.ab.1 TAAGGATACTAGATAAGAGC AAAAAAAAAAAAAAAAAAAA pair3.ab.2 AGAGTCACGTTTCGTCTTTA AAAAAAAAAAAAAAAAAAAA -ATGATTAAGGCTACTAGTATAAGC ab pair8.ab.1 TCTATCATTATGTTTTGAGG AAAAAAAAAAAAAAAAAAAA pair8.ab.2 GCCCCCTCTACCCCCTCTAG AAAAAAAAAAAAAAAAAAAA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.sort.tsv --- a/tests/families.sort.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,16 +0,0 @@ -AAACCGACACAGGACTAGGGATCA ab @pair15.ba.1 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA @pair15.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba @pair16.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA @pair16.ab.2 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab @pair1.ab.1 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA @pair1.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab @pair2.ab.1 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA @pair2.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab @pair3.ab.1 TAAGGATACTAGATAAGAGC AAAAAAAAAAAAAAAAAAAA @pair3.ab.2 AGAGTCACGTTTCGTCTTTA AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab @pair4.ab.1 TAAGGCTACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA @pair4.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba @pair5.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA @pair5.ba.2 TAAGGCTACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba @pair6.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA @pair6.ba.2 TAAGGATACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba @pair7.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA @pair7.ba.2 TAAGGATACTAGTAGAAGAG AAAAAAAAAAAAAAAAAAAA -ACTAGTATAAGCATGATTAAGGCT ba @pair10.ab.1 TCTATCATTATGTTTTGAGG AAAAAAAAAAAAAAAAAAAA @pair10.ab.2 GCCCCTCTACCCCCTCTAGC AAAAAAAAAAAAAAAAAAAA -ACTAGTATAAGCATGATTAAGGCT ba @pair8.ab.1 TCTATCATTATGTTTTGAGG AAAAAAAAAAAAAAAAAAAA @pair8.ab.2 GCCCCCTCTACCCCCTCTAG AAAAAAAAAAAAAAAAAAAA -ACTAGTATAAGCATGATTAAGGCT ba @pair9.ab.1 TCTATCATTATGTCTTGAGG AAAAAAAAAAAAAAAAAAAA @pair9.ab.2 GCCCCCTCTACCCCCTCTAG AAAAAAAAAAAAAAAAAAAA -CCAACACACTGTTCTTAATAAGAA ba @pair11.ab.1 TCGGTTGTTGATGAGATATT AAAAAAAAAAAAAAAAAAAA @pair11.ab.2 GATTAAGAGAACCAACACCT AAAAAAAAAAAAAAAAAAAA -TATTTGGAGGTATTGTTGATGAGA ab @pair12.ab.1 GGTGATTAGTCGGTTGTTGA AAAAAAAAAAAAAAAAAAAA @pair12.ab.2 ACTTTACAATGCAATGCCCA AAAAAAAAAAAAAAAAAAAA -TATTTGGAGGTATTGTTGATGAGA ab @pair13.ab.1 GGTGATTAGTCGGATGTTGA AAAAAAAAAAAAAAAAAAAA @pair13.ab.2 ACTTTACCATGCAATGCCCA AAAAAAAAAAAAAAAAAAAA -TATTTGGAGGTATTGTTGATGAGA ab @pair14.ab.1 GGTGACTAGTCGGTTGTTGA AAAAAAAAAAAAAAAAAAAA @pair14.ab.2 ACTTTACAATGCAATGCACA AAAAAAAAAAAAAAAAAAAA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.unequal.fa --- a/tests/families.unequal.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ ->ACCGACACAGACTAGGGATCAAAG.1 4-3 -TAAGGATACTAGTATAAGAGCCTA ->ACCGACACAGACTAGGGATCAAAG.2 4-3 -AGAGTCAGGTTCGTCTTTAG ->AAACCGACACAGGACTAGGGATCA.1 3-4 -TCAATGCTCTGAAATCTGTG ->AAACCGACACAGGACTAGGGATCA.2 3-4 -GTTGATGAGATAYTTGGAGG |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.unequal.msa.tsv --- a/tests/families.unequal.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,28 +0,0 @@ -ACCGACACAGACTAGGGATCAAAG ab 1 @pair1.ab.1 TAAGGATACTAGTATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab 1 @pair2.ab.1 TAAGGATACTAGTATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab 1 @pair3.ab.1 TAAGGATACTAG-ATAAGAGCCTA AAAAAAAAAAAA AAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab 1 @pair4.ab.1 TAAGGCTACTAGTATAAGAG---- AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba 2 @pair5.ba.2 TAAGGCTACTAGTATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba 2 @pair6.ba.2 TAAGGATACTAGTATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba 2 @pair7.ba.2 TAAGGATACTAGTAGAAGAG---- AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab 2 @pair1.ab.2 AGAGTCA-GGTTCGTCTTTAG AAAAAAA AAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab 2 @pair2.ab.2 AGAGTCA-GGTTCGTCTTTAG AAAAAAA AAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab 2 @pair3.ab.2 AGAGTCACGTTTCGTCTTTA- AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab 2 @pair4.ab.2 AGAGTCA-GGTTCGTCTTTAG AAAAAAA AAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba 1 @pair5.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba 1 @pair6.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba 1 @pair7.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab 1 @pair8.ba.1 TCAATGCTCTGAAATCTGTGGACT AAAAAAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab 1 @pair9.ba.1 TCAATGCTCTGAAATCTGTGGACT AAAAAAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab 1 @pair10.ba.1 TCAATGCTCTGAAATCTGTG---- AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba 2 @pair11.ab.2 TCAATGCTCTGAAATCTGTGGACT AAAAAAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba 2 @pair12.ab.2 TCAATGCTCTGAAATCTGTG---- AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba 2 @pair13.ab.2 TCAATGCTCTGAAATCTGTG---- AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba 2 @pair14.ab.2 TCAATGCTCTGAAATCTGTG---- AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab 2 @pair8.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab 2 @pair9.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab 2 @pair10.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba 1 @pair11.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba 1 @pair12.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba 1 @pair13.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba 1 @pair14.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.unequal.sscs.fa --- a/tests/families.unequal.sscs.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,16 +0,0 @@ ->ACCGACACAGACTAGGGATCAAAG.ab.1 4 -TAAGGATACTAGTATAAGAGCCTA ->ACCGACACAGACTAGGGATCAAAG.ba.2 3 -TAAGGATACTAGTATAAGAGCCTA ->ACCGACACAGACTAGGGATCAAAG.ab.2 4 -AGAGTCAGGTTCGTCTTTAG ->ACCGACACAGACTAGGGATCAAAG.ba.1 3 -AGAGTCAGGTTCGTCTTTAG ->AAACCGACACAGGACTAGGGATCA.ab.1 3 -TCAATGCTCTGAAATCTGTGGACT ->AAACCGACACAGGACTAGGGATCA.ba.2 4 -TCAATGCTCTGAAATCTGTG ->AAACCGACACAGGACTAGGGATCA.ab.2 3 -GTTGATGAGATATTTGGAGG ->AAACCGACACAGGACTAGGGATCA.ba.1 4 -GTTGATGAGATACTTGGAGG |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/families.unequal.tsv --- a/tests/families.unequal.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,14 +0,0 @@ -ACCGACACAGACTAGGGATCAAAG ab @pair1.ab.1 TAAGGATACTAGTATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAAA @pair1.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab @pair2.ab.1 TAAGGATACTAGTATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAAA @pair2.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab @pair3.ab.1 TAAGGATACTAGATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAA @pair3.ab.2 AGAGTCACGTTTCGTCTTTA AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ab @pair4.ab.1 TAAGGCTACTAGTATAAGAG AAAAAAAAAAAAAAAAAAAA @pair4.ab.2 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba @pair5.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA @pair5.ba.2 TAAGGCTACTAGTATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba @pair6.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA @pair6.ba.2 TAAGGATACTAGTATAAGAGCCTA AAAAAAAAAAAAAAAAAAAAAAAA -ACCGACACAGACTAGGGATCAAAG ba @pair7.ba.1 AGAGTCAGGTTCGTCTTTAG AAAAAAAAAAAAAAAAAAAA @pair7.ba.2 TAAGGATACTAGTAGAAGAG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab @pair8.ba.1 TCAATGCTCTGAAATCTGTGGACT AAAAAAAAAAAAAAAAAAAAAAAA @pair8.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab @pair9.ba.1 TCAATGCTCTGAAATCTGTGGACT AAAAAAAAAAAAAAAAAAAAAAAA @pair9.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ab @pair10.ba.1 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA @pair10.ba.2 GTTGATGAGATATTTGGAGG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba @pair11.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA @pair11.ab.2 TCAATGCTCTGAAATCTGTGGACT AAAAAAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba @pair12.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA @pair12.ab.2 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba @pair13.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA @pair13.ab.2 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA -AAACCGACACAGGACTAGGGATCA ba @pair14.ab.1 GTTGATGAGATACTTGGAGG AAAAAAAAAAAAAAAAAAAA @pair14.ab.2 TCAATGCTCTGAAATCTGTG AAAAAAAAAAAAAAAAAAAA |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/gapqual.cons.fa --- a/tests/gapqual.cons.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ ->ACCGGACAACGACACCCTGCATAA.1 5 -TGGGAGAACACAACTAAACTCGGGAAGT ->ACCGGACAACGACACCCTGCATAA.2 4 -CNCATCACCAGGAACAACTCTGCTGTACTT |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/gapqual.msa.tsv --- a/tests/gapqual.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ -ACCGGACAACGACACCCTGCATAA ab 1 read1.1 --G-GAGAACACAACTAAACTCGGGAAGT- 0 ?@AAAAAAAAAAAAAAAAAAAAAAA -ACCGGACAACGACACCCTGCATAA ab 1 read2.1 --G-GAGAACACAACTAAACTCGGGAAGT- 0 ?@AAAAAAAAAAAAAAAAAAAAAAA -ACCGGACAACGACACCCTGCATAA ab 1 read3.1 --G-GAGAACACAACTAAACTCGG-AAGTA 0 ?@AAAAAAAAAAAAAAA;;1 1;;AA -ACCGGACAACGACACCCTGCATAA ab 1 read4.1 TGG-GAG-ACACAACT-AACTCGG-AAGTA AAA AAA AAAAAAAA AAAA;;1 1;;AA -ACCGGACAACGACACCCTGCATAA ab 1 read5.1 TGGAGCGAAC-CAACTGAA-TCGG-AAGT- AAAAAAAAAA AAAAAAAA AAAA AAAA -ACCGGACAACGACACCCTGCATAA ab 2 read1.2 CGCGTCACCAGGAACAACTCTGCTGTACTT AAA1AAAAAAAAAAAAAAAAAAAAAAAAAA -ACCGGACAACGACACCCTGCATAA ab 2 read2.2 CGCGTCACCAGGAACAACTCTGCTGTACTT AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -ACCGGACAACGACACCCTGCATAA ab 2 read3.2 CACATCACCAGGAACAACTCTGCTGTACTT AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA -ACCGGACAACGACACCCTGCATAA ab 2 read4.2 CACATCACCAGGAACAACTCTGCTGTACTT AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA \ No newline at end of file |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/gaps-diffs.out.tsv --- a/tests/gaps-diffs.out.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,11 +0,0 @@ -ACCGGA 0.0 CAGCACCCCCTCTACCC------CCCCCTCTAGAG -ACCGGA 0.0 CAGCACCCCCTCTACCC-----ACCCCCTCTAGAG -ACCGGA 0.0 CAGCACCCCCTCTACCC-------CCCCTCTAGAG -ACCGGA 0.0286 CAGCACCCCCTCTACC-------CCCCCTCTAGAG -ACCGGA 0.0286 CAGCACCCCCTCTACC------CCCCCCTCTAGAG -ACCGGA 0.0286 CAGCACCCCCTCTACCCC------CCCCTCTAGAG -ACCGGA 0.0 CAGCACCCCCTCTACCC------CCCCCTCTAGAG -ACCGGA 0.0286 CAGCACCCCCTCTACCCCCTCTA----CTCTAGAG -ACCGGA 0.0571 CAGCACCCCCTCTACCCCCTCTAC----TCTAGAG -ACCGGA 0.0571 CAGCACCCCCTCTACCCCCTCTACC----CTAGAG -ACCGGA 0.0286 CAGCACCCCCTCTACCCCCTCTACCCCCTCTAGAG |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/gaps.msa.tsv --- a/tests/gaps.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,12 +0,0 @@ -ACCGGA CONSENSUS CAGCACCCCCTCTACCC------CCCCCTCTAGAG -ACCGGA 0 cagcaccccctctaccc------ccccctctagag -ACCGGA 1 cagcaccccctctaccc-----accccctctagag -ACCGGA 2 cagcaccccctctaccc-------cccctctagag -ACCGGA 3 cagcaccccctctacc-------ccccctctagag -ACCGGA 4 cagcaccccctctacc------cccccctctagag -ACCGGA 5 cagcaccccctctacccc------cccctctagag -ACCGGA 0 cagcaccccctctaccc------ccccctctagag -ACCGGA 6 cagcaccccctctaccccctcta----ctctagag -ACCGGA 7 cagcaccccctctaccccctctac----tctagag -ACCGGA 8 cagcaccccctctaccccctctacc----ctagag -ACCGGA 9 cagcaccccctctaccccctctaccccctctagag |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/qual.cons.fa --- a/tests/qual.cons.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ ->ACCGGACAACGACACCCTGCATAA.1 4 -TGCAGAGAACACAACTAAACTCGGGAAGTA ->ACCGGACAACGACACCCTGCATAA.2 4 -CGCATCACCAGGAACAACTCTGCTGTACTT |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/qual.msa.tsv --- a/tests/qual.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ -ACCGGACAACGACACCCTGCATAA ab 1 read1.1 TGCAGAGAACACAACTAAACTCGGGAAGTA EEEEEEEEEEEEEEEEEEEEEEEEEEEEEE -ACCGGACAACGACACCCTGCATAA ab 1 read2.1 TGCAGAGAACACAACTAAACTCGGGAAGTA EEEEEEEEEEEEEEEEEEEEEEEEEEEEEE -ACCGGACAACGACACCCTGCATAA ab 1 read3.1 TGGAGAGAACACAACTGAACTCGGGAAGTA EEEEEEEEEEEEEEEE1EEEEEEEEEEEEE -ACCGGACAACGACACCCTGCATAA ab 1 read4.1 TGCAGCGAACACAACTGAACTCGG-AAGTA EEEEEEEEEEEEEEEEEEEEEEEE EEEEE -ACCGGACAACGACACCCTGCATAA ab 2 read1.2 CGCATCACCAGGAACAACTCTGCTGTACTT EEEEEEEEEEEEEEEEEEEEEEEEEEEEEE -ACCGGACAACGACACCCTGCATAA ab 2 read2.2 CGCATCACCAGGAACAACTCTGCTGTACTT EEEEEEEEEEEEEEEEEEEEEEEEEEEEEE -ACCGGACAACGACACCCTGCATAA ab 2 read3.2 CGCATCACCAGGAACAACTCTGCTGTACTT EEEEEEEEEEEEEEEEEEEEEEEEEEEEEE -ACCGGACAACGACACCCTGCATAA ab 2 read4.2 CGCATCACCAGGAACAACTCTGCTGTACTT EEEEEEEEEEEEEEEEEEEEEEEEEEEEEE \ No newline at end of file |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/quirks.msa.tsv --- a/tests/quirks.msa.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,16 +0,0 @@\n-CTGCGACACAATATTGGGCTCCCC\tab\t1\t@M02286:46:000000000-AEG11:1:1116:22967:7077 1:N:0:1\tGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAAC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCFGGGGGGDGGGGGGGFGGGGGGGGGGGGGFGGGGGGGGGGGGGGGG?FGGGFGGGFGGGFF7BFGDG@EEGGGGGGGCFEG@FGGGGGGGDFFFFGGGGGGGDEGGGGGGGGGGGGGGGFGG76CGGGGD4?DFGFFGGGFFDFFGEDEGFB?EB*-*3;028?)9)46FFAF706>FFFFF::\n-CTGCGACACAATATTGGGCTCCCC\tab\t1\t@M02286:46:000000000-AEG11:1:1118:14605:8689 1:N:0:1\tGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAAC\tFGGGGGCEEEEGGGGGGGGEGGGFGGGEGGGGGGGGGGGGGGGAFGGGDGGGGGGGGGGGGGGGEEDEGAFFDGGGFGGFGGFGGCGGGGGGGGGFGECGCFEGGGGBC7BFGEFGGDGGGGGGGGCGGFCGGGGGEGGGGGGFCFG,7EDCFCFGGF9FGGGFGGGEC7FF@CGGCFCE;BFCGGGG8CEFFBFFFFFFGGGGC7EEEEFGDGCF7C7=CGGGG4<<C<C@EGFD@E0<>?DFGDG55>C335B5=?F?058C4F9FAFF9?::2>9D?24?2\n-CTGCGACACAATATTGGGCTCCCC\tab\t1\t@M02286:46:000000000-AEG11:1:1118:21309:6959 1:N:0:1\tGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAAC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGFFGGGGGFGGGGGGGFFFGGGGGDGGGFCFGCFGGF9>DCGFGGGGG;DFFGFGFDCFGFGEGFE@CD0)7@*9F??F?F9@2:>F4)4<<;>FFF>A?8\n-CTGCGACACAATATTGGGCTCCCC\tab\t1\t@M02286:46:000000000-AEG11:1:2101:17733:13519 1:N:0:1\tGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTACGGTTCCCAAC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGFGEGGGDGGGGGGGGGGGFGGGGGGGGGGGGGGGGFGGGGGGGFFGGGGGGFGCGCFGFGGGGGGGGGGGGFGGGFGGFGGGG=;;FEEGGEFFGGGGCEGGGGGGEGGEGGGGGFFGGGGGGGGF6>CFFFFGGG77>@CFFFFCE75;>>575BGBFFFFF??F9<F>FB4*434F?6>?BF0\n-CTGCGACACAATATTGGGCTCCCC\tab\t1\t@M02286:46:000000000-AEG11:1:2103:23125:15471 1:N:0:1\tGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAAC\tGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGDGGGGGGFGGGCCFGFGGGFGGGDGGFGGFGGGGGGGGGGFGFCFFGGGGGGGGGGGCEBFGG?CFFFGGGGGGGGCFGFGFCFGGGGFGGGGGGFFGFGGGGEFGFG?E@FGFGFEEC>F?G4@FFF@FF:49<F?AFAF?515>:B@??:\n-CTGCGACACAATATTGGGCTCCCC\tab\t1\t@M02286:46:000000000-AEG11:1:2104:14576:24265 1:N:0:1\tGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTCTCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGCGTAAGTAGTTCGCCAGTTACTAGTTCACGCAACGTTCTTGCCATTGCTACAGCCATCGTGGTGTGTCGCTCGTCGTTTGGTATGGCTTCATTTAGCGCCGGTTCCCCAC\tF9FAFCFE7:EGGFGGFFGF@C9,8FEDEGGGFF9CEFFGGGGFCEGC@C@FGGGGGGGGGFGEGEGCG8E999CFGGCFFFFFGF9FGF@CGGFGDFDBFCEEGDD773=FE7:FCFGGGGGGGGBFFGFE,37DBEGG@FCC;DD9FC;@;=@2DF,<@>AEEFG*=C,CEC,*=*=CF9,3BC8EGE*52;+5;9+2++2**=CEEE**/0+07+3C9907CF6+1*<*557)1*)*)38G/))9C)<*5)11**)*0)00-6)**175(4759D(6,).-\n-CTGCGACACAATATTGGGCTCCCC\tab\t1\t@M02286:46:000000000-AEG'..b').0(344(*(,-((42(.(.)5A)9?0<4<7?+5( (,(,,(((->18:0,((02-92\n-CTGCGACACAATATTGGGCTCCCC\tab\t2\t@M02286:46:000000000-AEG11:1:1118:21309:6959 2:N:0:1\tAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCACTTTTTTGCACAACCTGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGCATGAAGCCATCCCAAA-CGACGACCGTGACACCACCATT\tGGGGGGGGGGFGDCGGGGGGGGDGGGGGGGGFGGGFFFGGGCFGF@CCGFFGGGGGGGGFGGGGFGFCFDFFDEGE?@FFF7FC?FFGGGGGGGAF?FDFFGGGFGFEFGCEFGGGGGGGFEFFGGGGGFDCFGGGGFD@EGGGFEEEFE,EDDEFF5DD@FCFEE>CDCGGD>ED5CDFFGAFFGF@CEEFG4C:A:8?*//C5577?F;FACCFFF4D@EB33=675A1(7284<?9>FB9?B02)6<29???A(23+43 :<767(-(4C<((0)-()(())\n-CTGCGACACAATATTGGGCTCCCC\tab\t2\t@M02286:46:000000000-AEG11:1:2101:17733:13519 2:N:0:1\tAAGGGCACCCGGGTGGCGGCAACCATAATTCTAAGATTGCTTGGGTGGGGTATTACTTAGCACAGGAAAAGAATCTAAGGAAGGGCAGACAGGAAAGGAATTAATGCATTCCTGCATAACCAAGGAGGGAAAAACCGGCGGCCAACTTCCTTCGACAAAGGTAGGGGGGACCAAAGGGGCAAACCGCTTTTTTCCACAAATGGGGGCATAATGTAACGCCCCTTG-TTGTTGGGGACCGGGCCCGAAAGGACCCAAACCAACACGACGACCCTATCACAAAACGG\tB9,,,,:,,,+8+++8C+++++,,,,,,<,,,,,,:9,,,,C,B,B,++++,,,5,,5,,,<E,,,C4E,,+,,,<5,,:,,4B+8+++,,,+,B,,,,:,75,,,,7,,5,,,83,5C3<<+,+,8+++,@,,++++3@*1*1,*,2,,1,,***4:C,,1,,3***28E**;/2***)*1*9*)/9:*7*1*0***)1**1095))))0*00/**))()-80) **)0.439>>4*-(,)((-(()((.((,4((-((4(),((-((-(()()/).))(((4-\n-CTGCGACACAATATTGGGCTCCCC\tab\t2\t@M02286:46:000000000-AEG11:1:2103:23125:15471 2:N:0:1\tAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGCAGACATAACAAA-CGCCGAGCGTTACACCCCGATG\tFGGGGGGFGGGGGGGGGGDGGGGGGGGCGGGGGGGGGGGGGGGGGGGGGGGGGGGCGFGGFGGGGGGGGGGGGGGGGGGGGCEGGGGGGGGGGGG9FGGGGGGGGGGFGGGEBEFGFBFFGGGGGFGGFGGGGGGGGGFDEEEGFGDDFGGGGGG,@EEFEFFGGG6CDEGFEC8?*,79CFCFGGGGDGGFGGGGFGGGF4*8*6=7>FD+788FC7:37GEA@<8F?5:?46C),<(9B90??>?4*)1..406B).5)2 4<((49>07()--4/4(2((-(\n-CTGCGACACAATATTGGGCTCCCC\tab\t2\t@M02286:46:000000000-AEG11:1:2104:14576:24265 2:N:0:1\tAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACATTAAGAGAATTCTGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAAAGATTGGAGGAACGAAGTGTATAACCACTTTTTTGCACAACATGCGGGATCGTGTAACTCGCTTTGTTCGTTGCTCACCGGAAGCGATAGCGACCATGCCACC-CGTACCGCGGTCAACACCGTTT\t<<F,;C6CFGGCFG7C:BCD7C:9,,CCFG,,,<,,,<E,<FC6<:DFCAEF,,,66CC<9CFGGGDGCE6=ECFA<F<A7:@FGC4ED,<A,9,:,C,,4,,,,,5@A,?F,55EFFFGGG@9,4=,9E;@DGGF9,E+++++3A@,6,=;DD,6@,@=,6,,7,,+6+,0+4@8+,,,+2257,3@,2,219@+4+*/406**)*02?*/;C)=>8+).**/)1)1):;4(++./26()(((((0)(.,)(0())(.64( ()--()()(.((,0).(((.((\n-CTGCGACACAATATTGGGCTCCCC\tab\t2\t@M02286:46:000000000-AEG11:1:2104:25265:19405 2:N:0:1\tAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAACGAGCTAACCGCTTTTTTGCACAACCTGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCTGAGCTGAATGAAGCCCTACCAAC-CGACGAGCGTGACACCACGATG\tGFFGGGGGGFGGDEFGGGGGGGGGGGGGGGGFGGAD<FAFGDG9FGGCEGFFFGGCGGGCGGGGGGC@FEFGGFFGGGG?7FGGFGGGGGGGDGDFFFDFFFGDGGGFE<FEFDC@<FFEFFFGCCFAF9FCAFFGG?EFGGGGGGCFFGA@>EF;E?DFGGF?EE@+8@DD6E>*@C574=B:DEG>=*ADGBFGC=D4*;*;76C378;A6CACCDD59CC()+*.8*.)45*3>7((0,,54)/*)426))(.4:())( 4)--6073(8?((633(36(((\n-CTGCGACACAATATTGGGCTCCCC\tab\t2\t@M02286:46:000000000-AEG11:1:2119:22759:6520 2:N:0:1\tAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACCACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGACTGAAGCACTACCCAA-CGACTACCGTCACACCACGATT\tGGGFFCFGGGGEGGGGGGGGGGDFGGGGGGGGGGFGAEGGGFGGFFGGGGFGGGGGGGFGGGGGGGCFGDDFGGGGGGGGGGGEDFGGFGGGGGFGGFFGGGGGGFGFFGFCFFGFCD@?FDGGFFG4EFFFGGDGGFGGGEGGFFGFDA9EFGG=9,@F+8+@>E6@E68:E5*;7C>CCE@FFGD9?96:57DFGFCGBC8?3(:CD3;8:@:+8+;3CDE<+27:FF5,:5A,73*((170(4).*/4+,)(.:?B:<, 8<(-((((-((((*,7(4((((\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/run.sh --- a/tests/run.sh Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,133 +0,0 @@ -#!/usr/bin/env bash -if [ x$BASH = x ] || [ ! $BASH_VERSINFO ] || [ $BASH_VERSINFO -lt 4 ]; then - echo "Error: Must use bash version 4+." >&2 - exit 1 -fi -# get the name of the test directory -dirname=$(dirname $0) - -USAGE="Usage: \$ $(basename $0) [options] [test1 [test2]]" - - -function main { - - do_all=true - verbose=true - # Run the requested tests - for arg in "$@"; do - # Check for options - #TODO: option to keep test data at end instead of removing it. - if [[ ${arg:0:1} == '-' ]]; then - case "$arg" in - -h) - echo "$USAGE" >&2 - echo "Currently valid tests:" >&2 - list_tests >&2 - exit 1;; - -q) - verbose='';; - -v) - verbose=true;; - *) - echo "Unrecognized option \"$arg\"." >&2;; - esac - continue - fi - # Execute valid tests (if they're existing functions). - if [[ $(type -t $arg) == function ]]; then - do_all='' - if [[ $verbose ]]; then - $arg - else - $arg 2>/dev/null - fi - else - echo "Unrecognized test \"$arg\"." >&2 - do_all='' - fi - done - - # If no tests were specified in arguments, do all tests. - if [[ $do_all ]]; then - if [[ $verbose ]]; then - all - else - all 2>/dev/null - fi - fi -} - -function fail { - echo "$@" >&2 - exit 1 -} - -function list_tests { - while read declare f test; do - # Filter out functions that aren't tests. - if echo "$initial_declarations" | grep -qF 'declare -f '"$test"; then - continue - else - echo "$test" - fi - done < <(declare -F) -} - -# Capture a list of all functions defined before the tests, to tell which are actual functions -# and which are tests. -initial_declarations=$(declare -F) - -########## Functional tests ########## - -# Do all tests. -function all { - align - align_p3 - duplex - duplex_qual - stats_diffs -} - -function barcodes { - echo -e "\tmake-barcodes.awk ::: families.raw_[12].fq" - paste "$dirname/families.raw_1.fq" "$dirname/families.raw_2.fq" | paste - - - - \ - | awk -f "$dirname/../make-barcodes.awk" -v TAG_LEN=12 -v INVARIANT=5 | sort \ - | diff -s - "$dirname/families.sort.tsv" -} - -# align_families.py -function align { - echo -e "\talign_families.py ::: families.sort.tsv:" - python "$dirname/../align_families.py" "$dirname/families.sort.tsv" | diff -s - "$dirname/families.msa.tsv" -} - -# align_families.py with 3 processes -function align_p3 { - echo -e "\talign_families.py ::: families.sort.tsv:" - python "$dirname/../align_families.py" -p 3 "$dirname/families.sort.tsv" | diff -s - "$dirname/families.msa.tsv" -} - -# dunovo.py defaults on toy data -function duplex { - echo -e "\tdunovo.py ::: families.msa.tsv:" - python "$dirname/../dunovo.py" "$dirname/families.msa.tsv" | diff -s - "$dirname/families.cons.fa" - python "$dirname/../dunovo.py" --incl-sscs "$dirname/families.msa.tsv" | diff -s - "$dirname/families.cons.incl-sscs.fa" -} - -# dunovo.py quality score consideration -function duplex_qual { - echo -e "\tdunovo.py ::: qual.msa.tsv:" - python "$dirname/../dunovo.py" --incl-sscs -q 20 "$dirname/qual.msa.tsv" | diff -s - "$dirname/qual.cons.fa" -} - -function duplex_gapqual { - echo -e "\tdunovo.py ::: gapqual.msa.tsv:" - python "$dirname/../dunovo.py" --incl-sscs -q 25 "$dirname/gapqual.msa.tsv" | diff -s - "$dirname/gapqual.cons.fa" -} - -function stats_diffs { - echo -e "\tstats.py diffs ::: gaps.msa.tsv:" - python "$dirname/../stats.py" diffs "$dirname/gaps.msa.tsv" | diff -s - "$dirname/gaps-diffs.out.tsv" -} - -main "$@" |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/smoke.families.aligned.tsv --- a/tests/smoke.families.aligned.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -GATTATTT ab 1 @one/1 CATTGGTCAATT nnacaeagasda -GATTATTT ab 2 @one/2 GTGTGATTAACC nnacaeagasda -TAAGCCCT ab 1 @two/1 GTTACGAAGTGG csacaeagadss -TAAGCCCT ab 2 @two/2 ACCCACCTCTTG csacaeagadss -GTGTGCGG ba 1 @three/1 ATCTTTGGGCTA aocasdefadva -GTGTGCGG ba 2 @three/2 CTCTTCATCAAT aocasdefadva |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/smoke.families.i0.tsv --- a/tests/smoke.families.i0.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ -GATTATTT ab @one/1 ACATTGGTCAATT knnacaeagasda @one/2 CGTGTGATTAACC knnacaeagasda -TAAGCCCT ab @two/1 TGTTACGAAGTGG ncsacaeagadss @two/2 AACCCACCTCTTG ncsacaeagadss -GTGTGCGG ba @three/1 CATCTTTGGGCTA naocasdefadva @three/2 ACTCTTCATCAAT naocasdefadva |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/smoke.families.tsv --- a/tests/smoke.families.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ -GATTATTT ab @one/1 CATTGGTCAATT nnacaeagasda @one/2 GTGTGATTAACC nnacaeagasda -TAAGCCCT ab @two/1 GTTACGAAGTGG csacaeagadss @two/2 ACCCACCTCTTG csacaeagadss -GTGTGCGG ba @three/1 ATCTTTGGGCTA aocasdefadva @three/2 CTCTTCATCAAT aocasdefadva |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/smoke_1.fq --- a/tests/smoke_1.fq Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,13 +0,0 @@ -@one/1 -GATTACATTGGTCAATT -+ -asdlknnacaeagasda -@two/1 -TAAGTGTTACGAAGTGG -+ -sdlkncsacaeagadss -@three/1 -GCGGCATCTTTGGGCTA -+ -aslknaocasdefadva - |
b |
diff -r e4d75f9efb90 -r 675a8370675b tests/smoke_2.fq --- a/tests/smoke_2.fq Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,13 +0,0 @@ -@one/2 -ATTTCGTGTGATTAACC -+ -asdlknnacaeagasda -@two/2 -CCCTAACCCACCTCTTG -+ -sdlkncsacaeagadss -@three/2 -GTGTACTCTTCATCAAT -+ -aslknaocasdefadva - |
b |
diff -r e4d75f9efb90 -r 675a8370675b tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Feb 02 19:14:13 2017 -0500 |
b |
@@ -0,0 +1,22 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="mafft" version="7.221"> + <repository changeset_revision="dd4a533a0e3c" name="mafft" owner="rnateam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="duplex" version="0.5"> + <install version="1.0"> + <actions> + <action type="download_by_url">https://github.com/galaxyproject/dunovo/archive/v0.5.tar.gz</action> + <action type="shell_command">make</action> + <action type="move_directory_files"> + <source_directory>.</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + <action type="set_environment"> + <environment_variable action="set_to" name="DUPLEX_DIR">$INSTALL_DIR</environment_variable> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR</environment_variable> + </action> + </actions> + </install> + </package> +</tool_dependency> |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/bars.tsv --- a/utils/bars.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,1000 +0,0 @@\n-chrM-0\tTACCTACTAGCA\tCTGGGCTCAACG\n-chrM-1\tGGGAACGGAGGT\tGCCGAATAACAT\n-chrM-2\tAATTGTTGGAGG\tTAACTCCATTTA\n-chrM-3\tGGTAGCGGGCAT\tGATTTGTGAGAT\n-chrM-4\tCACCCCTTGAAT\tTTCTATCGATGA\n-chrM-5\tGCAGATTTCGTT\tTGAGTTCTAAAT\n-chrM-6\tGTTGTCACGACC\tAAGCATCTAAAA\n-chrM-7\tCGTGCAAGGTCC\tATCCAGATCTGA\n-chrM-8\tCGGTCGTTACAG\tCGAGCTTCAGGT\n-chrM-9\tCTACGCTACCGT\tAGTTGGTAATGA\n-chrM-a\tCTGGGTGGGCTA\tACGCCGGACAAG\n-chrM-b\tAATGTACGTCCC\tTGGTTGCCGTGC\n-chrM-c\tTCGGGGTAACTG\tCCTCGACTGACC\n-chrM-d\tCGCGTTGCGCGC\tACGTGATGGCAC\n-chrM-e\tGTACCCCTTTGG\tGTATTGAGGACA\n-chrM-f\tGAACTACTACCT\tGGAGGGAACAAC\n-chrM-10\tTCTAGACGACCT\tCTAGAACGGGCC\n-chrM-11\tCGCGAAAGGGGA\tTGGTATCGGCCT\n-chrM-12\tGCAACGGTTCGT\tGCGCGTCATGGC\n-chrM-13\tGCAAGGGAACAA\tGCCGGTAACGAG\n-chrM-14\tGAGTAATGTGAG\tGTTGTAACAACA\n-chrM-15\tTAGTCAGTCTCC\tCACCGGCAGCCG\n-chrM-16\tAGCCGACCTGTT\tAGCGTTCTGTGG\n-chrM-17\tACTGCGCTCACG\tTACCCCGAAAGA\n-chrM-18\tGCCTTCCGGCCG\tGGAAATCTCCTC\n-chrM-19\tTGAGCCGGCTTC\tCATGTTAATGGG\n-chrM-1a\tAACCGATAATGG\tAGTTTAGCACCC\n-chrM-1b\tTTGCTTAGCTTC\tATCCCTATTTCA\n-chrM-1c\tCCTGTACGATAT\tAGCTGGGGATTA\n-chrM-1d\tAGTCTGTACTCG\tGCCTAATGTCCT\n-chrM-1e\tGAGCCGAAGATA\tAGGGATATCTGA\n-chrM-1f\tGTCTCTCGAATA\tGAGAGCAACGTG\n-chrM-20\tTTTTTCACGGTG\tACGGAACCGGGA\n-chrM-21\tTGCCGCCATTTC\tGAGCCCAAACAA\n-chrM-22\tGTACAAGTGGTC\tGTAAGTTAGTGC\n-chrM-23\tCCGCCCGAAAGT\tTTCGTCTAAAGT\n-chrM-24\tGCGACGCTTAAA\tGACACGTTCGGC\n-chrM-25\tGAGGCAGACTAG\tACAACAACCCGC\n-chrM-26\tGTAGTAGATCAC\tGGCATGGTCAGA\n-chrM-27\tTTAATGTGCTGC\tTCCGACGAAGGA\n-chrM-28\tGGGTTGGAATCT\tACAGTTCGGCAA\n-chrM-29\tACTGCAAGAGTA\tTGCGTATGCCTG\n-chrM-2a\tTCTGATTTGCAA\tCGTATGGCGACA\n-chrM-2b\tATACTCCTACCC\tCACTTAGGCGTT\n-chrM-2c\tCTCGAGTTACGG\tCCTGGTCGTCAT\n-chrM-2d\tGGGTATTCTTAC\tGCCGTCATAGCA\n-chrM-2e\tCCACGCAATTGC\tACGACAGTACTA\n-chrM-2f\tTTGTTGCTTTTA\tGGGATATTACAT\n-chrM-30\tTATTTCCATTCA\tAGGCAAGACCGT\n-chrM-31\tAAAGAATCTGTC\tATCGGATCAACA\n-chrM-32\tTTACCCATTATT\tCGGGCGACCTGG\n-chrM-33\tGGCTCCAAAGCT\tCTCTTTAGCTGG\n-chrM-34\tCGTACCCGAATT\tGTAAAAACAAAA\n-chrM-35\tACCGACTGGAAG\tTACAGACCACAG\n-chrM-36\tTGGCGTAAAAGC\tAGTGTCTTGTGA\n-chrM-37\tGCTCAGCAATCA\tGCTTACGGGCCA\n-chrM-38\tCGCCACCAACCG\tCAAGCCCTTGGT\n-chrM-39\tATACGAAGATGG\tACGAAACTTAAT\n-chrM-3a\tCGCTGAAGATAC\tACATGTTAGAAA\n-chrM-3b\tCTTTAGTTTTTC\tTTCTTGGATGTT\n-chrM-3c\tGGCGTGTCGGTA\tCATCTAGGATCC\n-chrM-3d\tGAGCTAAATATC\tGGGATAGGTCAG\n-chrM-3e\tAGTCATCAACAA\tTTCGCCCAATGA\n-chrM-3f\tCCATAGGAACCG\tGGATAGCTGCTC\n-chrM-40\tCTAACTAGCGTT\tTAATGTGAATAG\n-chrM-41\tAATTTTCAGAAA\tTCCTTCGGACTT\n-chrM-42\tTCCTGGGATATC\tTTGGCCTTTCGC\n-chrM-43\tACACGATCGAGG\tGTCAACTGACGG\n-chrM-44\tTCGTGTGCAGGT\tAAAGTGATCTTG\n-chrM-45\tGCGGTGGCCCGG\tAATTCTCACCGC\n-chrM-46\tGCACCTGTGTCA\tAAGAGACCTTCA\n-chrM-47\tCCCTATCCCACA\tTATCACGCCAAC\n-chrM-48\tCCGTGGTCTTTC\tACACTATAAATA\n-chrM-49\tTGCAGGAAAGTT\tCAGTCGGAGTCT\n-chrM-4a\tGAAAGACGTGCC\tCTCCAATCTCTG\n-chrM-4b\tATCACAAGACTG\tAACATCTGGAAG\n-chrM-4c\tGCCACGGGCATC\tCTATCTATTTGT\n-chrM-4d\tTAATTCGTTCAA\tTCCGCCCACCGA\n-chrM-4e\tTTAAGATCTTGT\tTCTAGTATAGCG\n-chrM-4f\tTACGCTGACAGC\tTGTGTGACGTGG\n-chrM-50\tGTTTAGTGATAA\tTTATATAGAAAA\n-chrM-51\tTATCGACGAGGA\tTCCCGTCCAAAT\n-chrM-52\tCCTCTCTACGGG\tCTAATTGACCTA\n-chrM-53\tATCACCGGAGGC\tCGGTGTGTCCGA\n-chrM-54\tGAATGGTTCTGT\tTTACCCCGTGGT\n-chrM-55\tCAGTTTTGGACT\tTCTGCCTCGTAG\n-chrM-56\tATTATGCAAAGT\tTGAATCAGGCCG\n-chrM-57\tCATTTGTGTTTA\tAACTGTTCAGAG\n-chrM-58\tCGTCAATCATGA\tTCTGTCGGGACC\n-chrM-59\tAATTGGGTCCGT\tTTAATGCGGGTC\n-chrM-5a\tTCAATGACTTGA\tCCTCTAAAGCGT\n-chrM-5b\tCCGTGCCTCGAA\tACTTGAGGTACT\n-chrM-5c\tACTGTGCGGTAG\tCTTCGTTTGAAG\n-chrM-5d\tGGAGACTTAGGC\tAGGATCGTTTAG\n-chrM-5e\tATCGGAGTTAAA\tGAGGCGTTCAAT\n-chrM-5f\tCGCTCCACAGGT\tGGGCAAAGAACC\n-chrM-60\tGGACAACATCCT\tTTTGACTAATTT\n-chrM-61\tGCAGACATGACC\tTATGCTCCGGGA\n-chrM-62\tTAGACCTTAGTT\tCCAACCCTAACT\n-chrM-63\tGAGGAGCCTGGC\tAACTGTGCAATT\n-chrM-64\tACATGACATGCC\tCATTATGGCACT\n-chrM-65\tTTAACAGCCGCG\tGGTGGGGATGGA\n-chrM-66\tCAGTTAGTTGCT\tGGAGCAAAAGTC\n-chrM-67\tACCGCACCGCTC\tGGACTTACGGAT\n-chrM-68\tATTTTAGGCACA\tTTATACCACGGT\n-chrM-69\tAGATCCCATGGC\tAAGTTAGGCCTG\n-chrM-6a\tTGGTCCGGCCTT\tTGGCCGTCCTCG\n-chrM-6b\tAAACCCGGTATC\tACCGGTGCTCAA\n-chrM-6c\tCATCCCTGACGT\tGAACAGTCACTG\n-chrM-6d\tTTTTGTAAACCA\tACACATCGAGGG\n-chrM-6e\tCTAGACGCTGGT\tGGTTTGATCAGT\n-chrM-6f\tGTCGCACTACTG\tCAATGCTGATTC\n-chrM-70\tAATGCACGCCGC\tATGCCATGGCAC\n-chrM-71\tGTACACGTCGAA\tGATACATCAATA\n-chrM-'..b'CA\n-chrM-379\tGTTTCATTACTT\tACGGAGGCGAAT\n-chrM-37a\tGTGTTAATATTA\tACATTTATCTAG\n-chrM-37b\tGGCAGATGTCTG\tGTTCATCAACCG\n-chrM-37c\tGATGACAACTCG\tTTCTGATTGCTG\n-chrM-37d\tTCGTGCTATTAA\tCAACGGAGAATA\n-chrM-37e\tGAGACATCCGAG\tGTCCCAAGGGTC\n-chrM-37f\tATAGGCCTTGCA\tCGTTATAGAAGG\n-chrM-380\tAACAGCCATTCC\tGGGCCAGAGATG\n-chrM-381\tCGAGTGCACGTA\tATGACGTCAGTG\n-chrM-382\tGCTTCCAAGGCT\tGCACTTTCCTGC\n-chrM-383\tTGTTGGAAGAAC\tTAGAAGGAGCAG\n-chrM-384\tGGTGTTAAGACT\tAAGAAATGAATT\n-chrM-385\tCGAGTCTGGCGA\tCTAAGGGACGTA\n-chrM-386\tGCCCTGGAGTAT\tCTCCAATCGATA\n-chrM-387\tTAAGTGATAGAG\tCGGCAGTGATGT\n-chrM-388\tCGAAGTATGTAA\tTGCACATGAAGG\n-chrM-389\tATGTCGAACTAC\tACCACGAGGATA\n-chrM-38a\tGGTGTTTAGGAC\tGATACACGCATA\n-chrM-38b\tCTCTTTCCGATT\tCCGGTCGGTCCC\n-chrM-38c\tGTAAGTAGCCCT\tGGGGGCTGCCAG\n-chrM-38d\tTCCTCAAGAGAA\tTATACTTATTTA\n-chrM-38e\tACGAAAACCGTC\tTAGAATGATTTC\n-chrM-38f\tGACAACGGGATC\tATTCGGTTGCCG\n-chrM-390\tGGCGCATTCCCA\tGAGCCTATACGA\n-chrM-391\tTATGCCCACCGC\tGGCCCTCCGCCT\n-chrM-392\tCGTCTTCCAACC\tCTCCCCAGATCC\n-chrM-393\tTGCAGAGGTGTC\tTCCACTCTTACG\n-chrM-394\tTATAGGCACCTC\tCAATTAGCAGAC\n-chrM-395\tAGGTATGCACTC\tAAATTTGTTCAC\n-chrM-396\tGTACAGCAAACC\tTACATATATTGG\n-chrM-397\tGTTCGCTGCGTC\tCCCAGTAATTAC\n-chrM-398\tGGTTCTAGATAC\tAGGATCAACTTT\n-chrM-399\tCAAGACCAATTA\tCTATACCTGTCA\n-chrM-39a\tTTACTCTCTGAC\tCTACATCCCATC\n-chrM-39b\tCCGAAATAGCCA\tGTAGAGTTGTCT\n-chrM-39c\tTTAGGTTTTATA\tAATAAAAGTCCG\n-chrM-39d\tAACGTCCCCTGC\tAGTTGTTTCGCG\n-chrM-39e\tATGGAATCACCC\tTTACGCTCCCTC\n-chrM-39f\tGTACTGATTGCC\tGGGGGTTCCTCG\n-chrM-3a0\tCTTAACGCAGGA\tTTTACCGTCCAT\n-chrM-3a1\tCTATGTTCGTCA\tTCTCAGTGTCAG\n-chrM-3a2\tACAAGCATATCA\tTGGTTGCCGATG\n-chrM-3a3\tAACCTCTGAGAA\tATGCTGAACTAG\n-chrM-3a4\tCTATACTGGAAG\tGTATTGCCCTCA\n-chrM-3a5\tTTCGATAGCAAG\tGAGCTCGCTTCT\n-chrM-3a6\tTTTTTCACTGCT\tGTCTGCCACGGT\n-chrM-3a7\tCCTGGCGGTGTG\tAGTTGTCATTCG\n-chrM-3a8\tTGTGATCCCGGG\tTTTGAGCTGGTT\n-chrM-3a9\tAATAAACCCCCA\tACGCCTTAGAGA\n-chrM-3aa\tTTACGATTTTTA\tCCGCTCTAACGT\n-chrM-3ab\tACCTAGTTCTCC\tAATCAGCGCGGA\n-chrM-3ac\tGTCACCATTTCA\tAAATCACGACAT\n-chrM-3ad\tCTGGGAGAGATT\tCTTAGCACCTTC\n-chrM-3ae\tATGGCAGCACTT\tGAAATGGCGCCA\n-chrM-3af\tGCGCTGTACGAA\tAGTTTGACCGAG\n-chrM-3b0\tCGAGCCCACCAT\tCTCTCATGAACG\n-chrM-3b1\tACTTCTCGGCGT\tCTTGCATCGACA\n-chrM-3b2\tGACTGAAGACCC\tGCACAAGGCAAT\n-chrM-3b3\tGGGGTGCATCCC\tCGCACATTCCAC\n-chrM-3b4\tCGGCCTGAGGGC\tCGGGACCATTTT\n-chrM-3b5\tTTCCCTATGTTG\tTAGTTCCGGCTA\n-chrM-3b6\tCGCGGTAGATTA\tTGAGATGGGCTC\n-chrM-3b7\tTCCCTACCCTCA\tGCATGGCCAGTA\n-chrM-3b8\tGAATGCGGGCTG\tAACCTAGGTGGA\n-chrM-3b9\tCGTTTTTGCAGT\tCAGCCCAGTTTG\n-chrM-3ba\tTGGCCTTTGCTC\tCTGATCCAGAGC\n-chrM-3bb\tATAATATTCTGA\tCTGTCCTGACTG\n-chrM-3bc\tCAGGATTTGAGA\tTCCCGCGACCGA\n-chrM-3bd\tATACTTAAACTC\tCCCGAGCCCCCT\n-chrM-3be\tAGACCCGGCTTC\tCCAAATAAAAGG\n-chrM-3bf\tTCGTACGGACGC\tCATTATGTGCTT\n-chrM-3c0\tGACTCGCGCACA\tATGTCCGCGAAA\n-chrM-3c1\tTTCTTCCCTTGA\tCGTAAGACTATA\n-chrM-3c2\tGGTCCCCCACGG\tGGAGACAAATTA\n-chrM-3c3\tTGGATTCGGGGT\tGGACGCGTTGGC\n-chrM-3c4\tCTTCTACACGAA\tATGTGAGAAAGT\n-chrM-3c5\tCCGACGACCAGG\tTGTACTACGCTC\n-chrM-3c6\tCAGGATCTACAA\tCTCTATATGTGT\n-chrM-3c7\tCTTGACCAGTAG\tTTCCGGTCGATG\n-chrM-3c8\tGCCCGTGTGACG\tAAAATCGGTCGT\n-chrM-3c9\tTTGCCGTCGGTC\tTATGTTAGTATA\n-chrM-3ca\tGTGGGCGGGTTA\tGATACATATAGC\n-chrM-3cb\tGGAAGCGAAAGT\tTTAAGGAGTAGC\n-chrM-3cc\tTTTAACCGTTCG\tCTACTGAGCATA\n-chrM-3cd\tCTTAAACATGGG\tGAGTCACGTTTC\n-chrM-3ce\tATGCGTCAGACT\tGACTCTCCCTAT\n-chrM-3cf\tCGGCCGCCGCGT\tCTGTGATTTGGT\n-chrM-3d0\tGGTTTGTGTATC\tTAAGTCACAAAC\n-chrM-3d1\tCTTTGAACAAGA\tGATGGCAGCCCT\n-chrM-3d2\tACCGCGCGTGGA\tCATATCGCGCTA\n-chrM-3d3\tGCACTCTTGCCC\tTCAGTGAACAAA\n-chrM-3d4\tTACGTATGTCAA\tAATCGTATGTAA\n-chrM-3d5\tCAAGCCAACGAA\tAACGGTTCGCTG\n-chrM-3d6\tATCATTGACGTG\tAGTATCTACTCT\n-chrM-3d7\tATAGGCTTATCT\tAGTTGTTAATAG\n-chrM-3d8\tCCTCTCATTTGT\tAAGATCTGACTT\n-chrM-3d9\tACTCGGTCCTCG\tCTCCGGCCTAGA\n-chrM-3da\tACCGGCCTCTCT\tGGGAAATTAACA\n-chrM-3db\tGACTAACATCCC\tCAAGCGAGATAT\n-chrM-3dc\tTCACCCCTTACA\tCCCGTGCTATTC\n-chrM-3dd\tCCGCTAATACAA\tAAGCAACTGATT\n-chrM-3de\tGCGCTTGCAGAC\tGCTAGAGCATTT\n-chrM-3df\tTGTTCTTGCATG\tGAACGTACAGAG\n-chrM-3e0\tACCACGGACGCT\tCATCTATGAATC\n-chrM-3e1\tCACCGGAACTTT\tCCCACGTACCCC\n-chrM-3e2\tTTGAGCTCCTAA\tTCGCACATACCA\n-chrM-3e3\tCGCTCCAAGATT\tTATTCCGTCTAA\n-chrM-3e4\tAAACATCTTGGA\tTGTTAGCTGTCC\n-chrM-3e5\tAAGTCGACACAC\tAGCCCGTTCATA\n-chrM-3e6\tATCATAGAGACG\tTCCATGGAAGCT\n-chrM-3e7\tGACAAGACTGAC\tAGGCTGCCCGAA\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/chrM-alt.fa --- a/utils/chrM-alt.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,238 +0,0 @@\n->chrM gi|251831106|ref|NC_012920.1| Homo sapiens mitochondrion, complete genome\n-GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGG\n-GTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC\n-CTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTA\n-ATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATC\n-ATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA\n-AACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC\n-TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATA\n-CAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCC\n-AAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAGGCAATACACTGAAAATGTTTAGACGGGCTC\n-ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA\n-GCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC\n-AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAA\n-ACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA\n-TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACT\n-CACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAACAC\n-ACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC\n-AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC\n-CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATA\n-CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC\n-AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTAT\n-GAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGA\n-AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA\n-TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA\n-GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA\n-GCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCG\n-ATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA\n-ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCC\n-AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA\n-AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGAT\n-AGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC\n-CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG\n-TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC\n-ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAG\n-TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC\n-AATATCTACAATCAACCAAAAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA\n-AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC\n-ATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA\n-AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT\n-TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA\n-TGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT\n-AAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG\n-TCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA\n-GCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCTGGACATCCCG\n-ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG\n-AGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCT\n-ACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGA\n-ACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAAT\n-TCCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCA\n-TTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCC\n-CCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCAC\n-ATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCC\n-CTCCCCATACCCAACCCCCTGGTCAACCTAAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAG\n-CCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGC\n-AGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGC\n-TCCTTTAACCT'..b'CAGTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCTAA\n-CAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCATCAGTTGATGA\n-TACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTT\n-TCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAA\n-CGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGGT\n-CTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTA\n-TAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAAC\n-TCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCCGCAGTCTGCGCCCTTACACAAAATGACATC\n-AAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAAC\n-CACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTC\n-CATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC\n-ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCA\n-AAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGC\n-TACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCCC\n-ACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCC\n-TATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTA\n-CCTAAAACTGACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACC\n-AACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCA\n-TCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCT\n-AACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCA\n-ACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAA\n-TCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAA\n-CTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGA\n-CCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATAC\n-TCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAATACTCACCAAGACCTCAACCC\n-CTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCC\n-CCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCG\n-ACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACC\n-CCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGAC\n-CAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAAC\n-CCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAA\n-ACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTA\n-CTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATC\n-ATCCGCTACCTTCACGCCATTGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCC\n-TATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGC\n-AACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAAC\n-TTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACA\n-GTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACT\n-CCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATC\n-ACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACAT\n-TAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCC\n-TCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCGTAACAAACTA\n-GGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAAC\n-AACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCT\n-AACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATAC\n-TTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTC\n-CTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGA\n-GAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTC\n-ATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTACA\n-TTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCA\n-ATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCA\n-ACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAG\n-TACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCC\n-TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCG\n-CTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTC\n-ATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/correct-simple.py --- a/utils/correct-simple.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,199 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import unicode_literals -import sys -import errno -import logging -import argparse -import subprocess - -ARG_DEFAULTS = {'nbarcodes':20, 'mapq_thres':25, 'log':sys.stderr, 'volume':logging.ERROR} -DESCRIPTION = """""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**ARG_DEFAULTS) - - parser.add_argument('nbarcodes', metavar='barcodes to try', type=int, nargs='?', - help='') - parser.add_argument('-s', '--summary', action='store_true', - help='Only print the summary of how many families were rescued.') - parser.add_argument('-m', '--mapq', type=int) - parser.add_argument('-r', '--random', action='store_true') - parser.add_argument('-l', '--log', type=argparse.FileType('w'), - help='Print log messages to this file instead of to stderr. Warning: Will overwrite the file.') - parser.add_argument('-q', '--quiet', dest='volume', action='store_const', const=logging.CRITICAL) - parser.add_argument('-v', '--verbose', dest='volume', action='store_const', const=logging.INFO) - parser.add_argument('-D', '--debug', dest='volume', action='store_const', const=logging.DEBUG) - - args = parser.parse_args(argv[1:]) - - logging.basicConfig(stream=args.log, level=args.volume, format='%(message)s') - tone_down_logger() - - logging.info('Reading random barcodes from border-families.txt..') - rand_arg = '--random-source=border-families.txt' - if args.random: - rand_arg = '' - pipeline = 'cat border-families.txt | paste - - | shuf {} | head -n {}'.format(rand_arg, - args.nbarcodes) - commands = [cmd.split() for cmd in pipeline.split('|')] - process = make_pipeline(*commands) - families_by_barcode = {} - for line_raw in process.stdout: - line = line_raw.rstrip('\r\n') - fields = line.split() - family = {} - count1, barcode, order1, count2, barcode2, order2 = fields - assert barcode == barcode2, (barcode, barcode2) - if order1 == 'ab': - assert order2 == 'ba', barcode - elif order1 == 'ba': - assert order2 == 'ab', barcode - count1, count2 = count2, count1 - else: - fail(order1, order2, barcode) - family['count1'] = int(count1) - family['count2'] = int(count2) - family['barcode'] = barcode - families_by_barcode[barcode] = family - - logging.info('Reading barcodes.fq to find read names..') - hits = 0 - families_by_read_name = {} - line_num = 0 - with open('barcodes.fq', 'rU') as barcodes_fq: - for line in barcodes_fq: - line_num += 1 - if line_num % 4 == 1: - read_name = line[1:].rstrip('\r\n') - elif line_num % 4 == 2: - seq = line.rstrip('\r\n') - family = families_by_barcode.get(seq) - if family: - hits += 1 - family['read_name'] = read_name - families_by_read_name[read_name] = family - logging.info('hits: {}'.format(hits)) - - logging.info('Reading barcodes.bam to find similar barcodes..') - hits = 0 - neighbors_by_read_name = {} - # samtools view -f 256 barcodes.bam | awk '$1 == '$read_name' && $5 > 25 {print $3}' - process = subprocess.Popen(('samtools', 'view', '-f', '256', 'barcodes.bam'), stdout=subprocess.PIPE) - for line in process.stdout: - fields = line.split() - mapq = int(fields[4]) - if mapq >= args.mapq_thres: - read_name = fields[0] - family = families_by_read_name.get(read_name) - if family: - hits += 1 - read_name2 = fields[2] - neighbor = {'read_name':read_name2} - neighbors = family.get('neighbors', []) - neighbors.append(neighbor) - family['neighbors'] = neighbors - neighbors_by_read_name[read_name2] = neighbor - logging.info('hits: {}'.format(hits)) - - logging.info('Reading barcodes.fq to find sequences of neighbors..') - hits = 0 - line_num = 0 - neighbors_by_barcode = {} - with open('barcodes.fq', 'rU') as barcodes_fq: - for line in barcodes_fq: - line_num += 1 - if line_num % 4 == 1: - read_name = line[1:].rstrip('\r\n') - neighbor = neighbors_by_read_name.get(read_name) - if line_num % 4 == 2 and neighbor: - seq = line.rstrip('\r\n') - neighbor['barcode'] = seq - neighbors_by_barcode[seq] = neighbor - logging.info('hits: {}'.format(hits)) - - logging.info('Reading families.uniq.txt to get counts of neighbors..') - hits = 0 - with open('families.uniq.txt', 'rU') as families_uniq: - for line in families_uniq: - fields = line.split() - barcode = fields[1] - neighbor = neighbors_by_barcode.get(barcode) - if neighbor: - hits += 1 - count = int(fields[0]) - order = fields[2].rstrip('\r\n') - alpha = barcode[:len(seq)//2] - beta = barcode[len(seq)//2:] - swap = alpha >= beta - if (not swap and order == 'ab') or (swap and order == 'ba'): - neighbor['count1'] = count - neighbor['count2'] = 0 - elif (not swap and order == 'ba') or (swap and order == 'ab'): - neighbor['count1'] = 0 - neighbor['count2'] = count - else: - fail(order, barcode, swap) - logging.info('hits: {}'.format(hits)) - - logging.info('Printing results..') - total = 0 - passing = 0 - for family in families_by_barcode.values(): - total += 1 - count1 = family['count1'] - count2 = family['count2'] - if not args.summary: - print('{barcode}\t{count1}\t{count2}\t{read_name}'.format(**family)) - neighbors = family.get('neighbors') - if neighbors: - for neighbor in neighbors: - if not args.summary: - print('{barcode}\t{count1}\t{count2}\t{read_name}'.format(**neighbor)) - count1 += neighbor['count1'] - count2 += neighbor['count2'] - if count1 >= 3 and count2 >= 3: - if not args.summary: - print('PASS!') - passing += 1 - elif not args.summary: - print('fail') - - print('{} families rescued out of {} ({:0.2f}%)'.format(passing, total, 100*passing/total)) - - -def make_pipeline(*commands): - processes = [] - for command in commands: - if not processes: - processes.append(subprocess.Popen(command, stdout=subprocess.PIPE)) - else: - processes.append(subprocess.Popen(command, stdin=processes[-1].stdout, stdout=subprocess.PIPE)) - processes[0].stdout.close() - return processes[-1] - - -def tone_down_logger(): - """Change the logging level names from all-caps to capitalized lowercase. - E.g. "WARNING" -> "Warning" (turn down the volume a bit in your log files)""" - for level in (logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG): - level_name = logging.getLevelName(level) - logging.addLevelName(level, level_name.capitalize()) - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - - -if __name__ == '__main__': - try: - sys.exit(main(sys.argv)) - except IOError as ioe: - if ioe.errno != errno.EPIPE: - raise |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/fastareader.py --- a/utils/fastareader.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,157 +0,0 @@ -#!/usr/bin/env python -import os -__version__ = '0.9' - - -class FastaReadGenerator(object): - """Read FASTA files and return one whole sequence at a time.""" - - def __init__(self, filepath): - self.line_generator = FastaLineGenerator(filepath) - - def __iter__(self): - return self.reads() - - def reads(self): - read = Read() - read.name = None - for line in self.line_generator: - if self.line_generator.name != read.name: - if read.name is not None: - yield read - read = Read() - read.name = self.line_generator.name - read.id = self.line_generator.id - read.seq += line - if read.name is not None: - yield read - - -class FastaLineGenerator(object): - """A simple FASTA parser that only reads a line at a time into memory. - Usage: - fasta = FastaLineGenerator('/home/user/sequence.fasta') - for line in fasta: - print "There is a sequence with this FASTA identifier: "+fasta.id - print "(Its full name is "+fasta.name+".)" - print "It has a line with this sequence: "+line - All strings (the line, id, and name) are stripped, and should not end in a - newline. - """ - - def __init__(self, filepath): - if not os.path.isfile(filepath): - raise IOError('File not found: "'+filepath+'"') - self.filepath = filepath - self.name = None - self.id = None - - def __iter__(self): - return self.lines() - - #TODO: Give some signal that we just finished a sequence. Otherwise, we can't validate that there - # aren't sequences with identical names one after another. - def lines(self): - with open(self.filepath, 'rU') as filehandle: - for line_raw in filehandle: - line = line_raw.strip() - if not line: - continue # allow empty lines - if line.startswith('>'): - self.name = line[1:] # remove ">" - if self.name: - self.id = self.name.split()[0] - else: - self.id = '' - continue - else: - yield line - - def bases(self): - """Generator that yields single bases, while still reading a whole line at - a time underneath. - This should be the best of both worlds: it yields a base at a time, but it - reads a line at a time from the file so it's not slow as molasses.""" - for line in self.lines(): - for base in line: - yield base - - def extract(self, start, end, chrom=None): - """Extract a subsequence based on a start and end coordinate. - The start and end are inclusive, 1-based. If chrom is not supplied, it will - default to the first chromosome (record) encountered in the FASTA file. - If the end coordinate is beyond the end of the chromosome, the returned - sequence will be truncated to the end of the chromosome. If the start - coordinate is beyond the end of the chromosome, an empty string will be - returned.""" - outseq = '' - line_start = 1 - for line in self: - if chrom is not None and self.id != chrom: - continue - line_end = line_start + len(line) - 1 - # if we haven't encountered the start yet, keep searching - if line_end < start: - line_start = line_end + 1 - continue - slice_start = max(start, line_start) - line_start - slice_end = min(end, line_end) - line_start + 1 - outseq += line[slice_start:slice_end] - # done? (on the last line?) - if line_end >= end: - break - line_start = line_end + 1 - return outseq - - -#TODO: see 0notes.txt -class FastaBaseGenerator(object): - """For when you absolutely have to read one base at a time. VERY SLOW. - Usage: - fasta = FastaBaseGenerator('/home/user/sequence.fasta') - for base in fasta: - print "There is a sequence with this FASTA identifier: "+fasta.id - print "This is the next base from it: "+base - """ - - def __init__(self, filepath): - self.filehandle = open(filepath, 'rU') - self.header = False - self.name = None - self.id = None - self._in_id = None - - def __iter__(self): - return self.new() - - def new(self): - newline = True - while True: - base = self.filehandle.read(1) - if not base: - raise StopIteration - elif base == '\n': - newline = True - self.header = False - elif newline and base == '>': - newline = False - self.header = True - self._in_id = True - self.name = '' - self.id = '' - elif self.header: - if self._in_id: - if base.isspace(): - self._in_id = False - else: - self.id += base - self.name += base - else: - newline = False - yield base - -class Read(object): - def __init__(self): - self.seq = '' - self.id = '' - self.name = '' |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/fastareader.pyc |
b |
Binary file utils/fastareader.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/fastqreader.py --- a/utils/fastqreader.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,99 +0,0 @@ -#!/usr/bin/env python -import os -__version__ = '0.5' - - -class FastqReadGenerator(object): - """A simple FASTQ parser that returns reads one at a time. - Handles multi-line read/quality values. - Usage: - fastq = FastqReadGenerator('/home/user/sequence.fq') - for read in fastq: - print "There is a read with this identifier: "+read.id - print "(Its full name is "+read.name+".)" - print "Its sequence is: "+read.seq - print "Its quality is: "+read.qual - All values (id, name, seq, qual) are whitespace-stripped. - """ - - def __init__(self, filepath): - if not os.path.isfile(filepath): - raise IOError('File not found: "'+filepath+'"') - self.filepath = filepath - self.name = None - self.id = None - - def __iter__(self): - return self.reads() - - def reads(self): - with open(self.filepath, 'rU') as filehandle: - read = None - line_type = 'first' - for line_raw in filehandle: - line = line_raw.strip() - if not line: - continue # allow empty lines - # Determine what kind of line we're in - if line.startswith('@'): - if line_type == 'first': - line_type = 'name' - elif line_type == 'plus': - line_type = 'qual' - elif line_type == 'qual': - # Determine if it's another qual line or a name line. - # If the quality scores observed so far already cover the whole read, we've seen all - # the quality information already. It should be a name line. - if len(read.qual) >= len(read.seq): - line_type = 'name' - else: - line_type = 'qual' - else: - raise FormatError('"@" starts line in wrong context:\n'+line_raw) - elif line.startswith('+'): - if line_type == 'seq': - line_type = 'plus' - elif line_type == 'qual': - pass - else: - raise FormatError('"+" starts line in wrong context:\n'+line_raw) - elif line_type == 'name': - line_type = 'seq' - elif line_type == 'plus': - line_type = 'qual' - elif line_type == 'first': - raise FormatError('First line must start with a "@":\n'+line_raw) - else: - raise FormatError('Invalid parser state: line_type "{}", first char "{}":\n{}' - .format(line_type, line[0], line_raw)) - if line_type == 'name': - # Return the previous read. - if read is not None: - yield read - read = Read() - read.name = line[1:] # remove ">" - if read.name: - read.id = read.name.split()[0] - else: - read.id = '' - elif line_type == 'seq': - read.seq += line - elif line_type == 'qual': - read.qual += line - # Return the last read. - if read is not None: - yield read - - -class Read(object): - def __init__(self): - self.seq = '' - self.qual = '' - self.id = '' - self.name = '' - - -class FormatError(Exception): - def __init__(self, message=None): - if message: - Exception.__init__(self, message) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/fastqreader.pyc |
b |
Binary file utils/fastqreader.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/frags.fq --- a/utils/frags.fq Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,40000 +0,0 @@\n-@chrM_10006_10405_0:0:0_0:0:0_0/1\n-AATTCGGTTCAGTCTAATCCTTTTTGTAGTCACTCATAGGCCAGACTTAGGGCTAGGATGATGATTAATAAGAGGGATGACATAACTATTAGTGGCAGGTTAGTTGTTTGTAGGGCTCATGGTAGGGGTAAAAGGAGGGCAATTTCTAGATCAAATAATAAGAAGGTAATAGCTACTAAGAAGAATTTTATGGAGAAAGGGACGCGGGCGGGGGATATAGGGTCGAAGCCGCACTCGTAAGGGGTGGATTTTTCTATGTAGCCGTTGAGTTGTGGTAGTCAAAATGTAATAATTATTAGTAGTAAGGCTAGGAGGGTGTTGATTATTAAAATTAAGGCGAAGTTTATTACTCTTTTTTGAATGTTGTCAAAACTAGTTAATTGGAAGTTAACGGTACTAT\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_15329_15728_0:0:0_0:0:0_1/1\n-GGAGTCAATAAAGTGATTGGCTTAGTGGGCGAAATATTATGCTTTGTTGTTTGGATATATGGAGGATGGGGATTATTGCTAGGATGAGGATGGATAGTAATAGGGCAAGGACGCCTCCTAGTTTGTTAGGGACGGATCGGAGAATTGTGTAGGCGAATAGGAAATATCATTCGGGCTTGATGTGGGGAGGGGTGTTTAAGGGGTTGGCTAGGGTATAATTGTCTGGGTCGCCTAGGAGGTCTGGTGAGAATAGTGTTAATGTCATTAAGGAGAGAAGGAAGAGAAGTAAGCCGAGGGCGTCTTTGATTGTGTAGTAAGGGTGGAAGGTGATTTTATCGGAATGGGAGGTGATTCCTAGGGGGTTGTTTGATCCCGTTTCGTGCAAGAATAGGAGGTGGAG\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_5038_5437_0:0:0_0:0:0_2/1\n-TAATAGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATCCTAACTACTACCGCATTCCTACTACTCAACTTAAACTCCAGCACCACGACCCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCAC\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_8121_8520_0:0:0_0:0:0_3/1\n-TAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAAGAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCATAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATATTAAACACAAACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAACCCTGAGA\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_9366_9765_0:0:0_0:0:0_4/1\n-TGAAGGGAGACTCGAAGTACTCTGAGGCTTGTAGGAGGGTAAAATAGAGACCCAGTAAAATTGTAATAAGCAGTGCTTGAATTATTTGGTTTCGGTTGTTTTCTATTAGACTATGGTGAGCTCAGGTGATTGATACTCCTGATGCGAGTAATACGGATGTGTTTAGGAGTGGGACTTCTAGGGGATTTAGCGGGGTGATGCCTGTTGGGGGCCAGTGCCCTCCTAATTGGGGGGTAGGGGCTAGGCTGGAGTGGTAAAAGGCTCAGAAAAATCCTGCGAAGAAAAAAACTTCTGAGGTAATAAATAGGATTATCCCGTATCGAAGGCCTTTTTGGACAGGTGGTGTGTGGTGGCCTTGGTATGTGCTTTCTCGTGTTACATCGCGCCATCATTGGTATAT\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII'..b'GTCAACTGGAGTTTTTTACAACTCAGGTGAGTTTTAGCTTTATTGGGGAGGGGGTGATCTAAAACACTCTTTACGCCGGCTTCTATTGACTTGGGTTAATCGTGTGACCGCGGTGGCTGGCACGAAATTGACCAACCCTGGGGTTAGTATAGCTTAGTTAAACTTTCGTTTATTGCTAAAGGTTAATCACTGCTGTTTCCCGTGGGGGTGTGGCT\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_9883_10282_0:0:0_0:0:0_270c/1\n-AGGGGTAAAAGGAGGGCAATTTCTAGATCAAATAATAAGAAGGTAATAGCTACTAAGAAGAATTTTATGGAGAAAGGGACGCGGGCGGGGGATATAGGGTCGAAGCCGCACTCGTAAGGGGTGGATTTTTCTATGTAGCCGTTGAGTTGTGGTAGTCAAAATGTAATAATTATTAGTAGTAAGGCTAGGAGGGTGTTGATTATTAAAATTAAGGCGAAGTTTATTACTCTTTTTTGAATGTTGTCAAAACTAGTTAATTGGAAGTTAACGGTACTATTTATACTAAAAGAGTAAGACCCTCATCAATAGATGGAGACATACAGAAATAGTCAAACCACATCTACAAAATGCCAGTATCAGGCGGCGGCTTCGAAGCCAAAGTGATGTTTGGATGTAAAGT\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_13932_14331_0:0:0_0:0:0_270d/1\n-CACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCTAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAACTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCAC\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_9921_10320_0:0:0_0:0:0_270e/1\n-GCCTGATACTGGCATTTTGTAGATGTGGTTTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTAGTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCTAGCCTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACATAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCCCGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATTATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAACAACTAACCTGCCACTAATAG\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_11983_12382_0:0:0_0:0:0_270f/1\n-TTAGGGAAGTCAGGGTTAGGGTGGTTATAGTAGTGTGCATGGTTATTACTTTTATTTGGAGTTGCACCAAAATTTTTGGGGCCTAAGACCAATGGATAGCTGTTATCCTTTAAAAGTTGAGAAAGCCATGTTGTTAGACATGGGGGCATGAGTTAGCAGTTCTTGTGAGCTTTCTCGGTAAATAAGGGGTCGTAAGCCTCTGTTGTCAGATTCACAATCTGATGTTTTGGTTAAACTATATTTACAAGAGGAAAACCCGGTAATGATGTCGGGGTTGAGGGATAGGAGGAGAATGGGGGATAGGTGTATGAACATGAGGGTGTTTTCTCGTGTGAATGAGGGTTTTATGTTGTTAATGTGGTGGGTGAGTGAGCCCCATTGTGTTGTGGTAAATATGTAG\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/frags1.fq --- a/utils/frags1.fq Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,40000 +0,0 @@\n-@chrM_10006_10405_0:0:0_0:0:0_0/1\n-AATTCGGTTCAGTCTAATCCTTTTTGTAGTCACTCATAGGCCAGACTTAGGGCTAGGATGATGATTAATAAGAGGGATGACATAACTATTAGTGGCAGGTTAGTTGTTTGTAGGGCTCATGGTAGGGGTAAAAGGAGGGCAATTTCTAGATCAAATAATAAGAAGGTAATAGCTACTAAGAAGAATTTTATGGAGAAAGGGACGCGGGCGGGGGATATAGGGTCGAAGCCGCACTCGTAAGGGGTGGATTTTTCTATGTAGCCGTTGAGTTGTGGTAGTCAAAATGTAATAATTATTAGTAGTAAGGCTAGGAGGGTGTTGATTATTAAAATTAAGGCGAAGTTTATTACTCTTTTTTGAATGTTGTCAAAACTAGTTAATTGGAAGTTAACGGTACTAT\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_15329_15728_0:0:0_0:0:0_1/1\n-GGAGTCAATAAAGTGATTGGCTTAGTGGGCGAAATATTATGCTTTGTTGTTTGGATATATGGAGGATGGGGATTATTGCTAGGATGAGGATGGATAGTAATAGGGCAAGGACGCCTCCTAGTTTGTTAGGGACGGATCGGAGAATTGTGTAGGCGAATAGGAAATATCATTCGGGCTTGATGTGGGGAGGGGTGTTTAAGGGGTTGGCTAGGGTATAATTGTCTGGGTCGCCTAGGAGGTCTGGTGAGAATAGTGTTAATGTCATTAAGGAGAGAAGGAAGAGAAGTAAGCCGAGGGCGTCTTTGATTGTGTAGTAAGGGTGGAAGGTGATTTTATCGGAATGGGAGGTGATTCCTAGGGGGTTGTTTGATCCCGTTTCGTGCAAGAATAGGAGGTGGAG\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_5038_5437_0:0:0_0:0:0_2/1\n-TAATAGCAGTTCTACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATCCTAACTACTACCGCATTCCTACTACTCAACTTAAACTCCAGCACCACGACCCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAATTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTTTTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCATCATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACCTACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAACGTAAAAATAAAATGACAGTTTGAACATACAAAACCCAC\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_8121_8520_0:0:0_0:0:0_3/1\n-TAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAGGGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTGTAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAAGAGAACCAACACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCATAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATATTAAACACAAACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAAAAATTATAACAAACCCTGAGA\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_9366_9765_0:0:0_0:0:0_4/1\n-TGAAGGGAGACTCGAAGTACTCTGAGGCTTGTAGGAGGGTAAAATAGAGACCCAGTAAAATTGTAATAAGCAGTGCTTGAATTATTTGGTTTCGGTTGTTTTCTATTAGACTATGGTGAGCTCAGGTGATTGATACTCCTGATGCGAGTAATACGGATGTGTTTAGGAGTGGGACTTCTAGGGGATTTAGCGGGGTGATGCCTGTTGGGGGCCAGTGCCCTCCTAATTGGGGGGTAGGGGCTAGGCTGGAGTGGTAAAAGGCTCAGAAAAATCCTGCGAAGAAAAAAACTTCTGAGGTAATAAATAGGATTATCCCGTATCGAAGGCCTTTTTGGACAGGTGGTGTGTGGTGGCCTTGGTATGTGCTTTCTCGTGTTACATCGCGCCATCATTGGTATAT\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII'..b'GTCAACTGGAGTTTTTTACAACTCAGGTGAGTTTTAGCTTTATTGGGGAGGGGGTGATCTAAAACACTCTTTACGCCGGCTTCTATTGACTTGGGTTAATCGTGTGACCGCGGTGGCTGGCACGAAATTGACCAACCCTGGGGTTAGTATAGCTTAGTTAAACTTTCGTTTATTGCTAAAGGTTAATCACTGCTGTTTCCCGTGGGGGTGTGGCT\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_9883_10282_0:0:0_0:0:0_270c/1\n-AGGGGTAAAAGGAGGGCAATTTCTAGATCAAATAATAAGAAGGTAATAGCTACTAAGAAGAATTTTATGGAGAAAGGGACGCGGGCGGGGGATATAGGGTCGAAGCCGCACTCGTAAGGGGTGGATTTTTCTATGTAGCCGTTGAGTTGTGGTAGTCAAAATGTAATAATTATTAGTAGTAAGGCTAGGAGGGTGTTGATTATTAAAATTAAGGCGAAGTTTATTACTCTTTTTTGAATGTTGTCAAAACTAGTTAATTGGAAGTTAACGGTACTATTTATACTAAAAGAGTAAGACCCTCATCAATAGATGGAGACATACAGAAATAGTCAAACCACATCTACAAAATGCCAGTATCAGGCGGCGGCTTCGAAGCCAAAGTGATGTTTGGATGTAAAGT\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_13932_14331_0:0:0_0:0:0_270d/1\n-CACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCTAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAACTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCAC\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_9921_10320_0:0:0_0:0:0_270e/1\n-GCCTGATACTGGCATTTTGTAGATGTGGTTTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTAGTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAAAAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCTAGCCTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACATAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCCCGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATTATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAACAACTAACCTGCCACTAATAG\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n-@chrM_11983_12382_0:0:0_0:0:0_270f/1\n-TTAGGGAAGTCAGGGTTAGGGTGGTTATAGTAGTGTGCATGGTTATTACTTTTATTTGGAGTTGCACCAAAATTTTTGGGGCCTAAGACCAATGGATAGCTGTTATCCTTTAAAAGTTGAGAAAGCCATGTTGTTAGACATGGGGGCATGAGTTAGCAGTTCTTGTGAGCTTTCTCGGTAAATAAGGGGTCGTAAGCCTCTGTTGTCAGATTCACAATCTGATGTTTTGGTTAAACTATATTTACAAGAGGAAAACCCGGTAATGATGTCGGGGTTGAGGGATAGGAGGAGAATGGGGGATAGGTGTATGAACATGAGGGTGTTTTCTCGTGTGAATGAGGGTTTTATGTTGTTAATGTGGTGGGTGAGTGAGCCCCATTGTGTTGTGGTAAATATGTAG\n-+\n-IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/fuzzy-match.py --- a/utils/fuzzy-match.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,238 +0,0 @@\n-#!/usr/bin/env python\n-from __future__ import division\n-from __future__ import print_function\n-import os\n-import sys\n-import logging\n-import argparse\n-import tempfile\n-import subprocess\n-import multiprocessing\n-import consensus\n-import swalign\n-\n-ARG_DEFAULTS = {\'bar_len\':24, \'win_len\':5, \'shift\':3, \'processes\':1, \'loglevel\':logging.ERROR}\n-USAGE = "%(prog)s [options]"\n-DESCRIPTION = """Try to match barcodes with sequencing errors.\n-Match based on a small window in the middle of each half of the barcode.\n-Then it will align all the unique barcodes which match and then print the similarity of each to the\n-consensus."""\n-EPILOG = """This will print each kmer observed, the barcodes which contained it, and their\n-similarities. The output is 4 tab-delimited columns: 1. whether the kmer was in the first or second\n-half of the barcode (0 for first half, 1 for second) 2. the kmer 3. the barcode 4. its similarity to\n-the consensus"""\n-\n-# Algorithm from Paul Medvedev (email from 2015-12-16)\n-\n-def main(argv):\n-\n- parser = argparse.ArgumentParser(description=DESCRIPTION)\n- parser.set_defaults(**ARG_DEFAULTS)\n-\n- parser.add_argument(\'infile\', metavar=\'families.tsv\', nargs=\'?\',\n- help=\'Input file (sorted output of make-barcodes.awk).\')\n- parser.add_argument(\'-n\', \'--num-barcodes\', type=int,\n- help=\'Only read in this many different barcodes.\')\n- parser.add_argument(\'-c\', \'--consensus\', action=\'store_true\',\n- help=\'Include consensus sequences in the output. They will appear the same as normal barcodes, \'\n- \'but they will be printed before each set of barcodes matching a kmer. (So you can filter \'\n- \'them out by looking for when either column 1 or 2 change, then discard the line after \'\n- \'the change.\')\n- parser.add_argument(\'-b\', \'--bar-len\', type=int,\n- help=\'Barcode length. Default: %(default)s\')\n- parser.add_argument(\'-w\', \'--win-len\', type=int,\n- help=\'Window (k-mer) size. Default: %(default)s\')\n- parser.add_argument(\'-s\', \'--shift\', type=int,\n- help=\'Bases to shift the window (number of k-mers to check). Default: %(default)s\')\n- parser.add_argument(\'-q\', \'--quiet\', dest=\'loglevel\', action=\'store_const\', const=logging.CRITICAL)\n- parser.add_argument(\'-v\', \'--verbose\', dest=\'loglevel\', action=\'store_const\', const=logging.INFO)\n- parser.add_argument(\'--debug\', dest=\'loglevel\', action=\'store_const\', const=logging.DEBUG)\n- parser.add_argument(\'-p\', \'--processes\', type=int,\n- help=\'Number of worker processes to use. Default: %(default)s\')\n-\n- args = parser.parse_args(argv[1:])\n-\n- assert args.processes > 0, \'-p must be greater than zero\'\n- logging.basicConfig(stream=sys.stderr, level=args.loglevel, format=\'%(message)s\')\n-\n- starts = calc_starts(args.bar_len, args.win_len, args.shift)\n-\n- if args.infile:\n- infile = open(args.infile)\n- else:\n- infile = sys.stdin\n-\n- logging.info(\'Beginning to read in data.\')\n- # For each window sequence (kmer), build a set of barcodes which contained it, in any of the shift\n- # positions. Do this for both halves of the barcode (independently, at the moment).\n- kmer_dicts = [{}, {}]\n- last_barcode = None\n- barcode_count = 0\n- for line in infile:\n- fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n- if len(fields) != 8:\n- logging.warn(\'Line contains incorrect number of fields.\')\n- continue\n- barcode = fields[0]\n- # Only do it for each unique barcode (in the sorted output, there will be runs of lines with\n- # the same barcode).\n- if barcode == last_barcode:\n- continue\n- barcode_count += 1\n- # for each half of the barcode\n- for kmer_dict, start in zip(kmer_dicts, starts):\n- # for each shift position (trying kmers at each of args.shift offsets)\n- for i in range(args.shift):\n- kmer = barcode[start+i:start+i+args.win_len]\n- kmer_set = kmer_dict.get(kmer, set())\n- kmer_set.add(barcode)\n- kmer_dict[kmer] = kmer_set\n- last_barcode = barcode\n- if args'..b'worker = workers[worker_i]\n- results = worker.recv()\n- if results:\n- process_results(*results, print_consensus=args.consensus)\n- worker.send(None)\n-\n-\n-def calc_starts(bar_len, win_len, shift):\n- half_len = bar_len//2\n- assert win_len < half_len, \'Window length must be less than half the barcode length.\'\n- # Place the window right in the middle of the first half of the barcode.\n- # Offset is where it should start.\n- offset = (half_len-win_len)/2\n- # Move it backward by half the shift length so that the average kmer start is at the offset\n- # calculated above.\n- start1 = int(offset - shift/2)\n- start2 = start1 + half_len\n- return start1, start2\n-\n-\n-def process_results(dict_num, kmer, consensus_seq, barcodes, similarities, print_consensus=False):\n- if print_consensus:\n- print(dict_num, kmer, consensus_seq, 1.0, sep=\'\\t\')\n- for barcode, similarity in zip(barcodes, similarities):\n- print(dict_num, kmer, barcode, similarity, sep=\'\\t\')\n-\n-\n-def open_workers(num_workers):\n- """Open the required number of worker processes."""\n- workers = []\n- for i in range(num_workers):\n- parent_pipe, child_pipe = multiprocessing.Pipe()\n- process = multiprocessing.Process(target=worker_function, args=(child_pipe,))\n- process.start()\n- workers.append(parent_pipe)\n- return workers\n-\n-\n-def delegate(workers, run_num, dict_num, kmer, barcodes):\n- worker_i = run_num % len(workers)\n- worker = workers[worker_i]\n- if run_num >= len(workers):\n- logging.info(\'Parent: Trying to receive results from worker..\')\n- results = worker.recv()\n- else:\n- results = None\n- args = (dict_num, kmer, barcodes)\n- logging.info(\'Parent: Sending new data to worker..\')\n- worker.send(args)\n- return results\n-\n-\n-##### HAPPENS IN CHILD PROCESSES #####\n-\n-def worker_function(child_pipe):\n- while True:\n- # logging.info(\'Worker: Listening for new data from parent..\')\n- args = child_pipe.recv()\n- if args is None:\n- break\n- # logging.info(\'Worker: Sending results back to parent..\')\n- child_pipe.send(process_barcodes(*args))\n-\n-\n-def process_barcodes(dict_num, kmer, barcodes):\n- """Perform a multiple sequence alignment on a set of barcodes and parse the result.\n- Uses MAFFT."""\n- # If there\'s only one barcode, we don\'t have to do an alignment.\n- if len(barcodes) == 1:\n- return dict_num, kmer, barcodes[0], barcodes, [1.0]\n- with tempfile.NamedTemporaryFile(\'w\', delete=False, prefix=\'align.msa.\') as family_file:\n- for i, barcode in enumerate(barcodes):\n- family_file.write(\'>{}\\n\'.format(i))\n- family_file.write(barcode+\'\\n\')\n- with open(os.devnull, \'w\') as devnull:\n- try:\n- command = [\'mafft\', \'--nuc\', \'--quiet\', family_file.name]\n- output = subprocess.check_output(command, stderr=devnull)\n- except (OSError, subprocess.CalledProcessError):\n- return None\n- os.remove(family_file.name)\n- alignment = read_fasta(output, upper=True)\n- consensus_seq = consensus.get_consensus(alignment)\n- similarities = []\n- for barcode in barcodes:\n- similarities.append(get_similarity(consensus_seq, barcode))\n- return dict_num, kmer, consensus_seq, barcodes, similarities\n-\n-\n-def read_fasta(fasta, upper=False):\n- """Quick and dirty FASTA parser. Return a list of the sequences only (no names)."""\n- sequences = []\n- sequence = \'\'\n- for line in fasta.splitlines():\n- if line.startswith(\'>\'):\n- if upper:\n- sequence = sequence.upper()\n- if sequence:\n- sequences.append(sequence)\n- sequence = \'\'\n- continue\n- sequence += line.strip()\n- if upper:\n- sequence = sequence.upper()\n- if sequence:\n- sequences.append(sequence)\n- return sequences\n-\n-\n-def get_similarity(seq1, seq2):\n- align = swalign.smith_waterman(seq1, seq2)\n- logging.debug(align.target+\'\\n\'+align.query)\n- return align.matches / len(align.query)\n-\n-\n-def fail(message):\n- sys.stderr.write(message+"\\n")\n- sys.exit(1)\n-\n-if __name__ == \'__main__\':\n- sys.exit(main(sys.argv))\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/get_msa.py --- a/utils/get_msa.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,156 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -import os -import sys -import argparse -import tempfile -import subprocess -sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) -import consensus -import seqtools - -OPT_DEFAULTS = {'format':'plain', 'qual':20, 'qual_format':'sanger'} -USAGE = "%(prog)s [options]" -DESCRIPTION = """""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**OPT_DEFAULTS) - - parser.add_argument('seqs', metavar='sequence', nargs='*', - help='The alignment.') - parser.add_argument('-i', '--input', - help='Provide the sequences in this input file instead of as command-line arguments. ' - 'Give "-" to use stdin.') - parser.add_argument('-f', '--format', choices=('plain', 'duplex'), - help='Input format. "plain" is a simple list of the sequences, one on each line. "duplex" is ' - 'the 8-column format of the family-sorted read data from the duplex pipeline. It must be ' - 'the read pairs from a single alpha/beta barcode combination (both the alpha-beta and ' - 'beta-alpha strands). If "duplex" is given, you must also specify which of the four ' - 'possible alignments to output with --mate and --order.') - parser.add_argument('-m', '--mate', type=int, choices=(1, 2)) - parser.add_argument('-o', '--order', choices=('ab', 'ba')) - parser.add_argument('-F', '--qual-format', choices=('sanger',)) - parser.add_argument('-q', '--qual', type=int, - help='Quality threshold: Default: %(default)s') - - args = parser.parse_args(argv[1:]) - - qual_thres = ' ' - if args.qual_format == 'sanger': - qual_thres = chr(args.qual + 33) - else: - fail('Error: Unsupported FASTQ quality format "{}".'.format(args.qual_format)) - # Check arguments. - if not (args.seqs or args.input): - fail('Error: You must provide sequences either in a file with --input or as arguments.') - elif args.seqs and args.input: - fail('Error: You cannot provide sequences in both a file and command-line arguments.') - if args.format == 'duplex' and not (args.mate and args.order): - fail('Error: If the --format is duplex, you must specify a --mate and --order.') - - # Read input. - quals = [] - if args.input: - if args.format == 'plain': - if args.input == '-': - seqs = [line.strip() for line in sys.stdin] - else: - with open(args.input) as infile: - seqs = [line.strip() for line in infile] - elif args.format == 'duplex': - if args.input == '-': - (seqs, quals) = parse_duplex(sys.stdin, args.mate, args.order) - else: - with open(args.input) as infile: - (seqs, quals) = parse_duplex(infile, args.mate, args.order) - else: - seqs = args.seqs - - align = make_msa(seqs) - if quals: - quals = seqtools.transfer_gaps_multi(quals, align, gap_char_out=' ') - cons = consensus.get_consensus(align, quals, qual_thres=qual_thres, gapped=True) - - output = format_alignment(cons, align, quals, qual_thres=ord(qual_thres)) - - for seq in output: - print seq - - -def parse_duplex(infile, mate, order): - seqs = [] - quals = [] - for line in infile: - (bar, this_order, name1, seq1, qual1, name2, seq2, qual2) = line.rstrip('\r\n').split('\t') - if this_order == order: - if mate == 1: - seqs.append(seq1) - quals.append(qual1) - elif mate == 2: - seqs.append(seq2) - quals.append(qual2) - return seqs, quals - - -def make_msa(seqs): - """Perform a multiple sequence alignment on a set of sequences. - Uses MAFFT.""" - i = 0 - #TODO: Replace with tempfile.mkstemp()? - with tempfile.NamedTemporaryFile('w', delete=False, prefix='msa.') as family_file: - for seq in seqs: - i+=1 - header = '>{}\n'.format(i) - family_file.write(header) - family_file.write(seq+'\n') - with open(os.devnull, 'w') as devnull: - try: - command = ['mafft', '--nuc', '--quiet', family_file.name] - output = subprocess.check_output(command, stderr=devnull) - except (OSError, subprocess.CalledProcessError): - return None - os.remove(family_file.name) - return read_fasta(output) - - -def read_fasta(fasta): - """Quick and dirty FASTA parser. Return only the list of sequences (no names). - Warning: Reads the entire contents of the file into memory at once.""" - sequences = [] - sequence = '' - for line in fasta.splitlines(): - if line.startswith('>'): - if sequence: - sequences.append(sequence) - sequence = '' - continue - sequence += line.strip() - if sequence: - sequences.append(sequence) - return sequences - - -def format_alignment(cons, seqs, quals=(), qual_thres=32, id_char='.'): - output = [cons.upper()] - for i, seq in enumerate(seqs): - outseq = '' - for j, seq_base in enumerate(seq.upper()): - if quals and seq_base != '-' and ord(quals[i][j]) < qual_thres: - outseq += ' ' - elif cons[j] == seq_base: - outseq += id_char - else: - outseq += seq_base - output.append(outseq) - return output - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/getreads.py --- a/utils/getreads.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,156 +0,0 @@ -"""A simple parser for FASTA, FASTQ, SAM, etc. Create generators that just return the read name and -sequence. -All format parsers follow this API: - with open('sequence.fasta') as fasta: - for read in getreads.getparser(fasta, filetype='fasta'): - print "There is a sequence with this FASTA identifier: "+read.id - print "Its sequence is "+read.seq -The properties of Read are: - name: The entire FASTA header line, SAM column 1, etc. - id: The first whitespace-delimited part of the name. - seq: The sequence. - qual: The quality scores (unless the format is FASTA). -""" - - -def getparser(filehandle, filetype='fasta'): - if filetype == 'fasta': - return FastaReader(filehandle) - elif filetype == 'fastq': - return FastqReader(filehandle) - elif filetype == 'sam': - return SamReader(filehandle) - elif filetype == 'tsv': - return TsvReader(filehandle) - else: - raise ValueError('Illegal argument: filetype=\''+filetype+'\'') - - -class FormatError(Exception): - def __init__(self, message=None): - if message: - Exception.__init__(self, message) - - -class Read(object): - def __init__(self, name='', seq='', id_='', qual=''): - self.name = name - self.seq = seq - self.id = id_ - self.qual = qual - - -class Reader(object): - """Base class for all other parsers.""" - def __init__(self, filehandle): - self.filehandle = filehandle - def __iter__(self): - return self.parser() - - -class TsvReader(Reader): - """A parser for a simple tab-delimited format. - Column 1: name - Column 2: sequence - Column 3: quality scores (optional)""" - def parser(self): - for line in self.filehandle: - fields = line.rstrip('\r\n').split('\t') - if len(fields) < 2: - continue - read = Read() - read.name = fields[0] - if read.name: - read.id = read.name.split()[0] - read.seq = fields[1] - if len(fields) >= 3: - read.qual = fields[2] - yield read - - -class SamReader(Reader): - """A simple SAM parser. - Assumptions: - Lines starting with "@" with 3 fields are headers. All others are alignments. - All alignment lines have 11 or more fields. Other lines will be skipped. - """ - def parser(self): - for line in self.filehandle: - fields = line.split('\t') - if len(fields) < 11: - continue - # Skip headers. - if fields[0].startswith('@') and len(fields[0]) == 3: - continue - read = Read() - read.name = fields[0] - if read.name: - read.id = read.name.split()[0] - read.seq = fields[9] - read.qual = fields[10].rstrip('\r\n') - yield read - - -class FastaReader(Reader): - """A simple FASTA parser that reads one sequence at a time into memory.""" - def parser(self): - read = Read() - while True: - line_raw = self.filehandle.readline() - if not line_raw: - if read.seq: - yield read - raise StopIteration - line = line_raw.strip() - # Allow empty lines. - if not line: - continue - if line.startswith('>'): - if read.seq: - yield read - read = Read() - read.name = line[1:] # remove ">" - if read.name: - read.id = read.name.split()[0] - continue - else: - read.seq += line - - -class FastqReader(Reader): - """A simple FASTQ parser. Can handle multi-line sequences, though.""" - def parser(self): - read = Read() - state = 'header' - while True: - line_raw = self.filehandle.readline() - if not line_raw: - if read.seq: - yield read - raise StopIteration - line = line_raw.strip() - # Allow empty lines. - if not line: - continue - if state == 'header': - if not line.startswith('@'): - raise FormatError('line state = "header" but line does not start with "@"') - if read.seq: - yield read - read = Read() - read.name = line[1:] # remove '@' - if read.name: - read.id = read.name.split()[0] - state = 'sequence' - elif state == 'sequence': - if line.startswith('+'): - state = 'plus' - else: - read.seq += line - elif state == 'plus' or state == 'quality': - state = 'quality' - togo = len(read.seq) - len(read.qual) - read.qual += line[:togo] - # The end of the quality lines is when we have a quality string as long as the sequence. - if len(read.qual) >= len(read.seq): - state = 'header' |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/getreads.pyc |
b |
Binary file utils/getreads.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/msa2fa.awk --- a/utils/msa2fa.awk Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,22 +0,0 @@ -# A quick script to convert the .msa.tsv output of sscs.py back into FASTA format. - -BEGIN { - FS = "\t"; - OFS = "\t"; -} - -$2 == "CONSENSUS" { - if ($1 == last) { - mate = 2; - } else { - mate = 1; - } - printf(">%s.%d:%d\n", $1, mate, pairs); - print $3; - pairs = 0; - last = $1; -} - -$2 != "CONSENSUS" { - pairs++; -} \ No newline at end of file |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/muts.genome.tsv --- a/utils/muts.genome.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,27 +0,0 @@ -. chrM . . 600 snv G -. chrM . . 1200 snv G -. chrM . . 1800 snv G -. chrM . . 2400 snv A -. chrM . . 3000 snv T -. chrM . . 3600 snv A -. chrM . . 4200 snv A -. chrM . . 4800 snv A -. chrM . . 5400 snv C -. chrM . . 6000 snv G -. chrM . . 6600 snv A -. chrM . . 7200 snv G -. chrM . . 7800 snv T -. chrM . . 8400 snv A -. chrM . . 9000 snv A -. chrM . . 9600 snv A -. chrM . . 10200 snv A -. chrM . . 10800 snv A -. chrM . . 11400 snv G -. chrM . . 12000 snv T -. chrM . . 12600 snv C -. chrM . . 13200 snv C -. chrM . . 13800 snv G -. chrM . . 14400 snv T -. chrM . . 15000 snv T -. chrM . . 15600 snv G -. chrM . . 16200 snv A |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/muts.old.tsv --- a/utils/muts.old.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,167 +0,0 @@ -chrM-0-0 chrM 13830 14229 60 del 1 -chrM-1-0 chrM 6646 7045 93 snv T -chrM-1-0 chrM 6646 7045 4 snv G -chrM-1-0 chrM 6646 7045 64 snv T -chrM-1-0 chrM 6646 7045 82 snv G -chrM-1-0 chrM 6646 7045 73 del 3 -chrM-1-1 chrM 6646 7045 69 snv A -chrM-1-2 chrM 6646 7045 93 snv T -chrM-1-2 chrM 6646 7045 77 snv G -chrM-1-2 chrM 6646 7045 46 del 3 -chrM-1-2 chrM 6646 7045 76 snv C -chrM-1-2 chrM 6646 7045 54 snv G -chrM-1-3 chrM 6646 7045 17 snv C -chrM-1-4 chrM 6646 7045 91 snv A -chrM-1-5 chrM 6646 7045 47 snv C -chrM-1-5 chrM 6646 7045 86 snv C -chrM-1-5 chrM 6646 7045 95 snv C -chrM-1-5 chrM 6646 7045 70 snv A -chrM-1-6 chrM 6646 7045 93 snv T -chrM-1-6 chrM 6646 7045 77 snv G -chrM-1-7 chrM 6646 7045 34 snv C -chrM-2-0 chrM 7111 7510 69 snv A -chrM-2-0 chrM 7111 7510 3 snv A -chrM-2-0 chrM 7111 7510 13 snv G -chrM-3-0 chrM 16093 16492 45 snv G -chrM-3-0 chrM 16093 16492 9 snv G -chrM-4-0 chrM 9743 10142 33 snv C -chrM-4-0 chrM 9743 10142 48 snv A -chrM-5-0 chrM 6250 6649 37 snv T -chrM-5-1 chrM 6250 6649 56 snv C -chrM-5-1 chrM 6250 6649 58 snv T -chrM-5-1 chrM 6250 6649 5 snv C -chrM-5-2 chrM 6250 6649 39 snv A -chrM-5-3 chrM 6250 6649 1 del 4 -chrM-5-3 chrM 6250 6649 11 snv A -chrM-5-3 chrM 6250 6649 1 snv G -chrM-5-4 chrM 6250 6649 59 snv T -chrM-5-4 chrM 6250 6649 24 snv A -chrM-5-5 chrM 6250 6649 42 snv A -chrM-5-7 chrM 6250 6649 92 snv G -chrM-5-7 chrM 6250 6649 60 del 1 -chrM-5-7 chrM 6250 6649 92 snv T -chrM-5-9 chrM 6250 6649 47 snv C -chrM-5-9 chrM 6250 6649 94 snv A -chrM-5-10 chrM 6250 6649 92 snv C -chrM-6-0 chrM 8176 8575 34 ins CG -chrM-6-1 chrM 8176 8575 23 snv C -chrM-6-1 chrM 8176 8575 14 snv A -chrM-6-1 chrM 8176 8575 31 snv T -chrM-6-1 chrM 8176 8575 95 snv C -chrM-6-1 chrM 8176 8575 3 snv G -chrM-6-2 chrM 8176 8575 23 snv C -chrM-6-2 chrM 8176 8575 14 snv A -chrM-6-2 chrM 8176 8575 75 snv G -chrM-6-2 chrM 8176 8575 41 snv T -chrM-6-3 chrM 8176 8575 23 snv C -chrM-6-3 chrM 8176 8575 43 snv G -chrM-6-3 chrM 8176 8575 97 snv G -chrM-6-3 chrM 8176 8575 28 snv G -chrM-6-4 chrM 8176 8575 21 ins T -chrM-6-5 chrM 8176 8575 23 snv C -chrM-6-5 chrM 8176 8575 51 snv T -chrM-6-6 chrM 8176 8575 23 snv C -chrM-6-6 chrM 8176 8575 88 snv G -chrM-6-7 chrM 8176 8575 1 snv A -chrM-6-8 chrM 8176 8575 23 snv C -chrM-6-8 chrM 8176 8575 39 del 1 -chrM-6-8 chrM 8176 8575 52 snv A -chrM-6-9 chrM 8176 8575 23 snv C -chrM-6-9 chrM 8176 8575 89 snv T -chrM-6-9 chrM 8176 8575 22 snv G -chrM-6-9 chrM 8176 8575 94 snv G -chrM-6-10 chrM 8176 8575 34 snv A -chrM-6-10 chrM 8176 8575 52 del 1 -chrM-6-11 chrM 8176 8575 23 snv C -chrM-6-11 chrM 8176 8575 89 snv T -chrM-6-11 chrM 8176 8575 80 snv G -chrM-6-12 chrM 8176 8575 13 snv T -chrM-6-12 chrM 8176 8575 57 snv A -chrM-6-13 chrM 8176 8575 81 snv A -chrM-6-13 chrM 8176 8575 67 snv C -chrM-6-14 chrM 8176 8575 23 snv C -chrM-6-14 chrM 8176 8575 1 snv G -chrM-6-14 chrM 8176 8575 99 snv G -chrM-6-14 chrM 8176 8575 31 del 1 -chrM-6-14 chrM 8176 8575 73 snv C -chrM-6-15 chrM 8176 8575 34 snv A -chrM-6-15 chrM 8176 8575 91 snv G -chrM-6-16 chrM 8176 8575 1 snv A -chrM-6-17 chrM 8176 8575 43 ins A -chrM-6-17 chrM 8176 8575 9 snv A -chrM-6-18 chrM 8176 8575 33 snv A -chrM-6-18 chrM 8176 8575 62 snv T -chrM-6-18 chrM 8176 8575 71 ins T -chrM-6-18 chrM 8176 8575 71 snv G -chrM-6-19 chrM 8176 8575 23 snv C -chrM-6-19 chrM 8176 8575 9 snv G -chrM-6-20 chrM 8176 8575 71 snv C -chrM-6-21 chrM 8176 8575 34 snv A -chrM-6-21 chrM 8176 8575 89 snv G -chrM-6-23 chrM 8176 8575 55 snv C -chrM-6-23 chrM 8176 8575 15 snv G -chrM-6-23 chrM 8176 8575 29 snv C -chrM-6-24 chrM 8176 8575 71 snv C -chrM-6-24 chrM 8176 8575 93 snv T -chrM-6-24 chrM 8176 8575 83 snv C -chrM-6-24 chrM 8176 8575 55 snv C -chrM-6-27 chrM 8176 8575 23 snv C -chrM-6-27 chrM 8176 8575 89 snv T -chrM-6-27 chrM 8176 8575 13 snv C -chrM-6-27 chrM 8176 8575 26 ins T -chrM-6-27 chrM 8176 8575 36 snv G -chrM-6-28 chrM 8176 8575 66 snv A -chrM-6-28 chrM 8176 8575 94 del 1 -chrM-6-28 chrM 8176 8575 0 snv T -chrM-6-29 chrM 8176 8575 69 snv G -chrM-6-30 chrM 8176 8575 23 snv C -chrM-6-30 chrM 8176 8575 27 snv C -chrM-6-30 chrM 8176 8575 83 snv C -chrM-6-30 chrM 8176 8575 34 snv T -chrM-6-30 chrM 8176 8575 71 snv A -chrM-6-30 chrM 8176 8575 94 snv A -chrM-6-31 chrM 8176 8575 34 snv A -chrM-6-31 chrM 8176 8575 43 snv G -chrM-6-32 chrM 8176 8575 1 snv A -chrM-6-32 chrM 8176 8575 21 snv C -chrM-6-32 chrM 8176 8575 55 snv C -chrM-7-0 chrM 7538 7937 81 del 1 -chrM-7-0 chrM 7538 7937 79 snv G -chrM-7-0 chrM 7538 7937 34 snv C -chrM-7-0 chrM 7538 7937 82 snv G -chrM-7-0 chrM 7538 7937 99 snv T -chrM-7-0 chrM 7538 7937 86 snv G -chrM-7-1 chrM 7538 7937 2 snv A -chrM-7-1 chrM 7538 7937 33 snv C -chrM-7-1 chrM 7538 7937 86 snv G -chrM-7-1 chrM 7538 7937 86 snv C -chrM-7-3 chrM 7538 7937 25 snv C -chrM-7-3 chrM 7538 7937 28 del 1 -chrM-7-3 chrM 7538 7937 58 ins A -chrM-7-4 chrM 7538 7937 29 snv G -chrM-7-4 chrM 7538 7937 21 snv T -chrM-7-4 chrM 7538 7937 71 snv G -chrM-7-5 chrM 7538 7937 8 snv C -chrM-7-6 chrM 7538 7937 94 snv G -chrM-7-7 chrM 7538 7937 29 del 1 -chrM-7-7 chrM 7538 7937 83 snv A -chrM-7-7 chrM 7538 7937 9 snv T -chrM-7-8 chrM 7538 7937 18 snv A -chrM-7-8 chrM 7538 7937 57 snv G -chrM-7-8 chrM 7538 7937 15 del 1 -chrM-7-8 chrM 7538 7937 30 snv A -chrM-7-8 chrM 7538 7937 75 snv C -chrM-7-9 chrM 7538 7937 8 ins AATATTG -chrM-7-9 chrM 7538 7937 9 snv A -chrM-7-9 chrM 7538 7937 24 snv A -chrM-7-9 chrM 7538 7937 97 snv G -chrM-7-9 chrM 7538 7937 50 del 1 -chrM-7-10 chrM 7538 7937 58 snv A -chrM-7-10 chrM 7538 7937 10 snv C -chrM-8-0 chrM 15893 16292 63 snv T -chrM-8-0 chrM 15893 16292 8 snv A -chrM-8-0 chrM 15893 16292 39 snv C -chrM-8-0 chrM 15893 16292 14 snv G -chrM-8-0 chrM 15893 16292 14 snv G -chrM-8-0 chrM 15893 16292 95 snv C -chrM-9-0 chrM 4396 4795 26 ins A |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/muts.wgsim.tsv --- a/utils/muts.wgsim.tsv Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,14 +0,0 @@ -chrM 2685 - A - -chrM 2787 A G - -chrM 4379 C T - -chrM 6208 T W + -chrM 6519 - A - -chrM 8027 G K + -chrM 8793 T W + -chrM 9693 C Y + -chrM 10399 CCG - - -chrM 12331 A W + -chrM 13620 T A - -chrM 15497 - A - -chrM 15831 T W + -chrM 16482 A G - |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/outconv.awk --- a/utils/outconv.awk Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,16 +0,0 @@ - -substr($0, 1, 1) == ">" { - header = $0 - split(header, fields1, ".") - split(fields1[2], fields2) - mate = fields2[1] - if (mate == target) { - print fields1[1]" "fields2[2] - } - next -} -{ - if (mate == target) { - } -} |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/outconv.py --- a/utils/outconv.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,107 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -from __future__ import print_function -from __future__ import absolute_import -from __future__ import unicode_literals -import sys -import argparse - -ARG_DEFAULTS = {'input':sys.stdin} -USAGE = "%(prog)s [options]" -DESCRIPTION = """Split interleaved outputs of dunovo.py into pairs of paired-end files.""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**ARG_DEFAULTS) - - parser.add_argument('input', metavar='consensuses.fa', type=argparse.FileType('r'), - help='Interleaved consensus reads (DCS or SSCS) from dunovo.py.') - parser.add_argument('-1', '--out1', metavar='out_1.fa', type=argparse.FileType('w'), required=True, - help='Output filename for mate 1 reads. CAUTION: Will overwrite any existing file.') - parser.add_argument('-2', '--out2', metavar='out_2.fa', type=argparse.FileType('w'), required=True, - help='Output filename for mate 2 reads. CAUTION: Will overwrite any existing file.') - - args = parser.parse_args(argv[1:]) - - """ -SSCS: ->AAAAAAAAAAAACTAAAATACAAA.ab.1 15 -ACTGATGAAAAGGCTGTTATTGTATCTGATGTGTAGTGTATGGCTAAGAAAAGACCTGTAATGATTTGGACTATTAGGCAGACTCCTAGAAGGGACCCAA ->AAAAAAAAAAAACTAAAATACAAA.ab.2 15 -ATCCAAACACAACCAACATCCCCCCTAAATAAATTAAAAAAACTATTAAACCTAAAAACGATCCACCAAACCCTAAAACCATTAAACAACCAACAAACCC ->AAAAAAAAAAAAGACCACGTTTCT.ab.1 15 -TGGAATTCAGCCTACTAGCAATTATCCCCATACTAATCAACAAAAAAAACCCACGATCAACTGAAGCAGCAACAAAATACTTCGTCACACAAGCAACAGC ->AAAAAAAAAAAAGACCACGTTTCT.ba.2 18 -TGGAATTCAGCCTACTAGCAATTATCCCCATACTAATCAACAAAAAAAACCCACGATCAACTGAAGCAGCAACAAAATACTTCGTCACACAAGCAACAGC ->AAAAAAAAAAAAGACCACGTTTCT.ab.2 15 -GTAGAGTTGAGTAGCGGGTAAATTTGAATTAAAATTGATAGGGGAGCAATTTTTTGTCATGTAAGAAGAATAAGTCCTATGTGCAGTGGGATCCCTTGAG ->AAAAAAAAAAAAGACCACGTTTCT.ba.1 18 -GTAGAGTTGAGTAGCGGGTAAATTTGAATTAAAATTGATAGGGGAGCAATTTTTTGTCATGTAAGAAGAATAAGTCCTATGTGCAGTGGGATCCCTTGAG - -DCS: ->AAAAAAAACACCAAATACGCCTAC.1 28-39 -GCTATGAATATAGGGGCTGTAAGAATAATATAGATTATGAGGTTGAGTAGAGTGAGGGATGGGTTGTAAGGAAGAATTGCTAATATTCATCCTATGTGGG ->AAAAAAAACACCAAATACGCCTAC.2 28-39 -TACTTCGTCACACAAGCAACAGCCTCAATAATTATCCTCCTGGCCATCGTACTCAACTATAAACAACTAGGAACATGAATATTTCAACAACAAACAAACG - """ - - intype = None - line_num = 0 - sscs_buffer = {} - for line_raw in args.input: - line_num += 1 - line = line_raw.rstrip('\r\n') - if line.startswith('>'): - # We're in a header line. - barcode, strand, mate, famsizes = parse_header(line) - if intype is None: - if strand is None: - intype = 'DCS' - else: - intype = 'SSCS' - else: - # We're in a sequence line. - seq = line - if intype == 'DCS': - if mate == '1': - args.out1.write('>{} {}\n{}\n'.format(barcode, famsizes, seq)) - elif mate == '2': - args.out2.write('>{} {}\n{}\n'.format(barcode, famsizes, seq)) - else: - fail('Error: Invalid mate "{}" on line {}.'.format(mate, line_num)) - elif intype == 'SSCS': - # Need to get SSCS in correct order. - # Collect reads until they're properly paired, then print both. - if (barcode, strand) in sscs_buffer: - stored_read = sscs_buffer[(barcode, strand)] - new_read = {'mate':mate, 'famsizes':famsizes, 'seq':seq} - if stored_read['mate'] == '1' and mate == '2': - read1, read2 = stored_read, new_read - elif stored_read['mate'] == '2' and mate == '1': - read1, read2 = new_read, stored_read - args.out1.write('>{}.{} {famsizes}\n{seq}\n'.format(barcode, strand, **read1)) - args.out2.write('>{}.{} {famsizes}\n{seq}\n'.format(barcode, strand, **read2)) - del sscs_buffer[(barcode, strand)] - else: - sscs_buffer[(barcode, strand)] = {'mate':mate, 'famsizes':famsizes, 'seq':seq} - - -def parse_header(header): - read_id, famsizes = header.split() - id_fields = read_id[1:].split('.') - barcode = id_fields[0] - strand = None - mate = id_fields[-1] - if len(id_fields) == 3: - strand = id_fields[1] - return barcode, strand, mate, famsizes - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/precheck.py --- a/utils/precheck.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,123 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -import sys -import argparse -import getreads - -OPT_DEFAULTS = {'tag_len':12, 'const_len':5, 'min_reads':3, 'human':True} -USAGE = "%(prog)s [options]" -DESCRIPTION = """Print statistics on the raw duplex sequencing reads.""" -EPILOG = """Warning: This tracks all barcodes in a dict, so it can take a lot of memory. A guideline -is about 200 bytes per (12bp) tag. For example, it took about 800MB for a 10GB, 32 million read -dataset with an average of 4 pairs per barcode.""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION, epilog=EPILOG) - parser.set_defaults(**OPT_DEFAULTS) - - parser.add_argument('infile1', metavar='reads_1.fq', - help='The first mates in the read pairs.') - parser.add_argument('infile2', metavar='reads_2.fq', - help='The second mates in the read pairs.') - parser.add_argument('-t', '--tag-length', dest='tag_len', type=int) - parser.add_argument('-c', '--constant-length', dest='const_len', type=int) - parser.add_argument('-C', '--computer', dest='human', action='store_false', - help='Print results in computer-readable format. This will be a tab-delimited version of the ' - 'output, in the same order, but with two columns: stat name and value.') - parser.add_argument('-m', '--min-reads', type=int, - help='The minimum number of reads required in each single-stranded family. Default: ' - '%(default)s') - parser.add_argument('-v', '--validate', action='store_true', - help='Check the id\'s of the reads to make sure the correct reads are mated into pairs (the ' - 'id\'s of mates must be identical).') - - args = parser.parse_args(argv[1:]) - - with open(args.infile1) as infileh1: - with open(args.infile2) as infileh2: - barcodes = read_files(infileh1, infileh2, tag_len=args.tag_len, validate=args.validate) - - stats = get_stats(barcodes, tag_len=args.tag_len, min_reads=args.min_reads) - print_stats(stats, min_reads=args.min_reads, human=args.human) - - -def read_files(infileh1, infileh2, tag_len=12, validate=False): - reader1 = getreads.getparser(infileh1, filetype='fastq').parser() - reader2 = getreads.getparser(infileh2, filetype='fastq').parser() - barcodes = {} - while True: - try: - read1 = reader1.next() - read2 = reader2.next() - except StopIteration: - break - if validate and read1.id != read2.id: - raise getreads.FormatError('Read pair mismatch: "{}" and "{}"'.format(read1.id, read2.id)) - alpha = read1.seq[:tag_len] - beta = read2.seq[:tag_len] - barcode = alpha + beta - if barcode in barcodes: - barcodes[barcode] += 1 - else: - barcodes[barcode] = 1 - return barcodes - - -def get_stats(barcodes, tag_len=12, min_reads=3): - passed_sscs = 0 - duplexes = 0 - passed_duplexes = 0 - singletons = 0 - total_pairs = 0 - for barcode, count in barcodes.items(): - total_pairs += count - if count == 1: - singletons += 1 - if count >= min_reads: - passed_sscs += 1 - alpha = barcode[:tag_len] - beta = barcode[tag_len:] - reverse = beta + alpha - if reverse in barcodes: - duplexes += 1 - if count >= min_reads and barcodes[reverse] >= min_reads: - passed_duplexes += 1 - # Each full duplex ends up being counted twice. Halve it to get the real total. - stats = { - 'pairs':total_pairs, - 'barcodes':len(barcodes), - 'avg_pairs':total_pairs/len(barcodes), - 'singletons':singletons, - 'duplexes':duplexes//2, - 'passed_sscs':passed_sscs*2, - 'passed_duplexes':passed_duplexes, - } - return stats - - -def print_stats(stats, min_reads=3, human=True): - all_stats = stats.copy() - all_stats.update({'min_reads':min_reads}) - if human: - print """Total read pairs:\t{pairs} -Unique barcodes:\t{barcodes} -Avg # of read pairs per barcode:\t{avg_pairs} -Singletons:\t{singletons} -Barcodes with reverse (other strand) present:\t{duplexes} -Passing threshold of {min_reads} reads per single-strand consensus: -\tSingle-strand consensus sequences:\t{passed_sscs} -\tDuplex consensus sequences:\t{passed_duplexes}""".format(**all_stats) - else: - for stat in ('pairs', 'barcodes', 'avg_pairs', 'singletons', 'duplexes', 'min_reads', - 'passed_sscs', 'passed_duplexes'): - print '{}\t{}'.format(stat, all_stats[stat]) - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/query.fa --- a/utils/query.fa Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->chrM_12350_12821_0:0:0_0:0:0_0/1 -GCTCGGGCGTATCATCAACTGATGAGCAAGAAGGATATAATTCCTACGCCCTCTCAGCCGATGAACAGTTGGAATAGGTTGTTAGCGGTAACTAAGATTAGTATGGTAATTAGGAAGATGAGTAGATATTTGAAGAACTGATTAATGTTTGGGTCTGAGTTTATATATCACAGTGAGAATTCTATGATGGACCATGTAACGAACAATGCTACAGGGATGAATATTATGGAGAAGTAGTCTAGTTTGAAGCTTAGGGAGAGCTGGGTTGTTTGGGTTGTGGCTCAGTGTCAGTTCGAGATAATAACTTCTTGGTCTAGGCACATGAATATTGTTGTGGGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTTTACATAATGGGGGTATGAGTTTT |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/read.fq --- a/utils/read.fq Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@chrM_12350_12821_0:0:0_0:0:0_0/1 -GCTCGGGCGTATCATCAACTGATGAGCAAGAAGGATATAATTCCTACGCCCTCTCAGCCGATGAACAGTTGGAATAGGTTGTTAGCGGTAACTAAGATTAGTATGGTAATTAGGAAGATGAGTAGATATTTGAAGAACTGATTAATGTTTGGGTCTGAGTTTATATATCACAGTGAGAATTCTATGATGGACCATGTAACGAACAATGCTACAGGGATGAATATTATGGAGAAGTAGTCTAGTTTGAAGCTTAGGGAGAGCTGGGTTGTTTGGGTTGTGGCTCAGTGTCAGTTCGAGATAATAACTTCTTGGTCTAGGCACATGAATATTGTTGTGGGGAAGAGACTGATAATAAAGGTGGATGCGACAATGGATTTTACATAATGGGGGTATGAGTTTT -+ -IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/sim-check.py --- a/utils/sim-check.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,142 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -from __future__ import print_function -import re -import os -import sys -import argparse -import fastqreader -script_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.dirname(script_dir)) -import swalign - -CANON = 'ACGT-' - -WGSIM_ID_REGEX = r'^(.+)_\d+_\d+_\d+:\d+:\d+_\d+:\d+:\d+_([0-9a-f]+)/[12]$' -ARG_DEFAULTS = {'print_stats':True} -USAGE = "%(prog)s [options]" -DESCRIPTION = """Correlate (labeled) reads from duplex pipeline with truth from simulator input, -and print the number of errors.""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**ARG_DEFAULTS) - - parser.add_argument('reads', type=argparse.FileType('r'), - help='Output from duplex pipeline. Should be the tsv produced by sim-label.py.') - parser.add_argument('frags', type=fastqreader.FastqReadGenerator, - help='--frag-file from sim.py.') - parser.add_argument('-i', '--ignore-ambiguous', action='store_true', - help='Don\'t consider ambiguous bases ("N", "R", etc.) in SNV errors. Specifically, it will ' - 'ignore any mismatch between a non-gap base in the fragment and read base that isn\'t ' - 'one of "'+CANON+'".') - parser.add_argument('-a', '--print-alignments', action='store_true', - help='Print the alignments of each read with each fragment. Mostly for debug purposes.') - parser.add_argument('-S', '--no-stats', dest='print_stats', action='store_false', - help='Don\'t print the normal output of statistics on differences.') - - args = parser.parse_args(argv[1:]) - - reads = iter(args.reads) - frags = iter(args.frags) - while True: - # Read in the next output read. - try: - read_line = next(reads) - except StopIteration: - break - fields = read_line.rstrip('\r\n').split('\t') - assert len(fields) == 7, fields - read = dict(zip(('chrom', 'frag_num', 'frag_id', 'bar', 'reads+', 'reads-', 'seq'), fields)) - # Read in fragments until we find the one corresponding to the output read. - frag_chrom = None - frag_frag_id = None - while not (read['chrom'] == frag_chrom and read['frag_id'] == frag_frag_id): - try: - frag = next(frags) - except StopIteration: - break - match = re.search(WGSIM_ID_REGEX, frag.id) - if match: - frag_chrom = match.group(1) - frag_frag_id = match.group(2) - else: - sys.stderr.write('Invalid wgsim read name: {}\n'.format(frag.id)) - if frag_chrom is None and frag_frag_id is None: - break - # Align the output read to the fragment. - align = swalign.smith_waterman_duplex(frag.seq, read['seq']) - assert len(align.target) == len(align.query) - if args.print_alignments: - print(align.target) - diffs = get_diffs(align.target, align.query, print_mid=args.print_alignments, - ignore_ambig=args.ignore_ambiguous) - if args.print_alignments: - print(align.query) - read_len = len(read['seq']) - snvs = ins = dels = 0 - for diff in diffs: - if diff['type'] == 'snv': - snvs += 1 - elif diff['type'] == 'ins': - ins += 1 - elif diff['type'] == 'del': - dels += 1 - match_rate = round(align.matches/read_len, 2) - if args.print_stats: - print(read['bar'], read['frag_id'], read['reads+'], read['reads-'], read_len, - read_len-align.matches, match_rate, len(diffs), snvs, ins, dels, sep='\t') - - -def get_diffs(target, query, print_mid=False, ignore_ambig=False): - diffs = [] - diff = None - coord = 0 - for base1, base2 in zip(target, query): - if base1 != '-': - coord += 1 - if base1 == base2: - # Finish ongoing indels and add them to the list. - if diff is not None: - # But omit the "indel" that's just the unaligned portion at the start. - if diff['coord'] > 1: - diffs.append(diff) - diff = None - if print_mid: - sys.stdout.write('|') - elif ignore_ambig and base1 != '-' and base2 not in CANON: - if print_mid: - sys.stdout.write(' ') - elif base1 == '-': - if diff is None: - diff = {'coord':coord, 'type':'ins', 'alt':base2} - else: - diff['alt'] += base2 - if print_mid: - sys.stdout.write(' ') - elif base2 == '-': - if diff is None: - diff = {'coord':coord-1, 'type':'del', 'alt':1} - else: - diff['alt'] += 1 - if print_mid: - sys.stdout.write(' ') - else: - diffs.append({'coord':coord, 'type':'snv', 'alt':base2}) - if print_mid: - sys.stdout.write(' ') - if diff is not None: - diffs.append(diff) - if print_mid: - print() - return diffs - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/sim-genome.py --- a/utils/sim-genome.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,94 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -from __future__ import print_function -import sys -import random -import argparse -import numpy -import sim -import fastareader - -ARG_DEFAULTS = {'spacing':600, 'indel_rate':0.15, 'ext_rate':0.3} -USAGE = "%(prog)s [options]" -DESCRIPTION = """Generate a version of the input reference with randomly added variants.""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**ARG_DEFAULTS) - - parser.add_argument('ref', type=fastareader.FastaLineGenerator, - help='The original reference genome (FASTA).') - parser.add_argument('-m', '--mutations', type=argparse.FileType('w'), - help='Write inserted mutations here.') - parser.add_argument('-s', '--spacing', type=int, - help='Average (or exact) spacing between variants, in bp. Note: must be less than the line ' - 'width in the reference file. Default: %(default)s') - parser.add_argument('-r', '--random', action='store_true', - help='Randomly distribute the variants instead of evenly spacing them.') - parser.add_argument('-N', '--ref-name', - help='Name the output sequence this.') - parser.add_argument('-i', '--indel-rate', type=float, - help='Default: %(default)s') - parser.add_argument('-E', '--extension-rate', dest='ext_rate', type=float, - help='Default: %(default)s') - parser.add_argument('-S', '--seed', type=int, - help='Default: random.') - - args = parser.parse_args(argv[1:]) - - if args.seed is None: - seed = random.randint(0, 2**31-1) - sys.stderr.write('seed: {}\n'.format(seed)) - else: - seed = args.seed - random.seed(seed) - - var_prob = 1/args.spacing - - next_var = args.spacing - coord = 0 - chr_name = None - for line in args.ref: - if args.ref.name != chr_name: - chr_name = args.ref.name - if args.ref_name: - chr_id = args.ref_name.split()[0] - print('>'+args.ref_name) - else: - chr_id = args.ref.id - print('>'+chr_name) - end_coord = coord + len(line) - new_line = line - if args.random: - n_vars = numpy.random.binomial(len(line), var_prob) - for i in range(n_vars): - vcoord = random.randint(coord+1, end_coord) - vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate) - var = {'coord':vcoord-coord-1, 'type':vtype, 'alt':alt} - # sys.stderr.write('Adding var at {}: {} ({})\n'.format(vcoord, vtype, alt)) - new_line = sim.apply_mutation(var, new_line) - if args.mutations: - var['coord'] = vcoord - sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.') - else: - if next_var <= end_coord: - vtype, alt = sim.make_mutation(args.indel_rate, args.ext_rate) - var = {'coord':next_var-coord-1, 'type':vtype, 'alt':alt} - # sys.stderr.write('Adding var at {}: {} ({})\n'.format(next_var, vtype, alt)) - new_line = sim.apply_mutation(var, new_line) - if args.mutations: - var['coord'] = next_var - sim.log_mutations(args.mutations, [var], '.', chr_id, '.', '.') - next_var += args.spacing - print(new_line) - coord += len(line) - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/sim-label.py --- a/utils/sim-label.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,78 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -from __future__ import print_function -import sys -import argparse -import fastareader - -ARG_DEFAULTS = {} -USAGE = "%(prog)s [options]" -DESCRIPTION = """Convert the results of the duplex pipeline on simulated data to a single-line tsv -format, and label them with their original fragments.""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**ARG_DEFAULTS) - - parser.add_argument('reads', type=fastareader.FastaReadGenerator, - help='Output fasta file from duplex pipeline. This script depends on their names being the ' - 'exact format produced by sim.py.') - parser.add_argument('families', type=argparse.FileType('r'), - help='Families tsv file (output of make-barcodes.awk).') - - args = parser.parse_args(argv[1:]) - - bars_to_frags = {} - for line in args.families: - barcode, order, read_name = line.rstrip('\r\n').split('\t')[:3] - if read_name.startswith('@'): - read_name = read_name[1:] - chrom, frag_id, read_num = read_name.split('-') - bars_to_frags[barcode] = (chrom, frag_id) - - reads = iter(args.reads) - while True: - try: - read = next(reads) - except StopIteration: - break - barcode = read.id - try: - chrom, frag_id = bars_to_frags[barcode] - except KeyError: - sys.stderr.write('Missing barcode: {}\n'.format(barcode)) - continue - try: - frag_num = int(frag_id, 16) - except ValueError: - sys.stderr.write('Invalid fragment id: {}\n'.format(frag_id)) - continue - fam1size, fam2size = get_famsizes(read.name) - print(chrom, frag_num, frag_id, barcode, fam1size, fam2size, read.seq, sep='\t') - - -def get_famsizes(read_name): - try: - faminfo = read_name.split()[1] - except IndexError: - faminfo = '' - famsizes = faminfo.split('-') - try: - fam1size = famsizes[0] - except IndexError: - fam1size = '' - try: - fam2size = famsizes[1] - except IndexError: - fam2size = '' - return fam1size, fam2size - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/sim.py --- a/utils/sim.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,706 +0,0 @@\n-#!/usr/bin/env python\n-from __future__ import division\n-from __future__ import print_function\n-import re\n-import os\n-import sys\n-import copy\n-import numpy\n-import bisect\n-import random\n-import string\n-import numbers\n-import tempfile\n-import argparse\n-import subprocess\n-import fastqreader\n-\n-REVCOMP_TABLE = string.maketrans(\'acgtrymkbdhvACGTRYMKBDHV\', \'tgcayrkmvhdbTGCAYRKMVHDB\')\n-WGSIM_ID_REGEX = r\'^(.+)_(\\d+)_(\\d+)_\\d+:\\d+:\\d+_\\d+:\\d+:\\d+_([0-9a-f]+)/[12]$\'\n-ARG_DEFAULTS = {\'read_len\':100, \'frag_len\':400, \'n_frags\':1000, \'out_format\':\'fasta\',\n- \'seq_error\':0.001, \'pcr_error\':0.001, \'cycles\':25, \'indel_rate\':0.15,\n- \'ext_rate\':0.3, \'seed\':None, \'invariant\':\'TGACT\', \'bar_len\':12, \'fastq_qual\':\'I\'}\n-USAGE = "%(prog)s [options]"\n-DESCRIPTION = """Simulate a duplex sequencing experiment."""\n-\n-RAW_DISTRIBUTION = (\n- # 0 1 2 3 4 5 6 7 8 9\n- # Low singletons, but then constant drop-off. From pML113 (see 2015-09-28 report).\n- # 0, 100, 36, 31, 27, 22, 17, 12, 7, 4.3,\n- #2.4, 1.2, 0.6, 0.3, 0.2, 0.15, 0.1, 0.07, 0.05, 0.03,\n- # High singletons, but then a second peak around 10. From Christine plasmid (2015-10-06 report).\n- # 0, 100, 5.24, 3.67, 3.50, 3.67, 3.85, 4.02, 4.11, 4.20,\n- # 4.17, 4.10, 4.00, 3.85, 3.69, 3.55, 3.38, 3.15, 2.92, 2.62,\n- # 2.27, 2.01, 1.74, 1.56, 1.38, 1.20, 1.02, 0.85,\n- # Same as above, but low singletons, 2\'s, and 3\'s (rely on errors to fill out those).\n- 0, 1, 2, 3, 3.50, 3.67, 3.85, 4.02, 4.11, 4.20,\n- 4.17, 4.10, 4.00, 3.85, 3.69, 3.55, 3.38, 3.15, 2.92, 2.62,\n- 2.27, 2.01, 1.74, 1.56, 1.38, 1.20, 1.02, 0.85,\n-)\n-\n-\n-def main(argv):\n-\n- parser = argparse.ArgumentParser(description=DESCRIPTION)\n- parser.set_defaults(**ARG_DEFAULTS)\n-\n- parser.add_argument(\'ref\', metavar=\'ref.fa\', nargs=\'?\',\n- help=\'Reference sequence. Omit if giving --frag-file.\')\n- parser.add_argument(\'out1\', type=argparse.FileType(\'w\'),\n- help=\'Write final mate 1 reads to this file.\')\n- parser.add_argument(\'out2\', type=argparse.FileType(\'w\'),\n- help=\'Write final mate 2 reads to this file.\')\n- parser.add_argument(\'-o\', \'--out-format\', choices=(\'fastq\', \'fasta\'))\n- parser.add_argument(\'--stdout\', action=\'store_true\',\n- help=\'Print interleaved output reads to stdout.\')\n- parser.add_argument(\'-m\', \'--mutations\', type=argparse.FileType(\'w\'),\n- help=\'Write a log of the PCR and sequencing errors introduced to this file. Will overwrite any \'\n- \'existing file at this path.\')\n- parser.add_argument(\'-b\', \'--barcodes\', type=argparse.FileType(\'w\'),\n- help=\'Write a log of which barcodes were ligated to which fragments. Will overwrite any \'\n- \'existing file at this path.\')\n- parser.add_argument(\'--frag-file\',\n- help=\'The path of the FASTQ file of fragments. If --ref is given, these will be generated with \'\n- \'wgsim and kept (normally a temporary file is used, then deleted). Note: the file will be \'\n- \'overwritten! If --ref is not given, then this should be a file of already generated \'\n- \'fragments, and they will be used instead of generating new ones.\')\n- parser.add_argument(\'-Q\', \'--fastq-qual\',\n- help=\'The quality score to assign to all bases in FASTQ output. Give a character or PHRED \'\n- \'score (integer). A PHRED score will be converted using the Sanger offset (33). Default: \'\n- \'"%(default)s"\')\n- parser.add_argument(\'-S\', \'--seed\', type=int,\n- help=\'Random number generator seed. By default, a random, 32-bit seed will be generated and \'\n- \'logged to stdout.\')\n- params = parser.add_argument_group(\'simulation parameters\')\n- params.add_argument(\'-n\', \'--n-frags\', type=int,\n- help=\'The number of original fragment molecules to simulate. The final number of reads will be \'\n- \'this multiplied by the average number of reads per family. If you provide fragments with \'\n- \'--frag-file, the script will s'..b'n(branches), len(candidates), shared))\n- branches[frag_i] = {\'cycle\':cycle, \'child1\':current_root, \'child2\':relative}\n- # Remove the relative from the list of lineages.\n- del(branches[relative_i])\n- if relative_i < frag_i:\n- frag_i -= 1\n- frag_i += 1\n- return branches\n-\n-\n-def get_depth(tree):\n- depth = 0\n- node = tree\n- while node:\n- depth += 1\n- node = node.get(\'child1\')\n- return depth\n-\n-\n-def convert_tree(tree_orig):\n- # Let\'s operate on a copy only.\n- tree = copy.deepcopy(tree_orig)\n- # Turn the tree vertical.\n- tree[\'line\'] = 1\n- tree[\'children\'] = 0\n- levels = [[tree]]\n- done = False\n- while not done:\n- last_level = levels[-1]\n- this_level = []\n- done = True\n- for node in last_level:\n- for child_name in (\'child1\', \'child2\'):\n- child = node.get(child_name)\n- if child:\n- done = False\n- child[\'parent\'] = node\n- child[\'branch\'] = child[\'parent\'][\'branch\']\n- if child_name == \'child2\':\n- child[\'branch\'] += 1\n- this_level.append(child)\n- this_level.sort(key=lambda node: node[\'branch\'])\n- levels.append(this_level)\n- return levels\n-\n-\n-def print_levels(levels):\n- last_level = []\n- for level in levels:\n- for node in level:\n- child = 1\n- for parent in last_level:\n- if parent.get(\'child2\') is node:\n- child = 2\n- if child == 1:\n- sys.stdout.write(\'| \')\n- else:\n- sys.stdout.write(\'\\ \')\n- last_level = level\n- print()\n-\n-\n-def label_branches(tree):\n- """Label each vertical branch (line of \'child1\'s) with an id number."""\n- counter = 1\n- tree[\'branch\'] = counter\n- nodes = [tree]\n- while nodes:\n- node = nodes.pop(0)\n- child1 = node.get(\'child1\')\n- if child1:\n- child1[\'branch\'] = node[\'branch\']\n- nodes.append(child1)\n- child2 = node.get(\'child2\')\n- if child2:\n- counter += 1\n- child2[\'branch\'] = counter\n- nodes.append(child2)\n-\n-\n-def print_tree(tree_orig):\n- # We "write" strings to an output buffer instead of directly printing, so we can post-process the\n- # output. The buffer is a matrix of cells, each holding a string representing one element.\n- lines = [[]]\n- # Let\'s operate on a copy only.\n- tree = copy.deepcopy(tree_orig)\n- # Add some bookkeeping data.\n- label_branches(tree)\n- tree[\'level\'] = 0\n- branches = [tree]\n- while branches:\n- line = lines[-1]\n- branch = branches.pop()\n- level = branch[\'level\']\n- while level > 0:\n- line.append(\' \')\n- level -= 1\n- node = branch\n- while node:\n- # Is it the root node? (Have we written anything yet?)\n- if lines[0]:\n- # Are we at the start of the line? (Is it only spaces so far?)\n- if line[-1] == \' \':\n- line.append(\'\\-\')\n- elif line[-1].endswith(\'-\'):\n- line.append(\'=-\')\n- else:\n- line.append(\'*-\')\n- child2 = node.get(\'child2\')\n- if child2:\n- child2[\'level\'] = node[\'level\'] + 1\n- branches.append(child2)\n- parent = node\n- node = node.get(\'child1\')\n- if node:\n- node[\'level\'] = parent[\'level\'] + 1\n- else:\n- line.append(\' {}\'.format(parent[\'branch\']))\n- lines.append([])\n- # Post-process output: Add lines connecting branches to parents.\n- x = 0\n- done = False\n- while not done:\n- # Draw vertical lines upward from branch points.\n- drawing = False\n- for line in reversed(lines):\n- done = True\n- if x < len(line):\n- done = False\n- cell = line[x]\n- if cell == \'\\-\':\n- drawing = True\n- elif cell == \' \' and drawing:\n- line[x] = \'| \'\n- elif cell == \'=-\' and drawing:\n- drawing = False\n- x += 1\n- # Print the final output.\n- for line in lines:\n- print(\'\'.join(line))\n-\n-\n-def fail(message):\n- sys.stderr.write(message+"\\n")\n- sys.exit(1)\n-\n-if __name__ == \'__main__\':\n- sys.exit(main(sys.argv))\n' |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/sim.pyc |
b |
Binary file utils/sim.pyc has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/stats.py --- a/utils/stats.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,154 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -import os -import sys -import math -import argparse -sys.path.append(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) -import seqtools -import swalign - -INF = float('inf') -STATS = ('diffs', 'diffs-binned', 'seqlen', 'strand') -OPT_DEFAULTS = {'bins':10, 'probes':'', 'thres':0.75} -USAGE = "%(prog)s [options]" -DESCRIPTION = """""" - - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**OPT_DEFAULTS) - - parser.add_argument('stats', - help='The type of statistics to compute and print. Give a comma-separated list of stat names, ' - 'choosing from "{}".'.format('", "'.join(STATS))) - parser.add_argument('infile', metavar='read-families.msa.tsv', nargs='?', - help='The --msa output of sscs.py. Will read from stdin if not provided.') - parser.add_argument('-b', '--bins', type=int, - help='The number of bins to segment reads into when doing "diffs-binned".') - parser.add_argument('-p', '--probes', - help='Sequence excerpts from the sense strand. Required for "strand" statistic. ' - 'Comma-separated.') - parser.add_argument('-t', '--thres', type=int, - help='Alignment identity threshold (in fraction, not decimal). Default: %(default)s') - - args = parser.parse_args(argv[1:]) - - stats = args.stats.split(',') - for stat in stats: - if stat not in STATS: - fail('Error: invalid statistic "{}". Must choose one of "{}".'.format(stat, '", "'.join(STATS))) - if 'strand' in stats and not args.probes: - fail('Error: must provide a probe if requesting "strand" statistic.') - - if args.infile: - infile = open(args.infile) - else: - infile = sys.stdin - - family = [] - consensus = None - barcode = None - for line in infile: - fields = line.rstrip('\r\n').split('\t') - if len(fields) != 3: - continue - (this_barcode, name, seq) = fields - if fields[1] == 'CONSENSUS': - if family and consensus: - process_family(stats, barcode, consensus, family, args) - barcode = this_barcode - consensus = seq - family = [] - else: - family.append(seq) - if family and consensus: - process_family(stats, barcode, consensus, family, args) - - if infile is not sys.stdin: - infile.close() - - -#TODO: Maybe print the number of N's in the consensus? -def process_family(stats, barcode, consensus, family, args): - # Compute stats requiring the whole family at once. - for stat in stats: - if stat == 'diffs': - diffs = seqtools.get_diffs_frac_simple(consensus, family) - elif stat == 'diffs-binned': - diffs_binned = seqtools.get_diffs_frac_binned(consensus, family, args.bins) - elif stat == 'strand': - probes = args.probes.split(',') - strand = get_strand(consensus, probes, args.thres) - # Print the requested stats for each read. - # Columns: barcode, [stat columns], read sequence. - for (i, read) in enumerate(family): - sys.stdout.write(barcode+'\t') - for stat in stats: - if stat == 'diffs': - sys.stdout.write('{}\t'.format(round_sig_figs(diffs[i], 3))) - elif stat == 'diffs-binned': - if diffs_binned is None: - sys.stdout.write('\t' * args.bins) - else: - for diff in diffs_binned[i]: - sys.stdout.write(str(round_sig_figs(diff, 3))+'\t') - elif stat == 'seqlen': - sys.stdout.write('{}\t'.format(len(read))) - elif stat == 'strand': - sys.stdout.write('{}\t'.format(strand)) - print read.upper() - - -def get_strand(seq, probes, thres): - """Determine which strand the sequence comes from by trying to align probes from the sense strand. - Returns 'sense', 'anti', or None. - Algorithm: This tries each probe in both directions. - If at least one of the alignments has an identity above the threshold, a vote is cast for the - direction with a higher identity. - If the votes that were cast are unanimous for one direction, that strand is returned. - Else, return None.""" - votes = [] - for probe in probes: - alignment = swalign.smith_waterman(seq, probe) - sense_id = alignment.matches/len(probe) - alignment = swalign.smith_waterman(seq, seqtools.get_revcomp(probe)) - anti_id = alignment.matches/len(probe) - # print '{}: sense: {}, anti: {}'.format(probe, sense_id, anti_id) - if sense_id > thres or anti_id > thres: - if sense_id > anti_id: - votes.append('sense') - else: - votes.append('anti') - strand = None - for vote in votes: - if strand: - if strand != vote: - return None - else: - strand = vote - return strand - - -def round_sig_figs(n, figs): - if n == 0: - return n - elif n < 0: - n = -n - sign = -1 - elif n > 0: - sign = 1 - elif math.isnan(n) or n == INF: - return n - magnitude = int(math.floor(math.log10(n))) - return sign * round(n, figs - 1 - magnitude) - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/subsample.py --- a/utils/subsample.py Thu Feb 02 18:44:31 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,55 +0,0 @@ -#!/usr/bin/env python -from __future__ import division -import sys -import random -import argparse - -OPT_DEFAULTS = {'fraction':0.1, 'seed':1} -USAGE = "%(prog)s [options]" -DESCRIPTION = """""" - -def main(argv): - - parser = argparse.ArgumentParser(description=DESCRIPTION) - parser.set_defaults(**OPT_DEFAULTS) - - parser.add_argument('infile', metavar='read-families.tsv', nargs='?', - help='The input reads, sorted into families.') - parser.add_argument('-f', '--fraction', type=float, - help='Fraction of families to output. Default: %(default)s') - parser.add_argument('-s', '--seed', type=int, - help='Random number generator seed. Default: %(default)s') - - args = parser.parse_args(argv[1:]) - - random.seed(args.seed) - - if args.infile: - infile = open(args.infile) - else: - infile = sys.stdin - - family = [] - last_barcode = None - for line in infile: - fields = line.rstrip('\r\n').split('\t') - if not fields: - continue - barcode = fields[0] - if barcode != last_barcode: - if random.random() <= args.fraction: - sys.stdout.write(''.join(family)) - family = [] - family.append(line) - last_barcode = barcode - - if infile is not sys.stdin: - infile.close() - - -def fail(message): - sys.stderr.write(message+"\n") - sys.exit(1) - -if __name__ == '__main__': - sys.exit(main(sys.argv)) |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/test_1.fq.gz |
b |
Binary file utils/test_1.fq.gz has changed |
b |
diff -r e4d75f9efb90 -r 675a8370675b utils/test_2.fq.gz |
b |
Binary file utils/test_2.fq.gz has changed |