annotate fasta2shrep_gspan.pl @ 0:b01beb170290 draft default tip

Uploaded
author bgruening
date Tue, 29 Oct 2013 11:10:19 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/perl
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2 #use feature ':5.10';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
3 use File::Basename;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
4 use lib dirname($0); # search skript directory for modules
b01beb170290 Uploaded
bgruening
parents:
diff changeset
5 use strict 'vars';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
6 use warnings;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
7 use Getopt::Long;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
8 use Pod::Usage;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
9 #use List::Util qw/ min max /;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
10
b01beb170290 Uploaded
bgruening
parents:
diff changeset
11 #use StructureLibrary::RNAtoolsConfig;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
12 #use StructureLibrary::Sequence;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
13
b01beb170290 Uploaded
bgruening
parents:
diff changeset
14 use Cwd qw(getcwd abs_path);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
15 use File::Temp qw(tempdir);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
16 use File::Copy;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
17 use POSIX qw(ceil);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
18
b01beb170290 Uploaded
bgruening
parents:
diff changeset
19 use FindBin;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
20 use lib "$FindBin::Bin";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
21 #use GraphClust;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
22
b01beb170290 Uploaded
bgruening
parents:
diff changeset
23 #use vars qw(%CONFIG);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
24 #*CONFIG = \%GraphClust::CONFIG;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
25
b01beb170290 Uploaded
bgruening
parents:
diff changeset
26 =head1 NAME
b01beb170290 Uploaded
bgruening
parents:
diff changeset
27
b01beb170290 Uploaded
bgruening
parents:
diff changeset
28
b01beb170290 Uploaded
bgruening
parents:
diff changeset
29 fasta2shrep_gspan.pl -fasta mysequences.fasta -wins "50,100,150,200" -shift 5 -M 8
b01beb170290 Uploaded
bgruening
parents:
diff changeset
30
b01beb170290 Uploaded
bgruening
parents:
diff changeset
31 =head1 SYNOPSIS
b01beb170290 Uploaded
bgruening
parents:
diff changeset
32
b01beb170290 Uploaded
bgruening
parents:
diff changeset
33 Options:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
34
b01beb170290 Uploaded
bgruening
parents:
diff changeset
35 HELP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
36 -help brief help message
b01beb170290 Uploaded
bgruening
parents:
diff changeset
37 -man full documentation
b01beb170290 Uploaded
bgruening
parents:
diff changeset
38
b01beb170290 Uploaded
bgruening
parents:
diff changeset
39 COMPULSORY
b01beb170290 Uploaded
bgruening
parents:
diff changeset
40 -fasta <STRING> e.g. "sequence.fasta"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
41 All sequences in fasta format.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
42
b01beb170290 Uploaded
bgruening
parents:
diff changeset
43 OPTIONS
b01beb170290 Uploaded
bgruening
parents:
diff changeset
44 -wins [INTEGER] e.g. "50,100,200"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
45 A list of window sizes to use.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
46 If none are given (empty string ''), then the entire sequence is
b01beb170290 Uploaded
bgruening
parents:
diff changeset
47 taken with no windows. Each window > 1 required!
b01beb170290 Uploaded
bgruening
parents:
diff changeset
48 -shift <INTEGER> e.g. 20
b01beb170290 Uploaded
bgruening
parents:
diff changeset
49 The shift of the window, relative to the window size given in
b01beb170290 Uploaded
bgruening
parents:
diff changeset
50 percent. So you give which percent of the window size shall be
b01beb170290 Uploaded
bgruening
parents:
diff changeset
51 used for the shift. Of course the shift is rounded down to the
b01beb170290 Uploaded
bgruening
parents:
diff changeset
52 nearest whole number.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
53 Example 20 % of a window 150 would result in a step size of 30 nt.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
54 It is a relative parameter, as you can give different window sizes.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
55 If you do not give this parameter there is a default shift of 1 nt.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
56 -cue Crop unpaired ends.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
57 If you give this flag, then the unpaired ends of each
b01beb170290 Uploaded
bgruening
parents:
diff changeset
58 single structure are ignored. E.g. the structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
59 ...(((...))).. becomes just (((...)))
b01beb170290 Uploaded
bgruening
parents:
diff changeset
60 -stack Adds stacking information to graphs. This adds an additional
b01beb170290 Uploaded
bgruening
parents:
diff changeset
61 vertex (type P) for each pair of stacked base-pairs and four edges
b01beb170290 Uploaded
bgruening
parents:
diff changeset
62 (type p) from each of the involved bases to the new vertex.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
63 -e <FLOAT> e.g. 5.0
b01beb170290 Uploaded
bgruening
parents:
diff changeset
64 Energy range in kcal/mol (RNAshapes)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
65 Use only one of -e and -c!
b01beb170290 Uploaded
bgruening
parents:
diff changeset
66 -c <INTEGER> e.g. 10
b01beb170290 Uploaded
bgruening
parents:
diff changeset
67 Relative energy range, i.e. percentage (%) of MFE energy (RNAshapes)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
68 Use only one of -e and -c!
b01beb170290 Uploaded
bgruening
parents:
diff changeset
69 -t <INTEGER> [1-5] e.g. 3 OR "3=0,4=100,5=200"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
70 The shape type (RNAshapes). Default is 3.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
71 With the list format, the shape level can be changed for different window length
b01beb170290 Uploaded
bgruening
parents:
diff changeset
72 "4=100" means that shape level 4 is used from length 100nt (window length)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
73 The first given length has to be 0! Not continuous given levels are allowed!
b01beb170290 Uploaded
bgruening
parents:
diff changeset
74 -M <INTEGER> e.g. 10
b01beb170290 Uploaded
bgruening
parents:
diff changeset
75 Max number of shreps that should be taken per window.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
76 -u Ignore unstable structures (RNAshapes).
b01beb170290 Uploaded
bgruening
parents:
diff changeset
77 This option filters out closed structures with positive free energy.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
78 -r Calculate structure probabilities for shreps (RNAshapes)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
79 -i <INT> e.g. 10
b01beb170290 Uploaded
bgruening
parents:
diff changeset
80 Turn on structure sampling and gives number of sampling iterations.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
81 Default no sampling (i=0)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
82 -sample-len <INT> e.g. 100
b01beb170290 Uploaded
bgruening
parents:
diff changeset
83 Only in sampling mode: Sampling is only used for seqs/windows >= given length,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
84 Default: sample all lengths (0), if -i > 0
b01beb170290 Uploaded
bgruening
parents:
diff changeset
85 -q Turn on shape probabilities for RNAshapes, no sampling mode allowed
b01beb170290 Uploaded
bgruening
parents:
diff changeset
86 -Tp <FLOAT> e.g 0.001
b01beb170290 Uploaded
bgruening
parents:
diff changeset
87 Filter cutoff for shape probabilities, applied before -M filter!
b01beb170290 Uploaded
bgruening
parents:
diff changeset
88 -seq-graph-win add for each window a graph which contains no structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
89 -seq-graph-t add for each 't #' a graph which contains no structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
90 -seq-graph-alph change the alphabet of unstructured graphs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
91 -annotate <STRING> annotation.tab
b01beb170290 Uploaded
bgruening
parents:
diff changeset
92 A file with annotations to be added as abstract graphs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
93 on the sequence leven (if given) and on the structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
94 (SHREP) level. The format is has the following TAB-delimited
b01beb170290 Uploaded
bgruening
parents:
diff changeset
95 columns: SEQID, START, END, NAMESPACE#LABEL.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
96 Labels with the same name-space and SEQID form connected
b01beb170290 Uploaded
bgruening
parents:
diff changeset
97 components, which is a sequence of label vertices ordered
b01beb170290 Uploaded
bgruening
parents:
diff changeset
98 by the START position in the sequence.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
99 -abstr Add abstract structure graphs to the single shrep graph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
100 instances.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
101 -nostr Calculate no structures, only add sequence information,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
102 if this is given, then -seq-graph-win AND/OR -seq-graph-t
b01beb170290 Uploaded
bgruening
parents:
diff changeset
103 are required.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
104 -match-shape <SHAPE>
b01beb170290 Uploaded
bgruening
parents:
diff changeset
105 all seqs/windows will be constraint folded into that shape via
b01beb170290 Uploaded
bgruening
parents:
diff changeset
106 RNAshapes (if structure is given in another way this struct will be kept),
b01beb170290 Uploaded
bgruening
parents:
diff changeset
107 if this shape is not possible within given energy range, produce a
b01beb170290 Uploaded
bgruening
parents:
diff changeset
108 specific t graph with only one vertex 'X'. By this the instance
b01beb170290 Uploaded
bgruening
parents:
diff changeset
109 becomes very unsimilar to all other graphs (for knn)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
110 -vp enable graph computation with viewpoints:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
111 svmsgdnspdk will center on those nucleotides that are given
b01beb170290 Uploaded
bgruening
parents:
diff changeset
112 via capital letters and ignore those given as lowercase letters
b01beb170290 Uploaded
bgruening
parents:
diff changeset
113 -tmp <STRING> e.g. "/scratch/1/sita/tmp"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
114 A directory for writing temporary files
b01beb170290 Uploaded
bgruening
parents:
diff changeset
115 -o <STRING> e.g. "ProjectX/MySequences/GSPAN/"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
116 Output directory for gspan files containing graphs.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
117 -group <INTEGER> e.g. 5
b01beb170290 Uploaded
bgruening
parents:
diff changeset
118 Combine/group that number of input seqs into 1 gspan file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
119 output name is then '<INT>.group.gspan.bz2'
b01beb170290 Uploaded
bgruening
parents:
diff changeset
120 -sge Use SGE cluster for each sequence separately
b01beb170290 Uploaded
bgruening
parents:
diff changeset
121 -sge-logdir stdout directory for SGE call
b01beb170290 Uploaded
bgruening
parents:
diff changeset
122 -sge-errdir sdterr directory for SGE call
b01beb170290 Uploaded
bgruening
parents:
diff changeset
123 -stdout send graphs to stdout instead of writing to files
b01beb170290 Uploaded
bgruening
parents:
diff changeset
124 -ignore-header don't write fasta id part after first space to gspan
b01beb170290 Uploaded
bgruening
parents:
diff changeset
125 -debug additional debug output
b01beb170290 Uploaded
bgruening
parents:
diff changeset
126
b01beb170290 Uploaded
bgruening
parents:
diff changeset
127
b01beb170290 Uploaded
bgruening
parents:
diff changeset
128 DEFAULT VALUES
b01beb170290 Uploaded
bgruening
parents:
diff changeset
129 -wins ""
b01beb170290 Uploaded
bgruening
parents:
diff changeset
130 -shift 1 nt
b01beb170290 Uploaded
bgruening
parents:
diff changeset
131 -c 10
b01beb170290 Uploaded
bgruening
parents:
diff changeset
132 -t 3
b01beb170290 Uploaded
bgruening
parents:
diff changeset
133 -M 0 # selects all shreps
b01beb170290 Uploaded
bgruening
parents:
diff changeset
134 -tmp "/var/tmp/fasta2shrep"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
135 -o "CURRENT_DIR/GSPAN/"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
136
b01beb170290 Uploaded
bgruening
parents:
diff changeset
137 SGE mode
b01beb170290 Uploaded
bgruening
parents:
diff changeset
138 -sge-logdir "CURRENT_DIR/GSPAN/SGE_log"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
139 -sge-errdir "CURRENT_DIR/GSPAN/SGE_log"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
140 -task-id <NUM>
b01beb170290 Uploaded
bgruening
parents:
diff changeset
141
b01beb170290 Uploaded
bgruening
parents:
diff changeset
142 =head1 DESCRIPTION
b01beb170290 Uploaded
bgruening
parents:
diff changeset
143
b01beb170290 Uploaded
bgruening
parents:
diff changeset
144 =cut
b01beb170290 Uploaded
bgruening
parents:
diff changeset
145
b01beb170290 Uploaded
bgruening
parents:
diff changeset
146 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
147 # end handler for temporary directory
b01beb170290 Uploaded
bgruening
parents:
diff changeset
148 # adds an error handler that deletes the directory in case of error
b01beb170290 Uploaded
bgruening
parents:
diff changeset
149 # SIGUSR{1/2} are sent by the sge prior to the uncatchable SIGKILL if the
b01beb170290 Uploaded
bgruening
parents:
diff changeset
150 # option -notify was set
b01beb170290 Uploaded
bgruening
parents:
diff changeset
151 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
152 $SIG{'INT'} = 'end_handler';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
153 $SIG{'TERM'} = 'end_handler';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
154 $SIG{'ABRT'} = 'end_handler';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
155 $SIG{'USR1'} = 'end_handler';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
156 $SIG{'USR2'} = 'end_handler';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
157
b01beb170290 Uploaded
bgruening
parents:
diff changeset
158 sub end_handler {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
159 print STDERR "signal ", $_[0], " caught, cleaning up temporary files\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
160
b01beb170290 Uploaded
bgruening
parents:
diff changeset
161 # change into home directory. deletion of the temporary directory will
b01beb170290 Uploaded
bgruening
parents:
diff changeset
162 # fail if it is the current working directory
b01beb170290 Uploaded
bgruening
parents:
diff changeset
163 chdir();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
164 File::Temp::cleanup();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
165 die();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
166 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
167
b01beb170290 Uploaded
bgruening
parents:
diff changeset
168 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
169 # PARSE COMMAND LINE OPTIONS
b01beb170290 Uploaded
bgruening
parents:
diff changeset
170 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
171
b01beb170290 Uploaded
bgruening
parents:
diff changeset
172 # command line options
b01beb170290 Uploaded
bgruening
parents:
diff changeset
173 my ( $i_help, $i_man, $i_debug, $i_fas, $i_wins, $i_shift, $i_crop_unpaired_ends,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
174 $i_r, $i_e, $i_c, $i_t, $i_u, $i_M, $i_o, $i_sge, $i_jobid, $i_tmp,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
175 $i_ignore_seq_header, $i_stacks, $i_stdout, $i_q, $i_T, $i_i,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
176 $i_sample_min_length, $i_sge_logDir, $i_sge_errDir, $i_groupsize, $i_annotate,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
177 $i_abstr, $i_no_structure, $i_vp, $i_matchShape, $i_rnashapes_binpath );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
178
b01beb170290 Uploaded
bgruening
parents:
diff changeset
179 my ( $i_add_seq_graph_win, $i_add_seq_graph_t, $i_change_seq_graph_alph );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
180
b01beb170290 Uploaded
bgruening
parents:
diff changeset
181 my $options = GetOptions(
b01beb170290 Uploaded
bgruening
parents:
diff changeset
182 "help" => \$i_help,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
183 "man" => \$i_man,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
184 "debug" => \$i_debug,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
185 "fasta=s" => \$i_fas,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
186 "wins=s" => \$i_wins,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
187 "shift=f" => \$i_shift,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
188 "cue" => \$i_crop_unpaired_ends,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
189 "stack" => \$i_stacks,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
190 "r" => \$i_r,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
191 "e=f" => \$i_e,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
192 "c=i" => \$i_c,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
193 "t=s" => \$i_t,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
194 "u" => \$i_u,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
195 "M=i" => \$i_M,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
196 "tmp=s" => \$i_tmp,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
197 "o=s" => \$i_o,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
198 "i=i" => \$i_i,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
199 "sample-len=i" => \$i_sample_min_length,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
200 "q" => \$i_q,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
201 "Tp=f" => \$i_T,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
202 "seq-graph-win" => \$i_add_seq_graph_win,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
203 "seq-graph-t" => \$i_add_seq_graph_t,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
204 "-seq-graph-alph" => \$i_change_seq_graph_alph,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
205 "sge" => \$i_sge,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
206 "task-id=i" => \$i_jobid,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
207 "ignore-header" => \$i_ignore_seq_header,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
208 "stdout" => \$i_stdout,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
209 "sge-logdir=s" => \$i_sge_logDir,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
210 "sge-errdir=s" => \$i_sge_errDir,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
211 "group=i" => \$i_groupsize,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
212 "annotate=s" => \$i_annotate,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
213 "abstr" => \$i_abstr,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
214 "nostr" => \$i_no_structure,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
215 "vp" => \$i_vp,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
216 "match-shape=s" => \$i_matchShape,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
217 "rnashapes-bin=s" => \$i_rnashapes_binpath,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
218 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
219
b01beb170290 Uploaded
bgruening
parents:
diff changeset
220 pod2usage( -exitstatus => 1, -verbose => 1 ) if $i_help;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
221 pod2usage( -exitstatus => 0, -verbose => 2 ) if $i_man;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
222 ($options) or pod2usage(2);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
223
b01beb170290 Uploaded
bgruening
parents:
diff changeset
224 # check compulsory options
b01beb170290 Uploaded
bgruening
parents:
diff changeset
225 ($i_fas) or pod2usage("Error: the option -fasta is compulsory!\n");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
226 ( -e $i_fas ) or pod2usage("Error: no such file - $i_fas!\n");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
227 $i_fas = abs_path($i_fas);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
228
b01beb170290 Uploaded
bgruening
parents:
diff changeset
229 # check other options and set default values
b01beb170290 Uploaded
bgruening
parents:
diff changeset
230 pod2usage("Error: either -e OR -c can be given, but NOT both!\n") if ( $i_e && $i_c );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
231 ( $i_e or $i_c ) or $i_c = 10; # set -c 10, if neither -e or -c are given
b01beb170290 Uploaded
bgruening
parents:
diff changeset
232 ($i_M) or $i_M = 0; # max number of shreps is 0 (=means take all computed)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
233 ($i_i) or $i_i = 0; # default no sampling else sampling iterations
b01beb170290 Uploaded
bgruening
parents:
diff changeset
234 ($i_sample_min_length) or $i_sample_min_length = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
235 pod2usage("\nError: use --sample-len <INT> only with -i <1..INT> !\n") if ( not $i_i and $i_sample_min_length );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
236 ($i_T) or $i_T = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
237 ($i_q) or $i_q = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
238 pod2usage("\nError: Sampling (-i) not possible with shape probabilities (-q)!\n") if ( $i_i and $i_q );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
239 ($i_add_seq_graph_win) or $i_add_seq_graph_win = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
240 ($i_add_seq_graph_t) or $i_add_seq_graph_t = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
241 ($i_change_seq_graph_alph) or $i_change_seq_graph_alph = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
242 ($i_no_structure) or $i_no_structure = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
243
b01beb170290 Uploaded
bgruening
parents:
diff changeset
244 if ($i_change_seq_graph_alph) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
245 ($i_add_seq_graph_t) or ($i_add_seq_graph_win) or pod2usage( "Error: " .
b01beb170290 Uploaded
bgruening
parents:
diff changeset
246 "When giving the parameter -seq-graph-alph, then either -seq-graph-t or -seq-graph-win" .
b01beb170290 Uploaded
bgruening
parents:
diff changeset
247 " must also be given!\n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
248 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
249 ( -e $i_annotate ) or pod2usage("Error: no such file - $i_annotate!\n")
b01beb170290 Uploaded
bgruening
parents:
diff changeset
250 if ($i_annotate);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
251
b01beb170290 Uploaded
bgruening
parents:
diff changeset
252 $i_add_seq_graph_t = 1 if ($i_no_structure);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
253
b01beb170290 Uploaded
bgruening
parents:
diff changeset
254 ($i_t) or $i_t = 3; # default abstraction type is 3
b01beb170290 Uploaded
bgruening
parents:
diff changeset
255 my $change_shape_level = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
256 my @level_lens = ( -1, -1, -1, -1, -1 ); ## array_idx-1=shape level, value=start length of this level
b01beb170290 Uploaded
bgruening
parents:
diff changeset
257
b01beb170290 Uploaded
bgruening
parents:
diff changeset
258 if ( $i_t !~ /^\d+$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
259
b01beb170290 Uploaded
bgruening
parents:
diff changeset
260 my @t_minlens = split( ",", $i_t ); ## -t "3=100,4=200"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
261
b01beb170290 Uploaded
bgruening
parents:
diff changeset
262 foreach my $idx ( 1 .. @t_minlens ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
263
b01beb170290 Uploaded
bgruening
parents:
diff changeset
264 my $level = $t_minlens[ $idx - 1 ]; ## $level = "3=100"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
265
b01beb170290 Uploaded
bgruening
parents:
diff changeset
266 die "$level Wrong -t format! Example: -t 3=0,4=100,5=200\n" if ( $level !~ /^\d+\=\d+$/ );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
267 my @lev_len = split( "=", $level ); ## $level = "3=100"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
268
b01beb170290 Uploaded
bgruening
parents:
diff changeset
269 die "Wrong -t format! First level given needs to be length 0! Example: -t 3=0,4=100,5=200\n" if ( $idx == 1 && $lev_len[1] != 0 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
270 die "Wrong -t format! Only level 1-5 allowed! Example: -t 3=0,4=100,5=200\n" if ( $lev_len[0] < 1 or $lev_len[0] > 5 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
271 die "Wrong -t format! Length >= 0 expected! Example: -t 3=0,4=100,5=200\n" if ( $lev_len[1] < 0 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
272 $change_shape_level = 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
273 $level_lens[ $lev_len[0] - 1 ] = $lev_len[1];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
274 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
275
b01beb170290 Uploaded
bgruening
parents:
diff changeset
276 ($i_debug) and print STDERR "i_t = $i_t - change $change_shape_level - shape level lengths#" . join( ":", @level_lens ) . "#" . join( ":", @t_minlens ) . "#\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
277 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
278
b01beb170290 Uploaded
bgruening
parents:
diff changeset
279 ## checks for match-shape
b01beb170290 Uploaded
bgruening
parents:
diff changeset
280 if ($i_matchShape){
b01beb170290 Uploaded
bgruening
parents:
diff changeset
281 die "Please provide correct match shape string like '[]'! Exit...\n\n"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
282 if ($i_matchShape !~ /^[\[\]_]+$/);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
283 ## RNAshapes prodices anyway only 1 structure, no suboptimal structs in match-shape folding
b01beb170290 Uploaded
bgruening
parents:
diff changeset
284 $i_M = 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
285 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
286
b01beb170290 Uploaded
bgruening
parents:
diff changeset
287 my $CURRDIR = getcwd;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
288
b01beb170290 Uploaded
bgruening
parents:
diff changeset
289 # set up tmp directory
b01beb170290 Uploaded
bgruening
parents:
diff changeset
290 # default tmp is /var/tmp, usually not NFS mounted!
b01beb170290 Uploaded
bgruening
parents:
diff changeset
291 ( defined $i_tmp ) or $i_tmp = '/var/tmp/';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
292 my $tmp_template = 'fasta2shrep-XXXXXX';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
293
b01beb170290 Uploaded
bgruening
parents:
diff changeset
294 # CLEANUP => 1 : automatically delete at exit
b01beb170290 Uploaded
bgruening
parents:
diff changeset
295 $i_tmp = tempdir( $tmp_template, DIR => $i_tmp, CLEANUP => 1 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
296
b01beb170290 Uploaded
bgruening
parents:
diff changeset
297 # create GSPAN directory when not printing to stdout
b01beb170290 Uploaded
bgruening
parents:
diff changeset
298 if ( not $i_stdout ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
299 if ($i_o) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
300 ( -e $i_o ) or system("mkdir -p $i_o");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
301 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
302 system("mkdir -p GSPAN");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
303 $i_o = $CURRDIR . "/GSPAN/";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
304 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
305 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
306
b01beb170290 Uploaded
bgruening
parents:
diff changeset
307 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
308 # GLOBAL VARIABLES
b01beb170290 Uploaded
bgruening
parents:
diff changeset
309 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
310 $i_rnashapes_binpath or $i_rnashapes_binpath = "RNAshapes";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
311 die "Please provide full RNAshapes path (e.g. /usr/bin/RNAshapes) !\n" if (!-f "$i_rnashapes_binpath");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
312 my $rnashapes_loc = $i_rnashapes_binpath;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
313 if ( !$rnashapes_loc || !-e $rnashapes_loc ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
314 my $loc = `which RNAshapes`;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
315 chomp($loc);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
316 die "\nCannot find RNAshapes binary! Exit...\n\n" if ( !$loc );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
317 die "\nCannot find RNAshapes binary! Exit...\n\n" if ( !-e $loc );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
318 $rnashapes_loc = $loc;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
319 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
320
b01beb170290 Uploaded
bgruening
parents:
diff changeset
321 my @WINDOWS = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
322 @WINDOWS = split( ",", $i_wins ) if ($i_wins);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
323 my $globalFolding;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
324 $globalFolding = 1 unless @WINDOWS;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
325 my $CURRUSER = getlogin;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
326 my $SEQNO = 0; # used to id the sequences
b01beb170290 Uploaded
bgruening
parents:
diff changeset
327 my $GSPANNO = 0; # used for gspan filenames
b01beb170290 Uploaded
bgruening
parents:
diff changeset
328 # minimum length of sequence that will work with RNAshapes
b01beb170290 Uploaded
bgruening
parents:
diff changeset
329 # sequences with one or two nucleotides will be restricted to sequence-only graphs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
330 # this ensures that no sequences are skipped and external info kept synchronized
b01beb170290 Uploaded
bgruening
parents:
diff changeset
331 my $GSPAN_SEQ_MINLEN = 3;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
332
b01beb170290 Uploaded
bgruening
parents:
diff changeset
333 # name spaces
b01beb170290 Uploaded
bgruening
parents:
diff changeset
334 my $ABSTRUCT = "AS";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
335 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
336 # EXECUTION CODE
b01beb170290 Uploaded
bgruening
parents:
diff changeset
337 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
338
b01beb170290 Uploaded
bgruening
parents:
diff changeset
339 # read fasta file into hash
b01beb170290 Uploaded
bgruening
parents:
diff changeset
340 my ( $headers_aref, $sequences_aref, $metainfo_aref ) = read_fasta_with_nonunique_headers_meta($i_fas);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
341
b01beb170290 Uploaded
bgruening
parents:
diff changeset
342 # call script again on the sge cluster in a batch job
b01beb170290 Uploaded
bgruening
parents:
diff changeset
343 if ($i_sge) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
344 my $SCRIPTNAME = $0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
345 my $CLUSTERSUBMIT = $FindBin::Bin."/fasta2shrep_gspan.sge";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
346
b01beb170290 Uploaded
bgruening
parents:
diff changeset
347 die "Cannot find SGE submit script $CLUSTERSUBMIT! Exit...\n\n" if ( !-e $CLUSTERSUBMIT );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
348
b01beb170290 Uploaded
bgruening
parents:
diff changeset
349 my $SGE_jobs = @{$sequences_aref};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
350 $SGE_jobs = ceil( @{$sequences_aref} / $i_groupsize ) if ($i_groupsize);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
351
b01beb170290 Uploaded
bgruening
parents:
diff changeset
352 my $params = "-fasta $i_fas ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
353 $params .= "-wins $i_wins " if ($i_wins);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
354 $params .= "-shift $i_shift " if ($i_shift);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
355 $params .= "-e $i_e " if ($i_e);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
356 $params .= "-c $i_c " if ($i_c);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
357 $params .= "-t $i_t " if ($i_t);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
358 $params .= "-u " if ($i_u);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
359 $params .= "-r " if ($i_r);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
360 $params .= "-M $i_M " if ($i_M);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
361 $params .= "-o $i_o " if ($i_o);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
362 $params .= "-i $i_i " if ($i_i);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
363 $params .= "--sample-len $i_sample_min_length" if ($i_sample_min_length);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
364 $params .= "-q " if ($i_q);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
365 $params .= "-Tp $i_T " if ($i_T);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
366 $params .= "--seq-grap-win " if ($i_add_seq_graph_win);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
367 $params .= "--seq-graph-t " if ($i_add_seq_graph_t);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
368 $params .= "--seq-graph-alph " if ($i_change_seq_graph_alph);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
369 $params .= "-ignore-header " if ($i_ignore_seq_header);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
370 $params .= "-cue " if ($i_crop_unpaired_ends);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
371 $params .= "-stack " if ($i_stacks);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
372 $params .= "--group $i_groupsize " if ($i_groupsize);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
373 $params .= "-annotate $i_annotate" if ($i_annotate);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
374 $params .= "-abstr" if ($i_abstr);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
375 $params .= "-nostr " if ($i_no_structure);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
376 $params .= "--debug " if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
377 $params .= "-vp " if ($i_vp);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
378 $params .= "--match-shape $i_matchShape " if ($i_matchShape);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
379
b01beb170290 Uploaded
bgruening
parents:
diff changeset
380 print "used script:" . $SCRIPTNAME . "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
381 print "used submit script:" . $CLUSTERSUBMIT . "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
382
b01beb170290 Uploaded
bgruening
parents:
diff changeset
383 $i_sge_logDir = "$i_o/SGE_log" if ( !$i_sge_logDir );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
384 mkdir($i_sge_logDir);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
385
b01beb170290 Uploaded
bgruening
parents:
diff changeset
386 $i_sge_errDir = $i_sge_logDir if ( !$i_sge_errDir );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
387 mkdir($i_sge_errDir);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
388
b01beb170290 Uploaded
bgruening
parents:
diff changeset
389 my $ssh = 1; ## can be used for debug shell script call
b01beb170290 Uploaded
bgruening
parents:
diff changeset
390 if ($ssh) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
391 system( "ssh $CURRUSER\@biui.informatik.uni-freiburg.de "
b01beb170290 Uploaded
bgruening
parents:
diff changeset
392 . "'export SGE_ROOT=/opt/sge-6.0/; cd $CURRDIR; "
b01beb170290 Uploaded
bgruening
parents:
diff changeset
393 . "/opt/sge-6.0/bin/lx24-amd64/qsub -t 1-$SGE_jobs -o $i_sge_errDir/ -e $i_sge_errDir/ "
b01beb170290 Uploaded
bgruening
parents:
diff changeset
394 . "$CLUSTERSUBMIT $CURRDIR $SCRIPTNAME \"$params\" ' " );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
395 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
396 system("$CLUSTERSUBMIT $CURRDIR $SCRIPTNAME '$params' ");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
397 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
398 exit;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
399 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
400
b01beb170290 Uploaded
bgruening
parents:
diff changeset
401 ## compute shreps and gspan, either local or after SGE submission
b01beb170290 Uploaded
bgruening
parents:
diff changeset
402
b01beb170290 Uploaded
bgruening
parents:
diff changeset
403 # TODO read and process annotations
b01beb170290 Uploaded
bgruening
parents:
diff changeset
404
b01beb170290 Uploaded
bgruening
parents:
diff changeset
405 my @used_seq_headers;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
406 my @used_seqs;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
407 my @used_meta;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
408 my $group_idx;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
409
b01beb170290 Uploaded
bgruening
parents:
diff changeset
410 if ($i_jobid) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
411 ## just process the one sequence as given by the jobid number
b01beb170290 Uploaded
bgruening
parents:
diff changeset
412 my $used_grouping = 1; ## if no group is given, make 1 seq per job = group=1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
413 $used_grouping = $i_groupsize if ($i_groupsize);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
414 my $st = ( $i_jobid - 1 ) * $used_grouping + 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
415 my $end = $st + $used_grouping - 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
416 $end = @{$sequences_aref} if ( $end > @{$sequences_aref} );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
417
b01beb170290 Uploaded
bgruening
parents:
diff changeset
418 foreach my $idx ( $st .. $end ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
419 push( @used_seq_headers, $headers_aref->[ $idx - 1 ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
420 push( @used_seqs, $sequences_aref->[ $idx - 1 ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
421 push( @used_meta, $metainfo_aref->[ $idx - 1 ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
422 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
423
b01beb170290 Uploaded
bgruening
parents:
diff changeset
424 $group_idx = $i_jobid;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
425
b01beb170290 Uploaded
bgruening
parents:
diff changeset
426 #($i_debug) and print STDERR "st $st end $end gr $group_idx job $i_jobid gs $i_groupsize\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
427
b01beb170290 Uploaded
bgruening
parents:
diff changeset
428 $GSPANNO = $i_jobid-1 if (!$i_groupsize);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
429
b01beb170290 Uploaded
bgruening
parents:
diff changeset
430 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
431 ## process all sequences at once
b01beb170290 Uploaded
bgruening
parents:
diff changeset
432 @used_seq_headers = @{$headers_aref};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
433 @used_seqs = @{$sequences_aref};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
434 @used_meta = @{$metainfo_aref};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
435 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
436
b01beb170290 Uploaded
bgruening
parents:
diff changeset
437 my $out;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
438 my $gspanfile;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
439 my $out_no_match_shape;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
440
b01beb170290 Uploaded
bgruening
parents:
diff changeset
441 if ($i_matchShape && !$i_stdout && !$i_groupsize){
b01beb170290 Uploaded
bgruening
parents:
diff changeset
442 open($out_no_match_shape,">$i_o/fasta2shrep.no_match");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
443 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
444
b01beb170290 Uploaded
bgruening
parents:
diff changeset
445 # for each sequence in the fasta file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
446 while ( my $seq = shift @used_seqs ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
447 my $tmp_header = shift @used_seq_headers;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
448 my $tmp_meta = shift @used_meta;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
449
b01beb170290 Uploaded
bgruening
parents:
diff changeset
450 my ( $seq_id, $seq_header ) = ( $tmp_header =~ /(\S+)\s*([\S*\s*]*)/ );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
451 $i_ignore_seq_header and $seq_header = '';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
452
b01beb170290 Uploaded
bgruening
parents:
diff changeset
453 my $seq_fasta = generate_single_fasta_from_sequence_X( $seq_id, $seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
454 my $seq_len = length($seq);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
455
b01beb170290 Uploaded
bgruening
parents:
diff changeset
456 # only print sequence graphs for sequences below this threshold
b01beb170290 Uploaded
bgruening
parents:
diff changeset
457 my $no_structure_override = ($seq_len < $GSPAN_SEQ_MINLEN) ? 1 : 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
458
b01beb170290 Uploaded
bgruening
parents:
diff changeset
459 $GSPANNO++;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
460
b01beb170290 Uploaded
bgruening
parents:
diff changeset
461 # set outstream for gspan output to correct file/STDOUT
b01beb170290 Uploaded
bgruening
parents:
diff changeset
462 if ($i_stdout) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
463 $out = \*STDOUT;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
464 } elsif ( !$i_groupsize ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
465 $gspanfile = $i_tmp . '/' . $GSPANNO . '.gspan';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
466 open( $out, "| bzip2 -f > $gspanfile.bz2" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
467 } elsif ( ( $GSPANNO - 1 ) % $i_groupsize == 0 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
468
b01beb170290 Uploaded
bgruening
parents:
diff changeset
469 if ( $GSPANNO > 1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
470 close($out);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
471 system("mv $gspanfile.bz2 $i_o/$group_idx.group.gspan.bz2");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
472 if ($out_no_match_shape){
b01beb170290 Uploaded
bgruening
parents:
diff changeset
473 close($out_no_match_shape);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
474 system("mv $gspanfile.no_match $i_o/$group_idx.group.gspan.no_match");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
475 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
476 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
477
b01beb170290 Uploaded
bgruening
parents:
diff changeset
478 if ( !$i_jobid ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
479 $group_idx = int( ( $GSPANNO - 1 ) / $i_groupsize ) + 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
480 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
481
b01beb170290 Uploaded
bgruening
parents:
diff changeset
482 $gspanfile = "$i_tmp/$group_idx.group.gspan";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
483 open( $out, "| bzip2 -f > $gspanfile.bz2" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
484 open( $out_no_match_shape, ">$gspanfile.no_match" ) if ($out_no_match_shape);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
485 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
486
b01beb170290 Uploaded
bgruening
parents:
diff changeset
487 ## do not use folding windows in special cases
b01beb170290 Uploaded
bgruening
parents:
diff changeset
488 if ( $globalFolding || $i_no_structure || $no_structure_override) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
489 @WINDOWS = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
490 push( @WINDOWS, $seq_len );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
491 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
492
b01beb170290 Uploaded
bgruening
parents:
diff changeset
493 ##check win sizes for global folding
b01beb170290 Uploaded
bgruening
parents:
diff changeset
494 my @WINDOWS_used = sort { $a <=> $b } @WINDOWS;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
495 foreach my $w_idx ( 0 .. ( @WINDOWS_used - 1 ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
496 if ( $WINDOWS_used[$w_idx] >= $seq_len && $WINDOWS_used[$w_idx] > 1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
497 if ( $w_idx < $#WINDOWS_used ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
498 @WINDOWS_used = @WINDOWS_used[ 0 .. $w_idx ];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
499 last;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
500 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
501 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
502 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
503
b01beb170290 Uploaded
bgruening
parents:
diff changeset
504 ## use seq graph only if no shape folding wanted
b01beb170290 Uploaded
bgruening
parents:
diff changeset
505 $i_add_seq_graph_t = 1 if ($i_no_structure || $no_structure_override);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
506
b01beb170290 Uploaded
bgruening
parents:
diff changeset
507 ## no shape info in graphheader if we have fixed structure (according LocaRNA handling)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
508 ## use tags #FS or #S for provided structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
509 my $graph_header;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
510 my @struct_meta = grep { $_ =~ /#FS/ || $_ =~ /#S/ } keys %{$tmp_meta};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
511
b01beb170290 Uploaded
bgruening
parents:
diff changeset
512 if (@struct_meta) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
513 $graph_header = getGraphHeader( $seq_id, $seq_header, \@WINDOWS, $i_shift, $i_e, $i_c, $i_t, $i_u, $i_r, $i_M, $i_crop_unpaired_ends, $i_i, $i_sample_min_length, $i_q, $i_T, $seq_len, 1 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
514 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
515 $graph_header = getGraphHeader( $seq_id, $seq_header, \@WINDOWS, $i_shift, $i_e, $i_c, $i_t, $i_u, $i_r, $i_M, $i_crop_unpaired_ends, $i_i, $i_sample_min_length, $i_q, $i_T, $seq_len, $i_no_structure || $no_structure_override );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
516 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
517
b01beb170290 Uploaded
bgruening
parents:
diff changeset
518 print $out $graph_header;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
519
b01beb170290 Uploaded
bgruening
parents:
diff changeset
520 my $gi = 0; # current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
521
b01beb170290 Uploaded
bgruening
parents:
diff changeset
522 ## add graph with no structure at all depending on $i_add_seq_graph_t
b01beb170290 Uploaded
bgruening
parents:
diff changeset
523 if ($i_add_seq_graph_t) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
524
b01beb170290 Uploaded
bgruening
parents:
diff changeset
525 ($gi) = convertSeqWindow( $seq, $seq_len, 1, $gi, $graph_header, $out, $i_annotate, $seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
526
b01beb170290 Uploaded
bgruening
parents:
diff changeset
527 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
528
b01beb170290 Uploaded
bgruening
parents:
diff changeset
529 ## encode fixed structure only if provided and structures wanted in general
b01beb170290 Uploaded
bgruening
parents:
diff changeset
530 if ( @struct_meta && !$i_no_structure && !$no_structure_override) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
531
b01beb170290 Uploaded
bgruening
parents:
diff changeset
532 my $struct_meta = "#FS";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
533 $struct_meta = "#S" if ( !exists $tmp_meta->{$struct_meta} );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
534
b01beb170290 Uploaded
bgruening
parents:
diff changeset
535 my $seq_shrep = [ $tmp_meta->{$struct_meta}, "ENERGY", "0.00", "SHAPE", $struct_meta ];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
536 $gi = convertShapeWindow( [$seq_shrep], $seq, $seq_len, 1, $gi, $out,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
537 $graph_header, $i_annotate, $i_abstr, $i_crop_unpaired_ends, $i_stacks, $seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
538 @WINDOWS_used = (); ## no shape folding if we have a fixed structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
539
b01beb170290 Uploaded
bgruening
parents:
diff changeset
540 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
541
b01beb170290 Uploaded
bgruening
parents:
diff changeset
542 ## ignore RNAshapes folding if wanted, but do correct file move afterwards
b01beb170290 Uploaded
bgruening
parents:
diff changeset
543 ## (just "next" does not work due to output)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
544 @WINDOWS_used = () if ($i_no_structure or $no_structure_override);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
545
b01beb170290 Uploaded
bgruening
parents:
diff changeset
546 #for each window size in list
b01beb170290 Uploaded
bgruening
parents:
diff changeset
547 foreach my $win_size (@WINDOWS_used) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
548
b01beb170290 Uploaded
bgruening
parents:
diff changeset
549 # calculate shift size from percentage
b01beb170290 Uploaded
bgruening
parents:
diff changeset
550 my $curr_shift = 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
551 if ($i_shift) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
552 $curr_shift = ( $i_shift / 100 ) * $win_size;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
553 $curr_shift = int($curr_shift); #round down
b01beb170290 Uploaded
bgruening
parents:
diff changeset
554 $curr_shift = 1 unless ($curr_shift); # just in case it is 0
b01beb170290 Uploaded
bgruening
parents:
diff changeset
555 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
556 ($i_debug) and print STDERR "winsize: $win_size curr_shift: $curr_shift\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
557 ($i_debug) and print STDERR "\nNext: $seq_id\t winsize:$win_size \n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
558
b01beb170290 Uploaded
bgruening
parents:
diff changeset
559 # choose current shape level, depending on $i_t
b01beb170290 Uploaded
bgruening
parents:
diff changeset
560 my $curr_t = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
561 if ($change_shape_level) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
562 for ( my $i = 0 ; $i < @level_lens ; $i++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
563 $curr_t = $i + 1 if ( $level_lens[$i] != -1 && ( $level_lens[$i] <= $win_size ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
564 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
565 ($i_debug) and print STDERR "$win_size curr type $curr_t\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
566 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
567 $curr_t = $i_t;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
568 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
569
b01beb170290 Uploaded
bgruening
parents:
diff changeset
570 my $rnashapesoutput_fh;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
571
b01beb170290 Uploaded
bgruening
parents:
diff changeset
572 # call RNAshapes and write to $rnashapesoutput_fh
b01beb170290 Uploaded
bgruening
parents:
diff changeset
573 $rnashapesoutput_fh = call_RNAshapes( $seq_fasta, $rnashapes_loc, $win_size,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
574 $curr_shift, $i_e, $i_c, $curr_t, $i_u, $i_r, $i_q, $i_T, $i_i, $i_sample_min_length, $seq_len, $i_matchShape );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
575
b01beb170290 Uploaded
bgruening
parents:
diff changeset
576 # read RNAshapes output from $rnashapesoutput_fh and write subgraph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
577 # to gspan file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
578 my $gi_old = $gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
579 $gi = convert_RNAshapes_output( $rnashapesoutput_fh, $gi, $i_M, $out, $graph_header,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
580 $win_size, $seq_len, $curr_t, $i_annotate, $i_abstr, $i_crop_unpaired_ends, $i_stacks, $seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
581
b01beb170290 Uploaded
bgruening
parents:
diff changeset
582 ## no (match) shape found at all for this seq
b01beb170290 Uploaded
bgruening
parents:
diff changeset
583 if ($gi == $gi_old+1){
b01beb170290 Uploaded
bgruening
parents:
diff changeset
584 $gi = convertSeqWindow( "X", 1, 1, $gi, $graph_header, $out, $i_annotate, $seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
585 print $out_no_match_shape $seq_id."\n" if (!$i_stdout && $out_no_match_shape);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
586 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
587 } ## foreach WINDOW_used
b01beb170290 Uploaded
bgruening
parents:
diff changeset
588
b01beb170290 Uploaded
bgruening
parents:
diff changeset
589 if ( !$i_stdout && !$i_groupsize ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
590 close($out);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
591 move "$gspanfile.bz2", "$i_o/$GSPANNO.gspan.bz2";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
592 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
593
b01beb170290 Uploaded
bgruening
parents:
diff changeset
594 system("rm $seq_fasta");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
595
b01beb170290 Uploaded
bgruening
parents:
diff changeset
596 } ## while @used_seqs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
597
b01beb170290 Uploaded
bgruening
parents:
diff changeset
598 if ($i_groupsize) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
599 close($out);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
600 system("mv $gspanfile.bz2 $i_o/$group_idx.group.gspan.bz2");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
601 close($out_no_match_shape) if ($out_no_match_shape);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
602 move "$gspanfile.no_match", "$i_o/$group_idx.group.gspan.no_match" if ($out_no_match_shape);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
603 } elsif ( $out_no_match_shape && !$i_stdout && !$i_groupsize ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
604 close($out_no_match_shape);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
605 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
606
b01beb170290 Uploaded
bgruening
parents:
diff changeset
607 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
608 # METHODS
b01beb170290 Uploaded
bgruening
parents:
diff changeset
609 ###############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
610
b01beb170290 Uploaded
bgruening
parents:
diff changeset
611 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
612 # Generates fasta file for a single sequence (one-lined-fasta). This
b01beb170290 Uploaded
bgruening
parents:
diff changeset
613 # fasta is stored in the temp directory and should be deleted at the end.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
614 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
615 # seq_id : the sequence ID
b01beb170290 Uploaded
bgruening
parents:
diff changeset
616 # seq : the sequence
b01beb170290 Uploaded
bgruening
parents:
diff changeset
617 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
618 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
619 # The fasta file name
b01beb170290 Uploaded
bgruening
parents:
diff changeset
620 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
621 sub generate_single_fasta_from_sequence_X {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
622 my ( $seq_id, $seq ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
623
b01beb170290 Uploaded
bgruening
parents:
diff changeset
624 $seq = uc($seq);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
625 $seq =~ tr/T/U/;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
626 $seq =~ s/[^AUCGN]/N/g;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
627
b01beb170290 Uploaded
bgruening
parents:
diff changeset
628 my $outfas = $i_tmp . "/seq_" . $SEQNO++ . ".fasta";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
629 my $host = readpipe("hostname");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
630 open( FAS, ">$outfas" ) or die "$host Cannot open file $outfas! Exit...\n\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
631 print FAS ">$seq_id\n$seq";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
632 close(FAS);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
633
b01beb170290 Uploaded
bgruening
parents:
diff changeset
634 return $outfas;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
635 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
636
b01beb170290 Uploaded
bgruening
parents:
diff changeset
637 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
638 # RNAshapes is called with the given input or default parameters.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
639 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
640 # seq_fasta : the sequence fasta file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
641 # rnashapes_location : the location of the installation files for RNAshapes
b01beb170290 Uploaded
bgruening
parents:
diff changeset
642 # win_size : the current window size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
643 # shift : the input parameter -shift
b01beb170290 Uploaded
bgruening
parents:
diff changeset
644 # e : the input parameter -e
b01beb170290 Uploaded
bgruening
parents:
diff changeset
645 # c : the input parameter -c
b01beb170290 Uploaded
bgruening
parents:
diff changeset
646 # t : the input parameter -t
b01beb170290 Uploaded
bgruening
parents:
diff changeset
647 # u : the input parameter -u
b01beb170290 Uploaded
bgruening
parents:
diff changeset
648 # r : the input parameter -r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
649 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
650 # Output: none
b01beb170290 Uploaded
bgruening
parents:
diff changeset
651 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
652 sub call_RNAshapes {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
653 my ( $seq_fasta, $rnashapes_location, $win_size, $shift, $e, $c, $t, $u, $r, $q, $T, $i, $sample_length, $seqLen, $matchShape ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
654 my $FUNCTION = "call_RNAshapes in fasta2shrep_gspan.pl";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
655
b01beb170290 Uploaded
bgruening
parents:
diff changeset
656 ($seq_fasta) or die("INPUT ERROR in $FUNCTION: the fasta file is compulsory!\n");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
657 ($rnashapes_location) or die( "INPUT ERROR in $FUNCTION: the RNAshapes location" . " is compulsory!\n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
658 die "$rnashapes_location does not exist! Exit...\n\n" if ( !-e $rnashapes_location );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
659 my $call = $rnashapes_location . " -o 1 "; # the output format is of type 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
660 $call .= "-q " if ($q);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
661 $call .= "-T $T " if ( $q and $T );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
662 $call .= "-w $win_size ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
663 $call .= "-W $shift " if ($i_shift);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
664 die("ERROR in $FUNCTION: Give only one of the options -c or -e (RNAshapes)!\n")
b01beb170290 Uploaded
bgruening
parents:
diff changeset
665
b01beb170290 Uploaded
bgruening
parents:
diff changeset
666 if ( $e && $c );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
667 $call .= "-e $e " if ($e);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
668 $call .= "-c $c " if ($c);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
669 $call .= "-t $t " if ($t);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
670 $call .= "-u " if ( $u and not $i ); ## not possible in sampling mode
b01beb170290 Uploaded
bgruening
parents:
diff changeset
671 $call .= "-r " if ($r);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
672 $call .= "-m $matchShape " if ($matchShape);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
673
b01beb170290 Uploaded
bgruening
parents:
diff changeset
674 ## check is a bit long but : we want to sample if the window is larger than $sample_length and full seq is larger than window or sample_len
b01beb170290 Uploaded
bgruening
parents:
diff changeset
675 ## necessary to do sampling a large window is given, sample_length is shorter than window, but seq is longer than sample_len
b01beb170290 Uploaded
bgruening
parents:
diff changeset
676 $call .= "-i $i -A " if ( not $q and $i and ( $win_size >= $sample_length and ( $win_size <= $seqLen or $seqLen >= $sample_length ) ) ); ## -A is to omit samples and print only combined shape probs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
677 $call .= " < $seq_fasta";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
678
b01beb170290 Uploaded
bgruening
parents:
diff changeset
679 ($i_debug) and print STDERR "$seqLen $sample_length $win_size $call\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
680
b01beb170290 Uploaded
bgruening
parents:
diff changeset
681 open my $rnashapesoutput, "$call |" or die( "ERROR in $FUNCTION: The following call " . "could not be carried out!\n$call\n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
682
b01beb170290 Uploaded
bgruening
parents:
diff changeset
683 return $rnashapesoutput;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
684 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
685
b01beb170290 Uploaded
bgruening
parents:
diff changeset
686 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
687 # The output of RNAshapes for one sequence and one window size is read
b01beb170290 Uploaded
bgruening
parents:
diff changeset
688 # and converted into graph format.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
689 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
690 # rnashapeoutput : filehandle for RNAshapes output in format -o 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
691 # curr_gi : current graph index (for vertices)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
692 # maxShreps : max number of shreps to convert to graphs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
693 # graph_file_hdl : the output handler for the graph file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
694 # graphHead : the header line for the complete graph (for sequence)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
695 # winSize : current window size in input for RNAshapes
b01beb170290 Uploaded
bgruening
parents:
diff changeset
696 # seqLen : full input seq length
b01beb170290 Uploaded
bgruening
parents:
diff changeset
697 # used_t :
b01beb170290 Uploaded
bgruening
parents:
diff changeset
698 # annotate:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
699 # abstr :
b01beb170290 Uploaded
bgruening
parents:
diff changeset
700 # cue :
b01beb170290 Uploaded
bgruening
parents:
diff changeset
701 # stacks :
b01beb170290 Uploaded
bgruening
parents:
diff changeset
702 # orig_seq : the nucleotide sequence as read from fasta
b01beb170290 Uploaded
bgruening
parents:
diff changeset
703 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
704 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
705 # The current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
706 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
707 sub convert_RNAshapes_output {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
708 my ( $rnashapesoutput, $curr_gi, $maxShreps, $graph_file_hdl, $graphHead, $winSize, $seqLen, $used_t, $annotate, $abstr, $cue, $stacks, $orig_seq ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
709
b01beb170290 Uploaded
bgruening
parents:
diff changeset
710 ## omit first line in output, contains fasta header line
b01beb170290 Uploaded
bgruening
parents:
diff changeset
711 my $line = <$rnashapesoutput>;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
712 my $win_shreps = [];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
713 my $win_start;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
714 my $win_end;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
715 my $win_seq;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
716 my $win_shrep_count = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
717 my $win_sample = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
718 my $winHead;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
719 my $win_globalFolding;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
720 my $win_size_real;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
721
b01beb170290 Uploaded
bgruening
parents:
diff changeset
722 # reading RNAshapes output
b01beb170290 Uploaded
bgruening
parents:
diff changeset
723 while ( $line = <$rnashapesoutput> ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
724
b01beb170290 Uploaded
bgruening
parents:
diff changeset
725 if ( $line =~ /^(\d+)\s+(\d+)$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
726 ## line: "<start> <end>"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
727
b01beb170290 Uploaded
bgruening
parents:
diff changeset
728 if ( @{$win_shreps} > 0 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
729
b01beb170290 Uploaded
bgruening
parents:
diff changeset
730 print $graph_file_hdl $winHead;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
731
b01beb170290 Uploaded
bgruening
parents:
diff changeset
732 ## remove SHAPE="_" shrep if it exists in $win_shreps
b01beb170290 Uploaded
bgruening
parents:
diff changeset
733 ## do this only when have a sequence graph already
b01beb170290 Uploaded
bgruening
parents:
diff changeset
734 if ($i_add_seq_graph_t) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
735 my @new_win_shreps = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
736 map { push( @new_win_shreps, $_ ) if ( $_->[ $#{$_} ] ne "_" ) } @{$win_shreps};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
737 $win_shreps = \@new_win_shreps;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
738 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
739
b01beb170290 Uploaded
bgruening
parents:
diff changeset
740 ## add graph with no structure depending on $i_add_seq_graph_win to win
b01beb170290 Uploaded
bgruening
parents:
diff changeset
741 if ($i_add_seq_graph_win ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
742 ( $curr_gi ) = convertSeqWindow( $win_seq, $win_size_real,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
743 $win_start, $curr_gi, $winHead, $graph_file_hdl, $annotate, $orig_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
744 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
745
b01beb170290 Uploaded
bgruening
parents:
diff changeset
746 $curr_gi = convertShapeWindow( $win_shreps, $win_seq, $win_size_real,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
747 $win_start, $curr_gi, $graph_file_hdl, $winHead, $annotate, $abstr,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
748 $cue, $stacks, $orig_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
749 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
750
b01beb170290 Uploaded
bgruening
parents:
diff changeset
751 ## set new window params
b01beb170290 Uploaded
bgruening
parents:
diff changeset
752 $win_shreps = [];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
753 $win_start = $1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
754 $win_end = $2;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
755 $win_shrep_count = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
756 $win_size_real = $win_end - $win_start + 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
757
b01beb170290 Uploaded
bgruening
parents:
diff changeset
758 if ( ($win_size_real) >= $seqLen ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
759 $win_globalFolding = 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
760 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
761 $win_globalFolding = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
762 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
763 my $win_center = $win_start + ( ( $win_size_real + 1 ) / 2 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
764
b01beb170290 Uploaded
bgruening
parents:
diff changeset
765 $winHead = getWindowHeader( $graphHead, $winSize, $win_start, $win_end, $win_center, $win_globalFolding, $win_sample, $used_t );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
766
b01beb170290 Uploaded
bgruening
parents:
diff changeset
767 } elsif ( $line =~ /^(\S+)$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
768 ## line: "CUUAUGAGUAAGGAAAAUAACGAUUCGGGGUGACGCCCGAAUCCUCACUG"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
769 $win_seq = uc($1); ## to be 101% sure that we have by default uppercase chars
b01beb170290 Uploaded
bgruening
parents:
diff changeset
770
b01beb170290 Uploaded
bgruening
parents:
diff changeset
771 } elsif ( $line =~ /^Results for (\d+) iterations:$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
772 ## line: "Results for 10 iterations:"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
773 $win_sample = $1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
774
b01beb170290 Uploaded
bgruening
parents:
diff changeset
775 } elsif ( $line =~ /^Shape\s+\S+\s+not found within energy range.*$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
776 ## line: "Shape [] not found within energy range (-24.75 to -27.50). Try -c or -e to increase range."
b01beb170290 Uploaded
bgruening
parents:
diff changeset
777 $win_shreps = [];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
778
b01beb170290 Uploaded
bgruening
parents:
diff changeset
779 } elsif ( $line =~ /^([\(\)\.]+)\s+\((\S+)\)\s+(\S+)$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
780 ## line:"...((((..(((....)))))))...........(((((......))))) (-10.10) [[]][]"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
781 ## take only $maxShreps shreps per window if set
b01beb170290 Uploaded
bgruening
parents:
diff changeset
782 next if ( $maxShreps && $win_shrep_count >= $maxShreps );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
783 push( @{$win_shreps}, [ $1, "ENERGY", $2, "SHAPE", $3 ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
784 $win_shrep_count++;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
785
b01beb170290 Uploaded
bgruening
parents:
diff changeset
786 } elsif ( $line =~ /^([\(\)\.]+)\s+\((\S+)\)\s+\((\S+)\)\s+(\S+)$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
787 ## line:"((((..((((...((.((.((.....)).)).))...))))..))))... (-10.60) (0.7795360) [[[[[]]]]]"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
788 ## take only $maxShreps shreps per window if set
b01beb170290 Uploaded
bgruening
parents:
diff changeset
789 next if ( $maxShreps && $win_shrep_count >= $maxShreps );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
790 push( @{$win_shreps}, [ $1, "ENERGY", $2, "PROB", $3, "SHAPE", $4 ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
791 $win_shrep_count++;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
792
b01beb170290 Uploaded
bgruening
parents:
diff changeset
793 } elsif ( $line =~ /^([\(\)\.]+)\s+\((\S+)\)\s+(\S+)\s+(\S+)$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
794 ## line:"((((..((((...((.((.((.....)).)).))...))))..))))... (-10.60) 0.3000000 [[[[[]]]]]"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
795 ## take only $maxShreps shreps per window if set
b01beb170290 Uploaded
bgruening
parents:
diff changeset
796 next if ( $maxShreps && $win_shrep_count >= $maxShreps );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
797 push( @{$win_shreps}, [ $1, "ENERGY", $2, "SHAPEPROB", $3, "SHAPE", $4 ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
798 $win_shrep_count++;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
799
b01beb170290 Uploaded
bgruening
parents:
diff changeset
800 } elsif ( $line =~ /^([\(\)\.]+)\s+\((\S+)\)\s+\((\S+)\)\s+(\S+)\s+(\S+)$/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
801 ## line:"((((..((((...((.((.((.....)).)).))...))))..))))... (-10.60) (0.7795360) 0.3000000 [[[[[]]]]]"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
802 ## take only $maxShreps shreps per window if set
b01beb170290 Uploaded
bgruening
parents:
diff changeset
803 next if ( $maxShreps && $win_shrep_count >= $maxShreps );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
804 push( @{$win_shreps}, [ $1, "ENERGY", $2, "PROB", $3, "SHAPEPROB", $4, "SHAPE", $5 ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
805 $win_shrep_count++;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
806
b01beb170290 Uploaded
bgruening
parents:
diff changeset
807 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
808 next if ( $line =~ /^$/ );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
809 die "Unexpected shape output format!\nline=$line\n\nExit...\n\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
810 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
811 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
812
b01beb170290 Uploaded
bgruening
parents:
diff changeset
813 ## convert last windows
b01beb170290 Uploaded
bgruening
parents:
diff changeset
814 if ( @{$win_shreps} > 0 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
815
b01beb170290 Uploaded
bgruening
parents:
diff changeset
816 print $graph_file_hdl $winHead;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
817
b01beb170290 Uploaded
bgruening
parents:
diff changeset
818 ## remove SHAPE="_" shrep if it exists in $win_shreps
b01beb170290 Uploaded
bgruening
parents:
diff changeset
819 ## do this only when have a sequence graph already
b01beb170290 Uploaded
bgruening
parents:
diff changeset
820 if ($i_add_seq_graph_t) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
821 my @new_win_shreps = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
822 map { push( @new_win_shreps, $_ ) if ( $_->[ $#{$_} ] ne "_" ) } @{$win_shreps};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
823 $win_shreps = \@new_win_shreps;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
824 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
825
b01beb170290 Uploaded
bgruening
parents:
diff changeset
826 ## add graph with no structure depending on $i_add_seq_graph_win to win
b01beb170290 Uploaded
bgruening
parents:
diff changeset
827 if ($i_add_seq_graph_win) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
828 ($curr_gi) = convertSeqWindow( $win_seq, $win_size_real, $win_start,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
829 $curr_gi, $winHead, $graph_file_hdl, $annotate, $orig_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
830 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
831
b01beb170290 Uploaded
bgruening
parents:
diff changeset
832 $curr_gi = convertShapeWindow( $win_shreps, $win_seq, $win_size_real,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
833 $win_start, $curr_gi, $graph_file_hdl, $winHead, $annotate, $abstr, $cue,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
834 $stacks, $orig_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
835 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
836
b01beb170290 Uploaded
bgruening
parents:
diff changeset
837 close($rnashapesoutput);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
838
b01beb170290 Uploaded
bgruening
parents:
diff changeset
839 return $curr_gi + 1; # return the gi (graph index) for the next subgraph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
840 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
841
b01beb170290 Uploaded
bgruening
parents:
diff changeset
842 ## Sub to create graph for a complete unstructured sequence
b01beb170290 Uploaded
bgruening
parents:
diff changeset
843 # TODO: document function
b01beb170290 Uploaded
bgruening
parents:
diff changeset
844 sub convertSeqWindow {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
845 my ( $win_seq, $win_size_real, $win_start, $curr_gi, $winHead, $graph_file_hdl, $annotate, $orig_seq ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
846
b01beb170290 Uploaded
bgruening
parents:
diff changeset
847 my $seq_shrep;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
848
b01beb170290 Uploaded
bgruening
parents:
diff changeset
849 # use a different alphabet (lowercase) if option seq-graph-alph set
b01beb170290 Uploaded
bgruening
parents:
diff changeset
850 my $seq_graph_sequence = $i_change_seq_graph_alph ? lc($win_seq) : uc($win_seq);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
851 $seq_shrep = [ "." x $win_size_real, "ENERGY", "0.00", "SHAPE", "_", "STRUCT", "." x $win_size_real, "SEQ", $seq_graph_sequence ];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
852 my $backboneGraph_ref = getBackboneGraph( $seq_graph_sequence, $curr_gi, $win_start, 0, ( $win_size_real - 1 ), $orig_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
853
b01beb170290 Uploaded
bgruening
parents:
diff changeset
854 print $graph_file_hdl getSeqHeader( $winHead, $seq_shrep );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
855 print $graph_file_hdl join( "\n", @{$backboneGraph_ref} ) . "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
856
b01beb170290 Uploaded
bgruening
parents:
diff changeset
857 $curr_gi += $win_size_real;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
858
b01beb170290 Uploaded
bgruening
parents:
diff changeset
859 if ($annotate) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
860 ## TODO add annotation graphs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
861 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
862
b01beb170290 Uploaded
bgruening
parents:
diff changeset
863 return ($curr_gi);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
864 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
865
b01beb170290 Uploaded
bgruening
parents:
diff changeset
866 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
867 # The results for one window are converted in this method to GSPAN graphs.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
868 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
869 # win_shreps_aref : the array ref of shreps (dot-bracket structures) for the
b01beb170290 Uploaded
bgruening
parents:
diff changeset
870 # current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
871 # win_seq : the nucleotide sequence for the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
872 # win_size_real : the current (true) window size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
873 # win_start : the starting position of win_seq in the original seq (1-n)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
874 # curr_gi : the current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
875 # graph_file_hdl : the graph file handler
b01beb170290 Uploaded
bgruening
parents:
diff changeset
876 # winHead : the header line for the current sequence window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
877 # TODO annotate
b01beb170290 Uploaded
bgruening
parents:
diff changeset
878 # abstr : input parameter -abstr
b01beb170290 Uploaded
bgruening
parents:
diff changeset
879 # cue : input parameter -cue
b01beb170290 Uploaded
bgruening
parents:
diff changeset
880 # stacks : input parameter -stacks
b01beb170290 Uploaded
bgruening
parents:
diff changeset
881 # orig_seq : the nucleotide sequence as read from fasta
b01beb170290 Uploaded
bgruening
parents:
diff changeset
882 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
883 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
884 # The current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
885 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
886 sub convertShapeWindow {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
887 my ( $win_shreps_aref, $win_seq, $win_size_real, $win_start, $curr_gi,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
888 $graph_file_hdl, $winHead, $annotate, $abstr, $cue, $stacks, $orig_seq ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
889
b01beb170290 Uploaded
bgruening
parents:
diff changeset
890 ## generate for each shrep a connected component in gspan file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
891 foreach my $shrep ( @{$win_shreps_aref} ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
892
b01beb170290 Uploaded
bgruening
parents:
diff changeset
893 # get the current gi as it was at the beginning of this shrep
b01beb170290 Uploaded
bgruening
parents:
diff changeset
894 my $shrep_gi = $curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
895
b01beb170290 Uploaded
bgruening
parents:
diff changeset
896 # cut off unpaired ends, if option is given
b01beb170290 Uploaded
bgruening
parents:
diff changeset
897 my $shrep_struct = $shrep->[0];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
898 my $crop_index_left = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
899 my $crop_index_right = length($shrep_struct) - 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
900
b01beb170290 Uploaded
bgruening
parents:
diff changeset
901 # find croping indices with numbering from 0 to n-1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
902 if ($cue) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
903 $crop_index_left = index( $shrep_struct, "(" ); # find 1st occ of "("
b01beb170290 Uploaded
bgruening
parents:
diff changeset
904 $crop_index_right = rindex( $shrep_struct, ")" ); # find last occ of ")"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
905
b01beb170290 Uploaded
bgruening
parents:
diff changeset
906 # if the complete window is unpaired, then don't crop
b01beb170290 Uploaded
bgruening
parents:
diff changeset
907 if ( $crop_index_left == -1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
908 $crop_index_left = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
909 $crop_index_right = length($shrep_struct) - 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
910 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
911 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
912
b01beb170290 Uploaded
bgruening
parents:
diff changeset
913 # create structure graph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
914 my $backboneGraph_ref = getBackboneGraph( $win_seq, $curr_gi, $win_start, $crop_index_left, $crop_index_right, $orig_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
915 my $structGraph_ref;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
916
b01beb170290 Uploaded
bgruening
parents:
diff changeset
917 # add both basic edges, stacks and abstract graph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
918 if ($abstr) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
919 ( $structGraph_ref, $curr_gi ) = getStructPlusAbstractStructGraph( $shrep, $curr_gi, $win_size_real, $cue, $stacks );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
920
b01beb170290 Uploaded
bgruening
parents:
diff changeset
921 # just add basic structure graph edges, and stacks
b01beb170290 Uploaded
bgruening
parents:
diff changeset
922 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
923 ( $structGraph_ref, $curr_gi ) = getStructGraph( $shrep, $curr_gi, $win_size_real, $stacks );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
924 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
925
b01beb170290 Uploaded
bgruening
parents:
diff changeset
926 # additional information for shrep header: sequence and dot-bracket
b01beb170290 Uploaded
bgruening
parents:
diff changeset
927 my $crop_length = $crop_index_right - $crop_index_left + 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
928 my $shrepheader_struct = substr( $shrep_struct, $crop_index_left, $crop_length );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
929 my $shrepheader_seq = substr( $win_seq, $crop_index_left, $crop_length );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
930 push( @{$shrep}, 'STRUCT', $shrepheader_struct, 'SEQ', $shrepheader_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
931
b01beb170290 Uploaded
bgruening
parents:
diff changeset
932 # print structure graph to file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
933 print $graph_file_hdl getShapeHeader( $winHead, $shrep );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
934 print $graph_file_hdl join( "\n", @{$backboneGraph_ref} ) . "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
935
b01beb170290 Uploaded
bgruening
parents:
diff changeset
936 # don't print empty array; sgdnspdk breaks with empty lines
b01beb170290 Uploaded
bgruening
parents:
diff changeset
937 if ( @{$structGraph_ref} > 0 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
938 print $graph_file_hdl join( "\n", @{$structGraph_ref} ) . "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
939
b01beb170290 Uploaded
bgruening
parents:
diff changeset
940 if ($annotate) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
941
b01beb170290 Uploaded
bgruening
parents:
diff changeset
942 # TODO add annotations to shrep structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
943 # $win_start, $win_end (1-n)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
944 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
945
b01beb170290 Uploaded
bgruening
parents:
diff changeset
946 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
947 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
948
b01beb170290 Uploaded
bgruening
parents:
diff changeset
949 return $curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
950 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
951
b01beb170290 Uploaded
bgruening
parents:
diff changeset
952 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
953 # Here the vertices for the nucleotides are generated and also the backbone
b01beb170290 Uploaded
bgruening
parents:
diff changeset
954 # edges, which connect the nucleotides in their correct order (i.e order
b01beb170290 Uploaded
bgruening
parents:
diff changeset
955 # of the given sequence)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
956 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
957 # win_seq : the sequence for the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
958 # win_start : the starting position for the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
959 # curr_crop_i_left : the cropping index for the left end of sequence (-cue)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
960 # curr_crop_i_right : the cropping index for the right end of sequence (-cue)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
961 # orig_seq : the nucleotide sequence as read from fasta
b01beb170290 Uploaded
bgruening
parents:
diff changeset
962 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
963 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
964 # The lines of the graph in an array reference to be printed to a file with
b01beb170290 Uploaded
bgruening
parents:
diff changeset
965 # one element per line.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
966 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
967 sub getBackboneGraph {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
968 my ( $win_seq, $curr_gi, $win_start, $curr_crop_i_left, $curr_crop_i_right, $orig_seq ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
969
b01beb170290 Uploaded
bgruening
parents:
diff changeset
970 # RNAshapes substitutes T -> U, sequence only does not
b01beb170290 Uploaded
bgruening
parents:
diff changeset
971 # thus, just transform all t/T -> u/U to remain consistent
b01beb170290 Uploaded
bgruening
parents:
diff changeset
972 $orig_seq =~ tr /tT/uU/;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
973 $win_seq =~ tr /tT/uU/;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
974
b01beb170290 Uploaded
bgruening
parents:
diff changeset
975 my @seq;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
976 @seq = split( "", $win_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
977
b01beb170290 Uploaded
bgruening
parents:
diff changeset
978 # if the vp option is set we need to obtain the original capitalization from
b01beb170290 Uploaded
bgruening
parents:
diff changeset
979 # $orig_seq and extract the windowed sequence manually
b01beb170290 Uploaded
bgruening
parents:
diff changeset
980 my @seq_vp;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
981 if ( defined $i_vp ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
982 my $win_len = length($win_seq);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
983 my $capitalized_win_seq = substr( $orig_seq, $win_start - 1, $win_len );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
984 ( uc($capitalized_win_seq) eq uc($win_seq) ) or
b01beb170290 Uploaded
bgruening
parents:
diff changeset
985 die( "error: windowed sequence generated due to vp option not equal " .
b01beb170290 Uploaded
bgruening
parents:
diff changeset
986 "to sequence reported by RNASHAPES.\n" .
b01beb170290 Uploaded
bgruening
parents:
diff changeset
987 "'${capitalized_win_seq}' != '${win_seq}'" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
988 @seq_vp = split( "", $capitalized_win_seq );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
989 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
990
b01beb170290 Uploaded
bgruening
parents:
diff changeset
991 my @vert = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
992 my @edg = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
993
b01beb170290 Uploaded
bgruening
parents:
diff changeset
994 my $curr_abs_pos = $win_start + $curr_crop_i_left;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
995 $curr_gi += $curr_crop_i_left;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
996
b01beb170290 Uploaded
bgruening
parents:
diff changeset
997 # set vertice labeled with 'v' or 'V' according to sequence and vp option
b01beb170290 Uploaded
bgruening
parents:
diff changeset
998 # when vp set,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
999 # uppercase nucleotides are annotated with 'v'
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1000 # lowercase nucleotides are annotated with 'V'
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1001 my $vertice_label = ( defined $i_vp and ( $seq_vp[$curr_crop_i_left] =~ /[a-z]/ ) ) ? 'V' : 'v';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1002
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1003 # create backbone vertice of first nucleotide
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1004 push( @vert, join( ' ', $vertice_label, $curr_gi, $seq[$curr_crop_i_left], $curr_abs_pos ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1005
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1006 foreach my $idx ( ( $curr_crop_i_left + 1 ) .. $curr_crop_i_right ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1007 $curr_abs_pos++;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1008 $curr_gi++;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1009
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1010 # set vertice label as described above
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1011 $vertice_label = ( defined $i_vp and ( $seq_vp[$idx] =~ /[a-z]/ ) ) ? 'V' : 'v';
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1012 push( @vert, join( ' ', $vertice_label, $curr_gi, $seq[$idx], $curr_abs_pos ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1013 push( @edg, join( ' ', "e", $curr_gi - 1, $curr_gi, '> 1' ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1014 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1015
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1016 my @ret = ( @vert, @edg );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1017
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1018 return \@ret;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1019 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1020
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1021 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1022 # This method does the same as getStructGraph, but is extended to identify
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1023 # the abstract parts of the structure and also add this to the graph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1024 # via abstract relations, see information below.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1025 # We already have the backbone graph, now the base-pair edges need to be
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1026 # added to this graph and the abstract graph is added after this. Finally
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1027 # the abstract relations, pointing from the abstract level to the basic
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1028 # structure level is added. All the while, we keep track of the current
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1029 # index graph. Furthermore, in the basic structure graph, we have vertices
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1030 # stacks that connects the four stacked base-pairs involved. (symbols P
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1031 # for vertex and p for edges). The abstract vertices are labelled according
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1032 # to the given name-space, then #, then the abstract structure type, e.g.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1033 # HL for hairpin-loop. The edges between the structure types are denoted
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1034 # with n, and the relation vertex is ^R, with ^r going from the abstract
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1035 # structure to the relation and from the relation to the basic structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1036 # is @r.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1037 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1038 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1039 # curr_shrep : the current shrep as dot-bracket structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1040 # curr_gi : the current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1041 # win_size : the window size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1042 # cue : the input parameter -cue (whether to crop unpaired ends)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1043 # stacks : the input parameter -stacks (whether to add stack information)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1044 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1045 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1046 # The structure graph information line-by-line as an array reference and
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1047 # the current graph index.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1048 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1049 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1050 sub getStructPlusAbstractStructGraph {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1051 my ( $curr_shrep, $curr_gi, $win_size, $cue, $stacks ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1052
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1053 # $win_size = length($curr_shrep->[0]);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1054
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1055 my @struct = split( "", $curr_shrep->[0] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1056
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1057 # print "=================testing: new shrep : gi=$curr_gi ===============================\n" if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1058
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1059 # OBJECTS and VARIABLES
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1060 # all indices are saved according to the current graph index, $curr_gi,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1061 # which is at the beginning of the current window, so that the nucleotide
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1062 # index can be inferred using $idx + $curr_gi
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1063 # opening brackets
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1064 my @open_blks = (); # open blocks of consecutive opening brackets (array of arrays)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1065 my @p_open = (); # currently open block of the last identified complete block
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1066 my @c_open = (); # current "incomplete" block of consecutive opening brackets
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1067 # base-pair objects
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1068 my %bps = (); # hash of all base-pairs with stem for a value, if it is at
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1069 # the BP is opening or closing a stem object
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1070 my @c_bp = (); # current BP, just being closed at current index pos (i,j)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1071 my @p_bp = (); # previously closed BP at position before current index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1072 # stem objects
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1073 my %stems = (); # all stems, key="i:j,k:l", value=stem array, (i,j) outer BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1074 my @c_stem = (); # current incomplete stem object
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1075 my @stmbrks = (); # all stem-breaks in the form of (i,k,l), where i is the remaining
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1076 # open base and (k,l) is the subsequent closing base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1077
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1078 # my @p_stmbrk = (); # previous stem-break in the form of (i,k,l), where i is the remaining
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1079 # # open base and (k,l) is the subsequent closing base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1080 # loop objects
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1081 my @up = (); # all unpaired regions (i,j) that are of unkown type
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1082 my @c_up = (); # current incomplete unpaired object
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1083 my @hls = (); # all hairpin loop objects
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1084 my @bls = (); # all bulge-loop objects
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1085 my @ils = (); # all internal loops
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1086 my @mls = (); # all multi-loops
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1087 my @c_mls = (); # current incomplete multi-loops! (there can be more than one)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1088 my @els = (); # all external loops
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1089 my @c_el = (); # current incomplete external loop
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1090
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1091 # iterate through shrep structure an identify abstract parts
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1092 foreach my $idx ( 0 .. @struct - 1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1093
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1094 #===================================================
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1095 # current char is the opening bracket of a base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1096 if ( $struct[$idx] eq "(" ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1097
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1098 #===================================================
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1099
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1100 # update current window index to current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1101 $idx += $curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1102
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1103 # case: '((' currently there is an open block, extend it
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1104 if (@c_open) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1105 push( @c_open, $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1106
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1107 # case: '.(' or ')(' or BOS'(' there is no open block, create a new one ".(" or ")("
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1108 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1109
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1110 # begin a new current open block
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1111 push( @c_open, $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1112
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1113 # case: ')('
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1114 if (@c_stem) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1115
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1116 # CLOSE STEM
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1117 print STDERR "TEST - closed stem:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@c_stem ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1118 close_stem( \@c_stem, \%stems, \@p_bp, \@c_bp, \@p_open, \@stmbrks, \@open_blks, \%bps );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1119
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1120 # case (x(x)(
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1121 if (@open_blks) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1122
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1123 # EXTEND_ML, if one exists
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1124 if (@c_mls) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1125 extend_or_open_nested_ML( $curr_shrep, \@c_mls, \@p_bp, \@up, \@stmbrks, \@open_blks, $idx, $curr_gi );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1126
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1127 # OPEN_ML
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1128 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1129
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1130 # case 'x((x)(' the opening of the ML was a stem-break
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1131 if ( @stmbrks
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1132 && $stmbrks[-1]->[1] == $p_bp[0]
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1133 && $stmbrks[-1]->[2] == $p_bp[1] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1134 my @c_stmbrk = @{ pop(@stmbrks) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1135 my @a = ( $c_stmbrk[0], -1 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1136 my @ml = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1137 push( @ml, \@a ); # 1st base-pair is in-waiting
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1138 my @a2 = ( $c_stmbrk[1], $c_stmbrk[2] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1139 push( @ml, \@a2 ); #2nd BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1140 my @a3 = ($idx);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1141 push( @ml, \@a3 ); #first of current BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1142 push( @c_mls, \@ml );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1143 print STDERR "TEST - opened ML with SB:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@ml, 1 ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1144
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1145 # case 'x(.(x)('
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1146 } elsif (@up) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1147 my @tmp_up = @{ pop(@up) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1148
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1149 # compare adjacent BP to UP to see if they fit
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1150 if ( $tmp_up[1] + 1 == $p_bp[0]
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1151 && $open_blks[-1]->[-1] == $tmp_up[0] - 1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1152 my @a = ( $open_blks[-1]->[-1], -1 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1153 my @ml = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1154 push( @ml, \@a ); # 1st BP awaits closing
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1155 my @newbp = @p_bp;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1156 push( @ml, \@newbp ); # 2nd BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1157 my @a2 = ($idx);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1158 push( @ml, \@a2 ); # current open BP, awaits closing
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1159 push( @c_mls, \@ml );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1160 print STDERR "TEST - opened ML with prev UP1:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@ml, 1 ) )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1161
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1162 if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1163 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1164 die "ERROR: the base-pairs to not match the " . "previous unpaired region?!\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1165 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1166 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1167 die "ERROR: in case ML, but there is no initial" . "part of the ML to be identified\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1168 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1169 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1170
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1171 # case: 'x(x)('
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1172 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1173
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1174 # case: '.(x)(' => CLOSE_EL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1175 if (@c_el) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1176 if ( $c_el[-1] == $p_bp[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1177 push( @c_el, $p_bp[1] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1178 my @newel = @c_el;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1179 push( @els, \@newel );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1180 @c_el = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1181 print STDERR "TEST - closed EL1:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newel ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1182 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1183 die "ERROR: the previous BP must match the current EL\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1184 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1185 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1186
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1187 # in both cases OPEN_EL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1188 push( @c_el, @p_bp );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1189 push( @c_el, $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1190 print STDERR "TEST - opened EL:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@c_el ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1191 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1192
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1193 # case '.('
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1194 } elsif (@c_up) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1195
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1196 # case (x(x).( OR 'x(.('
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1197 if (@open_blks) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1198
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1199 # case 'x(x).(' ML
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1200 if ( @p_bp && ( $p_bp[1] == $c_up[0] - 1 ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1201
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1202 # case '((x)x(x).(' EXTEND_ML, if one exists
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1203 if (@c_mls) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1204
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1205 extend_or_open_nested_ML( $curr_shrep, \@c_mls, \@p_bp, \@up, \@stmbrks, \@open_blks, $idx, $curr_gi );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1206
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1207 # case 'x((x).(' or 'x(.(x).(' OPEN_ML
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1208 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1209
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1210 # case 'x((x).(' the opening of the ML was a stem-break
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1211 if ( @stmbrks
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1212 && $stmbrks[-1]->[1] == $p_bp[0]
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1213 && $stmbrks[-1]->[2] == $p_bp[1] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1214 my @c_stmbrk = @{ pop(@stmbrks) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1215 my @a = ( $c_stmbrk[0], -1 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1216 my @ml = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1217 push( @ml, \@a ); # 1st base-pair is in-waiting
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1218 my @a2 = ( $c_stmbrk[1], $c_stmbrk[2] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1219 push( @ml, \@a2 ); #2nd BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1220 my @a3 = ($idx);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1221 push( @ml, \@a3 ); #first of current BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1222 push( @c_mls, \@ml );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1223 print STDERR "TEST - open ML with SB:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@ml, 1 ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1224
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1225 # case 'x(.(x).(' opening the ML with initial UP region
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1226 } elsif (@up) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1227
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1228 # get previous unpaired region
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1229 my $tmp = pop(@up);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1230 my @p_up = @{$tmp};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1231
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1232 # case: '(.(x).(' compare adjacent BP to UP to see if they create a ML
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1233 if ( $p_up[1] + 1 == $p_bp[0]
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1234 && $open_blks[-1]->[-1] == $p_up[0] - 1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1235 my @a = ( $open_blks[-1]->[-1], -1 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1236 my @ml = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1237 push( @ml, \@a ); # 1st BP awaits closing
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1238 my @newbp = @p_bp;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1239 push( @ml, \@newbp ); # 2nd BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1240 my @a2 = ($idx);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1241 push( @ml, \@a2 ); # current open BP, awaits closing
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1242 push( @c_mls, \@ml );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1243 print STDERR "TEST - opened ML with prev UP2:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@ml, 1 ) )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1244
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1245 if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1246 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1247
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1248 # has to be an ML, but can't find first part
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1249 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1250 die "ERROR: in case ML, but there is no initial " . "part of the ML to be identified\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1251 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1252 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1253
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1254 # case '(.(
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1255 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1256 my @newup = @c_up;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1257 push( @up, \@newup );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1258 print STDERR "TEST - closed UP:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newup ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1259 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1260
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1261 # case: 'x(x).(' OR 'EOS.('
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1262 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1263
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1264 # case: '.(x)(' => CLOSE_EL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1265 if (@c_el) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1266 if ( $c_el[-1] == $c_bp[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1267 push( @c_el, $p_bp[1] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1268 my @newel = @c_el;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1269 push( @els, \@newel );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1270 @c_el = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1271 print STDERR "TEST - closed EL2:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newel ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1272 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1273 die "ERROR: the previous BP must match the current EL\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1274 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1275 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1276
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1277 # in both cases OPEN_EL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1278 if (@p_bp) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1279 push( @c_el, @p_bp );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1280 push( @c_el, $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1281 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1282
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1283 # ignore this if -cue is given
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1284 # TODO check the gi index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1285 if ($cue) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1286 @c_el = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1287 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1288 push( @c_el, ( $curr_gi, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1289 push( @c_el, $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1290 print STDERR "TEST - opened EL:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@c_el ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1291 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1292 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1293 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1294 @c_up = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1295
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1296 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1297
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1298 # no more cases except opening bracket at beginning of sequence
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1299 # in this case do nothing, as the index has already been added
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1300 die "ERROR: '((', ')(', '.(' have all been covered\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1301 unless ( $idx == $curr_gi );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1302
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1303 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1304 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1305
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1306 #===================================================
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1307 # current char is the unpaired base
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1308 } elsif ( $struct[$idx] eq "." ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1309
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1310 #===================================================
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1311
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1312 # update current window index to current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1313 $idx += $curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1314
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1315 # case ').' => CLOSE_STEM
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1316 if (@c_stem) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1317 print STDERR "TEST - closed stem:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@c_stem ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1318 close_stem( \@c_stem, \%stems, \@p_bp, \@c_bp, \@p_open, \@stmbrks, \@open_blks, \%bps );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1319 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1320
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1321 # case '..' extend UP, @c_up
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1322 if (@c_up) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1323 $c_up[1] = $idx;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1324
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1325 # case 'x.' open new UP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1326 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1327
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1328 # OPEN_UP, @c_up
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1329 @c_up = ( $idx, $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1330
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1331 # case '(.' just come to the end of an open block, push it onto stack
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1332 if (@c_open) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1333 my @newopen = @c_open;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1334 push( @open_blks, \@newopen );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1335 @c_open = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1336
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1337 # case ').' or '.'
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1338 # (have already closed stem and created stem-break and emptied p_open)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1339 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1340
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1341 # case 'x(x).' or '.' => CLOSE_EL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1342 if ( !@open_blks ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1343
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1344 # case 'x().().'
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1345 if (@c_el) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1346
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1347 # double check
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1348 if ( $c_el[-1] == $p_bp[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1349 push( @c_el, $p_bp[1] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1350 my @newel = @c_el;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1351 push( @els, \@newel );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1352 @c_el = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1353 print STDERR "TEST - closed EL3:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newel ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1354 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1355 die "This case should not occur!\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1356 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1357 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1358 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1359 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1360 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1361
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1362 #===================================================
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1363 # current char is the closing bracket of a base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1364 } elsif ( $struct[$idx] eq ")" ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1365
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1366 #===================================================
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1367
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1368 # update current window index to current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1369 $idx += $curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1370
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1371 # save previous base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1372 if (@c_bp) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1373 @p_bp = @c_bp;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1374 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1375
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1376 # case: '((x))', extend stem
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1377 if (@p_open) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1378
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1379 # get current base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1380 @c_bp = ( pop(@p_open), $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1381
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1382 # add it to the base-pair hash
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1383 $bps{"$c_bp[0]:$c_bp[1]"} = "";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1384
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1385 # EXTEND_STEM
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1386 if (@c_stem) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1387 $c_stem[0] = $c_bp[0];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1388 $c_stem[1] = $c_bp[1];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1389 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1390 die "ERROR: in case '))' there has to be an open stem object!\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1391 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1392
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1393 # case: '(x(x))' or '(x.)', '()' not allowed
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1394 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1395
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1396 # case: '(x(x))' CLOSE_STEM close previous stem
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1397 if (@c_stem) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1398 print STDERR "TEST - closed stem:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@c_stem ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1399 close_stem( \@c_stem, \%stems, \@p_bp, \@c_bp, \@p_open, \@stmbrks, \@open_blks, \%bps );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1400
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1401 # case '(x(x))' OPEN_STEM
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1402 if (@open_blks) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1403 my $tmp = pop(@open_blks);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1404 @p_open = @{$tmp};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1405
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1406 # get current base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1407 @c_bp = ( pop(@p_open), $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1408
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1409 # add it to the base-pair hash
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1410 # as the stem is not finished yet, cannot add stem information
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1411 $bps{"$c_bp[0]:$c_bp[1]"} = "";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1412
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1413 # open stem
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1414 $c_stem[0] = $c_bp[0];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1415 $c_stem[1] = $c_bp[1];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1416 $c_stem[2] = $c_bp[0];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1417 $c_stem[3] = $c_bp[1];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1418 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1419 die "ERROR: there have to be open blocks to match " . "current closing bracket!\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1420 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1421
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1422 # case: '(.(x))' => CLOSE_BL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1423 if ( @up && $up[-1]->[0] - 1 == $c_bp[0] && $up[-1]->[1] + 1 == $p_bp[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1424 my @newBL = ( @c_bp, @p_bp );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1425 push( @bls, \@newBL );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1426 pop(@up);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1427 print STDERR "TEST - closed BL:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newBL ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1428
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1429 # case: '(x(x).(x))' => CLOSE_ML
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1430 } elsif ( @c_mls && $c_mls[-1]->[-1]->[-1] == $p_bp[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1431 my @newml = @{ pop(@c_mls) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1432 push( @{ $newml[-1] }, $p_bp[1] ); # close last BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1433 $newml[0]->[1] = $c_bp[1]; # close 1st BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1434 push( @mls, \@newml );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1435 print STDERR "TEST - closed ML1:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newml, 1 ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1436 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1437 die "ERROR: what is this case?\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1438 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1439
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1440 # case: (x.)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1441 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1442
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1443 # OPEN_STEM open new stem
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1444 if (@open_blks) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1445 @p_open = @{ pop(@open_blks) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1446
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1447 # get current base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1448 @c_bp = ( pop(@p_open), $idx );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1449
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1450 # add it to the base-pair hash
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1451 # as the stem is not finished yet, cannot add stem information
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1452 $bps{"$c_bp[0]:$c_bp[1]"} = "";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1453
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1454 # open stem
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1455 $c_stem[0] = $c_bp[0];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1456 $c_stem[1] = $c_bp[1];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1457 $c_stem[2] = $c_bp[0];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1458 $c_stem[3] = $c_bp[1];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1459 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1460 die "ERROR: there have to be open blocks to match " . "current closing bracket!$curr_shrep->[0]\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1461 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1462
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1463 # CLOSE_C_UP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1464 if (@c_up) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1465
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1466 # case: (.) => close_HL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1467 if ( $c_up[0] - 1 == $c_bp[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1468 my @newhl = @c_bp;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1469 push( @hls, \@newhl );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1470 @c_up = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1471 print STDERR "TEST - closed HL:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newhl ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1472
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1473 # case: (x(x).)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1474 } elsif ( @p_bp && $c_up[0] - 1 == $p_bp[1] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1475
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1476 # case: ((x).) => CLOSE_BL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1477 if ( @stmbrks
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1478 && $stmbrks[-1]->[0] == $c_bp[0]
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1479 && $stmbrks[-1]->[1] == $p_bp[0]
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1480 && $stmbrks[-1]->[2] == $p_bp[1] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1481 my @newbl = ( @c_bp, @p_bp );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1482 push( @bls, \@newbl );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1483 pop(@stmbrks);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1484 @c_up = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1485 print STDERR "TEST - closed BL:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newbl ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1486
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1487 # case: (x(x).), either CLOSE_ML or CLOSE_IL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1488 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1489
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1490 # case '(.(x).)' => CLOSE_IL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1491 if ( @up
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1492 && ( $up[-1]->[0] - 1 == $c_bp[0] )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1493 && ( $up[-1]->[1] + 1 == $p_bp[0] ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1494 my @p_up = @{ pop(@up) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1495 my @newil = ( @c_bp, @p_bp );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1496 push( @ils, \@newil );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1497 @c_up = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1498 print STDERR "TEST - closed IL:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newil ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1499
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1500 # case (x(x)x(x).) => CLOSE_ML remember, array of tuples
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1501 } elsif ( @c_mls && $c_mls[-1]->[-1]->[0] == $p_bp[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1502 my @newml = @{ pop(@c_mls) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1503 push( @{ $newml[-1] }, $p_bp[1] ); # close final BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1504 $newml[0]->[1] = $c_bp[1]; # close 1st BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1505 push( @mls, \@newml );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1506 @c_up = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1507 print STDERR "TEST - closed ML2:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@newml, 1 ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1508 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1509 die "There should be no such case!\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1510 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1511 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1512
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1513 # there should be no more unknown unpaired regions left!
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1514 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1515 die "There shouldn't be any unknown unpaired " . "regions left\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1516 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1517
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1518 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1519 die "ERROR: '()' is not allowed in $curr_shrep->[0]\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1520 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1521 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1522 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1523 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1524 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1525
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1526 # EOS
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1527
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1528 # CLOSE_STEM
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1529 print STDERR "TEST - closed stem EOS:\n" . ( test( $curr_shrep->[0], ( $win_size + $curr_gi ), $curr_gi, \@c_stem ) )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1530 if ( @c_stem && $i_debug );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1531 close_stem( \@c_stem, \%stems, \@p_bp, \@c_bp, \@p_open, \@stmbrks, \@open_blks, \%bps ) if (@c_stem);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1532 die "ERROR: there should not be an open ML!\n" . $curr_shrep . "\n"
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1533 if (@c_mls);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1534
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1535 # case ().() CLOSE_EL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1536 if (@c_el) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1537 if ( $c_el[-1] == $p_bp[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1538 push( @c_el, $p_bp[1] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1539 my @newel = @c_el;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1540 push( @els, \@newel );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1541 @c_el = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1542
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1543 print STDERR "TEST - closed EL4 EOS:\n" . ( test( $curr_shrep->[0], ( $win_size + $curr_gi ), $curr_gi, \@newel ) )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1544 if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1545 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1546 die "ERROR: the base-pairs in the EL don't match" . $curr_shrep . "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1547 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1548 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1549
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1550 # case (). CLOSE_EL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1551 if (@c_up) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1552 unless ($cue) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1553 if (@p_bp) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1554 push( @c_el, @p_bp );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1555 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1556 push( @c_el, ( $curr_gi, $curr_gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1557 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1558
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1559 push( @c_el, ( ( $curr_gi + $win_size - 1 ), ( $curr_gi + $win_size - 1 ) ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1560 my @newel = @c_el;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1561 push( @els, \@newel );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1562 @c_el = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1563 print STDERR "TEST - closed EL5 EOS:\n" . ( test( $curr_shrep->[0], ( $win_size + $curr_gi ), $curr_gi, \@newel ) )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1564 if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1565 ## check case where cue is given, but window is completely unstructured
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1566 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1567 unless (@p_bp) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1568 my @newel = ( $curr_gi, $curr_gi, ( $curr_gi + $win_size - 1 ), ( $curr_gi + $win_size - 1 ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1569 push( @els, \@newel );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1570 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1571 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1572 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1573
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1574 # # update current graph index (since all shrep indices have been read)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1575 $curr_gi += $win_size;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1576
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1577 # ===================================================
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1578 # build graph lines
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1579 my @graph_lines = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1580
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1581 # ===================================================
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1582
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1583 # graph line objects
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1584 my @edg = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1585 my @stackgraph = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1586 my @abstrgraph = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1587
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1588 #-------------------> stems,stacks
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1589 # build basic graph edges, stacks, and vertices for stems in the abstract graph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1590 my @tmp = keys %stems;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1591 foreach my $sk (@tmp) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1592 my @stem = @{ $stems{$sk} }; # stem (i,j,k,l)=> (i,j)= (0,1) and (k,l)=(2,3)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1593 die "ERROR: There are not enough elements in this stem: ", ( join( ",", @stem ) ), "\n" unless ( scalar(@stem) == 4 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1594
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1595 # infer base-pairs from stem
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1596 @p_bp = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1597 @c_bp = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1598 my @stem_relation_idxs = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1599 my @stack_relation_idxs = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1600 my $stacksize = $stem[2] - $stem[0] + 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1601 for ( my $x = 0 ; $x < $stacksize ; $x++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1602
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1603 # for (my $i = $stem[0]; $i <= $stem[2] ; $i++){
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1604 # for(my $j = $stem[1]; $j >= $stem[3]; $j--){
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1605 my $i = $stem[0] + $x;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1606 my $j = $stem[1] - $x;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1607 @p_bp = @c_bp;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1608 @c_bp = ( $i, $j );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1609 if ( defined $bps{"$i:$j"} ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1610
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1611 # add indices to stem relation object
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1612 push( @stem_relation_idxs, $i );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1613 push( @stem_relation_idxs, $j );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1614
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1615 # add edge to graph edges
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1616 push( @edg, "e $i $j s" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1617
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1618 # add stack if option given
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1619 if ($stacks) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1620 if (@p_bp) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1621 push( @stackgraph, "v $curr_gi P" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1622 push( @stackgraph, "e $curr_gi $p_bp[0] p" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1623 push( @stackgraph, "e $curr_gi $p_bp[1] p" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1624 push( @stackgraph, "e $c_bp[0] $curr_gi p" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1625 push( @stackgraph, "e $c_bp[1] $curr_gi p" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1626 push( @stack_relation_idxs, $curr_gi );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1627
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1628 # update graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1629 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1630 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1631 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1632 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1633 die "ERROR: There is no such base-pair in the base-pair hash:" . " ($i, $j)\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1634 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1635 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1636
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1637 # }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1638 # }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1639 # add stem to abstract graph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1640 # (graph indices become a bit jumbled, but saves time)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1641 push( @abstrgraph, "v $curr_gi ${ABSTRUCT}#S" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1642
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1643 # stem object now saves the curr_gi index for that stem
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1644 $stems{$sk} = $curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1645 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1646
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1647 # add relations
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1648 push( @abstrgraph, "v $curr_gi ^R" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1649 push( @abstrgraph, "e " . ( $curr_gi - 1 ) . " $curr_gi ^r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1650 foreach my $r (@stem_relation_idxs) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1651 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1652 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1653 if ($stacks) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1654 foreach my $r (@stack_relation_idxs) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1655 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1656 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1657 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1658 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1659 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1660
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1661 #-------------------> add HLs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1662 foreach my $hl (@hls) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1663
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1664 # check size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1665 die "ERROR: the HL object is the incorrect size\n" unless ( @{$hl} == 2 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1666
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1667 # add vertex
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1668 push( @abstrgraph, "v $curr_gi ${ABSTRUCT}#HL" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1669
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1670 # add edge from stem to hl
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1671 if ( defined( $stems{ $bps{"$hl->[0]:$hl->[1]"} } ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1672 push( @abstrgraph, "e " . $stems{ $bps{"$hl->[0]:$hl->[1]"} } . " $curr_gi n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1673 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1674 die "ERROR: stem not defined in base-pair hash for $hl->[0]:$hl->[1]\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1675 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1676 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1677
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1678 # add relations
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1679 push( @abstrgraph, "v $curr_gi ^R" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1680 push( @abstrgraph, "e " . ( $curr_gi - 1 ) . " $curr_gi ^r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1681
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1682 # add all nodes between i to j, inclusively
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1683 for ( my $r = $hl->[0] ; $r <= $hl->[1] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1684 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1685 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1686 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1687 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1688
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1689 #------------------> add BLs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1690 foreach my $bl (@bls) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1691
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1692 # check size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1693 die "ERROR: the BL object is the incorrect size\n" unless ( scalar( @{$bl} ) == 4 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1694
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1695 # add vertex
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1696 push( @abstrgraph, "v $curr_gi ${ABSTRUCT}#BL" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1697
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1698 # add edges from bl to both adjacent stems
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1699 if ( defined( $stems{ $bps{"$bl->[0]:$bl->[1]"} } )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1700 && defined( $stems{ $bps{"$bl->[2]:$bl->[3]"} } ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1701
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1702 # add outer stem: S->BL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1703 push( @abstrgraph, "e " . $stems{ $bps{"$bl->[0]:$bl->[1]"} } . " $curr_gi n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1704
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1705 # add inner stem: BL->S
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1706 push( @abstrgraph, "e $curr_gi " . $stems{ $bps{"$bl->[2]:$bl->[3]"} } . " n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1707 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1708 die "ERROR: stems not defined for BL\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1709 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1710 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1711
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1712 # add relations
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1713 push( @abstrgraph, "v $curr_gi ^R" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1714 push( @abstrgraph, "e " . ( $curr_gi - 1 ) . " $curr_gi ^r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1715
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1716 # add all nodes between i to k and l to j, inclusively
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1717 for ( my $r = $bl->[0] ; $r <= $bl->[2] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1718 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1719 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1720 for ( my $r = $bl->[3] ; $r <= $bl->[1] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1721 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1722 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1723 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1724 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1725
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1726 #------------------> add ILs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1727 foreach my $il (@ils) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1728
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1729 # check size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1730 die "ERROR: the IL object is the incorrect size\n" unless ( scalar( @{$il} ) == 4 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1731
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1732 # add vertex
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1733 push( @abstrgraph, "v $curr_gi ${ABSTRUCT}#IL" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1734
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1735 # add edges from il to both adjacent stems
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1736 if ( defined( $stems{ $bps{"$il->[0]:$il->[1]"} } )
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1737 && defined( $stems{ $bps{"$il->[2]:$il->[3]"} } ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1738
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1739 # add outer stem: S->IL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1740 push( @abstrgraph, "e " . $stems{ $bps{"$il->[0]:$il->[1]"} } . " $curr_gi n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1741
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1742 # add inner stem: IL->S
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1743 push( @abstrgraph, "e $curr_gi " . $stems{ $bps{"$il->[2]:$il->[3]"} } . " n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1744 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1745 die "ERROR: stems not defined for IL\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1746 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1747 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1748
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1749 # add relations
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1750 push( @abstrgraph, "v $curr_gi ^R" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1751 push( @abstrgraph, "e " . ( $curr_gi - 1 ) . " $curr_gi ^r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1752
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1753 # add all nodes between i to k and l to j, inclusively
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1754 for ( my $r = $il->[0] ; $r <= $il->[2] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1755 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1756 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1757 for ( my $r = $il->[3] ; $r <= $il->[1] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1758 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1759 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1760 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1761 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1762
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1763 #------------------> add ELs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1764 foreach my $el (@els) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1765
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1766 #check size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1767 die "ERROR: the EL object is the incorrect size\n" unless ( scalar( @{$el} ) == 4 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1768
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1769 # add vertex
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1770 push( @abstrgraph, "v $curr_gi ${ABSTRUCT}#EL" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1771
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1772 # add edges from el to both adjacent stems, if available
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1773 unless ( $el->[0] == $el->[1] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1774 if ( defined( $stems{ $bps{"$el->[0]:$el->[1]"} } ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1775 push( @abstrgraph, "e " . $stems{ $bps{"$el->[0]:$el->[1]"} } . " $curr_gi n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1776 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1777 die "ERROR: stems not defined for left BP of EL\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1778 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1779 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1780 unless ( $el->[2] == $el->[3] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1781 if ( defined( $stems{ $bps{"$el->[2]:$el->[3]"} } ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1782 push( @abstrgraph, "e $curr_gi " . $stems{ $bps{"$el->[2]:$el->[3]"} } . " n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1783 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1784 die "ERROR: stems not defined for right BP of EL\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1785 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1786 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1787 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1788
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1789 # add relations
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1790 push( @abstrgraph, "v $curr_gi ^R" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1791 push( @abstrgraph, "e " . ( $curr_gi - 1 ) . " $curr_gi ^r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1792
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1793 # @r relation edges
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1794 for ( my $r = $el->[1] ; $r <= $el->[2] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1795 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1796 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1797
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1798 # if BP, not EOS
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1799 unless ( $el->[0] == $el->[1] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1800 push( @abstrgraph, "e $curr_gi $el->[0] \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1801 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1802
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1803 # if BP, not EOS
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1804 unless ( $el->[2] == $el->[3] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1805 push( @abstrgraph, "e $curr_gi $el->[3] \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1806 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1807 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1808 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1809
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1810 #------------------> add MLs
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1811 foreach my $ml (@mls) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1812
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1813 #check size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1814 die "ERROR: the ML object is not big enough!\n" unless ( scalar( @{$ml} ) >= 3 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1815
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1816 # add vertex
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1817 push( @abstrgraph, "v $curr_gi ${ABSTRUCT}#ML" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1818
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1819 # add edge from outer stem to first BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1820 my $closing_bp = shift( @{$ml} );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1821 if ( defined( $stems{ $bps{"$closing_bp->[0]:$closing_bp->[1]"} } ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1822 push( @abstrgraph, "e " . $stems{ $bps{"$closing_bp->[0]:$closing_bp->[1]"} } . " $curr_gi n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1823 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1824 die "ERROR: ML is not defined for BP $closing_bp->[0]:$closing_bp->[1]\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1825 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1826
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1827 # add remaining stems
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1828 foreach my $bp ( @{$ml} ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1829 if ( defined( $stems{ $bps{"$bp->[0]:$bp->[1]"} } ) ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1830 push( @abstrgraph, "e $curr_gi " . $stems{ $bps{"$bp->[0]:$bp->[1]"} } . " n" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1831 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1832 die "ERROR: ML is not defined for BP $bp->[0]:$bp->[1]\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1833 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1834 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1835 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1836
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1837 # add relations
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1838 push( @abstrgraph, "v $curr_gi ^R" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1839 push( @abstrgraph, "e " . ( $curr_gi - 1 ) . " $curr_gi ^r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1840
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1841 # add @r relation edges between i and k, where (i,j) is the first BP and (k,l)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1842 # the second BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1843 my $pbp_aref = $closing_bp;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1844 my $cbp_aref = shift( @{$ml} );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1845 for ( my $r = $pbp_aref->[0] ; $r <= $cbp_aref->[0] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1846 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1847 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1848
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1849 # rest of @r relation edges
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1850 foreach my $bp ( @{$ml} ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1851 $pbp_aref = $cbp_aref;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1852 $cbp_aref = $bp;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1853 for ( my $r = $pbp_aref->[1] ; $r <= $cbp_aref->[0] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1854 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1855 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1856 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1857
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1858 # add final stretch between last BP (m,n) and first closing base-pair (i,j)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1859 for ( my $r = $cbp_aref->[1] ; $r <= $closing_bp->[1] ; $r++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1860 push( @abstrgraph, "e $curr_gi $r \@r" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1861 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1862 ++$curr_gi;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1863 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1864
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1865 @graph_lines = ( @edg, @stackgraph, @abstrgraph );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1866 return ( \@graph_lines, $curr_gi );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1867 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1868
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1869 # just to test the current shrep dot-bracket structure parsing
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1870 sub test {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1871 my ( $shrep, $idx, $gi, $mark_aref, $isML ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1872
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1873 my $result = "";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1874
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1875 $result .= "$shrep\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1876 my @a = split( "", $shrep );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1877 for ( my $i = 0 ; $i <= @a ; $i++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1878 my $mark = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1879 foreach my $j ( @{$mark_aref} ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1880 if ($isML) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1881 $mark = 1 if ( $i == $j->[0] - $gi );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1882 $mark = 1 if ( scalar( @{$j} == 2 && $i == $j->[1] - $gi ) );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1883 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1884 $mark = 1 if ( $i == $j - $gi );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1885 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1886
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1887 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1888 if ( $i == $idx - $gi ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1889 $result .= "^";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1890 } elsif ($mark) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1891 $result .= "'";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1892 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1893 $result .= " ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1894 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1895 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1896 $result .= "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1897
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1898 return $result;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1899 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1900
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1901 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1902 # Extends or closes a multi-loop. This means a ML already exists.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1903 # We can have the case ((x)( or ((x).(
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1904 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1905 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1906 # curr_shrep The current shrep object, with shrep string at pos 0
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1907 # c_ml_aref The current open MLs array reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1908 # p_bp_aref The previous base-pair array reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1909 # stmbrks_aref The stem-breaks array reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1910 # open_blks_aref The open bracket blocks array reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1911 # idx The current index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1912 # #curr_gi The current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1913 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1914 # None, it should just modify the ML accordingly
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1915 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1916 sub extend_or_open_nested_ML {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1917 my ( $curr_shrep, $c_mls_aref, $p_bp_aref, $up_aref, $stmbrks_aref, $open_blks_aref, $idx, $curr_gi ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1918
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1919 # case '(x(x)x(x)x(' extend current ML
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1920 if ( $c_mls_aref->[-1]->[-1]->[-1] == $p_bp_aref->[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1921 push( @{ $c_mls_aref->[-1]->[-1] }, $p_bp_aref->[1] ); #close previous BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1922 my @a = ($idx);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1923 push( @{ $c_mls_aref->[-1] }, \@a ); # add new open base-pair
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1924 print STDERR "TEST - extended ML:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, $c_mls_aref->[-1], 1 ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1925
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1926 # case '(x(x)x((x)(' nested ML, => OPEN_ML with stem-break
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1927 } elsif ( @$stmbrks_aref
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1928 && $stmbrks_aref->[-1]->[1] == $p_bp_aref->[0]
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1929 && $stmbrks_aref->[-1]->[2] == $p_bp_aref->[1] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1930
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1931 my @c_stmbrk = @{ pop( @{$stmbrks_aref} ) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1932 my @a = ( $c_stmbrk[0], -1 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1933 my @ml = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1934 push( @ml, \@a ); # 1st base-pair is in-waiting
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1935 my @a2 = ( $c_stmbrk[1], $c_stmbrk[2] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1936 push( @ml, \@a2 ); #2nd BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1937 my @a3 = ($idx);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1938 push( @ml, \@a3 ); #first of current BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1939 push( @{$c_mls_aref}, \@ml );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1940 print STDERR "TEST - opened nested ML with SB:\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@ml, 1 ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1941
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1942 # case '(x(x)x(.(x)(' nested ML, => OPEN_ML with UP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1943 } elsif ( @$up_aref
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1944 && $up_aref->[-1]->[0] - 1 == $open_blks_aref->[-1]->[-1]
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1945 && $up_aref->[-1]->[1] + 1 == $p_bp_aref->[0] ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1946 my @tmp_up = @{ pop( @{$up_aref} ) };
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1947 my @a = ( $open_blks_aref->[-1]->[-1], -1 );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1948 my @ml = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1949 push( @ml, \@a ); # 1st BP awaits closing
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1950 my @newbp = @{$p_bp_aref};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1951 push( @ml, \@newbp ); #2nd BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1952 my @a2 = ($idx);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1953 push( @ml, \@a2 ); # current open BP, awaits closing
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1954 push( @{$c_mls_aref}, \@ml );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1955 print STDERR "TEST - opened nested ML with prev UP\n" . ( test( $curr_shrep->[0], $idx, $curr_gi, \@ml, 1 ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1956 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1957 die "ERROR: in the case extend or open nested ML, " . "but the base-pairs don't fit for either!\n" . ( test( $curr_shrep->[0], $idx, $curr_gi ) ) if ($i_debug);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1958 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1959 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1960
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1961 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1962 # Closes a stem-object for the abstract graph. If necessary, it also
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1963 # creates a stem-break because the current open block is still open,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1964 # which can only mean that there is a stem-break between two consecutive
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1965 # opening brackets.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1966 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1967 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1968 # c_stem_aref : current stem array reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1969 # stems_href : all stems hash reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1970 # p_bp_aref : previous base-pair array reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1971 # c_cp_aref : current base-pair array reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1972 # p_open_aref : previously opened open-bracket block
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1973 # p_stmbrk_aref : the previous stem-break
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1974 # open_blks_aref : all old open blocks array reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1975 # bps_href : all base-pairs hash reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1976 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1977 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1978 # None, it should just modify the input variables accordingly
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1979 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1980 sub close_stem {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1981 my ( $c_stem_aref, $stems_href, $p_bp_aref, $c_bp_aref, $p_open_aref, $stmbrks_aref, $open_blks_aref, $bps_href ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1982
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1983 my @newstem = @$c_stem_aref;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1984 my $stemkey = "$newstem[0]:$newstem[1],$newstem[2]:$newstem[3]";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1985 $stems_href->{$stemkey} = \@newstem;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1986
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1987 # reset and clean running variables
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1988 @$c_stem_aref = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1989 @$p_bp_aref = @$c_bp_aref;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1990 @$c_bp_aref = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1991
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1992 # add stem to base-pair hash
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1993 $bps_href->{"$newstem[0]:$newstem[1]"} = $stemkey;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1994 $bps_href->{"$newstem[2]:$newstem[3]"} = $stemkey;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1995
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1996 # there are still bases open in the previous open block, '[[]x',
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1997 # where [] is a closed stem and [ is an open block,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1998 # must create STEM-BREAK
b01beb170290 Uploaded
bgruening
parents:
diff changeset
1999 # this cannot be the case ((x)), because there p_open is empty
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2000 if (@$p_open_aref) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2001
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2002 # check: the next open base is adjacent to the previous BP
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2003 if ( $p_open_aref->[-1] == $p_bp_aref->[0] - 1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2004 my @stmbrk = ( $p_open_aref->[-1], @$p_bp_aref );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2005 push( @$stmbrks_aref, \@stmbrk );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2006
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2007 # push the previous open block back onto stack
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2008 my @newopen = @$p_open_aref;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2009 push( @$open_blks_aref, \@newopen );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2010 @$p_open_aref = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2011 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2012
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2013 # error, this should not be possible
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2014 die("ERROR in close_stem(), [.[]x should not occur here\n");
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2015 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2016 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2017 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2018
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2019 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2020 # Here the information about the structure is added to the graph. In a
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2021 # first step each base-pair is added by adding edges between the vertices
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2022 # of the respective nucleotides. Then, if two base-pairs are stacked,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2023 # we add an extra vertex and connect the four nucleotide vertices that
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2024 # are involved with extra edges (symbols P used for vertex and p for edges).
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2025 # While adding new vertices we keep track of the current graph index.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2026 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2027 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2028 # curr_shrep : the current shrep as dot-bracket structure
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2029 # curr_gi : the current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2030 # win_size : the window size
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2031 # stacks : the input parameter -stacks (whether to add stacking information)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2032 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2033 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2034 # The structure graph information line-by-line as an array reference and
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2035 # the current graph index.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2036 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2037 sub getStructGraph {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2038 my ( $curr_shrep, $curr_gi, $win_size, $stacks ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2039
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2040 my @struct = split( "", $curr_shrep->[0] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2041
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2042 my @edg = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2043 my @starts = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2044 my @pairs = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2045
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2046 foreach my $idx ( 0 .. @struct - 1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2047
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2048 push( @starts, $idx ) if ( $struct[$idx] eq "(" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2049
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2050 if ( $struct[$idx] eq ")" ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2051 my $start = $curr_gi + pop(@starts);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2052 my $end = $curr_gi + $idx;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2053 push( @edg, "e " . $start . " " . $end . " s " );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2054 my @pair = ( $start, $end );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2055 push( @pairs, \@pair );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2056 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2057 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2058
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2059 # update current graph index
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2060 $curr_gi = $curr_gi + $win_size;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2061
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2062 # initiate structure lines array
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2063 my @stacking_info = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2064
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2065 # add stacking information to graph unless input option tells us not to
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2066 if ($stacks) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2067
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2068 my $stacked_pairs = 0;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2069
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2070 # get stacked base-pairs (they are ordered according to
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2071 # position of closing nucleotide)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2072 for ( my $i = 1 ; $i < @pairs ; $i++ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2073
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2074 # add stacked base-pairs (vertices+edges)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2075 my $curr_bp_aref = $pairs[$i];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2076 my $prev_bp_aref = $pairs[ $i - 1 ];
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2077
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2078 # if the current base-pair is stacked on the previous base-pair,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2079 # when curr_start = prev_start - 1 AND curr_end = prev_end + 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2080 if ( $curr_bp_aref->[0] == $prev_bp_aref->[0] - 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2081 && $curr_bp_aref->[1] == $prev_bp_aref->[1] + 1 ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2082
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2083 # add stacking vertex P
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2084 push( @stacking_info, "v $curr_gi P" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2085
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2086 # add four edges from involved nucleotids
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2087 push( @stacking_info, "e $curr_gi $prev_bp_aref->[0] p" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2088 push( @stacking_info, "e $curr_gi $prev_bp_aref->[1] p" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2089 push( @stacking_info, "e $curr_bp_aref->[0] $curr_gi p" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2090 push( @stacking_info, "e $curr_bp_aref->[1] $curr_gi p" );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2091 ++$curr_gi; # add one to the index, ready for next vertex
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2092
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2093 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2094 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2095 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2096
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2097 my @str_graphlines = ( @edg, @stacking_info );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2098 return ( \@str_graphlines, $curr_gi );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2099 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2100
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2101 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2102 # This method gives us the overall graph header for this sequence. This
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2103 # graph includes all window calculations and all shreps for all windows.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2104 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2105 # seq_id : the sequence ID
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2106 # seq_head : the header information for the sequence
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2107 # wins_aref : the windows sizes for folding
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2108 # h_shift : input parameter -shift
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2109 # h_e : input parameter -e
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2110 # h_c : input parameter -c
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2111 # h_t : input parameter -t
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2112 # h_u : input parameter -u
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2113 # h_r : input parameter -r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2114 # h_M : input parameter -M
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2115 # h_crop_unpaired_ends : input parameter -cue
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2116 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2117 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2118 # The header line as a string
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2119 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2120 sub getGraphHeader {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2121 my ( $seq_id, $seq_head, $wins_aref, $h_shift, $h_e, $h_c, $h_t, $h_u, $h_r, $h_M, $h_crop_unpaired_ends, $h_i, $h_sample_len, $h_q, $h_Tp, $h_seqlen, $h_noStr ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2122
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2123 my $ret;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2124
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2125 $ret = "t # SEQID $seq_id ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2126 $ret .= "$seq_head " if ( defined $seq_head );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2127 $ret .= "SEQLEN $h_seqlen ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2128
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2129 return $ret . "\n" if ($h_noStr);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2130
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2131 $ret .= "MAXWINSHREPS $h_M ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2132 $ret .= "CUE 1 " if ($h_crop_unpaired_ends);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2133 $ret .= "RNASHAPES -w " . ( join( ",", @{$wins_aref} ) ) . " ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2134 $ret .= "-SHIFT%w $h_shift " if ( not $globalFolding and $h_shift );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2135 $ret .= "-SHIFT 1 " if ( not $globalFolding and not $h_shift );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2136 $ret .= "-e $h_e " if ( defined $h_e );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2137 $ret .= "-c $h_c " if ( defined $h_c );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2138 $ret .= "-t $h_t " if ( defined $h_t );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2139 $ret .= "-u 1 " if ($h_u);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2140 $ret .= "-r 1 " if ($h_r);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2141 $ret .= "-i $h_i SAMPLE_LEN $h_sample_len" if ( not $h_q and $h_i );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2142 $ret .= "-q 1 " if ($h_q);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2143 $ret .= "-T $h_Tp" if ( $h_q and defined $h_Tp );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2144 $ret .= "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2145
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2146 return $ret;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2147 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2148
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2149 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2150 # This method gives us the window header for the subgraph that includes
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2151 # all shreps for the current window.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2152 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2153 # graphHead : the header line for the entire sequence graph
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2154 # win_size : the size of the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2155 # win_start : the starting position of the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2156 # win_end : the end position of the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2157 # win_center : the centre position of the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2158 # win_global_fold : (boolean) whether the actual number of nucleotides
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2159 # is smaller than the given window size (end of sequence?)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2160 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2161 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2162 # The header line as a string
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2163 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2164 sub getWindowHeader {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2165 my ( $graphHead, $win_size, $win_start, $win_end, $win_center, $win_global_fold, $win_sample, $used_t ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2166
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2167 chomp $graphHead;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2168
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2169 $graphHead =~ s/t # //;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2170
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2171 my $ret = "w # $graphHead ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2172 $ret .= "SHAPE_TYPE $used_t ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2173 $ret .= "GLOBALFOLD $win_global_fold ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2174 $ret .= "WSIZE $win_size ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2175 $ret .= "WSTART $win_start ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2176 $ret .= "WEND $win_end ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2177 $ret .= "WCENT $win_center ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2178 $ret .= "WSAMPLE $win_sample ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2179 $ret .= "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2180
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2181 return $ret;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2182 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2183
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2184 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2185 # This method gives us the shape header that includes the single shrep
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2186 # connected components.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2187 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2188 # winHead : the header for the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2189 # shrep : information about the current shrep
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2190 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2191 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2192 # The header line as a string
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2193 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2194 sub getShapeHeader {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2195 my ( $winHead, $shrep ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2196
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2197 chomp $winHead;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2198
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2199 $winHead =~ s/w # //;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2200 $winHead =~ s/t # //;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2201
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2202 my $ret = "s # $winHead ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2203
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2204 my @info = @{$shrep};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2205 $ret .= join( " ", @info[ 1 .. $#info ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2206 $ret .= "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2207
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2208 return $ret;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2209 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2210
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2211 ###########################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2212 # This method gives us the sequence header that includes the unstructured
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2213 # sequence connected components.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2214 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2215 # winHead : the header for the current window
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2216 # shrep : information about the current shrep
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2217 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2218 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2219 # The header line as a string
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2220 ############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2221 sub getSeqHeader {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2222 my ( $winHead, $shrep ) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2223
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2224 chomp $winHead;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2225
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2226 $winHead =~ s/w # //;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2227 $winHead =~ s/t # //;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2228
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2229 my $ret = "u # $winHead ";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2230
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2231 my @info = @{$shrep};
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2232 $ret .= join( " ", @info[ 1 .. $#info ] );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2233 $ret .= "\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2234
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2235 return $ret;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2236 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2237
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2238
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2239 ##################################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2240 # This method parses a fasta file and is useful if the header ID is not-unique!!
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2241 # It returns the header lines in an array and the sequence lines in the same
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2242 # order. It is then up to the user to extract the parts from the header that is
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2243 # necessary for the script.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2244 # Furthermore, the method deals with multiple lines, and returns a single sequence
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2245 # without line breaks.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2246 # Input:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2247 # file The name of the fasta file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2248 # Output:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2249 # (1) An array reference for each header line
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2250 # (2) An array reference for each sequence in same order as the headers
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2251 ##################################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2252 sub read_fasta_with_nonunique_headers_meta {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2253 my ($file) = @_;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2254 my $FUNCTION = "read_fasta_file in Sequences.pm";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2255
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2256 my $header = "";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2257 my $seqstring = "";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2258 my @headers = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2259 my @sequences = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2260 my @meta = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2261 my %seq_meta = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2262 open( IN_HANDLE, "<$file" ) || die "ERROR in $FUNCTION:\n" . "Couldn't open the following file in package Tool," . " sub read_fasta_file: $file\n";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2263
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2264 while ( my $line = <IN_HANDLE> ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2265 chomp($line);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2266
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2267 # header (can contain one space after > symbol)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2268 if ( $line =~ /^\>(.*)/ ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2269 if ($header) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2270 $seqstring =~ s/\s*//g; ## do not allow spaces in sequence
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2271 push( @headers, $header );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2272 push( @sequences, $seqstring );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2273 push( @meta, +{%seq_meta} ); ## anonymous hash reference
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2274 #print keys %seq_meta;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2275 $seqstring = "";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2276 undef(%seq_meta);
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2277 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2278 $header = $1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2279
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2280 } elsif ( $line =~ /(.+)\s+(#\S+)$/ && $header ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2281
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2282 if ( exists $seq_meta{$2} ) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2283 $seq_meta{$2} .= $1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2284 } else {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2285 $seq_meta{$2} = $1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2286 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2287
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2288 } elsif ($header) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2289 $seqstring .= $line
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2290 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2291 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2292
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2293 if ($header) {
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2294 $seqstring =~ s/\s*//g; ## do not allow spaces in sequence
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2295 push( @headers, $header );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2296 push( @sequences, $seqstring );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2297 push( @meta, +{%seq_meta} );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2298 $seqstring = "";
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2299 %seq_meta = ();
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2300 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2301 return ( \@headers, \@sequences, \@meta );
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2302 }
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2303
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2304
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2305 1;
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2306
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2307 #############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2308 # Programming description
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2309 #############################################################################
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2310 # Substructure graphs for machine learning with Fabrizio
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2311 # -------------------------------------------------------
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2312 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2313 # (1) Parameters (RNAshapes parameter):
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2314 # - Window sizes [] (-w)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2315 # - window shift size (-W)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2316 # - calculate structure probabilities (-r)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2317 # - energy range kcal/mol (-e) OR energy relative percentage (%) to MFE (-c)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2318 # - shape type 1 to 5 (-t)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2319 # - ignore unstable substructures (-u)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2320 # - max shreps
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2321 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2322 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2323 # (2) For each sequence, generate one graph/file that consists of all windows. The general format for one graph is as follows:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2324 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2325 # t # seq_id parameters
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2326 # v graph_index nt_type window_size window_centre abs_seq_pos
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2327 # ...
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2328 # e m m+1 > 1 (backbone)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2329 # ...
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2330 # e base_i_graph_index base_j_graph_index s shrep_e shrep_p ...
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2331 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2332 # For each window (subgraph) we create a subgraph (of the subgraph) for each substructure.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2333 # We have a running index (gi) for each subgraph. All vertex and edge indices of the subgraph add
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2334 # the running graph index to the actual window position. For example
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2335 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2336 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2337 # Sequence: AAACC CUUUG GG
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2338 # 01234 56789 01
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2339 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2340 # Window=10 substructure1 = (((...))). centre 5.5
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2341 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2342 # v 0 A 10 5.5 0
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2343 # v 1 A 10 5.5 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2344 # v 2 A 10 5.5 2
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2345 # v 3 C 10 5.5 3
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2346 # v 4 C 10 5.5 4
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2347 # v 5 C 10 5.5 5
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2348 # v 6 U 10 5.5 6
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2349 # v 7 U 10 5.5 7
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2350 # v 8 U 10 5.5 8
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2351 # v 9 G 10 5.5 9
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2352 # e 0 1 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2353 # e 1 2 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2354 # e 2 3 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2355 # e 3 4 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2356 # e 4 5 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2357 # e 5 6 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2358 # e 6 7 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2359 # e 7 8 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2360 # e 8 9 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2361 # e 0 8 s -15.0 0.1223
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2362 # e 1 7 s -15.0 0.1223
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2363 # e 2 6 s -15.0 0.1223
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2364 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2365 # gi = 9+1 = 10
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2366 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2367 # Window = 10 substructure2 = .(((...))) centre 6.5
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2368 # v 10 A 10 6.5 2
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2369 # v 11 C 10 6.5 3
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2370 # v 12 C 10 6.5 4
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2371 # v 13 C 10 6.5 5
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2372 # v 14 U 10 6.5 6
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2373 # v 15 U 10 6.5 7
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2374 # v 16 U 10 6.5 8
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2375 # v 17 G 10 6.5 9
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2376 # v 18 G 10 6.5 10
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2377 # v 19 G 10 6.5 11
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2378 # e 10 11 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2379 # e 11 12 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2380 # e 12 13 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2381 # e 13 14 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2382 # e 14 15 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2383 # e 15 16 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2384 # e 16 17 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2385 # e 17 18 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2386 # e 18 19 > 1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2387 # e 11 19 s -17.0 0.156
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2388 # e 12 18 s -17.0 0.156
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2389 # e 13 17 s -17.0 0.156
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2390 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2391 # gi = 19+1 = 20
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2392 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2393 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2394 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2395 # Write one perl script to create graphs for a set of sequences, fasta2shrep_gspan.pl.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2396 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2397 # INPUT:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2398 # -f fasta file with all sequences to compute
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2399 # parameters as above
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2400 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2401 # OUTPUT:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2402 # one file per sequence that contains graph called seq_id.gspan
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2403 #
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2404 # (1) for each window size call RNAshapes and write to a tmp file
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2405 # (2) parse result of RNAshapes (catch RNAshapes error - sequence too long?) check for max shreps.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2406 # (3) convert RNAshapes result to subgraph -> write to file (readpipe) look at efficiency and errors
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2407 # (4) repeat (1) to (3) for each sequence
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2408
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2409 # ABSTRACT STRUCTURE GRAPH
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2410 # To each shrep graph (s #) add an abstract structure graph with special
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2411 # abstract relations. For example, to add the abstract structure graph to
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2412 # the previous shrep structure with gi numbers from 10 to 19, we first
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2413 # identify the abstract shape, i.e. EL-S-HL and then create nodes (labelled
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2414 # with a given name-space followed the actual node name and separated by a
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2415 # hash) and edges for this graph as follows:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2416 # NOTE: at the moment we add the adjacent base-pairs to the loop definitions
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2417 # but these can be removed if necessary in the future
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2418 # v 20 abstruct#EL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2419 # v 21 abstruct#S
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2420 # v 22 abstruct#HL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2421 # e 20 21 n
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2422 # e 21 22 n
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2423 # v 23 ^R
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2424 # e 20 23 ^r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2425 # e 23 10 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2426 # e 23 11 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2427 # e 23 19 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2428 # v 24 ^R
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2429 # e 21 24 ^r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2430 # e 24 11 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2431 # e 24 12 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2432 # e 24 13 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2433 # e 24 17 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2434 # e 24 18 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2435 # e 24 19 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2436 # v 25 ^R
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2437 # e 22 25 ^r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2438 # e 25 13 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2439 # e 25 14 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2440 # e 25 15 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2441 # e 25 16 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2442 # e 25 17 @r
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2443
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2444 # gi = 25+1 = 26
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2445
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2446 # ANOTATION FILE
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2447 # This is a file that labels a sequence region with a given annotation. In one
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2448 # file we can have annotations within different name-spaces, for example target
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2449 # sites predicted with different tools.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2450 # File format is a tab-delimited file with the following columns:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2451 # SEQID (same ID as in the fasta file header)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2452 # i (left positon of region)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2453 # j (right position of region, if one position i=j)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2454 # NAMESPACE#LABEL (give each annotation type one name-space and choose a label depending on the task)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2455
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2456 # E.g we have 2 different miRNAs and we predict the target-sites with (1) IntaRNA and (2) TargetSearch,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2457 # then this could be IntaRNA#miR1, IntaRNA#miR2, TargetSearch#miR1, and TargetSearch#miR2.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2458 # All labels can be used more than once for one or more sequences.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2459 # all labels with the same namespace and the same sequence ID are grouped into
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2460 # one abstract graph, according to the order of i.
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2461 # Example
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2462 # SEQID i j NAMESPACE#LABEL
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2463 # s1 10 20 IntaRNA#miR1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2464 # s1 54 60 IntaRNA#miR2
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2465 # s1 15 25 TargetSearch#miR1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2466 # s1 54 60 TargetSearch#miR2
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2467 # s2 ...
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2468
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2469 # We create connected component per sequence ID per name-space,
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2470 # so that all labels with
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2471 # the same namespace are grouped together (for one sequence) as follows:
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2472 # v 26 IntaRNA#miR1
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2473 # v 27 IntaRNA#miR2
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2474 # e 26 27 n
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2475 # then create R nodes and ^r and @r relations as before (from i to j)
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2476
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2477 # If a sequence graph option is given, then these graphs (u #) have to
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2478 # be labelled with this annotation
b01beb170290 Uploaded
bgruening
parents:
diff changeset
2479