0
|
1 #!/usr/bin/perl
|
|
2
|
|
3 #===============================================================================
|
|
4 # Author: Robert SCHMIEDER, Computational Science Research Center @ SDSU, CA
|
|
5 #
|
|
6 # File: prinseq-graphs
|
|
7 # Date: 2012-12-22
|
|
8 # Version: 0.6 graphs
|
|
9 #
|
|
10 # Usage:
|
|
11 # prinseq-graphs [options]
|
|
12 #
|
|
13 # Try 'prinseq-graphs-noPCA -h' for more information.
|
|
14 #
|
|
15 # Purpose: PRINSEQ will help you to preprocess your genomic or metagenomic
|
|
16 # sequence data in FASTA or FASTQ format. The graphs version allows
|
|
17 # users of the lite version to generate graphs similar to the web
|
|
18 # version.
|
|
19 #
|
|
20 # Bugs: Please use http://sourceforge.net/tracker/?group_id=315449
|
|
21 #
|
|
22 #===============================================================================
|
|
23
|
|
24 use strict;
|
|
25 use warnings;
|
|
26
|
|
27 use Getopt::Long;
|
|
28 use Pod::Usage;
|
|
29 use File::Temp qw(tempfile); #for output files
|
|
30 use Fcntl qw(:flock SEEK_END); #for log file
|
|
31 use Cwd;
|
|
32 use JSON;
|
|
33 use Cairo;
|
|
34 #use Statistics::PCA;
|
|
35 use MIME::Base64;
|
|
36 use File::Basename;
|
|
37 use Data::Dumper; ###
|
|
38
|
|
39 $| = 1; # Do not buffer output
|
|
40
|
|
41 my $PI = 4 * atan2(1, 1);
|
|
42 my $LOG62 = log(62);
|
|
43 my $DINUCODDS_VIR = [
|
|
44 [qw(1.086940308 0.98976932 1.034167044 0.880024041 1.070421277 0.990687084 0.890945575 1.069957074 0.92465631 0.803973303)],
|
|
45 [qw(1.101064857 0.986812783 1.038299155 0.896162618 1.081652847 0.976365237 0.867445186 1.06727283 0.94688543 0.768007295)],
|
|
46 [qw(1.071548411 0.912204166 1.196914981 0.80628184 1.294201511 1.148517794 0.269295791 1.033948026 0.895951033 0.623192149)],
|
|
47 [qw(1.090253719 0.907428629 1.203991784 0.786359294 1.281499107 1.145421568 0.235974709 1.033437274 0.899580091 0.631699771)],
|
|
48 [qw(1.075864745 1.003413074 1.01872902 0.897841689 0.980373171 1.05854979 0.934262259 1.052477953 0.88145851 0.889239724)],
|
|
49 [qw(1.101890467 1.030028291 1.019912674 0.84191395 1.0015174 1.069546264 0.900151602 0.996269395 0.889195343 0.904039022)],
|
|
50 [qw(1.152417359 0.855028574 0.91164793 1.017415486 1.114163672 1.128353311 0.846355573 0.916745489 1.206820475 0.811014651)],
|
|
51 [qw(1.142454218 0.8635465 0.923406967 1.026242747 1.134445058 1.131747833 0.79793368 0.920767641 1.179468556 0.799770057)],
|
|
52 [qw(1.124462747 0.873556143 0.945627041 1.013755408 1.159866153 1.096259526 0.757315047 0.972924919 1.105562567 0.772731886)],
|
|
53 [qw(1.143826972 0.866968779 0.995740249 0.945859278 1.109590621 1.089305083 0.76048874 0.971561388 1.157101408 0.792923027)],
|
|
54 [qw(1.131900141 0.82776996 0.996204924 0.999433455 1.024692372 1.071176333 0.921026216 1.088936699 1.054010776 0.773498892)],
|
|
55 [qw(1.042180476 0.930180412 1.019242897 0.98909997 1.006666828 1.046708539 0.959492164 1.011183418 1.055168776 0.937433818)],
|
|
56 [qw(1.086515695 0.985345815 0.930914307 0.969581792 1.043010232 1.087463712 0.939482285 0.990551965 0.954752469 0.893972874)],
|
|
57 [qw(1.096657826 0.950117614 0.936195529 0.965619788 1.114975275 1.077011195 0.843153131 0.989128406 1.043790912 0.840634731)],
|
|
58 [qw(1.158030995 0.935307365 0.874812261 1.056236525 1.117171274 0.937484692 1.057442372 0.970079538 1.174848738 0.725071711)],
|
|
59 [qw(1.15591506 0.93000227 0.883538923 1.0567652 1.095730954 0.944489906 1.074229471 0.983993745 1.156051409 0.726688465)],
|
|
60 [qw(1.205726473 0.924439339 1.049457756 0.805718412 0.975472778 1.07581991 0.726992211 1.075025787 0.8704929 0.726672843)],
|
|
61 [qw(1.188544681 0.95239611 1.049066985 0.790031334 1.038632598 1.056749787 0.665197397 1.057566244 0.862429061 0.708982398)],
|
|
62 [qw(1.063631482 0.925593715 1.014869316 0.944904401 1.119690731 1.325971834 0.273781451 0.943347677 1.06438014 0.920825904)],
|
|
63 [qw(1.077560287 0.911888545 1.044147857 0.927758054 1.058535939 1.296838544 0.421514996 0.945722451 1.128317986 0.926419928)],
|
|
64 [qw(1.163753415 0.989905668 0.893599328 0.955641844 1.176047687 0.941559156 0.950641089 0.959741692 1.100815282 0.72491925)],
|
|
65 [qw(1.139253929 0.946297517 0.922096125 1.024801537 1.205206793 0.968818717 0.915801342 0.971626058 1.107569276 0.627623404)]
|
|
66 ];
|
|
67 my $DINUCODDS_MIC = [
|
|
68 [qw(1.13127323 0.853587195 0.911041047 1.104520778 1.065586428 1.021434164 0.999734139 1.063684014 1.078035184 0.733596552)],
|
|
69 [qw(1.173267344 0.840539337 0.919534602 1.068050141 1.062394214 1.051999071 0.96770576 1.035511729 1.095600433 0.72328141)],
|
|
70 [qw(1.172939786 0.84567902 0.911836259 1.106288994 1.05351787 1.026143368 1.002308358 1.066319771 1.094918797 0.710733535)],
|
|
71 [qw(1.073527689 0.850290918 0.978455025 1.080882178 1.111174765 1.010754115 0.895668707 1.072980666 1.079304608 0.754057386)],
|
|
72 [qw(1.08807747 0.837444678 0.95824965 1.097310298 1.118897971 1.030863881 0.886827263 1.072349394 1.07406322 0.733440096)],
|
|
73 [qw(1.071685485 0.861055813 0.966566865 1.090268118 1.112945761 1.012538936 0.909535491 1.063745603 1.071156598 0.755770377)],
|
|
74 [qw(1.142698587 0.867936867 1.000612099 0.977934257 1.111801746 1.018318601 0.788556794 0.987763594 1.184649653 0.784776176)],
|
|
75 [qw(1.134560074 0.876651844 0.998190253 0.995723123 1.128448077 1.014172324 0.781776188 0.971020602 1.182411449 0.786449476)],
|
|
76 [qw(1.180029632 0.787899325 1.01316945 0.932268406 1.077837263 1.211699678 0.612128817 1.033036699 1.157314398 0.74940288)],
|
|
77 [qw(1.160925546 0.788308899 1.003702496 0.965371236 1.076051693 1.188304271 0.641536444 1.070331188 1.124067192 0.740126813)],
|
|
78 [qw(1.173873006 0.790118011 1.014718833 0.937979878 1.07453725 1.207167373 0.622279064 1.046150047 1.145627707 0.742212886)],
|
|
79 [qw(1.128383111 0.870541389 0.987269741 0.98353238 1.115643879 1.040107028 0.774505865 1.010896432 1.164757274 0.775254395)],
|
|
80 [qw(1.15297511 0.853883985 0.956393231 1.000027661 1.139915472 1.01355294 0.838843622 1.015553125 1.216219741 0.70447264)],
|
|
81 [qw(1.148264236 0.852123859 0.974568293 0.985455546 1.13192373 1.015879393 0.828987111 1.016820786 1.216647853 0.71634006)],
|
|
82 [qw(1.12933788 0.831777975 1.005434367 0.991081409 1.126146895 1.07421504 0.69343913 1.054032466 1.14809591 0.728541157)],
|
|
83 [qw(1.124157235 0.828112691 1.022348424 0.983822386 1.143028487 1.081830005 0.672594435 1.05685982 1.149537403 0.684432106)],
|
|
84 [qw(1.128029586 0.841853305 1.00983936 0.967179139 1.122524003 1.094555807 0.659238308 1.061578854 1.1243601 0.740148171)],
|
|
85 [qw(1.093521636 0.855071052 0.929160818 1.203773691 1.178257185 0.881341255 1.078305505 1.051988532 1.169143967 0.555057308)],
|
|
86 [qw(1.073737278 0.877396537 0.968017446 1.124155374 1.166244435 0.909044208 0.999147578 1.071098934 1.120156138 0.607444953)],
|
|
87 [qw(1.092150184 0.863407008 0.927040387 1.185387013 1.171670826 0.882276859 1.083058605 1.048379554 1.168635365 0.580337997)]
|
|
88 ];
|
|
89 my $DATA_VIR = [
|
|
90 [2,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
|
|
91 [3,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
|
|
92 [42,2,'Human (nasal)',[127/255, 127/255, 255/255,1]],
|
|
93 [43,2,'Human (nasal)',[127/255, 127/255, 255/255,1]],
|
|
94 [45,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
|
|
95 [49,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
|
|
96 [52,3,'Human (sputum)',[127/255, 127/255, 255/255,1]],
|
|
97 [54,3,'Human (sputum)',[127/255, 127/255, 255/255,1]],
|
|
98 [55,4,'Human (sputum, CF)',[127/255, 127/255, 255/255,1]],
|
|
99 [57,4,'Human (sputum, CF)',[127/255, 127/255, 255/255,1]],
|
|
100 [88,5,'Freshwater (Hot spring)',[127/255, 127/255, 255/255,1]],
|
|
101 [89,5,'Freshwater (Hot spring)',[127/255, 127/255, 255/255,1]],
|
|
102 [98,6,'Freshwater (Antartic lake)',[127/255, 127/255, 255/255,1]],
|
|
103 [99,6,'Freshwater (Antartic lake)',[127/255, 127/255, 255/255,1]],
|
|
104 [100,7,'Freshwater (reclaimed)',[127/255, 127/255, 255/255,1]],
|
|
105 [102,7,'Freshwater (reclaimed)',[127/255, 127/255, 255/255,1]],
|
|
106 [153,8,'Mouse (brain tissue)',[127/255, 127/255, 255/255,1]],
|
|
107 [154,8,'Mouse (brain tissue)',[127/255, 127/255, 255/255,1]],
|
|
108 [202,9,'Fish (gut)',[127/255, 127/255, 255/255,1]],
|
|
109 [206,9,'Fish (gut)',[127/255, 127/255, 255/255,1]],
|
|
110 [209,10,'Mosquito',[127/255, 127/255, 255/255,1]],
|
|
111 [211,10,'Mosquito',[127/255, 127/255, 255/255,1]],
|
|
112 ['U',0,'User input',[255/255, 127/255, 127/255,1]]
|
|
113 ];
|
|
114 my $DATA_MIC = [
|
|
115 [17,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
|
|
116 [20,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
|
|
117 [22,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
|
|
118 [63,2,'Mouse (fecal)',[127/255, 127/255, 255/255,1]],
|
|
119 [65,2,'Mouse (fecal)',[127/255, 127/255, 255/255,1]],
|
|
120 [68,2,'Mouse (fecal)',[127/255, 127/255, 255/255,1]],
|
|
121 [93,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
|
|
122 [95,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
|
|
123 [109,4,'Marine (open ocean)',[127/255, 127/255, 255/255,1]],
|
|
124 [110,4,'Marine (open ocean)',[127/255, 127/255, 255/255,1]],
|
|
125 [111,4,'Marine (open ocean)',[127/255, 127/255, 255/255,1]],
|
|
126 [120,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
|
|
127 [124,5,'Marine (estuary)',[127/255, 127/255, 255/255,1]],
|
|
128 [125,5,'Marine (estuary)',[127/255, 127/255, 255/255,1]],
|
|
129 [134,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
|
|
130 [146,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
|
|
131 [148,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
|
|
132 [201,6,'Fish (gut)',[127/255, 127/255, 255/255,1]],
|
|
133 [203,7,'Fish (slime)',[127/255, 127/255, 255/255,1]],
|
|
134 [205,6,'Fish (gut)',[127/255, 127/255, 255/255,1]],
|
|
135 ['U',0,'User input',[255/255, 127/255, 127/255,1]]
|
|
136 ];
|
|
137 my $BASE64_BASES = {A => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
|
|
138 bWFnZVJlYWR5ccllPAAAAzVJREFUeNrsnMFxo0AQRWe7fJcyMBnYGawyMIe9a0JQJtbefDPOAB33
|
|
139 JmdgZyBlsIpgl9lCLkwJA/N7uhu0XTXlkstI8Oh+agbG355+/XDC8VaNu8htf1ZjI73DJPx59wCg
|
|
140 EN4phDQkNAsWGqCkIeUM7zFrSL7OBDS+VyObMyQrZWsSUlZnACfw5dwgcZ/5BZPfTEHyEwCvColL
|
|
141 2O24q/uuWUDKJ1TGKpCCsB8Sn4Dl1CGlbvxEBD51SCIlR4lL4VYAUnKB08SzSCSbUkFKLWxRgdMM
|
|
142 sii5wK1BOlksuRSQVoCwA9wjIPDVVCAhWVTWw1SZc0MK8lxHblvUP7fA569TCJyMZFET0qEa75ay
|
|
143 iRtSrDwDlLfG663CPohAQoRdtF4jXrrlFjgZKbU2lN/VeLFSclyQlkAzt6s95BiziVXgXJByFz/7
|
|
144 WH7x+6OFbOKCFCvL0wUffeUqFYFzQELu7/eVFAKJTeCkmEVDIARXvWqXHAoJEXbwzZ4BZJ/AM21I
|
|
145 iLCLESV50swmMlxqzZ6pnCqkDBD2a0dvlErguRYkiSw6x16zZyKlDy4FwDbjARE4AYBihf1Se0YS
|
|
146 EnRSaSJZpNozxUAKaRv7QNYR/KZSEXgMpI1CFjUhifdMMZBypUzgAB0lcIoAFDv72J6ijY0tuL1P
|
|
147 DckrZ5GrQSM90yYlpMxh9/cfq/GHaSBPq4xeVUBCWWQt/kMaEKNWFQyFJPVAlmRsuCF5N7/wnJCW
|
|
148 TvaBLKkYLHC60iwadWzEWbtzFXgfpNUMhT06CeiKS23wMVKPsNdXAKlX4HTlWTToWG8SQdoxXK3H
|
|
149 zA7E3r0JAr/vmqXogoSu3w87vFeA9AwK3I8pN+Rr/6gAKAQ669m5qoA6hJ0r7mxsoE/Hda4qoA6i
|
|
150 CzDttaJI0TMRc6mFKdqDIqS9w2YtLy4LowTC1o4tdzYR83VaaQASu8Dpwh/ERuzta+441H0am8Cp
|
|
151 1TwuJp5FSQROTB32yRgk9Om4TwI/Q8oc9g9XCmcv2LKJmIRtERL6LfexqoAYSo3r9nUKgb+D7+HP
|
|
152 kFBhW8wi1p6JHL4KujQMCRX4v1UFARJyu2infBky5KIXPYn+rwADAOL8qKxS08x7AAAAAElFTkSu
|
|
153 QmCC',
|
|
154 C => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
|
|
155 bWFnZVJlYWR5ccllPAAAA7BJREFUeNrsnM1xqzAQxxUNDfBKwCWQCt7g+7vgEkgJ5pRDTnYJpgRz
|
|
156 eXeYVBBKCCU8SvAzM6sZxuMPaXclQaydYYKTGPBv/7tagdYvp9NJTO3Px6dwZPl5S2A/hdf3rD9v
|
|
157 1eT1nvuC/r7/vvr7SLizDGAUEzgmNr5nN3mt9ksAWNu6cNuQYoCyhX0bpmANoK4K9tlMWrrw0euH
|
|
158 8/YPPkTsQKkxnIv9nNKSZ79BQb5sy3kNkjnnfMMFzsFiUHNDVZVk9FyDTMguBowvGDS8QTpejDpz
|
|
159 tARAZT4gNRr1zZyswYCSrk84Azuahp58MkAqoR9NkjkG0m7BgG5V76yQcgtD/B6mFqvz9nJlW8Pf
|
|
160 uacdha6zI0P6B6YLbGH6UGv+b3tRbnCNpgdwDpuSOEr9cU61AXXUBOX9YlJWolOVS4MwyxnUs2L6
|
|
161 cAr2G1MhzAKJKu8K1DMw55UKYFHVlFMhYe//KKuZPH7v+CXxGCyQsNZbBjTNUzURUoyFlFEmhhAK
|
|
162 g3BjVDUVWEg5MV90DgvEy3vgppZi66ScGAKurTJMDxXAvXuPPMLGqUYy7T1A6mBLHxSlRg6MMPLT
|
|
163 hOTLWnBuNVELKS9GD5I2ttDzCalkSOJaiTsmKKkVP8wks4qE4xHNKyRKhd0HSCHcyCPb4LDC9g4p
|
|
164 DqFmL9yGZ4EUkrbhBDeYBSWJoKQAKViAFCAFSLOERKl1kqCkoKSgJFMl9QGSPUijpQHSE6rppypJ
|
|
165 tU5Y7Qig3IL1vZ5ydNJ403BcdzSuZBt71Rp4ncxJSbFHSNmN36melxMAK6iQhgWrSWf9wu6KylBL
|
|
166 byiQCo+hliIcqlTmFFLmaZSjOKfCQFIrNLDmuqUrIULqsHO3muhVl+UAxSl3F3lIDQlSHhMZ9XAQ
|
|
167 w9tKqOlAUs2/lBA4OAgz6jlIkDjUlFsEpTqOqGsXeiokqppUfmqYQy+BY1Lz3sPPJg0O1DPkDXSL
|
|
168 5xV1fjEAanVKHZM7kxtG72ObCjN4L9eAoLUQ36SVqwNFcdQ/GWzTUL6V+7aTn5zhqh0dpl/DUYLE
|
|
169 ueZm6lshhHDbEd4Lg8WnmAcBG7H8dZFGqQMDSfWa9QsG1NmGpOS6XiAoVC+vJMb164JCr8TWe9SH
|
|
170 kwOAqmcO6I1SEEvGON/MEI5KC5QWL9bH3KOaVjNSVQXXQ15XLi14TrW0+1r03kIKYGtrlRYvdM0h
|
|
171 dUPlvMI5WQeTyIFXW/Cqeu5VMIPpheUuTZdfobifjDTTXvxYcz5YXsBxtrD+vwADADoA0kx0ZQr1
|
|
172 AAAAAElFTkSuQmCC',
|
|
173 G => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
|
|
174 bWFnZVJlYWR5ccllPAAAA6lJREFUeNrsnN2RqjAUgANjA9wS3CefsQQsAUtgS8AStAQsQUuQEuR5
|
|
175 nzYlSAkumTnZy/UKJpyTEANn5syqs0L8cn4hIbjf70zI19eaWZS40aT1Pm80Uvhe1ei59b6Ez8hl
|
|
176 tbr+vl5YhpLCa8xx4h54RqCZhCQsI9OwEkYEr2700OgRXqMlNARn3+gN/kbMrrTPXzS6dA2SHFzO
|
|
177 3BBhyd8wrtEhJTAYV+A8Sg7ji8eCJGbpQmHWhkWM7wrJwxqkCODk7L3kpDvmBWJW3sF6+qyfQRY0
|
|
178 YknvDqgNKjUByRdAUgqVYK4DKQJ/9gWQ/E0FJSQl6gNExIVdo9tGgw5dw/8cDJw/fhXIA8UGN8cW
|
|
179 ZA9ybPVaQ4vEjHDSapgI/qzBDRXjEBUgAeaj0U8EIAl5Dcepidwux7hbQTRTG3ApTmyRa6LOP+3q
|
|
180 M0OFLybIk1fwQ0pmRjhMQEVgTdkQSHsCQBti6+mzVE5gTVqQMmS6l4BqZkckKGymi3UhYQa8tQio
|
|
181 7Xo7gisaSpASZHrdWXCxvrqLI61JqcFNkW52HLmSPmrG0yOA5ezfGw2dxaSI8t9s+GXXjcFMppOp
|
|
182 bj21WgWhoHMyX90tSRCAuAOAZEws4XecdS6LPJOFik9qmq0rsqE6UEic1VyCxExBWiJcrRoh5Y8C
|
|
183 CeNqJfNUKCFVU4GEaUP4DGm2JDQkb63oEVKEyGz1lCCxGZJaMemKiKL2PpJeuiDNme0NLck7SNFU
|
|
184 INUzJLOQ2AzptSxnSLO7kaTyyGdQVJC8drmQsJOPpwJpDt4KkDCXYBPisYmbCgFSuSl3qxHuFk3B
|
|
185 krDWlE0FEiZ4p1OBdEZmuHgKkDjSmrIpQMJaU2Yg0zkJCXtPfz8FSDUSVOwTqL4rk9gtCvnI2Y6s
|
|
186 6e6DRLEg6zRSfBLnvNqAJOST4BwXyxZVMOLtZq8gcUazMOtkIUaJrHozUYKo3C2hWm6cgwtQu5/c
|
|
187 qV2Y6h1VINUMv4C8nfUuoBnyOALOHSzU6GWaQOOBLntmZue2XDLMe4rYpHWVwcbu8XK1uv4uTNXZ
|
|
188 zb1j/z+thkJS1xtj3Tu4W+bxYq22JWEgyZ1APoPaPhbSQ9YCSFC+rbYVE//xLC4OXTAhQR08ASTi
|
|
189 7bqr1AkJDr59YziiUP7zarIplt6cu8zUcTjKu8Gp1idxsCjXg/qB/d1yrzxO6pVuJcyQS6VCBWEh
|
|
190 GNpiBYYfoSiLz/0IYM6gg/rO9qbAwOJzJmVrgd0l3pdEGFXGbUP6EWAA2LwDwtC8jpAAAAAASUVO
|
|
191 RK5CYII=',
|
|
192 T => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
|
|
193 bWFnZVJlYWR5ccllPAAAALRJREFUeNrs09ENQDAUQFHEXlhAYgJWMJnEBLqBUWxQFkCC/si5yftq
|
|
194 mzYnaR5jzM4KXXu++J9CNc311YYi022QIEGCBAkSJEiCBCll5c16k+DO4Zj+4dnxmPXj92xvkZYE
|
|
195 SPWLs2uiN/lukCBBggQJkiBBggQJEiRIggQJEiRIkCBBEiRIkCBBggRJkCBBggQJEiRICCBBggQJ
|
|
196 EiRIggQJEiRIkCAJEiRIkCBBggRJ1+0CDAAzsw5U48snWgAAAABJRU5ErkJggg==',
|
|
197 N => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAACXBIWXMAAAsTAAALEwEAmpwYAAAK
|
|
198 T2lDQ1BQaG90b3Nob3AgSUNDIHByb2ZpbGUAAHjanVNnVFPpFj333vRCS4iAlEtvUhUIIFJCi4AU
|
|
199 kSYqIQkQSoghodkVUcERRUUEG8igiAOOjoCMFVEsDIoK2AfkIaKOg6OIisr74Xuja9a89+bN/rXX
|
|
200 Pues852zzwfACAyWSDNRNYAMqUIeEeCDx8TG4eQuQIEKJHAAEAizZCFz/SMBAPh+PDwrIsAHvgAB
|
|
201 eNMLCADATZvAMByH/w/qQplcAYCEAcB0kThLCIAUAEB6jkKmAEBGAYCdmCZTAKAEAGDLY2LjAFAt
|
|
202 AGAnf+bTAICd+Jl7AQBblCEVAaCRACATZYhEAGg7AKzPVopFAFgwABRmS8Q5ANgtADBJV2ZIALC3
|
|
203 AMDOEAuyAAgMADBRiIUpAAR7AGDIIyN4AISZABRG8lc88SuuEOcqAAB4mbI8uSQ5RYFbCC1xB1dX
|
|
204 Lh4ozkkXKxQ2YQJhmkAuwnmZGTKBNA/g88wAAKCRFRHgg/P9eM4Ors7ONo62Dl8t6r8G/yJiYuP+
|
|
205 5c+rcEAAAOF0ftH+LC+zGoA7BoBt/qIl7gRoXgugdfeLZrIPQLUAoOnaV/Nw+H48PEWhkLnZ2eXk
|
|
206 5NhKxEJbYcpXff5nwl/AV/1s+X48/Pf14L7iJIEyXYFHBPjgwsz0TKUcz5IJhGLc5o9H/LcL//wd
|
|
207 0yLESWK5WCoU41EScY5EmozzMqUiiUKSKcUl0v9k4t8s+wM+3zUAsGo+AXuRLahdYwP2SycQWHTA
|
|
208 4vcAAPK7b8HUKAgDgGiD4c93/+8//UegJQCAZkmScQAAXkQkLlTKsz/HCAAARKCBKrBBG/TBGCzA
|
|
209 BhzBBdzBC/xgNoRCJMTCQhBCCmSAHHJgKayCQiiGzbAdKmAv1EAdNMBRaIaTcA4uwlW4Dj1wD/ph
|
|
210 CJ7BKLyBCQRByAgTYSHaiAFiilgjjggXmYX4IcFIBBKLJCDJiBRRIkuRNUgxUopUIFVIHfI9cgI5
|
|
211 h1xGupE7yAAygvyGvEcxlIGyUT3UDLVDuag3GoRGogvQZHQxmo8WoJvQcrQaPYw2oefQq2gP2o8+
|
|
212 Q8cwwOgYBzPEbDAuxsNCsTgsCZNjy7EirAyrxhqwVqwDu4n1Y8+xdwQSgUXACTYEd0IgYR5BSFhM
|
|
213 WE7YSKggHCQ0EdoJNwkDhFHCJyKTqEu0JroR+cQYYjIxh1hILCPWEo8TLxB7iEPENyQSiUMyJ7mQ
|
|
214 AkmxpFTSEtJG0m5SI+ksqZs0SBojk8naZGuyBzmULCAryIXkneTD5DPkG+Qh8lsKnWJAcaT4U+Io
|
|
215 UspqShnlEOU05QZlmDJBVaOaUt2ooVQRNY9aQq2htlKvUYeoEzR1mjnNgxZJS6WtopXTGmgXaPdp
|
|
216 r+h0uhHdlR5Ol9BX0svpR+iX6AP0dwwNhhWDx4hnKBmbGAcYZxl3GK+YTKYZ04sZx1QwNzHrmOeZ
|
|
217 D5lvVVgqtip8FZHKCpVKlSaVGyovVKmqpqreqgtV81XLVI+pXlN9rkZVM1PjqQnUlqtVqp1Q61Mb
|
|
218 U2epO6iHqmeob1Q/pH5Z/YkGWcNMw09DpFGgsV/jvMYgC2MZs3gsIWsNq4Z1gTXEJrHN2Xx2KruY
|
|
219 /R27iz2qqaE5QzNKM1ezUvOUZj8H45hx+Jx0TgnnKKeX836K3hTvKeIpG6Y0TLkxZVxrqpaXllir
|
|
220 SKtRq0frvTau7aedpr1Fu1n7gQ5Bx0onXCdHZ4/OBZ3nU9lT3acKpxZNPTr1ri6qa6UbobtEd79u
|
|
221 p+6Ynr5egJ5Mb6feeb3n+hx9L/1U/W36p/VHDFgGswwkBtsMzhg8xTVxbzwdL8fb8VFDXcNAQ6Vh
|
|
222 lWGX4YSRudE8o9VGjUYPjGnGXOMk423GbcajJgYmISZLTepN7ppSTbmmKaY7TDtMx83MzaLN1pk1
|
|
223 mz0x1zLnm+eb15vft2BaeFostqi2uGVJsuRaplnutrxuhVo5WaVYVVpds0atna0l1rutu6cRp7lO
|
|
224 k06rntZnw7Dxtsm2qbcZsOXYBtuutm22fWFnYhdnt8Wuw+6TvZN9un2N/T0HDYfZDqsdWh1+c7Ry
|
|
225 FDpWOt6azpzuP33F9JbpL2dYzxDP2DPjthPLKcRpnVOb00dnF2e5c4PziIuJS4LLLpc+Lpsbxt3I
|
|
226 veRKdPVxXeF60vWdm7Obwu2o26/uNu5p7ofcn8w0nymeWTNz0MPIQ+BR5dE/C5+VMGvfrH5PQ0+B
|
|
227 Z7XnIy9jL5FXrdewt6V3qvdh7xc+9j5yn+M+4zw33jLeWV/MN8C3yLfLT8Nvnl+F30N/I/9k/3r/
|
|
228 0QCngCUBZwOJgUGBWwL7+Hp8Ib+OPzrbZfay2e1BjKC5QRVBj4KtguXBrSFoyOyQrSH355jOkc5p
|
|
229 DoVQfujW0Adh5mGLw34MJ4WHhVeGP45wiFga0TGXNXfR3ENz30T6RJZE3ptnMU85ry1KNSo+qi5q
|
|
230 PNo3ujS6P8YuZlnM1VidWElsSxw5LiquNm5svt/87fOH4p3iC+N7F5gvyF1weaHOwvSFpxapLhIs
|
|
231 OpZATIhOOJTwQRAqqBaMJfITdyWOCnnCHcJnIi/RNtGI2ENcKh5O8kgqTXqS7JG8NXkkxTOlLOW5
|
|
232 hCepkLxMDUzdmzqeFpp2IG0yPTq9MYOSkZBxQqohTZO2Z+pn5mZ2y6xlhbL+xW6Lty8elQfJa7OQ
|
|
233 rAVZLQq2QqboVFoo1yoHsmdlV2a/zYnKOZarnivN7cyzytuQN5zvn//tEsIS4ZK2pYZLVy0dWOa9
|
|
234 rGo5sjxxedsK4xUFK4ZWBqw8uIq2Km3VT6vtV5eufr0mek1rgV7ByoLBtQFr6wtVCuWFfevc1+1d
|
|
235 T1gvWd+1YfqGnRs+FYmKrhTbF5cVf9go3HjlG4dvyr+Z3JS0qavEuWTPZtJm6ebeLZ5bDpaql+aX
|
|
236 Dm4N2dq0Dd9WtO319kXbL5fNKNu7g7ZDuaO/PLi8ZafJzs07P1SkVPRU+lQ27tLdtWHX+G7R7ht7
|
|
237 vPY07NXbW7z3/T7JvttVAVVN1WbVZftJ+7P3P66Jqun4lvttXa1ObXHtxwPSA/0HIw6217nU1R3S
|
|
238 PVRSj9Yr60cOxx++/p3vdy0NNg1VjZzG4iNwRHnk6fcJ3/ceDTradox7rOEH0x92HWcdL2pCmvKa
|
|
239 RptTmvtbYlu6T8w+0dbq3nr8R9sfD5w0PFl5SvNUyWna6YLTk2fyz4ydlZ19fi753GDborZ752PO
|
|
240 32oPb++6EHTh0kX/i+c7vDvOXPK4dPKy2+UTV7hXmq86X23qdOo8/pPTT8e7nLuarrlca7nuer21
|
|
241 e2b36RueN87d9L158Rb/1tWeOT3dvfN6b/fF9/XfFt1+cif9zsu72Xcn7q28T7xf9EDtQdlD3YfV
|
|
242 P1v+3Njv3H9qwHeg89HcR/cGhYPP/pH1jw9DBY+Zj8uGDYbrnjg+OTniP3L96fynQ89kzyaeF/6i
|
|
243 /suuFxYvfvjV69fO0ZjRoZfyl5O/bXyl/erA6xmv28bCxh6+yXgzMV70VvvtwXfcdx3vo98PT+R8
|
|
244 IH8o/2j5sfVT0Kf7kxmTk/8EA5jz/GMzLdsAAAAgY0hSTQAAeiUAAICDAAD5/wAAgOkAAHUwAADq
|
|
245 YAAAOpgAABdvkl/FRgAAAh1JREFUeNrsmk1xwzAQRr8RgYRBwqBhkDJoGbQMagZ1GbgMVAYNA5dB
|
|
246 wsBm4CBwL9Wx0Uwk7593Z3z0SHmRn3fXi3me8d8FoAUw33kdQB/9PXu9xWCeZ4QFN9zBSCwJ6Qig
|
|
247 cUj5aAFsHdLt2Fh47ALBGi8AHh2ScYlTQXrQLPFAuJZaiVNC2gCIDikfTxolHhjWjA4pH7s/Pzmk
|
|
248 TDQA9g7JUCYeGNdWI/HAvH50SEYkHgTs4V26xIOQfUSHlI8jgGeHlI9OagEsCdIOQtspQdh+REo8
|
|
249 CPzjokNSKPGlIJ0qnKatdUgdgJ/CArhdw+NW+qZ6A888ASmkM4DPCifSvLhbANdCib9ahzRV+JHs
|
|
250 mThFCvCtXeJUeVLpaWKVOBWkAcCH1kycMuPuAIwF97PNE1BCqiHxlkPi1LVbX1iysHyK4ihwm8Lc
|
|
251 iXwojAPSUOE0dNYhJbdctEics5/UVAC9tQ6pB/BVKPFoHVINiZPME3BDmirUZdE6pPSmKimAF58n
|
|
252 kPIhoKlw/946pDPKupiLZuKSPim1FSR+sA6pRgG8sQ4JKO9iYg2QAAGNfw2QBpR3Mc1DSrnT6JCW
|
|
253 l7h5SKkAPjmk5QvgVUAaIGAeQDqklImPDkl47qQFUo+yLuYqILFKXBOkCUzTJZogpUz84pAESlwj
|
|
254 pDPKZzHNQ0q509Uh5SXeOKR8RBB1MTVDIpO4dkgDCLqY2iGl3Gl0SMwS/x0AsYSfWCRqIfIAAAAA
|
|
255 SUVORK5CYII='
|
|
256 };
|
|
257 my $MMCHART_B2 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAAGCAYAAAACEPQxAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
|
|
258 bWFnZVJlYWR5ccllPAAAABdJREFUeNpiYGBg+M/w//9/BmwEQIABANxBD/HRDNRSAAAAAElFTkSu
|
|
259 QmCC';
|
|
260 my $FREQCHART_L = 'iVBORw0KGgoAAAANSUhEUgAAAC8AAABvCAIAAADzHQ6XAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
|
|
261 bWFnZVJlYWR5ccllPAAABWNJREFUeNrkm91V7DoMhTOsaQBKgBKgBCjhUgKUAI/wBiUwJUAJUAKU
|
|
262 ACVACed+K/tcYxz5J5nYk3uOHmYFxokVWdrakj17v1rJ6+trdkw3y0x3d3dd172/v5vfPj8/d/8J
|
|
263 Iytq488U0+bw8PDq6oqLh4cHhn1+fta1jXQytWGB+ErLhB5cPz4+xp6z11WWr68vPvf39/WJnT4+
|
|
264 PmKD17W10dwoYX57c3Oji9vbW2xTXRvpgYVknoQ2fFZfKSkhC6ETFzE7tdDm+PiY6V9eXrh+enri
|
|
265 8/T0NDq6RkxxwZ/EczneSJNuIVgsbVa62rmsVqsfeHN5eanV3aU4W8n5cTqWNobx9ST0G2AbPZzD
|
|
266 c+HcsJk2ht8ACYTiSy8Y7J9eUmG5hQRYbMQUy+SMhDasnUyVyL1NV4rgRCcs1E6btBdDUPiqnTYX
|
|
267 FxeA5q8dyZxYXMJ5W2jDmrqcHHMsTO7GxFDD0OauF/cI1i4bR4yRPylTmixTMOHIvPlMafMjM1xf
|
|
268 X/tgAOqcnZ2JSsaEMcwkVoVab29vJhlVqOozwUQ7n0sHb8br8lqZmsMzPnZivuEYgWc6MKXJ2mev
|
|
269 AeAe9pKwTZrzOkHdo14Yr9ceYvEP24gQDUODmbK2cRY1bYOX8BDZj1kwtpmSQy8+7oXbPnvhTiwc
|
|
270 u9n3UKcut6uK80UVXfb1Qm0wTGDzrNP49ojFlJZGz9EKmEFu4w1Dr3rh/pI0aeJNwIt5lCtfhsZb
|
|
271 LhYvixev/XC9v78fwpdPM2rLtzbALgqJW+2YpRMLXO+cUayDKrWxLWwsFnZl0aWdbYAEcjiuE7Q2
|
|
272 wLdsJprZb9QDMyXRGKtYT8VytdkFql6H7/eCThR1akVJSp5lkqyt6nAWSyxO2YQFwq+zkF/Cixnj
|
|
273 orU0T4k/EFYiBjwCCOY/6dxZwot5DmN4jskwDW38cY6mcH+WVGT5TbZHbLB0k4mqvZtm6X6z02Tp
|
|
274 6gltNptVL6TCfJ5ynUsfjtXCTDhyCS/WyzCGV0cnKhOmcK+dwmJEzovBHRNN+02WFysDuofElt5g
|
|
275 okGe4s7semf9Rh7p8nGpNtOYaJYXi5lrjBh7Uc0wYx0e8GIfb0bU4QvixScnJ2ZQtGGiYZ4ys08a
|
|
276 bCr2i83dSnyiavNxnN80I4QGL06AaRte/O3FQStJfkOWAUIc06jtxSkm6naOF8FE23BQ3zZL3Z/a
|
|
277 Rubixd8xFcNiJ/gybjQkUxTwDrWHA3wydHR0RJ4i0eb3fMWtxmIx7MmdkSAqE+PPz89HYPGwI6ee
|
|
278 VAKLy89ICNZLc3jA0AqxONh4jnUY1XfWFGlt1sHSDtsU2/eLWUEVOiN6FGrN8wbq0GJ/uVuiqSPb
|
|
279 OIuatsEYSr0BBRvdoc0eJSrxm2EETeTFJYzO2SN77micbbY/KeVe2px4ijbcg7XH7s7Pv3c3rUcx
|
|
280 bw4Pz1GoR6GiGpDI7pZV4cWTexRVbDO5R1Gllz6tR1G3XzyhR1GrgpnWo6h74mWz2ciHUAVQb0aN
|
|
281 F8yLcVgdxtstL86wrWl5akj8Rp/3U0CN1SZ7dli4qmSpMWa2Cb2YzMBiDelIYg+GBaLSUDCy1gcH
|
|
282 B8MymTF+qOIfZtmQr3yzezCFvNhkZyleLJQbuwdTyItdoGB+LOebKorFk6lWmhf7NNc8gWIzinpn
|
|
283 h3EdmAmq+AEYPf/rVwVjgzzLi0VLsp2X3xHu++kE1MnyYh9s8jXDltrMy4vn6ZjMtZG+1y1JlqXN
|
|
284 KrBzAOSNO/vrYO7GRymi/eI/pwv5Z3rx36pNEXduUCst63cwJb+7a6RNYU95zqyZ3Wz7n/3uTgDY
|
|
285 tXHhEu7cYqX++t/dbY83vyOr2WnHEu78rwADABaBbeIZChwYAAAAAElFTkSuQmCC';
|
|
286
|
|
287 my $CSS_STYLE = '
|
|
288 html, body, div, span, p, img {
|
|
289 margin: 0;
|
|
290 padding: 0;
|
|
291 border: 0;
|
|
292 outline: 0;
|
|
293 font-size: 100%;
|
|
294 vertical-align: baseline;
|
|
295 background: transparent;
|
|
296 }
|
|
297
|
|
298 html, body {
|
|
299 font-family: Arial, Verdana;
|
|
300 color: #40454b;
|
|
301 font-size: 12px;
|
|
302 text-align: center;
|
|
303 }
|
|
304
|
|
305 img {
|
|
306 padding: 0px; margin: 0px; border: none;
|
|
307 }
|
|
308
|
|
309 .info-panel {
|
|
310 margin-top: 10px;
|
|
311 margin-bottom: 10px;
|
|
312 width: 740px;
|
|
313 text-align: left;
|
|
314 }
|
|
315
|
|
316 .info-header {
|
|
317 padding-top: 20px;
|
|
318 padding-top: 10px;
|
|
319 }
|
|
320
|
|
321 .info-header-title {
|
|
322 color: #126499;
|
|
323 text-decoration: none;
|
|
324 font-family: sans-serif;
|
|
325 font-weight: bold;
|
|
326 font-size: 16px;
|
|
327 vertical-align: baseline;
|
|
328 margin-right: 20px;
|
|
329 margin-bottom: 25px;
|
|
330 margin-top: 15px;
|
|
331 }
|
|
332
|
|
333 .info-content {
|
|
334 padding: 2px;
|
|
335 font-family: "lucida grande",sans-serif,arial;
|
|
336 margin-top: 15px;
|
|
337 margin-bottom: 15px;
|
|
338 }
|
|
339
|
|
340 .info-table-type {
|
|
341 min-width: 70px;
|
|
342 padding: 4px;
|
|
343 vertical-align: top;
|
|
344 }
|
|
345
|
|
346 .info-table-value {
|
|
347 font-weight: bold;
|
|
348 padding-top: 4px;
|
|
349 padding-left: 10px;
|
|
350 padding-right: 10px;
|
|
351 vertical-align: top;
|
|
352 }
|
|
353
|
|
354 hr {
|
|
355 background-color: #E0E0E0;
|
|
356 border: medium none;
|
|
357 color: #E0E0E0;
|
|
358 height: 1px;
|
|
359 outline: medium none;
|
|
360 }
|
|
361
|
|
362 .sequencetext {
|
|
363 font-family: courier, "courier new";
|
|
364 font-weight: normal;
|
|
365 }
|
|
366 ';
|
|
367
|
|
368 my $VERSION = '0.6';
|
|
369 my $WHAT = 'graphs-noPCA';
|
|
370
|
|
371 my $man = 0;
|
|
372 my $help = 0;
|
|
373 my %params = ('help' => \$help, 'h' => \$help, 'man' => \$man);
|
|
374 GetOptions( \%params,
|
|
375 'help|h',
|
|
376 'man',
|
|
377 'verbose',
|
|
378 'version' => sub { print "PRINSEQ-$WHAT $VERSION\n"; exit; },
|
|
379 'i=s',
|
|
380 'o=s',
|
|
381 'png_all',
|
|
382 'html_all',
|
|
383 'log:s',
|
|
384 'web:s'
|
|
385 ) or pod2usage(2);
|
|
386 pod2usage(1) if $help;
|
|
387 pod2usage(-exitstatus => 0, -verbose => 2) if $man;
|
|
388
|
|
389 =head1 NAME
|
|
390
|
|
391 PRINSEQ - PReprocessing and INformation of SEQuence data
|
|
392
|
|
393 =head1 VERSION
|
|
394
|
|
395 PRINSEQ-graphs 0.6
|
|
396
|
|
397 =head1 SYNOPSIS
|
|
398
|
|
399 perl prinseq-graphs.pl [-h] [-help] [-version] [-man] [-verbose] [-i input_graph_data_file] [-png_all] [-html_all] [-log file]
|
|
400
|
|
401 =head1 DESCRIPTION
|
|
402
|
|
403 PRINSEQ will help you to preprocess your genomic or metagenomic sequence data in FASTA (and QUAL) or FASTQ format. The graphs version allows users of the lite version to generate graphs similar to the web version.
|
|
404
|
|
405 =head1 OPTIONS
|
|
406
|
|
407 =over 8
|
|
408
|
|
409 =item B<-help> | B<-h>
|
|
410
|
|
411 Print the help message; ignore other arguments.
|
|
412
|
|
413 =item B<-man>
|
|
414
|
|
415 Print the full documentation; ignore other arguments.
|
|
416
|
|
417 =item B<-version>
|
|
418
|
|
419 Print program version; ignore other arguments.
|
|
420
|
|
421 =item B<-verbose>
|
|
422
|
|
423 Prints status and info messages during processing.
|
|
424
|
|
425 =item B<***** INPUT OPTIONS *****>
|
|
426
|
|
427 =item B<-i> <file>
|
|
428
|
|
429 Input file containing the graph data generated by the lite version.
|
|
430
|
|
431 =item B<***** OUTPUT OPTIONS *****>
|
|
432
|
|
433 =item B<-o> <string>
|
|
434
|
|
435 By default, the output files are created in the same directory as the input file with an additional "_prinseq_graphs_XXXX" in their name (where XXXX is replaced by random characters to prevent overwriting previous files). To change the output filename and location, specify the filename using this option. The file extension will be added automatically.
|
|
436
|
|
437 =item B<-png_all>
|
|
438
|
|
439 Use this option to generate PNG files with the graphs.
|
|
440
|
|
441 =item B<-html_all>
|
|
442
|
|
443 Use this option to generate a HTML file with the graphs and tables.
|
|
444
|
|
445 =item B<-log> <file>
|
|
446
|
|
447 Log file to keep track of parameters, errors, etc. The log file name is optional. If no file name is given, the log file name will be "inputname.log". If the log file already exists, new content will be added to the file.
|
|
448
|
|
449 =back
|
|
450
|
|
451 =head1 AUTHOR
|
|
452
|
|
453 Robert SCHMIEDER, C<< <rschmieder_at_gmail_dot_com> >>
|
|
454
|
|
455 =head1 BUGS
|
|
456
|
|
457 If you find a bug please email me at C<< <rschmieder_at_gmail_dot_com> >> or use http://sourceforge.net/tracker/?group_id=315449 so that I can make PRINSEQ better.
|
|
458
|
|
459 =head1 COPYRIGHT
|
|
460
|
|
461 Copyright (C) 2011-2012 Robert SCHMIEDER
|
|
462
|
|
463 =head1 LICENSE
|
|
464
|
|
465 This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
|
|
466
|
|
467 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
468
|
|
469 You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
470
|
|
471 =cut
|
|
472
|
|
473 #
|
|
474 ################################################################################
|
|
475 ## DATA AND PARAMETER CHECKING
|
|
476 ################################################################################
|
|
477 #
|
|
478
|
|
479 my ($file1,$command,@dataread);
|
|
480
|
|
481 #Check if input file exists and check if file format is correct
|
|
482 if(exists $params{i}) {
|
|
483 $command .= ' -i '.$params{i};
|
|
484 $file1 = $params{i};
|
|
485 if($params{i} eq 'stdin') {
|
|
486 my $format = &checkInputFormat();
|
|
487 unless($format eq 'gd') {
|
|
488 &printError('input data for -i is in '.uc($format).' format not in graph data format');
|
|
489 }
|
|
490 } elsif(-e $params{i}) {
|
|
491 #check for file format
|
|
492 my $format = &checkFileFormat($file1);
|
|
493 unless($format eq 'gd') {
|
|
494 &printError('input file for -i is in '.uc($format).' format not in graph data format');
|
|
495 }
|
|
496 } else {
|
|
497 &printError("could not find input file \"".$params{i}."\"");
|
|
498 }
|
|
499 } else {
|
|
500 &printError("you did not specify an input file containing the graph data");
|
|
501 }
|
|
502
|
|
503 #check output file name prefix
|
|
504 if(exists $params{o}) {
|
|
505 $command .= ' -o '.$params{o};
|
|
506 }
|
|
507
|
|
508 #check for output format
|
|
509 unless(exists $params{png_all} || exists $params{html_all}) {
|
|
510 &printError("No output format specified. Use -png_all and/or -html_all to generate graphs.");
|
|
511 }
|
|
512 if(exists $params{png_all}) {
|
|
513 $command .= ' -png_all';
|
|
514 }
|
|
515 if(exists $params{html_all}) {
|
|
516 $command .= ' -html_all';
|
|
517 }
|
|
518 if(exists $params{web}) {
|
|
519 $command .= ' -web'.($params{web} ? ' '.$params{web} : '');
|
|
520 }
|
|
521
|
|
522 #add remaining to log command
|
|
523 if(exists $params{log}) {
|
|
524 $command .= ' -log'.($params{log} ? ' '.$params{log} : '');
|
|
525
|
|
526 unless($params{log}) {
|
|
527 $params{log} = join("__",$file1||'nonamegiven').'.log';
|
|
528 }
|
|
529 $params{log} = cwd().'/'.$params{log} unless($params{log} =~ /^\//);
|
|
530 &printLog("Executing PRINSEQ with command: \"perl prinseq-".$WHAT.".pl".$command."\"");
|
|
531 }
|
|
532
|
|
533 #
|
|
534 ################################################################################
|
|
535 ## DATA PROCESSING
|
|
536 ################################################################################
|
|
537 #
|
|
538
|
|
539 my $filename = $file1;
|
|
540 while($filename =~ /[\w\d]+\.[\w\d]+$/) {
|
|
541 $filename =~ s/\.[\w\d]+$//;
|
|
542 last if($filename =~ /\/[^\.]+$/);
|
|
543 }
|
|
544
|
|
545 if(exists $params{png_all}) {
|
|
546 my $graphs = &generateGraphs($params{i},$params{o});
|
|
547 if(exists $params{web} && $params{web} ne 'nozip') {
|
|
548 #png files
|
|
549 if(scalar(@$graphs)) {
|
|
550 system("zip -j -r ".dirname($params{o})."/png_graphs.zip ".dirname($params{o}).' -i \*.png') == 0 or &printError("Cannot generate graphs ZIP file");
|
|
551 }
|
|
552 }
|
|
553 }
|
|
554 if(exists $params{html_all}) {
|
|
555 &generateHtml($params{i},$params{o});
|
|
556 }
|
|
557
|
|
558 &printWeb("STATUS: done");
|
|
559
|
|
560 ##
|
|
561 #################################################################################
|
|
562 ### MISC FUNCTIONS
|
|
563 #################################################################################
|
|
564 ##
|
|
565
|
|
566 sub printError {
|
|
567 my $msg = shift;
|
|
568 print STDERR "\nERROR: ".$msg.".\n\nTry \'perl prinseq-".$WHAT.".pl -h\' for more information.\nExit program.\n";
|
|
569 &printLog("ERROR: ".$msg.". Exit program.\n");
|
|
570 exit(0);
|
|
571 }
|
|
572
|
|
573 sub printWarning {
|
|
574 my $msg = shift;
|
|
575 print STDERR "WARNING: ".$msg.".\n";
|
|
576 &printLog("WARNING: ".$msg.".\n");
|
|
577 }
|
|
578
|
|
579 sub printWeb {
|
|
580 my $msg = shift;
|
|
581 if(exists $params{web}) {
|
|
582 print STDERR "\n".&getTime()."$msg\n";
|
|
583 }
|
|
584 }
|
|
585
|
|
586 sub getTime {
|
|
587 return sprintf("[%02d/%02d/%04d %02d:%02d:%02d] ",sub {($_[4]+1,$_[3],$_[5]+1900,$_[2],$_[1],$_[0])}->(localtime));
|
|
588 }
|
|
589
|
|
590 sub printLog {
|
|
591 my $msg = shift;
|
|
592 if(exists $params{log}) {
|
|
593 my $time = sprintf("%02d/%02d/%04d %02d:%02d:%02d",sub {($_[4]+1,$_[3],$_[5]+1900,$_[2],$_[1],$_[0])}->(localtime));
|
|
594 open(FH, ">>", $params{log}) or die "ERROR: Can't open file ".$params{log}.": $! \n";
|
|
595 flock(FH, LOCK_EX) or die "ERROR: Cannot lock file ".$params{log}.": $! \n";
|
|
596 print FH "[prinseq-".$WHAT."-$VERSION] [$time] $msg\n";
|
|
597 flock(FH, LOCK_UN) or die "ERROR: cannot unlock ".$params{log}.": $! \n";
|
|
598 close(FH);
|
|
599 }
|
|
600 }
|
|
601
|
|
602 sub addCommas {
|
|
603 my $num = shift;
|
|
604 return unless(defined $num);
|
|
605 return $num if($num < 1000);
|
|
606 $num = scalar reverse $num;
|
|
607 $num =~ s/(\d{3})/$1\,/g;
|
|
608 $num =~ s/\,$//;
|
|
609 $num = scalar reverse $num;
|
|
610 return $num;
|
|
611 }
|
|
612
|
|
613 sub checkFileFormat {
|
|
614 my $file = shift;
|
|
615
|
|
616 my ($format,$count,$id,$fasta,$fastq,$qual,$gd,$aa);
|
|
617 $count = 3;
|
|
618 $fasta = $fastq = $qual = $gd = $aa = 0;
|
|
619 $format = 'unknown';
|
|
620
|
|
621 open(FILE,"perl -p -e 's/\r/\n/g;s/\n\n/\n/g' < $file |") or die "ERROR: Could not open file $file: $! \n";
|
|
622 while (<FILE>) {
|
|
623 # chomp();
|
|
624 # next unless(length($_));
|
|
625 if($count-- == 0) {
|
|
626 last;
|
|
627 } elsif(!$fasta && /^\>\S+\s*/) {
|
|
628 $fasta = 1;
|
|
629 $qual = 1;
|
|
630 } elsif($fasta == 1 && (($aa && /^[ABCDEFGHIKLMNOPQRSTUVWYZXabcdefghiklmmopqrstuvwyzx*-]+/) || (!$aa && /^[ACGTURYKMSWBDHVNXacgturykmswbdhvnx-]+/))) {
|
|
631 $fasta = 2;
|
|
632 } elsif($qual == 1 && /^\s*\d+/) {
|
|
633 $qual = 2;
|
|
634 } elsif(!$fastq && /^\@(\S+)\s*/) {
|
|
635 $id = $1;
|
|
636 $fastq = 1;
|
|
637 } elsif($fastq == 1 && (($aa && /^[ABCDEFGHIKLMNOPQRSTUVWYZXabcdefghiklmmopqrstuvwyzx*-]+/) || (!$aa && /^[ACGTURYKMSWBDHVNXacgturykmswbdhvnx-]+/))) {
|
|
638 $fastq = 2;
|
|
639 } elsif($fastq == 2 && /^\+(\S*)\s*/) {
|
|
640 $fastq = 3 if($id eq $1 || /^\+\s*$/);
|
|
641 } elsif(!$gd && /^\{\"numseqs\"\:/) {
|
|
642 $gd = 1;
|
|
643 }
|
|
644 }
|
|
645 close(FILE);
|
|
646 if($fasta == 2) {
|
|
647 $format = 'fasta';
|
|
648 } elsif($qual == 2) {
|
|
649 $format = 'qual';
|
|
650 } elsif($fastq == 3) {
|
|
651 $format = 'fastq';
|
|
652 } elsif($gd == 1) {
|
|
653 $format = 'gd';
|
|
654 }
|
|
655
|
|
656 return $format;
|
|
657 }
|
|
658
|
|
659 sub checkInputFormat {
|
|
660 my ($format,$count,$id,$fasta,$fastq,$qual,$gd,$aa);
|
|
661 $count = 3;
|
|
662 $fasta = $fastq = $qual = $gd = $aa = 0;
|
|
663 $format = 'unknown';
|
|
664
|
|
665 while (<STDIN>) {
|
|
666 push(@dataread,$_);
|
|
667 # chomp();
|
|
668 # next unless(length($_));
|
|
669 if($count-- == 0) {
|
|
670 last;
|
|
671 } elsif(!$fasta && /^\>\S+\s*/) {
|
|
672 $fasta = 1;
|
|
673 $qual = 1;
|
|
674 } elsif($fasta == 1 && (($aa && /^[ABCDEFGHIKLMNOPQRSTUVWYZXabcdefghiklmmopqrstuvwyzx*-]+/) || (!$aa && /^[ACGTURYKMSWBDHVNXacgturykmswbdhvnx-]+/))) {
|
|
675 $fasta = 2;
|
|
676 } elsif($qual == 1 && /^\s*\d+/) {
|
|
677 $qual = 2;
|
|
678 } elsif(!$fastq && /^\@(\S+)\s*/) {
|
|
679 $id = $1;
|
|
680 $fastq = 1;
|
|
681 } elsif($fastq == 1 && (($aa && /^[ABCDEFGHIKLMNOPQRSTUVWYZXabcdefghiklmmopqrstuvwyzx*-]+/) || (!$aa && /^[ACGTURYKMSWBDHVNXacgturykmswbdhvnx-]+/))) {
|
|
682 $fastq = 2;
|
|
683 } elsif($fastq == 2 && /^\+(\S*)\s*/) {
|
|
684 $fastq = 3 if($id eq $1 || /^\+\s*$/);
|
|
685 } elsif(!$gd && /^\{\"numseqs\"\:/) {
|
|
686 $gd = 1;
|
|
687 }
|
|
688 }
|
|
689
|
|
690 if($fasta == 2) {
|
|
691 $format = 'fasta';
|
|
692 } elsif($qual == 2) {
|
|
693 $format = 'qual';
|
|
694 } elsif($fastq == 3) {
|
|
695 $format = 'fastq';
|
|
696 } elsif($gd == 1) {
|
|
697 $format = 'gd';
|
|
698 }
|
|
699
|
|
700 return $format;
|
|
701 }
|
|
702
|
|
703 sub readGdFile {
|
|
704 my $file = shift;
|
|
705 my $data;
|
|
706
|
|
707 open(DATA,"<$file") or &printError("Could not open file $file: $!");
|
|
708 while(<DATA>) {
|
|
709 next if(/^\#/);
|
|
710 chomp();
|
|
711 if(length($_)) {
|
|
712 $data = from_json($_);
|
|
713 }
|
|
714 }
|
|
715 close(DATA);
|
|
716
|
|
717 return $data;
|
|
718 }
|
|
719
|
|
720 sub getFileName {
|
|
721 my $ext = shift;
|
|
722 my ($file,$fh);
|
|
723 if(exists $params{o}) {
|
|
724 $file = $params{o}.$ext;
|
|
725 open(OUT,">$file") or &printError('cannot open output file');
|
|
726 close(OUT);
|
|
727 } else {
|
|
728 $fh = File::Temp->new( TEMPLATE => $filename.'_prinseq_graphs_XXXX',
|
|
729 SUFFIX => $ext,
|
|
730 UNLINK => 0);
|
|
731 $file = $fh->filename;
|
|
732 $fh->close();
|
|
733 }
|
|
734 return $file;
|
|
735 }
|
|
736
|
|
737 sub generateGraphs {
|
|
738 my ($in,$out) = @_;
|
|
739 my ($file,$data,$surface,@graphs);
|
|
740 $data = &readGdFile($in);
|
|
741
|
|
742 #length plot
|
|
743 if(exists $data->{counts}->{length}) {
|
|
744 $file = &getFileName('_ld.png');
|
|
745 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{length},1),$data->{stats}->{length},'Length Distribution','Read Length in bp','# Sequences',$file,0,' bp');
|
|
746 $surface->write_to_png($file);
|
|
747 push(@graphs,$file);
|
|
748 }
|
|
749 if(exists $data->{counts2} && exists $data->{counts2}->{length}) {
|
|
750 $file = &getFileName('_ld-2.png');
|
|
751 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{length},1),$data->{stats2}->{length},'Length Distribution','Read Length in bp','# Sequences',$file,0,' bp');
|
|
752 $surface->write_to_png($file);
|
|
753 push(@graphs,$file);
|
|
754 }
|
|
755
|
|
756 #tail plot
|
|
757 if(exists $data->{tail}) {
|
|
758 $file = &getFileName('_td5.png');
|
|
759 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{tail5},1),undef,'Poly-A/T Tail Distribution (> 4bp)','5\' Tail Length in bp','# Sequences',$file,0,' bp');
|
|
760 $surface->write_to_png($file);
|
|
761 push(@graphs,$file);
|
|
762 $file = &getFileName('_td3.png');
|
|
763 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{tail3},1),undef,'Poly-A/T Tail Distribution (> 4bp)','3\' Tail Length in bp','# Sequences',$file,0,' bp');
|
|
764 $surface->write_to_png($file);
|
|
765 push(@graphs,$file);
|
|
766 }
|
|
767 if(exists $data->{tail2}) {
|
|
768 $file = &getFileName('_td5-2.png');
|
|
769 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{tail5},1),undef,'Poly-A/T Tail Distribution (> 4bp)','5\' Tail Length in bp','# Sequences',$file,0,' bp');
|
|
770 $surface->write_to_png($file);
|
|
771 push(@graphs,$file);
|
|
772 $file = &getFileName('_td3-2.png');
|
|
773 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{tail3},1),undef,'Poly-A/T Tail Distribution (> 4bp)','3\' Tail Length in bp','# Sequences',$file,0,' bp');
|
|
774 $surface->write_to_png($file);
|
|
775 push(@graphs,$file);
|
|
776 }
|
|
777
|
|
778 #Ns plot
|
|
779 if(exists $data->{counts}->{ns}) {
|
|
780 $file = &getFileName('_ns.png');
|
|
781 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{ns},1),undef,'Percentage of N\'s (> 0%)','Percentage of N\'s per Read (1-100%)','# Sequences',$file,0);
|
|
782 $surface->write_to_png($file);
|
|
783 push(@graphs,$file);
|
|
784 }
|
|
785 if(exists $data->{counts2} && exists $data->{counts2}->{ns}) {
|
|
786 $file = &getFileName('_ns-2.png');
|
|
787 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{ns},1),undef,'Percentage of N\'s (> 0%)','Percentage of N\'s per Read (1-100%)','# Sequences',$file,0);
|
|
788 $surface->write_to_png($file);
|
|
789 push(@graphs,$file);
|
|
790 }
|
|
791
|
|
792 #GC content plot
|
|
793 if(exists $data->{counts}->{gc}) {
|
|
794 $file = &getFileName('_gc.png');
|
|
795 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{gc},0),$data->{stats}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences',$file,1);
|
|
796 $surface->write_to_png($file);
|
|
797 push(@graphs,$file);
|
|
798 }
|
|
799 if(exists $data->{counts2} && exists $data->{counts2}->{gc}) {
|
|
800 $file = &getFileName('_gc-2.png');
|
|
801 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{gc},0),$data->{stats2}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences',$file,1);
|
|
802 $surface->write_to_png($file);
|
|
803 push(@graphs,$file);
|
|
804 }
|
|
805
|
|
806 #Sequence complexity plot - dust
|
|
807 if(exists $data->{compldust}) {
|
|
808 $file = &getFileName('_cd.png');
|
|
809 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{compldust},0),undef,'Sequence complexity distribution','Mean sequence complexity (DUST scores)','Number of sequences',$file,1);
|
|
810 $surface->write_to_png($file);
|
|
811 push(@graphs,$file);
|
|
812 }
|
|
813
|
|
814 #Sequence complexity plot - entropy
|
|
815 if(exists $data->{complentropy}) {
|
|
816 $file = &getFileName('_ce.png');
|
|
817 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{complentropy},0),undef,'Sequence complexity distribution','Mean sequence complexity (Entropy values)','Number of sequences',$file,1);
|
|
818 $surface->write_to_png($file);
|
|
819 push(@graphs,$file);
|
|
820 }
|
|
821
|
|
822 #Dinucleotide odd ratio PCA plot - microbial/viral
|
|
823 #Odds ratio plot
|
|
824 if(exists $data->{dinucodds}) {
|
|
825 my @new = map {$data->{dinucodds}->{$_}} sort keys %{$data->{dinucodds}};
|
|
826 # $file = &getFileName('_pm.png');
|
|
827 # $surface = &createPCAPlot(&convertToPCAValues(\@new,'m'),'PCA','1st Principal Component Score','2nd Principal Component Score',$file);
|
|
828 # $surface->write_to_png($file);
|
|
829 # push(@graphs,$file);
|
|
830 # $file = &getFileName('_pv.png');
|
|
831 # $surface = &createPCAPlot(&convertToPCAValues(\@new,'v'),'PCA','1st Principal Component Score','2nd Principal Component Score',$file);
|
|
832 # $surface->write_to_png($file);
|
|
833 # push(@graphs,$file);
|
|
834 $file = &getFileName('_or.png');
|
|
835 $surface = &createOddsRatioPlot($data->{dinucodds},'Odds ratios','Dinucleotide','Odds ratio',$file);
|
|
836 $surface->write_to_png($file);
|
|
837 push(@graphs,$file);
|
|
838 }
|
|
839
|
|
840 #Qual plot
|
|
841 if(exists $data->{quals}) {
|
|
842 $file = &getFileName('_qd.png');
|
|
843 $surface = &createBoxPlot(&convertToBoxValues($data->{quals},4),'Base Quality Distribution','Read position in %','Quality score',$file);
|
|
844 $surface->write_to_png($file);
|
|
845 push(@graphs,$file);
|
|
846 }
|
|
847 if(exists $data->{quals2}) {
|
|
848 $file = &getFileName('_qd-2.png');
|
|
849 $surface = &createBoxPlot(&convertToBoxValues($data->{quals2},4),'Base Quality Distribution','Read position in %','Quality score',$file);
|
|
850 $surface->write_to_png($file);
|
|
851 push(@graphs,$file);
|
|
852 }
|
|
853
|
|
854 #Qualbin plot
|
|
855 if(exists $data->{qualsbin}) {
|
|
856 $file = &getFileName('_qd2.png');
|
|
857 $surface = &createBoxPlot(&convertToBoxValues($data->{qualsbin},4),'Base Quality Distribution','Read position in bp','Quality score',$file,0,'bp',$data->{binval});
|
|
858 $surface->write_to_png($file);
|
|
859 push(@graphs,$file);
|
|
860 }
|
|
861 if(exists $data->{qualsbin2}) {
|
|
862 $file = &getFileName('_qd2-2.png');
|
|
863 $surface = &createBoxPlot(&convertToBoxValues($data->{qualsbin2},4),'Base Quality Distribution','Read position in bp','Quality score',$file,0,'bp',$data->{binval});
|
|
864 $surface->write_to_png($file);
|
|
865 push(@graphs,$file);
|
|
866 }
|
|
867
|
|
868 #Qualmean plot
|
|
869 if(exists $data->{qualsmean}) {
|
|
870 $file = &getFileName('_qd3.png');
|
|
871 $surface = &createBarPlot(&convertToBarValues($data->{qualsmean},5,1),'Sequence Quality Distribution','Mean of quality scores per sequence','Number of sequences',$file,0);
|
|
872 $surface->write_to_png($file);
|
|
873 push(@graphs,$file);
|
|
874 }
|
|
875 if(exists $data->{qualsmean2}) {
|
|
876 $file = &getFileName('_qd3-2.png');
|
|
877 $surface = &createBarPlot(&convertToBarValues($data->{qualsmean2},5,1),'Sequence Quality Distribution','Mean of quality scores per sequence','Number of sequences',$file,0);
|
|
878 $surface->write_to_png($file);
|
|
879 push(@graphs,$file);
|
|
880 }
|
|
881
|
|
882 #Sequence duplicate plots
|
|
883 if(exists $data->{dubscounts}) {
|
|
884 $file = &getFileName('_df.png');
|
|
885 $surface = &createStackBarPlot(&convertOdToStackBinMatrix($data->{dubscounts},5,1,100),'Sequence duplication level','Number of duplicates','Number of sequences',$file,0);
|
|
886 $surface->write_to_png($file);
|
|
887 push(@graphs,$file);
|
|
888 }
|
|
889 if(exists $data->{dubslength}) {
|
|
890 $file = &getFileName('_dl.png');
|
|
891 $surface = &createStackBarPlot(&convertOdToStackBinMatrix($data->{dubslength},5,1),'Sequence duplication level','Read Length in bp','Number of duplicates',$file,0,' bp');
|
|
892 $surface->write_to_png($file);
|
|
893 push(@graphs,$file);
|
|
894 }
|
|
895 if(exists $data->{dubscounts}) {
|
|
896 my %dubsmax;
|
|
897 my $count = 1;
|
|
898 foreach my $n (sort {$b <=> $a} keys %{$data->{dubscounts}}) {
|
|
899 foreach my $s (keys %{$data->{dubscounts}->{$n}}) {
|
|
900 foreach my $i (1..$data->{dubscounts}->{$n}->{$s}) {
|
|
901 $dubsmax{$count++}->{$s} = $n;
|
|
902 last unless($count <= 100);
|
|
903 }
|
|
904 last unless($count <= 100);
|
|
905 }
|
|
906 last unless($count <= 100);
|
|
907 }
|
|
908 $file = &getFileName('_dm.png');
|
|
909 $surface = &createStackBarPlot(&convertOdToStackBinMatrix(\%dubsmax,5,1,100),'Sequence duplication level','Sequence','Number of duplicates',$file,0);
|
|
910 $surface->write_to_png($file);
|
|
911 push(@graphs,$file);
|
|
912 }
|
|
913
|
|
914 return \@graphs;
|
|
915 }
|
|
916
|
|
917 sub convertOdToBinMatrix {
|
|
918 my ($data,$min,$max,$nonice) = @_;
|
|
919
|
|
920 my ($num,$ymax,$xmax,$xmin,$step,%vals,$tmp,@matrix,$bin,$tmpbin);
|
|
921
|
|
922 #make nice xmax value
|
|
923 if(defined $max) {
|
|
924 $xmax = $max;
|
|
925 } else {
|
|
926 $xmax = (sort {$b <=> $a} keys %$data)[0];
|
|
927 }
|
|
928 $bin = &getBinVal($xmax);
|
|
929 $xmax = $bin*100;
|
|
930 $xmin = (defined $min ? $min : 0);
|
|
931
|
|
932 #get data to bin and find y axis max value
|
|
933 $ymax = 0;
|
|
934 $tmp = 0;
|
|
935 $tmpbin = $bin;
|
|
936 foreach my $i ($xmin..$xmax) {
|
|
937 if(exists $data->{$i}) {
|
|
938 $tmp += $data->{$i};
|
|
939 }
|
|
940 if(--$tmpbin <= 0) {
|
|
941 $tmpbin = $bin;
|
|
942 $ymax = &max($ymax,$tmp);
|
|
943 push(@matrix,$tmp);
|
|
944 $tmp = 0;
|
|
945 }
|
|
946 }
|
|
947
|
|
948 #make nice ymax value
|
|
949 unless($nonice) {
|
|
950 $ymax = sprintf("%d",($ymax/4)+1)*4 if($ymax % 4);
|
|
951 # $step = ($ymax <= 10 ? 10 : ($ymax < 40 ? 40 : ($ymax < 100 ? 100 : ($ymax < 1000 ? 100 : 100))));
|
|
952 # $ymax = sprintf("%d",($ymax/$step)+1)*$step if($ymax % $step);
|
|
953 }
|
|
954
|
|
955 return (\@matrix,$xmax,$ymax);
|
|
956 }
|
|
957
|
|
958 sub getBinVal {
|
|
959 my $val = shift;
|
|
960 my $step;
|
|
961 if(!$val || $val <= 100) {
|
|
962 return 1;
|
|
963 } elsif($val < 10000) {
|
|
964 return int($val/100)+($val % 100 ? 1 : 0);
|
|
965 } elsif($val < 100000) {
|
|
966 return 1000;
|
|
967 } else {
|
|
968 $step = 1000000;
|
|
969 my $xmax = ($val % $step ? sprintf("%d",($val/$step+1))*$step : $val);
|
|
970 return ($xmax/100);
|
|
971 }
|
|
972 }
|
|
973
|
|
974 sub max {
|
|
975 my ($a,$b) = @_;
|
|
976 return ($a < $b ? $b : $a);
|
|
977 }
|
|
978
|
|
979 sub min {
|
|
980 my ($a,$b) = @_;
|
|
981 return ($a > $b ? $b : $a);
|
|
982 }
|
|
983
|
|
984 sub createAnnotBarPlot {
|
|
985 my ($matrix,$xmax,$ymax,$annot,$title,$xlab,$ylab,$file,$zero,$add) = @_;
|
|
986
|
|
987 my $bin = 1;
|
|
988 if($xmax > 100) {
|
|
989 $bin = $xmax / 100;
|
|
990 $xmax = 100;
|
|
991 }
|
|
992
|
|
993 my @barcol = (127/255, 127/255, 255/255, 1); #b2b2ff
|
|
994 my @meancol = (255/255, 127/255, 127/255, 1); #ffb2b2
|
|
995 my @stdcol = (178/255, 178/255, 255/255, 0.8); #7f7fff
|
|
996 my @std1col = (0, 0, 0, 0.04); #ff7f7f
|
|
997 my @std2col = (0, 0, 0, 0.03); #ff7f7f
|
|
998 my @linecol = (0, 0, 0, 0.4);
|
|
999 my @helplinecol = (1, 1, 1, 0.9);
|
|
1000 my @background = (0.95, 0.95, 0.95, 1);
|
|
1001 my @tickcol = (0, 0, 0, 0.8);
|
|
1002 my @labelcol = (0, 0, 0, 1);
|
|
1003
|
|
1004 #create new image
|
|
1005 my $size = 6;
|
|
1006 my $offset = 20;
|
|
1007 my $left = 40;
|
|
1008 my $bottom = 15;
|
|
1009 my $top = 20;
|
|
1010 my $height = 200;
|
|
1011 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+($xmax+$zero)*$size,$bottom+$top+$offset*2+$height); #format, width, height
|
|
1012 my $cr = Cairo::Context->create($surface);
|
|
1013
|
|
1014 my ($font_extents,$extents,$fontheight,$fontdescent);
|
|
1015
|
|
1016 #background
|
|
1017 $cr->rectangle(0, 0, $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+2*200+20);
|
|
1018 $cr->set_source_rgba(1, 1, 1, 1);
|
|
1019 $cr->fill;
|
|
1020
|
|
1021 #fonts
|
|
1022 $cr->select_font_face ('sans', 'normal', 'normal');
|
|
1023
|
|
1024 $cr->save;
|
|
1025
|
|
1026 #set up work space
|
|
1027 $cr->set_antialias('none');
|
|
1028 $cr->set_line_width(1);
|
|
1029
|
|
1030 #background for plot
|
|
1031 $cr->rectangle($left+$offset, $top+$offset, ($xmax+$zero)*$size-1, $height);
|
|
1032 $cr->set_source_rgba(@background);
|
|
1033 $cr->fill;
|
|
1034
|
|
1035 #draw ticks
|
|
1036 #x-axis
|
|
1037 $cr->set_source_rgba(@tickcol);
|
|
1038 foreach my $i (($zero ? 0 : 1)..$xmax) {
|
|
1039 if(($i%5) == 0 && $i > 1 && $i < $xmax) {
|
|
1040 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
|
|
1041 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+3);
|
|
1042 } else {
|
|
1043 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
|
|
1044 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+1);
|
|
1045 }
|
|
1046 }
|
|
1047 $cr->stroke;
|
|
1048
|
|
1049 #y-axis
|
|
1050 $cr->move_to($left+$offset, $top+$offset);
|
|
1051 $cr->line_to($left+$offset-3, $top+$offset);
|
|
1052 $cr->move_to($left+$offset, $top+$offset+$height-1);
|
|
1053 $cr->line_to($left+$offset-3, $top+$offset+$height-1);
|
|
1054 $cr->stroke;
|
|
1055
|
|
1056 #helplines
|
|
1057 $cr->set_source_rgba(@helplinecol);
|
|
1058 foreach my $j (1..3) {
|
|
1059 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1060 $cr->line_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1061 }
|
|
1062 $cr->stroke;
|
|
1063
|
|
1064 $cr->set_antialias('default');
|
|
1065
|
|
1066 #tick labels
|
|
1067 $cr->set_source_rgba(@tickcol);
|
|
1068 $cr->set_font_size(10);
|
|
1069 $font_extents = $cr->font_extents;
|
|
1070 $fontheight = $font_extents->{height};
|
|
1071 #x-axis
|
|
1072 foreach my $i (($zero ? 0 : 1)..$xmax) {
|
|
1073 if(($i%10) == 0 && $i > 1 && $i < $xmax) {
|
|
1074 $extents = $cr->text_extents($i*$bin);
|
|
1075 $cr->move_to($left+$offset+int($size/2+1)+$size*$i-($zero ? 0 : $size)-$extents->{width}/2-1-($i == 1 ? 1 : 0), $top+$offset+$height+$fontheight+2);
|
|
1076 $cr->show_text($i*$bin);
|
|
1077 }
|
|
1078 }
|
|
1079 #y-axis
|
|
1080 $extents = $cr->text_extents(&addCommas($ymax));
|
|
1081 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2);
|
|
1082 $cr->show_text(&addCommas($ymax));
|
|
1083 $extents = $cr->text_extents(0);
|
|
1084 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2+$height);
|
|
1085 $cr->show_text(0);
|
|
1086
|
|
1087 $cr->save;
|
|
1088
|
|
1089 #labels
|
|
1090 $cr->set_font_size (14);
|
|
1091 $font_extents = $cr->font_extents;
|
|
1092 $fontheight = $font_extents->{height};
|
|
1093
|
|
1094 #axis labels
|
|
1095 $cr->set_source_rgba(@labelcol);
|
|
1096 $extents = $cr->text_extents($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
|
|
1097 $cr->move_to($left+$offset+($xmax+$zero)*$size/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
|
|
1098 $cr->show_text($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
|
|
1099 $cr->rotate($PI * 3 / 2);
|
|
1100 $extents = $cr->text_extents($ylab.($bin>1 ? ' (per bin)' : ''));
|
|
1101 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2),$offset+10);
|
|
1102 $cr->show_text($ylab.($bin>1 ? ' (per bin)' : ''));
|
|
1103
|
|
1104 $cr->restore;
|
|
1105
|
|
1106 #draw annotations
|
|
1107 if($annot) {
|
|
1108 $cr->set_antialias('none');
|
|
1109 my ($std1l,$std2l,$std1r,$std2r);
|
|
1110 #std boxes
|
|
1111 $std1l = int($annot->{mean})-int($annot->{std});
|
|
1112 $std2l = int($annot->{mean})-2*int($annot->{std});
|
|
1113 $std1r = int($annot->{mean})+int($annot->{std});
|
|
1114 $std2r = int($annot->{mean})+2*int($annot->{std});
|
|
1115 unless($std1l == $std1r) {
|
|
1116 if($std1l < 0) {
|
|
1117 $std1l = 0;
|
|
1118 } else {
|
|
1119 $std1l = int($std1l/$bin);
|
|
1120 }
|
|
1121 if($std2l < 0) {
|
|
1122 $std2l = 0;
|
|
1123 } else {
|
|
1124 $std2l = int($std2l/$bin);
|
|
1125 }
|
|
1126 if($std1r/$bin > 100) {
|
|
1127 $std1r = 100;
|
|
1128 } else {
|
|
1129 $std1r = int($std1r/$bin);
|
|
1130 }
|
|
1131 if($std2r/$bin > 100) {
|
|
1132 $std2r = 100;
|
|
1133 } else {
|
|
1134 $std2r = int($std2r/$bin);
|
|
1135 }
|
|
1136 $cr->rectangle($left+$offset+$std2l*$size+2, $top+$offset, ($std2r-$std2l)*$size, $height);
|
|
1137 $cr->set_source_rgba(@std2col);
|
|
1138 $cr->fill;
|
|
1139 $cr->rectangle($left+$offset+$std1l*$size+2, $top+$offset, ($std1r-$std1l)*$size, $height);
|
|
1140 $cr->set_source_rgba(@std1col);
|
|
1141 $cr->fill;
|
|
1142 #mean line
|
|
1143 $cr->set_source_rgba(@meancol);
|
|
1144 $cr->move_to($left+$offset+int(int($annot->{mean})/$bin)*$size+2, $top+$offset-5);
|
|
1145 $cr->line_to($left+$offset+int(int($annot->{mean})/$bin)*$size+2, $top+$offset+$height);
|
|
1146 $cr->stroke;
|
|
1147 #std lines
|
|
1148 $cr->set_source_rgba(@stdcol);
|
|
1149 if($std1l > 0) {
|
|
1150 $cr->move_to($left+$offset+$std1l*$size+2, $top+$offset-5);
|
|
1151 $cr->line_to($left+$offset+$std1l*$size+2, $top+$offset+$height);
|
|
1152 }
|
|
1153 if($std2l > 0) {
|
|
1154 $cr->move_to($left+$offset+$std2l*$size+2, $top+$offset-5);
|
|
1155 $cr->line_to($left+$offset+$std2l*$size+2, $top+$offset+$height);
|
|
1156 }
|
|
1157 if($std1r < 100) {
|
|
1158 $cr->move_to($left+$offset+$std1r*$size+2, $top+$offset-5);
|
|
1159 $cr->line_to($left+$offset+$std1r*$size+2, $top+$offset+$height);
|
|
1160 }
|
|
1161 if($std2r < 100) {
|
|
1162 $cr->move_to($left+$offset+$std2r*$size+2, $top+$offset-5);
|
|
1163 $cr->line_to($left+$offset+$std2r*$size+2, $top+$offset+$height);
|
|
1164 }
|
|
1165 $cr->stroke;
|
|
1166 #labels
|
|
1167 $cr->set_antialias('default');
|
|
1168 $cr->set_source_rgba(@tickcol);
|
|
1169 $extents = $cr->text_extents('M');
|
|
1170 $cr->move_to($left+$offset+int(int($annot->{mean})/$bin)*$size+2-$extents->{width}/2, $top+$offset-10);
|
|
1171 $cr->show_text('M');
|
|
1172 if($std1l > 0) {
|
|
1173 $extents = $cr->text_extents('1SD');
|
|
1174 $cr->move_to($left+$offset+$std1l*$size-$extents->{width}/2+2, $top+$offset-10);
|
|
1175 $cr->show_text('1SD');
|
|
1176 }
|
|
1177 if($std2l > 0) {
|
|
1178 $extents = $cr->text_extents('2SD');
|
|
1179 $cr->move_to($left+$offset+$std2l*$size-$extents->{width}/2+3, $top+$offset-10);
|
|
1180 $cr->show_text('2SD');
|
|
1181 }
|
|
1182 if($std1r < 100) {
|
|
1183 $extents = $cr->text_extents('1SD');
|
|
1184 $cr->move_to($left+$offset+$std1r*$size-$extents->{width}/2+2, $top+$offset-10);
|
|
1185 $cr->show_text('1SD');
|
|
1186 }
|
|
1187 if($std2r < 100) {
|
|
1188 $extents = $cr->text_extents('2SD');
|
|
1189 $cr->move_to($left+$offset+$std2r*$size-$extents->{width}/2+3, $top+$offset-10);
|
|
1190 $cr->show_text('2SD');
|
|
1191 }
|
|
1192 }
|
|
1193 }
|
|
1194
|
|
1195 #draw boxes
|
|
1196 $cr->set_antialias('none');
|
|
1197 $cr->set_source_rgba(@barcol);
|
|
1198 foreach my $pos (0..$xmax-($zero ? 0 : 1)) {
|
|
1199 next unless($matrix->[$pos]);
|
|
1200 my $tmp = $matrix->[$pos] / $ymax;
|
|
1201 #unique
|
|
1202 if($tmp) {
|
|
1203 $cr->rectangle($left+$offset+$pos*$size, $top+$offset+$height, $size-1, -$tmp*$height);
|
|
1204 $cr->fill;
|
|
1205 }
|
|
1206 }
|
|
1207
|
|
1208 #write image
|
|
1209 $cr->show_page;
|
|
1210 return $surface;
|
|
1211 }
|
|
1212
|
|
1213 #sub convertToPCAValues {
|
|
1214 # my ($new,$type) = @_;
|
|
1215 #
|
|
1216 # my @data = ($type eq 'v' ? @$DINUCODDS_VIR : @$DINUCODDS_MIC);
|
|
1217 #
|
|
1218 # push(@data,$new);
|
|
1219 #
|
|
1220 # my $pca = Statistics::PCA->new;
|
|
1221 # $pca->load_data({format => 'table', data => \@data});
|
|
1222 # $pca->pca();
|
|
1223 #
|
|
1224 # my @variances = $pca->results('proportion');
|
|
1225 # my @list = $pca->results('transformed');
|
|
1226 #
|
|
1227 # my ($xmin,$xmax,$ymin,$ymax);
|
|
1228 # $xmax = $ymax = -100;
|
|
1229 # $xmin = $ymin = 100;
|
|
1230 #
|
|
1231 # #get min/max values for PC1
|
|
1232 # foreach my $v (@{$list[0]}) {
|
|
1233 # $xmax = &max($xmax,$v);
|
|
1234 # $xmin = &min($xmin,$v);
|
|
1235 # }
|
|
1236 # #get min/max values for PC2
|
|
1237 # foreach my $v (@{$list[1]}) {
|
|
1238 # $ymax = &max($ymax,$v);
|
|
1239 # $ymin = &min($ymin,$v);
|
|
1240 # }
|
|
1241 #
|
|
1242 # return ([$list[0],$list[1]],sprintf("%d",$variances[0]*100),sprintf("%d",$variances[1]*100),$xmin,$xmax,$ymin,$ymax,$type);
|
|
1243 #}
|
|
1244
|
|
1245 sub createPCAPlot {
|
|
1246 my ($data,$var1,$var2,$xmin,$xmax,$ymin,$ymax,$type,$title,$xlab,$ylab,$file) = @_;
|
|
1247
|
|
1248 my @linecol = (0, 0, 0, 0.4);
|
|
1249 my @helplinecol1 = (1,1,1, 0.9);
|
|
1250 my @helplinecol2 = (1,1,1, 0.5);
|
|
1251
|
|
1252 #create new image
|
|
1253 my $size = 5;
|
|
1254 my $offset = 20;
|
|
1255 my $left = 25;
|
|
1256 my $bottom = 15;
|
|
1257 my $top = ($type eq 'v' ? 35 : 20);
|
|
1258 my $height = 500;
|
|
1259 my $space = 10;
|
|
1260 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+$height+2*$space,$top+$bottom+$offset*2+$height+2*$space); #format, width, height
|
|
1261 my $cr = Cairo::Context->create($surface);
|
|
1262
|
|
1263 my ($font_extents,$extents,$fontheight,$fontdescent);
|
|
1264
|
|
1265 #background
|
|
1266 $cr->rectangle(0, 0, $left+$offset*2+$height+2*$space,$top+$bottom+$offset*2+$height+2*$space);
|
|
1267 $cr->set_source_rgba(1, 1, 1, 1);
|
|
1268 $cr->fill;
|
|
1269
|
|
1270 #fonts
|
|
1271 $cr->select_font_face ('sans-serif', 'normal', 'normal');
|
|
1272
|
|
1273 $cr->save;
|
|
1274
|
|
1275 #set up work space
|
|
1276 my ($dx, $dy);
|
|
1277 $cr->set_antialias('none');
|
|
1278 $cr->set_line_width(1);
|
|
1279
|
|
1280 #background for plot
|
|
1281 $cr->rectangle($left+$offset, $top+$offset, $height+2*$space, $height+2*$space);
|
|
1282 $cr->set_source_rgba(0.95, 0.95, 0.95, 1);
|
|
1283 $cr->fill;
|
|
1284
|
|
1285 #get infos
|
|
1286 my $num = scalar(@{$data->[0]})-1;
|
|
1287 my $xrange = ($xmax-$xmin);
|
|
1288 my $yrange = ($ymax-$ymin);
|
|
1289 my $data_info = ($type eq 'v' ? $DATA_VIR : $DATA_MIC);
|
|
1290
|
|
1291 #draw ticks
|
|
1292 #x-axis
|
|
1293 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1294 $cr->move_to($left+$offset+$space, $top+$offset+$height+2*$space);
|
|
1295 $cr->line_to($left+$offset+$space, $top+$offset+$height+2*$space+3);
|
|
1296 $cr->move_to($left+$offset+$space+$height, $top+$offset+$height+2*$space);
|
|
1297 $cr->line_to($left+$offset+$space+$height, $top+$offset+$height+2*$space+3);
|
|
1298 $cr->move_to($left+$offset+$space+int(abs($xmin)/$xrange*$height), $top+$offset+$height+2*$space);
|
|
1299 $cr->line_to($left+$offset+$space+int(abs($xmin)/$xrange*$height), $top+$offset+$height+2*$space+3);
|
|
1300 $cr->stroke;
|
|
1301 #y-axis
|
|
1302 $cr->move_to($left+$offset, $top+$offset+$space);
|
|
1303 $cr->line_to($left+$offset-3, $top+$offset+$space);
|
|
1304 $cr->move_to($left+$offset, $top+$offset+$height+$space);
|
|
1305 $cr->line_to($left+$offset-3, $top+$offset+$height+$space);
|
|
1306 $cr->move_to($left+$offset, $top+$offset+$space+int(abs($ymax)/$yrange*$height));
|
|
1307 $cr->line_to($left+$offset-3, $top+$offset+$space+int(abs($ymax)/$yrange*$height));
|
|
1308 $cr->stroke;
|
|
1309
|
|
1310 #helplines
|
|
1311 $cr->set_source_rgba(@helplinecol1);
|
|
1312 $cr->move_to($left+$offset+$space+int(abs($xmin)/$xrange*$height), $top+$offset);
|
|
1313 $cr->line_to($left+$offset+$space+int(abs($xmin)/$xrange*$height), $top+$offset+$height+2*$space);
|
|
1314 $cr->stroke;
|
|
1315 $cr->move_to($left+$offset, $top+$offset+$space+int(abs($ymax)/$yrange*$height));
|
|
1316 $cr->line_to($left+$offset+2*$space+$height, $top+$offset+$space+int(abs($ymax)/$yrange*$height));
|
|
1317 $cr->stroke;
|
|
1318
|
|
1319
|
|
1320 $cr->set_antialias('default');
|
|
1321
|
|
1322 #tick labels
|
|
1323 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1324 $cr->set_font_size(10);
|
|
1325 $font_extents = $cr->font_extents;
|
|
1326 $fontheight = $font_extents->{height};
|
|
1327 #x-axis
|
|
1328 $extents = $cr->text_extents(sprintf("%.2f",$xmin));
|
|
1329 $cr->move_to($left+$offset+$space-$extents->{width}/2-1, $top+$offset+$height+2*$space+$fontheight+2);
|
|
1330 $cr->show_text(sprintf("%.2f",$xmin));
|
|
1331 $extents = $cr->text_extents(sprintf("%.2f",$xmax));
|
|
1332 $cr->move_to($left+$offset+$space+$height-$extents->{width}/2-1, $top+$offset+$height+2*$space+$fontheight+2);
|
|
1333 $cr->show_text(sprintf("%.2f",$xmax));
|
|
1334 $extents = $cr->text_extents(0);
|
|
1335 $cr->move_to($left+$offset+$space+int(abs($xmin)/$xrange*$height)-$extents->{width}/2, $top+$offset+$height+2*$space+$fontheight+2);
|
|
1336 $cr->show_text(0);
|
|
1337 #y-axis
|
|
1338 $extents = $cr->text_extents(sprintf("%.2f",$ymax));
|
|
1339 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$space+$fontheight/2-2);
|
|
1340 $cr->show_text(sprintf("%.2f",$ymax));
|
|
1341 $extents = $cr->text_extents(sprintf("%.2f",$ymin));
|
|
1342 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$height+$space+$fontheight/2-2);
|
|
1343 $cr->show_text(sprintf("%.2f",$ymin));
|
|
1344 $extents = $cr->text_extents(0);
|
|
1345 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$space+int(abs($ymax)/$yrange*$height)+$fontheight/2-2);
|
|
1346 $cr->show_text(0);
|
|
1347
|
|
1348 $cr->save;
|
|
1349
|
|
1350 #labels
|
|
1351 $cr->set_font_size (14);
|
|
1352 $font_extents = $cr->font_extents;
|
|
1353 $fontheight = $font_extents->{height};
|
|
1354
|
|
1355 #add type
|
|
1356 $cr->set_source_rgba(0, 0, 0, 0.5);
|
|
1357 $extents = $cr->text_extents(uc($type));
|
|
1358 $cr->arc($offset/2+$extents->{width}/2, $offset-5, 10, 0, 2*$PI);
|
|
1359 $cr->fill;
|
|
1360 $cr->set_source_rgba(1, 1, 1, 1);
|
|
1361 $cr->move_to($offset/2-($type eq 'm' ? 1 : 0), $offset);
|
|
1362 $cr->show_text(uc($type));
|
|
1363
|
|
1364 #axis labels
|
|
1365 $cr->set_source_rgba(0, 0, 0, 1);
|
|
1366 $extents = $cr->text_extents($xlab.' ('.$var1.'%)');
|
|
1367 $cr->move_to($left+$offset+$height/2-$extents->{width}/2+$space, $top+$offset+$height+$fontheight+15+2*$space);
|
|
1368 $cr->show_text($xlab.' ('.$var1.'%)');
|
|
1369 $cr->rotate($PI * 3 / 2);
|
|
1370 $extents = $cr->text_extents($ylab.' ('.$var2.'%)');
|
|
1371 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2)+$space,$offset);
|
|
1372 $cr->show_text($ylab.' ('.$var2.'%)');
|
|
1373
|
|
1374 $cr->restore;
|
|
1375
|
|
1376 #draw dots
|
|
1377 $cr->set_antialias('default');
|
|
1378 $cr->set_font_size (10);
|
|
1379 foreach my $i (0..$num) {
|
|
1380 $cr->set_source_rgba(@{$data_info->[$i]->[3]});
|
|
1381 $cr->arc(($left+$offset+$space+int(($data->[0]->[$i]+abs($xmin))/$xrange*$height)), ($space+$top+$offset+int(($data->[1]->[$i]+abs($ymin))/$yrange*$height)), $size, 0, 2*$PI);
|
|
1382 $cr->fill;
|
|
1383 }
|
|
1384 $cr->set_source_rgba(0, 0, 0, 1);
|
|
1385 foreach my $i (0..$num) {
|
|
1386 $extents = $cr->text_extents($data_info->[$i]->[1]);
|
|
1387 $cr->move_to(($left+$offset+$space+int(($data->[0]->[$i]+abs($xmin))/$xrange*$height))+$size+1, ($space+$top+$offset+int(($data->[1]->[$i]+abs($ymin))/$yrange*$height))+$size*2);
|
|
1388 $cr->show_text($data_info->[$i]->[1]);
|
|
1389 }
|
|
1390
|
|
1391 #draw legend
|
|
1392 my %labels;
|
|
1393 foreach my $i (0..$num) {
|
|
1394 $labels{$data_info->[$i]->[1]} = $data_info->[$i]->[2];
|
|
1395 }
|
|
1396 $cr->set_font_size(10);
|
|
1397 $fontheight = $font_extents->{height};
|
|
1398 $cr->set_source_rgba(0, 0, 0, 1);
|
|
1399 my $x = $left+$offset+$space;
|
|
1400 my $y = int($offset/2);
|
|
1401 foreach my $n (sort {$a <=> $b} keys %labels) {
|
|
1402 if($x+$cr->text_extents($n.' - '.$labels{$n})->{width}+15 >= $left+$offset+$space+$height) {
|
|
1403 $x = $left+$offset+$space;
|
|
1404 $y += $fontheight;
|
|
1405 }
|
|
1406 $cr->move_to($x,$y);
|
|
1407 $cr->show_text($n.' - '.$labels{$n});
|
|
1408 $x += $cr->text_extents($n.' - '.$labels{$n})->{width}+15;
|
|
1409
|
|
1410 }
|
|
1411
|
|
1412 #write image
|
|
1413 $cr->show_page;
|
|
1414 return $surface;
|
|
1415 }
|
|
1416
|
|
1417 sub createOddsRatioPlot {
|
|
1418 my ($data,$title,$xlab,$ylab,$file) = @_;
|
|
1419
|
|
1420 my @yvalues = (0.5,0.78,1.00,1.23,1.5);
|
|
1421
|
|
1422 my @linecol = (0, 0, 0, 0.4);
|
|
1423 my @helplinecol1 = (1,1,1, 0.9);
|
|
1424 my @helplinecol2 = (1,1,1, 0.5);
|
|
1425
|
|
1426 #create new image
|
|
1427 my $size = 40;
|
|
1428 my $offset = 20;
|
|
1429 my $left = 35;
|
|
1430 my $right = 90;
|
|
1431 my $bottom = 20;
|
|
1432 my $top = 0;
|
|
1433 my $height = 100;
|
|
1434 my $width = $size*10;
|
|
1435 my $space = 20;
|
|
1436 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+$width+$right,$top+$bottom+$offset*2+$height); #format, width, height
|
|
1437 my $cr = Cairo::Context->create($surface);
|
|
1438
|
|
1439 my ($font_extents,$extents,$fontheight,$fontdescent);
|
|
1440
|
|
1441 #background
|
|
1442 $cr->rectangle(0, 0, $left+$offset*2+$width+$right,$top+$bottom+$offset*2+$height);
|
|
1443 $cr->set_source_rgba(1, 1, 1, 1);
|
|
1444 $cr->fill;
|
|
1445
|
|
1446 #fonts
|
|
1447 $cr->select_font_face ('sans', 'normal', 'normal');
|
|
1448
|
|
1449 $cr->save;
|
|
1450
|
|
1451 #set up work space
|
|
1452 my ($dx, $dy);
|
|
1453 $cr->set_antialias('none');
|
|
1454 $cr->set_line_width(1);
|
|
1455
|
|
1456 #background for plot
|
|
1457 $cr->rectangle($left+$offset, $top+$offset, $width, $height);
|
|
1458 $cr->set_source_rgba(0.95, 0.95, 0.95, 1);
|
|
1459 $cr->fill;
|
|
1460
|
|
1461 #right side marks
|
|
1462 $cr->set_source_rgba(255/255, 127/255, 127/255, 0.6);
|
|
1463 $cr->rectangle($left+$offset+$width+8, $top+$offset, 3, 0.77/2*$height);
|
|
1464 $cr->fill;
|
|
1465 $cr->rectangle($left+$offset+$width+8, $top+$offset+$height-0.78/2*$height, 3, 0.78/2*$height);
|
|
1466 $cr->fill;
|
|
1467
|
|
1468 #get infos
|
|
1469 my $num = scalar(keys %$data)-1;
|
|
1470
|
|
1471 #draw ticks
|
|
1472 #x-axis
|
|
1473 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1474 foreach my $i (0..$num) {
|
|
1475 $cr->move_to($left+$offset+$size/2+$i*$size, $top+$offset+$height);
|
|
1476 $cr->line_to($left+$offset+$size/2+$i*$size, $top+$offset+$height+3);
|
|
1477 }
|
|
1478 $cr->stroke;
|
|
1479 #y-axis
|
|
1480 foreach my $i (@yvalues) {
|
|
1481 $cr->move_to($left+$offset, $top+$offset+$height-$i/2*$height);
|
|
1482 $cr->line_to($left+$offset-3, $top+$offset+$height-$i/2*$height);
|
|
1483 }
|
|
1484 $cr->stroke;
|
|
1485
|
|
1486 #helplines
|
|
1487 #x-axis
|
|
1488 $cr->set_source_rgba(@helplinecol1);
|
|
1489 foreach my $i (0..$num) {
|
|
1490 $cr->move_to($left+$offset+$size/2+$i*$size, $top+$offset);
|
|
1491 $cr->line_to($left+$offset+$size/2+$i*$size, $top+$offset+$height);
|
|
1492 }
|
|
1493 $cr->stroke;
|
|
1494 #yaxis
|
|
1495 foreach my $i (@yvalues) {
|
|
1496 $cr->set_source_rgba(0, 0, 0, ($i == 0.5 || $i == 1.00 || $i == 1.50 ? 0.1 : 0.3));
|
|
1497 $cr->move_to($left+$offset, $top+$offset+$height-$i/2*$height);
|
|
1498 $cr->line_to($left+$offset+$width, $top+$offset+$height-$i/2*$height);
|
|
1499 $cr->stroke;
|
|
1500 }
|
|
1501
|
|
1502 $cr->set_antialias('default');
|
|
1503
|
|
1504 #tick labels
|
|
1505 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1506 $cr->set_font_size(10);
|
|
1507 $font_extents = $cr->font_extents;
|
|
1508 $fontheight = $font_extents->{height};
|
|
1509 #x-axis
|
|
1510 my $xcur = 0;
|
|
1511 foreach my $dn (map {join("/",(m/../g ))} sort keys %$data) {
|
|
1512 $extents = $cr->text_extents($dn);
|
|
1513 $cr->move_to($left+$offset+$size/2-$extents->{width}/2-1+$size*$xcur++, $top+$offset+$height+$fontheight+2);
|
|
1514 $cr->show_text($dn);
|
|
1515 }
|
|
1516 #y-axis
|
|
1517 foreach my $i (@yvalues) {
|
|
1518 $cr->set_source_rgba(0, 0, 0, ($i == 0.5 || $i == 1.00 || $i == 1.50 ? 0.5 : 0.8));
|
|
1519 $extents = $cr->text_extents(sprintf("%.2f",$i));
|
|
1520 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$height-$i/2*$height+$fontheight/2-2);
|
|
1521 $cr->show_text(sprintf("%.2f",$i));
|
|
1522 }
|
|
1523
|
|
1524 #label on right side
|
|
1525 $cr->set_source_rgba(0, 0, 0, 1);
|
|
1526 $extents = $cr->text_extents('Over-represented');
|
|
1527 $cr->move_to($left+$offset+$width+15, $top+$offset+$height-1.6/2*$height+$fontheight/2-2);
|
|
1528 $cr->show_text('Over-represented');
|
|
1529 $extents = $cr->text_extents('Under-represented');
|
|
1530 $cr->move_to($left+$offset+$width+15, $top+$offset+$height-0.4/2*$height+$fontheight/2-2);
|
|
1531 $cr->show_text('Under-represented');
|
|
1532
|
|
1533 $cr->save;
|
|
1534
|
|
1535 #labels
|
|
1536 $cr->set_font_size (14);
|
|
1537 $font_extents = $cr->font_extents;
|
|
1538 $fontheight = $font_extents->{height};
|
|
1539
|
|
1540 #axis labels
|
|
1541 #x-axis
|
|
1542 $cr->set_source_rgba(0, 0, 0, 1);
|
|
1543 $extents = $cr->text_extents($xlab);
|
|
1544 $cr->move_to($left+$offset+$width/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
|
|
1545 $cr->show_text($xlab);
|
|
1546 #y-axis
|
|
1547 $cr->rotate($PI * 3 / 2);
|
|
1548 $extents = $cr->text_extents($ylab);
|
|
1549 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2),$offset);
|
|
1550 $cr->show_text($ylab);
|
|
1551
|
|
1552 $cr->restore;
|
|
1553
|
|
1554 #draw dots
|
|
1555 $cr->set_antialias('default');
|
|
1556 $xcur = 0;
|
|
1557 foreach my $dn (sort keys %$data) {
|
|
1558 if($data->{$dn} > 1.23 || $data->{$dn} < 0.78) {
|
|
1559 $cr->set_source_rgba(255/255, 127/255, 127/255, 1);
|
|
1560 } else {
|
|
1561 $cr->set_source_rgba(127/255, 127/255, 255/255, 1);
|
|
1562 }
|
|
1563 $cr->arc($left+$offset+$size/2+$size*$xcur++, $top+$offset+$height-$data->{$dn}/2*$height, 5, 0, 2*$PI);
|
|
1564 $cr->fill;
|
|
1565 }
|
|
1566
|
|
1567 #write image
|
|
1568 $cr->show_page;
|
|
1569 return $surface;
|
|
1570 }
|
|
1571
|
|
1572 sub convertToBoxValues {
|
|
1573 my ($data,$niceval) = @_;
|
|
1574 my ($xmax,$ymax,@matrix);
|
|
1575 $xmax = $ymax = 0;
|
|
1576 foreach my $i (sort {$a <=> $b} keys %$data) {
|
|
1577 $xmax++;
|
|
1578 push(@matrix,[$i,$data->{$i}->{min},$data->{$i}->{p25},$data->{$i}->{median},$data->{$i}->{p75},$data->{$i}->{max}]);
|
|
1579 $ymax = &max($ymax,$data->{$i}->{max});
|
|
1580 }
|
|
1581
|
|
1582 if($niceval) {
|
|
1583 $ymax = sprintf("%d",($ymax/$niceval)+1)*$niceval if($ymax % $niceval);
|
|
1584 }
|
|
1585
|
|
1586 return (\@matrix,$xmax,$ymax);
|
|
1587 }
|
|
1588
|
|
1589 sub createBoxPlot {
|
|
1590 my ($matrix,$xmax,$ymax,$title,$xlab,$ylab,$file,$zero,$add,$bin) = @_;
|
|
1591 $bin = ($bin ? $bin : 1);
|
|
1592 $zero = 0 unless($zero);
|
|
1593 $add = '' unless(defined $add);
|
|
1594 if($xmax != 100) {
|
|
1595 $xmax = 100;
|
|
1596 }
|
|
1597 $ymax = 1 unless($ymax);
|
|
1598 # die Dumper $matrix;
|
|
1599
|
|
1600
|
|
1601 my @col0 = (178/255, 178/255, 255/255); #b2b2ff
|
|
1602 my @col1 = (255/255, 178/255, 178/255); #ffb2b2
|
|
1603 my @col3 = (127/255, 127/255, 255/255); #7f7fff
|
|
1604 my @col4 = (255/255, 127/255, 127/255); #ff7f7f
|
|
1605 my @linecol = (0, 0, 0, 0.4);
|
|
1606 my @linecol0 = (@col3, 1);
|
|
1607 my @linecol1 = (@col4, 1);
|
|
1608 my @boxcol = (@col3, 1);
|
|
1609 my @whiscol = (@col0, 0.9);
|
|
1610 my @medcol = (0,0,0, 0.5);
|
|
1611 my @helplinecol1 = (1,1,1, 0.9);
|
|
1612 my @helplinecol2 = (1,1,1, 0.5);
|
|
1613
|
|
1614 #create new image
|
|
1615 my $size = 6;
|
|
1616 my $offset = 20;
|
|
1617 my $left = 25;
|
|
1618 my $bottom = 25;
|
|
1619 my $top = 5;
|
|
1620 my $height = 300;
|
|
1621 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+$height); #format, width, height
|
|
1622 my $cr = Cairo::Context->create($surface);
|
|
1623
|
|
1624 my ($font_extents,$extents,$fontheight,$fontdescent);
|
|
1625
|
|
1626 #background
|
|
1627 $cr->rectangle(0, 0, $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+2*200+20);
|
|
1628 $cr->set_source_rgba(1, 1, 1, 1);
|
|
1629 $cr->fill;
|
|
1630
|
|
1631 #fonts
|
|
1632 $cr->select_font_face ('sans', 'normal', 'normal');
|
|
1633 # $cr->set_font_size (30);
|
|
1634
|
|
1635 $cr->save;
|
|
1636
|
|
1637 #set up work space
|
|
1638 $cr->set_antialias('none');
|
|
1639 $cr->set_line_width(1);
|
|
1640
|
|
1641 #background for plot
|
|
1642 $cr->rectangle($left+$offset, $top+$offset, ($xmax+$zero)*$size-1, $height);
|
|
1643 $cr->set_source_rgba(0.95, 0.95, 0.95, 1);
|
|
1644 $cr->fill;
|
|
1645
|
|
1646 #draw legend
|
|
1647 $cr->set_font_size(10);
|
|
1648 # $font_extents = $cr->font_extents;
|
|
1649 my $x = $left+$offset+$size*50;
|
|
1650 foreach my $v ([\@whiscol,'Min/Max value'],[\@boxcol,'25th to 75th percentile'],[\@medcol,'Median']) {
|
|
1651 $cr->set_antialias('none');
|
|
1652 $cr->set_source_rgba(@{$v->[0]});
|
|
1653 $cr->rectangle($x, $top+5, 10, 10);
|
|
1654 $cr->fill;
|
|
1655 $x += 15;
|
|
1656 $cr->set_antialias('default');
|
|
1657 $cr->move_to($x,$top+5+9);
|
|
1658 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1659 $cr->show_text($v->[1]);
|
|
1660 $x += $cr->text_extents($v->[1])->{width}+15;
|
|
1661 }
|
|
1662
|
|
1663 $cr->set_antialias('none');
|
|
1664
|
|
1665 #draw ticks
|
|
1666 #x-axis
|
|
1667 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1668 # $cr->move_to($left+$offset+int($size/2+1), $top+$offset+$height);
|
|
1669 # $cr->line_to($left+$offset+int($size/2+1), $top+$offset+$height+3);
|
|
1670 # $cr->move_to($left+$offset+int($size/2+1), $top+$offset+$height+$space);
|
|
1671 # $cr->line_to($left+$offset+int($size/2+1), $top+$offset+$height+$space-3);
|
|
1672 foreach my $i (1..9) {
|
|
1673 $cr->move_to($left+$offset+int($size/2)+$size*10*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
|
|
1674 $cr->line_to($left+$offset+int($size/2)+$size*10*$i-($zero ? 0 : $size)-1, $top+$offset+$height+3);
|
|
1675 # $cr->move_to($left+$offset+int($size/2)+$size*10*$i-($zero ? 0 : $size)-1, $top+$offset);
|
|
1676 # $cr->line_to($left+$offset+int($size/2)+$size*10*$i-($zero ? 0 : $size)-1, $top+$offset-3);
|
|
1677 }
|
|
1678 $cr->stroke;
|
|
1679 #y-axis
|
|
1680 foreach my $j (0..4) {
|
|
1681 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1682 $cr->line_to($left+$offset-3, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1683 # $cr->move_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1684 # $cr->line_to($left+$offset+($xmax+$zero)*$size+3, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1685 }
|
|
1686 $cr->stroke;
|
|
1687
|
|
1688 #helplines
|
|
1689 $cr->set_source_rgba(@helplinecol1);
|
|
1690 foreach my $j (1..3) {
|
|
1691 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1692 $cr->line_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1693 }
|
|
1694 $cr->stroke;
|
|
1695
|
|
1696 $cr->set_antialias('default');
|
|
1697
|
|
1698 #tick labels
|
|
1699 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1700 $cr->set_font_size(10);
|
|
1701 $font_extents = $cr->font_extents;
|
|
1702 $fontheight = $font_extents->{height};
|
|
1703 #x-axis
|
|
1704 # $extents = $cr->text_extents(1);
|
|
1705 # $cr->move_to($left+$offset+int($size/2+1)-$extents->{width}, $top+$offset+$height+$fontheight+2);
|
|
1706 # $cr->show_text(1);
|
|
1707 foreach my $i (1..9) {
|
|
1708 $extents = $cr->text_extents($i*10*$bin);
|
|
1709 $cr->move_to($left+$offset+int($size/2+1)+$size*10*$i-($zero ? 0 : $size)-$extents->{width}/2-1-($i == 1 ? 1 : 0), $top+$offset+$height+$fontheight+2);
|
|
1710 $cr->show_text($i*10*$bin);
|
|
1711 }
|
|
1712 #y-axis
|
|
1713 foreach my $j (0..4) {
|
|
1714 $extents = $cr->text_extents(&addCommas($ymax*$j/4));
|
|
1715 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2+$height*(4-$j)/4);
|
|
1716 $cr->show_text(&addCommas($ymax*$j/4));
|
|
1717 }
|
|
1718
|
|
1719 $cr->save;
|
|
1720
|
|
1721 #axis labels
|
|
1722 $cr->set_source_rgba(0, 0, 0, 1);
|
|
1723 $cr->set_font_size (14);
|
|
1724 $font_extents = $cr->font_extents;
|
|
1725 $fontheight = $font_extents->{height};
|
|
1726 $extents = $cr->text_extents($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
|
|
1727 $cr->move_to($left+$offset+($xmax+$zero)*$size/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
|
|
1728 $cr->show_text($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
|
|
1729 $cr->rotate($PI * 3 / 2);
|
|
1730 $extents = $cr->text_extents($ylab);
|
|
1731 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2),$offset);
|
|
1732 $cr->show_text($ylab);
|
|
1733
|
|
1734 $cr->restore;
|
|
1735
|
|
1736 #draw boxes
|
|
1737 my $factor = $height/$ymax;
|
|
1738 $cr->set_antialias('none');
|
|
1739 foreach my $v (@$matrix) {
|
|
1740 #wiskers
|
|
1741 $cr->set_source_rgba(@whiscol);
|
|
1742 if($v->[1] != $v->[2]) {
|
|
1743 $cr->move_to($left+$offset+$size*$v->[0]+1, $top+$offset+$height-$v->[1]*$factor-1);
|
|
1744 $cr->line_to($left+$offset+$size*$v->[0]+$size-2, $top+$offset+$height-$v->[1]*$factor-1);
|
|
1745 $cr->stroke;
|
|
1746 }
|
|
1747 if($v->[4] != $v->[5]) {
|
|
1748 $cr->move_to($left+$offset+$size*$v->[0]+1, $top+$offset+$height-$v->[5]*$factor);
|
|
1749 $cr->line_to($left+$offset+$size*$v->[0]+$size-2, $top+$offset+$height-$v->[5]*$factor);
|
|
1750 $cr->stroke;
|
|
1751 }
|
|
1752 $cr->save;
|
|
1753 $cr->set_dash(1,4,3);
|
|
1754 if($v->[1] != $v->[2]) {
|
|
1755 $cr->move_to($left+$offset+$size*$v->[0]+int($size/2)-1, $top+$offset+$height-$v->[2]*$factor);
|
|
1756 $cr->line_to($left+$offset+$size*$v->[0]+int($size/2)-1, $top+$offset+$height-$v->[1]*$factor);
|
|
1757 $cr->stroke;
|
|
1758 }
|
|
1759 if($v->[4] != $v->[5]) {
|
|
1760 $cr->move_to($left+$offset+$size*$v->[0]+int($size/2)-1, $top+$offset+$height-$v->[5]*$factor);
|
|
1761 $cr->line_to($left+$offset+$size*$v->[0]+int($size/2)-1, $top+$offset+$height-$v->[4]*$factor-1);
|
|
1762 $cr->stroke;
|
|
1763 }
|
|
1764 $cr->restore;
|
|
1765 #box
|
|
1766 if(($v->[2] != $v->[3]) || ($v->[4] != $v->[3])) {
|
|
1767 $cr->set_source_rgba(@whiscol);
|
|
1768 $cr->rectangle($left+$offset+$size*$v->[0], $top+$offset+$height-$v->[2]*$factor, $size-1, -($v->[4]-$v->[2])*$factor);
|
|
1769 $cr->fill;
|
|
1770 $cr->stroke;
|
|
1771 $cr->set_source_rgba(@boxcol);
|
|
1772 $cr->rectangle($left+$offset+$size*$v->[0], $top+$offset+$height-$v->[2]*$factor, $size-2, -($v->[4]-$v->[2])*$factor);
|
|
1773 $cr->stroke;
|
|
1774 } else {
|
|
1775 $cr->set_source_rgba(@boxcol);
|
|
1776 $cr->move_to($left+$offset+$size*$v->[0], $top+$offset+$height-$v->[3]*$factor);
|
|
1777 $cr->line_to($left+$offset+$size*$v->[0]+$size-1, $top+$offset+$height-$v->[3]*$factor);
|
|
1778 $cr->stroke;
|
|
1779 }
|
|
1780 #median
|
|
1781 $cr->set_source_rgba(@medcol);
|
|
1782 $cr->move_to($left+$offset+$size*$v->[0]+1, $top+$offset+$height-$v->[3]*$factor);
|
|
1783 $cr->line_to($left+$offset+$size*$v->[0]+$size-2, $top+$offset+$height-$v->[3]*$factor);
|
|
1784 $cr->stroke;
|
|
1785 }
|
|
1786
|
|
1787 #write image
|
|
1788 $cr->show_page;
|
|
1789 return $surface;
|
|
1790 }
|
|
1791
|
|
1792 sub convertToBarValues {
|
|
1793 my ($data,$niceval,$start,$max) = @_;
|
|
1794 my ($xmax,$ymax,@matrix,$tmp);
|
|
1795 $xmax = $ymax = 0;
|
|
1796
|
|
1797 #get xmax value
|
|
1798 if($max) {
|
|
1799 $xmax = $max;
|
|
1800 } else {
|
|
1801 foreach my $q (keys %$data) {
|
|
1802 $xmax = &max($xmax,$q);
|
|
1803 }
|
|
1804 }
|
|
1805 if($niceval) {
|
|
1806 $xmax = sprintf("%d",($xmax/$niceval)+1)*$niceval if($xmax % $niceval);
|
|
1807 }
|
|
1808
|
|
1809 #get matrix values
|
|
1810 foreach my $q ($start..$xmax) {
|
|
1811 $tmp = (exists $data->{$q} ? $data->{$q} : 0);
|
|
1812 $ymax = &max($ymax,$tmp);
|
|
1813 push(@matrix,$tmp);
|
|
1814 }
|
|
1815
|
|
1816 $ymax = sprintf("%d",($ymax/4)+1)*4 if($ymax % 4);
|
|
1817
|
|
1818 return (\@matrix,$xmax,$ymax);
|
|
1819 }
|
|
1820
|
|
1821 sub createBarPlot {
|
|
1822 my ($matrix,$xmax,$ymax,$title,$xlab,$ylab,$file,$zero) = @_;
|
|
1823
|
|
1824 my @col0 = (178/255, 178/255, 255/255); #b2b2ff
|
|
1825 my @col1 = (255/255, 178/255, 178/255); #ffb2b2
|
|
1826 my @col3 = (127/255, 127/255, 255/255); #7f7fff
|
|
1827 my @col4 = (255/255, 127/255, 127/255); #ff7f7f
|
|
1828 my @linecol = (0, 0, 0, 0.4);
|
|
1829 my @linecol0 = (@col3, 1);
|
|
1830 my @linecol1 = (@col4, 1);
|
|
1831 my @barcol0 = (@col3, 1);
|
|
1832 my @barcol1 = (@col4, 1);
|
|
1833 my @helplinecol1 = (1,1,1, 0.9);
|
|
1834 my @helplinecol2 = (1,1,1, 0.5);
|
|
1835
|
|
1836 #create new image
|
|
1837 my $size = ($xmax <= 50 ? 10 : ($xmax <= 100 ? 6 : 3));
|
|
1838 my $offset = 20;
|
|
1839 my $left = 25;
|
|
1840 my $bottom = 15;
|
|
1841 my $top = 0;
|
|
1842 my $height = 200;
|
|
1843 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+$height); #format, width, height
|
|
1844 my $cr = Cairo::Context->create($surface);
|
|
1845
|
|
1846 my ($font_extents,$extents,$fontheight,$fontdescent);
|
|
1847
|
|
1848 #background
|
|
1849 $cr->rectangle(0, 0, $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+2*200+20);
|
|
1850 $cr->set_source_rgba(1, 1, 1, 1);
|
|
1851 $cr->fill;
|
|
1852
|
|
1853 #fonts
|
|
1854 $cr->select_font_face ('sans', 'normal', 'normal');
|
|
1855 # $cr->set_font_size (30);
|
|
1856
|
|
1857 $cr->save;
|
|
1858
|
|
1859 #set up work space
|
|
1860 my ($dx, $dy);
|
|
1861 $cr->set_antialias('none');
|
|
1862 $cr->set_line_width(1);
|
|
1863
|
|
1864 #background for plot
|
|
1865 $cr->rectangle($left+$offset, $top+$offset, ($xmax+$zero)*$size-1, $height);
|
|
1866 $cr->set_source_rgba(0.95, 0.95, 0.95, 1);
|
|
1867 $cr->fill;
|
|
1868
|
|
1869 #draw ticks
|
|
1870 #x-axis
|
|
1871 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1872 foreach my $i (($zero ? 0 : 1)..$xmax) {
|
|
1873 if(($i%5) == 0 && $i > 1 && $i < $xmax) {
|
|
1874 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
|
|
1875 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+3);
|
|
1876 } else {
|
|
1877 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
|
|
1878 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+1);
|
|
1879 }
|
|
1880 }
|
|
1881 $cr->stroke;
|
|
1882
|
|
1883 #y-axis
|
|
1884 $cr->move_to($left+$offset, $top+$offset);
|
|
1885 $cr->line_to($left+$offset-3, $top+$offset);
|
|
1886 $cr->move_to($left+$offset, $top+$offset+$height-1);
|
|
1887 $cr->line_to($left+$offset-3, $top+$offset+$height-1);
|
|
1888 $cr->stroke;
|
|
1889
|
|
1890 #helplines
|
|
1891 $cr->set_source_rgba(@helplinecol1);
|
|
1892 foreach my $j (1..3) {
|
|
1893 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1894 $cr->line_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
1895 }
|
|
1896 $cr->stroke;
|
|
1897
|
|
1898 $cr->set_antialias('default');
|
|
1899
|
|
1900 #tick labels
|
|
1901 $cr->set_source_rgba(0, 0, 0, 0.8);
|
|
1902 $cr->set_font_size(10);
|
|
1903 $font_extents = $cr->font_extents;
|
|
1904 $fontheight = $font_extents->{height};
|
|
1905 #x-axis
|
|
1906 foreach my $i (($zero ? 0 : 1)..$xmax) {
|
|
1907 if(($i%5) == 0 && $i > 1 && $i < $xmax) {
|
|
1908 $extents = $cr->text_extents($i);
|
|
1909 $cr->move_to($left+$offset+int($size/2+1)+$size*$i-($zero ? 0 : $size)-$extents->{width}/2-1-($i == 1 ? 1 : 0), $top+$offset+$height+$fontheight+2);
|
|
1910 $cr->show_text($i);
|
|
1911 }
|
|
1912 }
|
|
1913 #y-axis
|
|
1914 $extents = $cr->text_extents(&addCommas($ymax));
|
|
1915 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2);
|
|
1916 $cr->show_text(&addCommas($ymax));
|
|
1917 $extents = $cr->text_extents(0);
|
|
1918 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2+$height);
|
|
1919 $cr->show_text(0);
|
|
1920
|
|
1921 $cr->save;
|
|
1922
|
|
1923 #labels
|
|
1924 $cr->set_font_size (14);
|
|
1925 $font_extents = $cr->font_extents;
|
|
1926 $fontheight = $font_extents->{height};
|
|
1927
|
|
1928 #axis labels
|
|
1929 $cr->set_source_rgba(0, 0, 0, 1);
|
|
1930 $extents = $cr->text_extents($xlab);
|
|
1931 $cr->move_to($left+$offset+($xmax+$zero)*$size/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
|
|
1932 $cr->show_text($xlab);
|
|
1933 $cr->rotate($PI * 3 / 2);
|
|
1934 $extents = $cr->text_extents($ylab);
|
|
1935 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2),$offset);
|
|
1936 $cr->show_text($ylab);
|
|
1937
|
|
1938 $cr->restore;
|
|
1939
|
|
1940 #draw boxes
|
|
1941 $cr->set_antialias('none');
|
|
1942 foreach my $pos (0..$xmax-($zero ? 0 : 1)) {
|
|
1943 next unless($matrix->[$pos+($zero ? 0 : 1)]);
|
|
1944 my $tmp = $matrix->[$pos+($zero ? 0 : 1)] / $ymax;
|
|
1945 #unique
|
|
1946 if($tmp) {
|
|
1947 $cr->set_source_rgba(@barcol0);
|
|
1948 $cr->rectangle($left+$offset+($pos+($zero ? 0 : 1))*$size, $top+$offset+$height, $size-1, -$tmp*$height);
|
|
1949 $cr->fill;
|
|
1950 }
|
|
1951 }
|
|
1952
|
|
1953 #write image
|
|
1954 $cr->show_page;
|
|
1955 return $surface;
|
|
1956 }
|
|
1957
|
|
1958 sub convertOdToStackBinMatrix {
|
|
1959 my ($data,$stacks,$min,$max,$nonice) = @_;
|
|
1960
|
|
1961 my ($num,$ymax,$xmax,$xmin,$step,%vals,%sums,$sum,@matrix,$bin,$tmpbin);
|
|
1962
|
|
1963 #make nice xmax value
|
|
1964 if(defined $max) {
|
|
1965 $xmax = $max;
|
|
1966 } else {
|
|
1967 $xmax = (sort {$b <=> $a} keys %$data)[0];
|
|
1968 }
|
|
1969 $bin = &getBinVal($xmax);
|
|
1970 $xmax = $bin*100;
|
|
1971 $xmin = (defined $min ? $min : 0);
|
|
1972
|
|
1973 #get data to bin and find y axis max value
|
|
1974 $ymax = 0;
|
|
1975 foreach my $s (0..$stacks-1) {
|
|
1976 $sums{$s} = 0;
|
|
1977 }
|
|
1978 $sum = 0;
|
|
1979 $tmpbin = $bin;
|
|
1980 foreach my $i ($xmin..$xmax) {
|
|
1981 foreach my $s (0..$stacks-1) {
|
|
1982 next unless(exists $data->{$i}->{$s});
|
|
1983 $sums{$s} += $data->{$i}->{$s};
|
|
1984 $sum += $data->{$i}->{$s};
|
|
1985 }
|
|
1986 if(--$tmpbin <= 0) {
|
|
1987 $tmpbin = $bin;
|
|
1988 $ymax = &max($ymax,$sum);
|
|
1989 $sum = 0;
|
|
1990 foreach my $s (0..$stacks-1) {
|
|
1991 push(@{$matrix[$s]},$sums{$s});
|
|
1992 $sums{$s} = 0;
|
|
1993 }
|
|
1994 }
|
|
1995 }
|
|
1996
|
|
1997 #make nice ymax value
|
|
1998 unless($nonice) {
|
|
1999 $ymax = sprintf("%d",($ymax/4)+1)*4 if($ymax % 4);
|
|
2000 # $step = ($ymax <= 10 ? 10 : ($ymax < 40 ? 40 : ($ymax < 100 ? 100 : ($ymax < 1000 ? 100 : 100))));
|
|
2001 # $ymax = sprintf("%d",($ymax/$step)+1)*$step if($ymax % $step);
|
|
2002 }
|
|
2003
|
|
2004 return (\@matrix,$xmax,$ymax,$stacks);
|
|
2005 }
|
|
2006
|
|
2007 sub createStackBarPlot {
|
|
2008 my ($matrix,$xmax,$ymax,$stacks,$title,$xlab,$ylab,$file,$zero,$add) = @_;
|
|
2009
|
|
2010 my $bin = 1;
|
|
2011 if($xmax > 100) {
|
|
2012 $bin = $xmax / 100;
|
|
2013 $xmax = 100;
|
|
2014 }
|
|
2015
|
|
2016 my @legend = ('Exact dupl.','5\' dupl.','3\' dupl.','Rev. compl. exact dupl.','Rev. compl. 5\'/3\' dupl.');
|
|
2017 my @cols = ([69/255, 114/255, 167/255, 1],
|
|
2018 [137/255, 1165/255, 78/255, 1],
|
|
2019 [170/255, 70/255, 67/255, 1],
|
|
2020 [147/255, 169/255, 207/255, 1],
|
|
2021 [51/255, 102/255, 102/255, 1]);
|
|
2022 my @barcol = (127/255, 127/255, 255/255, 1); #b2b2ff
|
|
2023 my @meancol = (255/255, 127/255, 127/255, 1); #ffb2b2
|
|
2024 my @stdcol = (178/255, 178/255, 255/255, 0.8); #7f7fff
|
|
2025 my @std1col = (0, 0, 0, 0.02); #ff7f7f
|
|
2026 my @std2col = (0, 0, 0, 0.02); #ff7f7f
|
|
2027 my @linecol = (0, 0, 0, 0.4);
|
|
2028 my @helplinecol = (1, 1, 1, 0.9);
|
|
2029 my @background = (0.95, 0.95, 0.95, 1);
|
|
2030 my @tickcol = (0, 0, 0, 0.8);
|
|
2031 my @labelcol = (0, 0, 0, 1);
|
|
2032
|
|
2033 #create new image
|
|
2034 my $size = 6;
|
|
2035 my $offset = 20;
|
|
2036 my $left = 40;
|
|
2037 my $bottom = 15;
|
|
2038 my $top = 20;
|
|
2039 my $height = 200;
|
|
2040 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+($xmax+$zero)*$size,$bottom+$top+$offset*2+$height); #format, width, height
|
|
2041 my $cr = Cairo::Context->create($surface);
|
|
2042
|
|
2043 my ($font_extents,$extents,$fontheight,$fontdescent);
|
|
2044
|
|
2045 #background
|
|
2046 $cr->rectangle(0, 0, $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+2*200+20);
|
|
2047 $cr->set_source_rgba(1, 1, 1, 1);
|
|
2048 $cr->fill;
|
|
2049
|
|
2050 #fonts
|
|
2051 $cr->select_font_face ('sans', 'normal', 'normal');
|
|
2052
|
|
2053 $cr->save;
|
|
2054
|
|
2055 #set up work space
|
|
2056 $cr->set_antialias('none');
|
|
2057 $cr->set_line_width(1);
|
|
2058
|
|
2059 #background for plot
|
|
2060 $cr->rectangle($left+$offset, $top+$offset, ($xmax+$zero)*$size-1, $height);
|
|
2061 $cr->set_source_rgba(@background);
|
|
2062 $cr->fill;
|
|
2063
|
|
2064 #draw legend
|
|
2065 $cr->set_font_size(10);
|
|
2066 # $font_extents = $cr->font_extents;
|
|
2067 my $x = $left+$offset+$size*100-5;
|
|
2068 foreach my $i (reverse (0..scalar(@legend)-1)) {
|
|
2069 $cr->set_antialias('default');
|
|
2070 $x -= $cr->text_extents($legend[$i])->{width};
|
|
2071 $cr->move_to($x,$top+5+9);
|
|
2072 $cr->set_source_rgba(@tickcol);
|
|
2073 $cr->show_text($legend[$i]);
|
|
2074 $x -= 15;
|
|
2075 $cr->set_antialias('none');
|
|
2076 $cr->set_source_rgba(@{$cols[$i]});
|
|
2077 $cr->rectangle($x, $top+5, 10, 10);
|
|
2078 $cr->fill;
|
|
2079 $x -= 15;
|
|
2080 }
|
|
2081
|
|
2082 #draw ticks
|
|
2083 $cr->set_antialias('none');
|
|
2084 #x-axis
|
|
2085 $cr->set_source_rgba(@tickcol);
|
|
2086 foreach my $i (($zero ? 0 : 1)..$xmax) {
|
|
2087 if(($i%5) == 0 && $i > 1 && $i < $xmax) {
|
|
2088 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
|
|
2089 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+3);
|
|
2090 } else {
|
|
2091 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
|
|
2092 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+1);
|
|
2093 }
|
|
2094 }
|
|
2095 $cr->stroke;
|
|
2096
|
|
2097 #y-axis
|
|
2098 $cr->move_to($left+$offset, $top+$offset);
|
|
2099 $cr->line_to($left+$offset-3, $top+$offset);
|
|
2100 $cr->move_to($left+$offset, $top+$offset+$height-1);
|
|
2101 $cr->line_to($left+$offset-3, $top+$offset+$height-1);
|
|
2102 $cr->stroke;
|
|
2103
|
|
2104 #helplines
|
|
2105 $cr->set_source_rgba(@helplinecol);
|
|
2106 foreach my $j (1..3) {
|
|
2107 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
2108 $cr->line_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
|
|
2109 }
|
|
2110 $cr->stroke;
|
|
2111
|
|
2112 $cr->set_antialias('default');
|
|
2113
|
|
2114 #tick labels
|
|
2115 $cr->set_source_rgba(@tickcol);
|
|
2116 $cr->set_font_size(10);
|
|
2117 $font_extents = $cr->font_extents;
|
|
2118 $fontheight = $font_extents->{height};
|
|
2119 #x-axis
|
|
2120 foreach my $i (($zero ? 0 : 1)..$xmax) {
|
|
2121 if(($i%10) == 0 && $i > 1 && $i < $xmax) {
|
|
2122 $extents = $cr->text_extents($i*$bin);
|
|
2123 $cr->move_to($left+$offset+int($size/2+1)+$size*$i-($zero ? 0 : $size)-$extents->{width}/2-1-($i == 1 ? 1 : 0), $top+$offset+$height+$fontheight+2);
|
|
2124 $cr->show_text($i*$bin);
|
|
2125 }
|
|
2126 }
|
|
2127 #y-axis
|
|
2128 $extents = $cr->text_extents(&addCommas($ymax));
|
|
2129 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2);
|
|
2130 $cr->show_text(&addCommas($ymax));
|
|
2131 $extents = $cr->text_extents(0);
|
|
2132 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2+$height);
|
|
2133 $cr->show_text(0);
|
|
2134
|
|
2135 $cr->save;
|
|
2136
|
|
2137 #labels
|
|
2138 $cr->set_font_size(14);
|
|
2139 $font_extents = $cr->font_extents;
|
|
2140 $fontheight = $font_extents->{height};
|
|
2141
|
|
2142 #axis labels
|
|
2143 $cr->set_source_rgba(@labelcol);
|
|
2144 $extents = $cr->text_extents($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
|
|
2145 $cr->move_to($left+$offset+($xmax+$zero)*$size/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
|
|
2146 $cr->show_text($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
|
|
2147 $cr->rotate($PI * 3 / 2);
|
|
2148 $extents = $cr->text_extents($ylab.($bin>1 ? ' (per bin)' : ''));
|
|
2149 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2+($bin>1 ? 12 : 0)),$offset+10);
|
|
2150 $cr->show_text($ylab.($bin>1 ? ' (per bin)' : ''));
|
|
2151
|
|
2152 $cr->restore;
|
|
2153
|
|
2154 #draw boxes
|
|
2155 $cr->set_antialias('none');
|
|
2156 foreach my $pos (0..$xmax-($zero ? 0 : 1)) {
|
|
2157 my $tmp = 0;
|
|
2158 foreach my $s (0..$stacks-1) {
|
|
2159 next unless($matrix->[$s]->[$pos]);
|
|
2160 my $cur = $matrix->[$s]->[$pos] / $ymax;
|
|
2161 $cr->set_source_rgba(@{$cols[$s]});
|
|
2162 if($cur) {
|
|
2163 $cr->rectangle($left+$offset+$pos*$size, $top+$offset+$height-$tmp*$height, $size-1, -$cur*$height);
|
|
2164 $cr->fill;
|
|
2165 }
|
|
2166 $tmp += $cur;
|
|
2167 }
|
|
2168 }
|
|
2169
|
|
2170 #write image
|
|
2171 $cr->show_page;
|
|
2172 return $surface;
|
|
2173 }
|
|
2174
|
|
2175 sub header {
|
|
2176 return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
|
2177 <html>
|
|
2178 <head>
|
|
2179 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
2180 <title>PRINSEQ-'.$WHAT.' Report</title>
|
|
2181 <style type="text/css">
|
|
2182 <!--/* <![CDATA[ */
|
|
2183 '.$CSS_STYLE.'
|
|
2184 /* ]]> */-->
|
|
2185 </style>
|
|
2186 </head>
|
|
2187 <body>
|
|
2188 <center>';
|
|
2189 }
|
|
2190
|
|
2191 sub footer {
|
|
2192 return '</center></body></html>';
|
|
2193 }
|
|
2194
|
|
2195 sub generateHtml {
|
|
2196 my ($in,$out) = @_;
|
|
2197 my ($file,$data,$surface,$html,$png);
|
|
2198 $data = &readGdFile($in);
|
|
2199 my $time = sprintf("%02d/%02d/%04d %02d:%02d:%02d",sub {($_[4]+1,$_[3],$_[5]+1900,$_[2],$_[1],$_[0])}->(localtime));
|
|
2200
|
|
2201 $html .= &header();
|
|
2202 $html .= '<h2>PRINSEQ-'.$WHAT.' v'.$VERSION.' HTML Report </h2>[Generated: '.$time.']<br /><br />';
|
|
2203 $html .= '<div class="info-panel">';
|
|
2204
|
|
2205 #input info
|
|
2206 if(exists $data->{numseqs}) {
|
|
2207 $html .= '<div class="info-header"><span class="info-header-title">Input Information</span></div>';
|
|
2208 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Input file(s):</td><td class="info-table-value">'.($data->{filename1} ? &convertIntToString($data->{filename1}) : '-').($data->{filename2} ? ' and '.&convertIntToString($data->{filename2}) : '').'</td></tr><tr><td class="info-table-type">Input format(s):</td><td class="info-table-value">'.($data->{format1} ? uc($data->{format1}) : '-').($data->{format2} ? ' and '.uc($data->{format2}) : '').'</td></tr>';
|
|
2209 if(exists $data->{pairedend} && $data->{pairedend}) {
|
|
2210 my $singletons1 = ($data->{numseqs}||0)-($data->{pairs}||0);
|
|
2211 my $singletons2 = ($data->{numseqs2}||0)-($data->{pairs}||0);
|
|
2212 $html .= '<tr><td class="info-table-type"># Sequences (file 1):</td><td class="info-table-value">'.&addCommas($data->{numseqs}||'-').'</td></tr><tr><td class="info-table-type">Total bases (file 1):</td><td class="info-table-value">'.&addCommas($data->{numbases}||'-').'</td></tr><tr><td class="info-table-type"># Sequences (file 2):</td><td class="info-table-value">'.&addCommas($data->{numseqs2}||'-').'</td></tr><tr><td class="info-table-type">Total bases (file 2):</td><td class="info-table-value">'.&addCommas($data->{numbases2}||'-').'</td></tr><tr><td class="info-table-type"># Pairs:</td><td class="info-table-value">'.&addCommas($data->{pairs}||'-').($data->{pairs} ? ' ('.sprintf("%.2f",(100*(2*$data->{pairs})/(($data->{numseqs}||0)+($data->{numseqs2}||0)))).'% of sequences)' : '').'</td></tr></tr><tr><td class="info-table-type"># Singletons (file 1):</td><td class="info-table-value">'.&addCommas($singletons1).($singletons1 ? ' ('.sprintf("%.2f",(100*$singletons1/$data->{numseqs})).'%)' : '').'</td></tr><tr><td class="info-table-type"># Singletons (file 2):</td><td class="info-table-value">'.&addCommas($singletons2).($singletons2 ? ' ('.sprintf("%.2f",(100*$singletons2/$data->{numseqs2})).'%)' : '').'</td></tr>';
|
|
2213 } else {
|
|
2214 $html .= '<tr><td class="info-table-type"># Sequences:</td><td class="info-table-value">'.&addCommas($data->{numseqs}||'-').'</td></tr><tr><td class="info-table-type">Total bases:</td><td class="info-table-value">'.&addCommas($data->{numbases}||'-').'</td></tr>';
|
|
2215 }
|
|
2216 $html .= '</tbody></table></div><hr>';
|
|
2217 }
|
|
2218
|
|
2219 #length plot
|
|
2220 if(exists $data->{counts}->{length} && keys %{$data->{counts}->{length}}) {
|
|
2221 $html .= '<div class="info-header"><span class="info-header-title">Length Distribution</span></div>';
|
|
2222 if(exists $data->{pairedend} && $data->{pairedend}) {
|
|
2223 $html .= '<div class="info-content"><b>File 1</b><br /><table border="0" cellpadding="0" cellspacing="0"> <tbody><tr><td class="info-table-type">Mean sequence length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{mean} ? sprintf("%.2f",$data->{stats}->{length}->{mean}) : '-').' ± '.(exists $data->{stats}->{length}->{std} ? sprintf("%.2f",$data->{stats}->{length}->{std}) : '-').' bp</td></tr><tr><td class="info-table-type">Minimum length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{min} ? &addCommas($data->{stats}->{length}->{min}) : '-').' bp</td></tr> <tr><td class="info-table-type">Maximum length:</td><td class="info-table-value">'.(exists $data->{stats}->{length}->{max} ? &addCommas($data->{stats}->{length}->{max}) : '-').' bp</td></tr> <tr><td class="info-table-type">Length range:</td><td class="info-table-value">'.(exists $data->{stats}->{length}->{range} ? &addCommas($data->{stats}->{length}->{range}) : '-').' bp</td></tr> <tr><td class="info-table-type">Mode length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{mode} ? &addCommas($data->{stats}->{length}->{mode}) : '-').' bp with '.(exists $data->{stats}->{length}->{modeval} ? &addCommas($data->{stats}->{length}->{modeval}) : '-').' sequences</td></tr></tbody></table><br>';
|
|
2224 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{length},1),$data->{stats}->{length},'Length Distribution','Read Length in bp','# Sequences','',0,' bp');
|
|
2225 $png = '';
|
|
2226 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2227 $html .= &insert_image($png);
|
|
2228 $html .= '</div>';
|
|
2229 $html .= '<div class="info-content"><br /><b>File 2</b><br /><table border="0" cellpadding="0" cellspacing="0"> <tbody><tr><td class="info-table-type">Mean sequence length:</td> <td class="info-table-value">'.(exists $data->{stats2}->{length}->{mean} ? sprintf("%.2f",$data->{stats2}->{length}->{mean}) : '-').' ± '.(exists $data->{stats2}->{length}->{std} ? sprintf("%.2f",$data->{stats2}->{length}->{std}) : '-').' bp</td></tr><tr><td class="info-table-type">Minimum length:</td> <td class="info-table-value">'.(exists $data->{stats2}->{length}->{min} ? &addCommas($data->{stats2}->{length}->{min}) : '-').' bp</td></tr> <tr><td class="info-table-type">Maximum length:</td><td class="info-table-value">'.(exists $data->{stats2}->{length}->{max} ? &addCommas($data->{stats2}->{length}->{max}) : '-').' bp</td></tr> <tr><td class="info-table-type">Length range:</td><td class="info-table-value">'.(exists $data->{stats2}->{length}->{range} ? &addCommas($data->{stats2}->{length}->{range}) : '-').' bp</td></tr> <tr><td class="info-table-type">Mode length:</td> <td class="info-table-value">'.(exists $data->{stats2}->{length}->{mode} ? &addCommas($data->{stats2}->{length}->{mode}) : '-').' bp with '.(exists $data->{stats2}->{length}->{modeval} ? &addCommas($data->{stats2}->{length}->{modeval}) : '-').' sequences</td></tr></tbody></table><br>';
|
|
2230 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{length},1),$data->{stats2}->{length},'Length Distribution','Read Length in bp','# Sequences','',0,' bp');
|
|
2231 $png = '';
|
|
2232 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2233 $html .= &insert_image($png);
|
|
2234 $html .= '</div>';
|
|
2235 } else {
|
|
2236 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"> <tbody><tr><td class="info-table-type">Mean sequence length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{mean} ? sprintf("%.2f",$data->{stats}->{length}->{mean}) : '-').' ± '.(exists $data->{stats}->{length}->{std} ? sprintf("%.2f",$data->{stats}->{length}->{std}) : '-').' bp</td></tr><tr><td class="info-table-type">Minimum length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{min} ? &addCommas($data->{stats}->{length}->{min}) : '-').' bp</td></tr> <tr><td class="info-table-type">Maximum length:</td><td class="info-table-value">'.(exists $data->{stats}->{length}->{max} ? &addCommas($data->{stats}->{length}->{max}) : '-').' bp</td></tr> <tr><td class="info-table-type">Length range:</td><td class="info-table-value">'.(exists $data->{stats}->{length}->{range} ? &addCommas($data->{stats}->{length}->{range}) : '-').' bp</td></tr> <tr><td class="info-table-type">Mode length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{mode} ? &addCommas($data->{stats}->{length}->{mode}) : '-').' bp with '.(exists $data->{stats}->{length}->{modeval} ? &addCommas($data->{stats}->{length}->{modeval}) : '-').' sequences</td></tr></tbody></table><br>';
|
|
2237 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{length},1),$data->{stats}->{length},'Length Distribution','Read Length in bp','# Sequences','',0,' bp');
|
|
2238 $png = '';
|
|
2239 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2240 $html .= &insert_image($png);
|
|
2241 $html .= '</div>';
|
|
2242 }
|
|
2243 $html .= '<hr>';
|
|
2244 }
|
|
2245
|
|
2246 #GC content
|
|
2247 if(exists $data->{counts}->{gc} && keys %{$data->{counts}->{gc}}) {
|
|
2248 $html .= '<div class="info-header"><span class="info-header-title">GC Content Distribution</span></div>';
|
|
2249 if(exists $data->{pairedend} && $data->{pairedend}) {
|
|
2250 $html .= '<div class="info-content"><b>File 1</b><br /><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Mean GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{mean} ? sprintf("%.2f",$data->{stats}->{gc}->{mean}) : '-').' ± '.(exists $data->{stats}->{gc}->{std} ? sprintf("%.2f",$data->{stats}->{gc}->{std}) : '-').' %</td></tr> <tr><td class="info-table-type">Minimum GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{min} ? $data->{stats}->{gc}->{min} : '-').' %</td></tr> <tr><td class="info-table-type">Maximum GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{max} ? $data->{stats}->{gc}->{max} : '-').' %</td></tr> <tr><td class="info-table-type">GC content range:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{range} ? $data->{stats}->{gc}->{range} : '-').' %</td></tr> <tr><td class="info-table-type">Mode GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{mode} ? $data->{stats}->{gc}->{mode} : '-').' % with '.(exists $data->{stats}->{gc}->{modeval} ? &addCommas($data->{stats}->{gc}->{modeval}) : '-').' sequences</td></tr> </tbody></table><br>';
|
|
2251 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{gc},0),$data->{stats}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences','',1);
|
|
2252 $png = '';
|
|
2253 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2254 $html .= &insert_image($png);
|
|
2255 $html .= '</div>';
|
|
2256 $html .= '<div class="info-content"><br /><b>File 2</b><br /><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Mean GC content:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{mean} ? sprintf("%.2f",$data->{stats2}->{gc}->{mean}) : '-').' ± '.(exists $data->{stats2}->{gc}->{std} ? sprintf("%.2f",$data->{stats2}->{gc}->{std}) : '-').' %</td></tr> <tr><td class="info-table-type">Minimum GC content:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{min} ? $data->{stats2}->{gc}->{min} : '-').' %</td></tr> <tr><td class="info-table-type">Maximum GC content:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{max} ? $data->{stats2}->{gc}->{max} : '-').' %</td></tr> <tr><td class="info-table-type">GC content range:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{range} ? $data->{stats2}->{gc}->{range} : '-').' %</td></tr> <tr><td class="info-table-type">Mode GC content:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{mode} ? $data->{stats2}->{gc}->{mode} : '-').' % with '.(exists $data->{stats2}->{gc}->{modeval} ? &addCommas($data->{stats2}->{gc}->{modeval}) : '-').' sequences</td></tr> </tbody></table><br>';
|
|
2257 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{gc},0),$data->{stats2}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences','',1);
|
|
2258 $png = '';
|
|
2259 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2260 $html .= &insert_image($png);
|
|
2261 $html .= '</div>';
|
|
2262 } else {
|
|
2263 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Mean GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{mean} ? sprintf("%.2f",$data->{stats}->{gc}->{mean}) : '-').' ± '.(exists $data->{stats}->{gc}->{std} ? sprintf("%.2f",$data->{stats}->{gc}->{std}) : '-').' %</td></tr> <tr><td class="info-table-type">Minimum GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{min} ? $data->{stats}->{gc}->{min} : '-').' %</td></tr> <tr><td class="info-table-type">Maximum GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{max} ? $data->{stats}->{gc}->{max} : '-').' %</td></tr> <tr><td class="info-table-type">GC content range:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{range} ? $data->{stats}->{gc}->{range} : '-').' %</td></tr> <tr><td class="info-table-type">Mode GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{mode} ? $data->{stats}->{gc}->{mode} : '-').' % with '.(exists $data->{stats}->{gc}->{modeval} ? &addCommas($data->{stats}->{gc}->{modeval}) : '-').' sequences</td></tr> </tbody></table><br>';
|
|
2264 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{gc},0),$data->{stats}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences','',1);
|
|
2265 $png = '';
|
|
2266 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2267 $html .= &insert_image($png);
|
|
2268 $html .= '</div>';
|
|
2269 }
|
|
2270 $html .= '<hr>';
|
|
2271 }
|
|
2272
|
|
2273 #Base quality
|
|
2274 if(exists $data->{quals} || exists $data->{qualsmean} || exists $data->{qualsbin}) {
|
|
2275 $html .= '<div class="info-header"><span class="info-header-title">Base Quality Distribution</span></div><div class="info-content">';
|
|
2276 if(exists $data->{pairedend} && $data->{pairedend}) {
|
|
2277 $html .= '<b>File 1</b><br />';
|
|
2278 }
|
|
2279 }
|
|
2280 if(exists $data->{quals} && keys %{$data->{quals}}) {
|
|
2281 $surface = &createBoxPlot(&convertToBoxValues($data->{quals},4),'Base Quality Distribution','Read position in %','Quality score','');
|
|
2282 $png = '';
|
|
2283 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2284 $html .= &insert_image($png);
|
|
2285 }
|
|
2286 if(exists $data->{qualsbin} && keys %{$data->{qualsbin}}) {
|
|
2287 $surface = &createBoxPlot(&convertToBoxValues($data->{qualsbin},4),'Base Quality Distribution','Read position in bp','Quality score','',0,'bp',$data->{binval});
|
|
2288 $png = '';
|
|
2289 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2290 $html .= '<br /><br />' if(exists $data->{quals});
|
|
2291 $html .= &insert_image($png);
|
|
2292 }
|
|
2293 if(exists $data->{qualsmean} && keys %{$data->{qualsmean}}) {
|
|
2294 $surface = &createBarPlot(&convertToBarValues($data->{qualsmean},5,1),'Sequence Quality Distribution','Mean of quality scores per sequence','Number of sequences','',0);
|
|
2295 $png = '';
|
|
2296 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2297 $html .= '<br /><br />' if(exists $data->{qualsbin});
|
|
2298 $html .= &insert_image($png);
|
|
2299 }
|
|
2300 if(exists $data->{pairedend} && $data->{pairedend}) {
|
|
2301 if(exists $data->{quals} || exists $data->{qualsmean} || exists $data->{qualsbin}) {
|
|
2302 $html .= '<br /><br /><br /><b>File 2</b><br />';
|
|
2303 }
|
|
2304 if(exists $data->{quals2} && keys %{$data->{quals2}}) {
|
|
2305 $surface = &createBoxPlot(&convertToBoxValues($data->{quals2},4),'Base Quality Distribution','Read position in %','Quality score','');
|
|
2306 $png = '';
|
|
2307 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2308 $html .= &insert_image($png);
|
|
2309 }
|
|
2310 if(exists $data->{qualsbin2} && keys %{$data->{qualsbin2}}) {
|
|
2311 $surface = &createBoxPlot(&convertToBoxValues($data->{qualsbin2},4),'Base Quality Distribution','Read position in bp','Quality score','',0,'bp',$data->{binval});
|
|
2312 $png = '';
|
|
2313 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2314 $html .= '<br /><br />' if(exists $data->{quals2});
|
|
2315 $html .= &insert_image($png);
|
|
2316 }
|
|
2317 if(exists $data->{qualsmean2} && keys %{$data->{qualsmean2}}) {
|
|
2318 $surface = &createBarPlot(&convertToBarValues($data->{qualsmean2},5,1),'Sequence Quality Distribution','Mean of quality scores per sequence','Number of sequences','',0);
|
|
2319 $png = '';
|
|
2320 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2321 $html .= '<br /><br />' if(exists $data->{qualsbin2});
|
|
2322 $html .= &insert_image($png);
|
|
2323 }
|
|
2324 }
|
|
2325 if(exists $data->{quals} || exists $data->{qualsmean} || exists $data->{qualsbin}) {
|
|
2326 $html .= '</div><hr>';
|
|
2327 }
|
|
2328
|
|
2329 #Ns
|
|
2330 if((exists $data->{counts}->{ns} && keys %{$data->{counts}->{ns}}) || (exists $data->{counts2} && exists $data->{counts2}->{ns} && keys %{$data->{counts2}->{ns}})) {
|
|
2331 $html .= '<div class="info-header"><span class="info-header-title">Occurence of N</span></div><div class="info-content">';
|
|
2332 if(exists $data->{pairedend} && $data->{pairedend}) {
|
|
2333 $html .= '<b>File 1</b><br />';
|
|
2334 }
|
|
2335 }
|
|
2336 if(exists $data->{counts}->{ns} && keys %{$data->{counts}->{ns}}) {
|
|
2337 my $nscount = 0;
|
|
2338 foreach my $n (values %{$data->{counts}->{ns}}) {
|
|
2339 $nscount += $n;
|
|
2340 }
|
|
2341 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Sequences with N:</td> <td class="info-table-value">'.($nscount ? &addCommas($nscount).' ('.sprintf("%.2f",100/$data->{numseqs}*$nscount).' %)' : 0).'</td></tr><tr><td class="info-table-type">Max percentage of Ns per sequence:</td> <td class="info-table-value">'.(exists $data->{stats}->{ns}->{max} ? $data->{stats}->{ns}->{max} : 0).' %</td></tr></tbody></table>';
|
|
2342 if($nscount) {
|
|
2343 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{ns},1),undef,'Percentage of N\'s (> 0%)','Percentage of N\'s per Read (1-100%)','# Sequences','',0);
|
|
2344 $png = '';
|
|
2345 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2346 $html .= '<br />'.&insert_image($png);
|
|
2347 }
|
|
2348 }
|
|
2349 if(exists $data->{pairedend} && $data->{pairedend} && exists $data->{counts2}->{ns} && keys %{$data->{counts2}->{ns}}) {
|
|
2350 $html .= '<br /><br /><br /><b>File 2</b><br />';
|
|
2351 my $nscount = 0;
|
|
2352 foreach my $n (values %{$data->{counts2}->{ns}}) {
|
|
2353 $nscount += $n;
|
|
2354 }
|
|
2355 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Sequences with N:</td> <td class="info-table-value">'.($nscount ? &addCommas($nscount).' ('.sprintf("%.2f",100/$data->{numseqs2}*$nscount).' %)' : 0).'</td></tr><tr><td class="info-table-type">Max percentage of Ns per sequence:</td> <td class="info-table-value">'.(exists $data->{stats2}->{ns}->{max} ? $data->{stats2}->{ns}->{max} : 0).' %</td></tr></tbody></table>';
|
|
2356 if($nscount) {
|
|
2357 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{ns},1),undef,'Percentage of N\'s (> 0%)','Percentage of N\'s per Read (1-100%)','# Sequences','',0);
|
|
2358 $png = '';
|
|
2359 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2360 $html .= '<br />'.&insert_image($png);
|
|
2361 }
|
|
2362 }
|
|
2363 if((exists $data->{counts}->{ns} && keys %{$data->{counts}->{ns}}) || (exists $data->{counts2} && exists $data->{counts2}->{ns} && keys %{$data->{counts2}->{ns}})) {
|
|
2364 $html .= '</div><hr>';
|
|
2365 }
|
|
2366
|
|
2367 #tails
|
|
2368 if(exists $data->{tail} || exists $data->{tail2}) {
|
|
2369 $html .= '<div class="info-header"><span class="info-header-title">Poly-A/T Tails</span></div><div class="info-content">';
|
|
2370 }
|
|
2371 if(exists $data->{tail}) {
|
|
2372 my $tail5count = 0;
|
|
2373 foreach my $n (values %{$data->{counts}->{tail5}}) {
|
|
2374 $tail5count += $n;
|
|
2375 }
|
|
2376 my $tail3count = 0;
|
|
2377 foreach my $n (values %{$data->{counts}->{tail3}}) {
|
|
2378 $tail3count += $n;
|
|
2379 }
|
|
2380 if(exists $data->{pairedend} && $data->{pairedend}) {
|
|
2381 $html .= '<b>File 1</b><br />';
|
|
2382 }
|
|
2383 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value">5\'-end</td> <td class="info-table-value">3\'-end</td></tr> <tr><td class="info-table-type">Sequences with tail:</td><td class="info-table-value">'.($tail5count ? &addCommas($tail5count).' ('.sprintf("%.2f",100/$data->{numseqs}*$tail5count).' %)' : 0).'</td> <td class="info-table-value">'.($tail3count ? &addCommas($tail3count).' ('.sprintf("%.2f",100/$data->{numseqs}*$tail3count).' %)' : 0).'</td></tr> <tr><td class="info-table-type">Maximum tail length:</td> <td class="info-table-value">'.(exists $data->{stats}->{tail5}->{max} ? $data->{stats}->{tail5}->{max} : 0).'</td> <td class="info-table-value">'.(exists $data->{stats}->{tail3}->{max} ? $data->{stats}->{tail3}->{max} : 0).'</td></tr></tbody></table>';
|
|
2384 if($tail5count) {
|
|
2385 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{tail5},1),undef,'Poly-A/T Tail Distribution (> 4bp)','5\' Tail Length in bp','# Sequences','',0,' bp');
|
|
2386 $png = '';
|
|
2387 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2388 $html .= '<br />'.&insert_image($png);
|
|
2389 if($tail3count) {
|
|
2390 $html .= '<br />';
|
|
2391 }
|
|
2392 }
|
|
2393 if($tail3count) {
|
|
2394 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{tail3},1),undef,'Poly-A/T Tail Distribution (> 4bp)','3\' Tail Length in bp','# Sequences','',0,' bp');
|
|
2395 $png = '';
|
|
2396 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2397 $html .= '<br />'.&insert_image($png);
|
|
2398 }
|
|
2399 }
|
|
2400 if(exists $data->{pairedend} && $data->{pairedend} && exists $data->{tail2}) {
|
|
2401 my $tail5count = 0;
|
|
2402 foreach my $n (values %{$data->{counts2}->{tail5}}) {
|
|
2403 $tail5count += $n;
|
|
2404 }
|
|
2405 my $tail3count = 0;
|
|
2406 foreach my $n (values %{$data->{counts2}->{tail3}}) {
|
|
2407 $tail3count += $n;
|
|
2408 }
|
|
2409 $html .= '<br /><br /><br /><b>File 2</b><br />';
|
|
2410 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value">5\'-end</td> <td class="info-table-value">3\'-end</td></tr> <tr><td class="info-table-type">Sequences with tail:</td><td class="info-table-value">'.($tail5count ? &addCommas($tail5count).' ('.sprintf("%.2f",100/$data->{numseqs2}*$tail5count).' %)' : 0).'</td> <td class="info-table-value">'.($tail3count ? &addCommas($tail3count).' ('.sprintf("%.2f",100/$data->{numseqs2}*$tail3count).' %)' : 0).'</td></tr> <tr><td class="info-table-type">Maximum tail length:</td> <td class="info-table-value">'.(exists $data->{stats2}->{tail5}->{max} ? $data->{stats2}->{tail5}->{max} : 0).'</td> <td class="info-table-value">'.(exists $data->{stats2}->{tail3}->{max} ? $data->{stats2}->{tail3}->{max} : 0).'</td></tr></tbody></table>';
|
|
2411 if($tail5count) {
|
|
2412 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{tail5},1),undef,'Poly-A/T Tail Distribution (> 4bp)','5\' Tail Length in bp','# Sequences','',0,' bp');
|
|
2413 $png = '';
|
|
2414 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2415 $html .= '<br />'.&insert_image($png);
|
|
2416 if($tail3count) {
|
|
2417 $html .= '<br />';
|
|
2418 }
|
|
2419 }
|
|
2420 if($tail3count) {
|
|
2421 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{tail3},1),undef,'Poly-A/T Tail Distribution (> 4bp)','3\' Tail Length in bp','# Sequences','',0,' bp');
|
|
2422 $png = '';
|
|
2423 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2424 $html .= '<br />'.&insert_image($png);
|
|
2425 }
|
|
2426 }
|
|
2427 if(exists $data->{tail} || exists $data->{tail2}) {
|
|
2428 $html .= '</div><hr>';
|
|
2429 }
|
|
2430
|
|
2431
|
|
2432 #tag sequence check
|
|
2433 if(exists $data->{freqs} || exists $data->{freqs2}) {
|
|
2434 $html .= '<div class="info-header"><span class="info-header-title">Tag Sequence Check</span></div><div class="info-content">';
|
|
2435 }
|
|
2436 if(exists $data->{freqs}) {
|
|
2437 my $tagmidseq;
|
|
2438 if(exists $data->{tagmidseq}) {
|
|
2439 $tagmidseq = $data->{tagmidseq};
|
|
2440 $tagmidseq =~ s/\,/\<br \/\>/g;
|
|
2441 }
|
|
2442 if(exists $data->{pairedend} && $data->{pairedend}) {
|
|
2443 $html .= '<b>File 1</b><br />';
|
|
2444 }
|
|
2445 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value">5\'-end</td><td class="info-table-value">3\'-end</td></tr><tr><td class="info-table-type">Probability of tag sequence:</td><td class="info-table-value">'.(exists $data->{tagprob}->{5} ? $data->{tagprob}->{5}.' %' : '-').'</td><td class="info-table-value">'.(exists $data->{tagprob}->{3} ? $data->{tagprob}->{3}.' %' : '-').'</td></tr><tr><td class="info-table-type">GSMIDs or RLMIDs:</td><td class="info-table-value">'.(exists $data->{tagmidnum} ? ($data->{tagmidnum} == 0 ? 'none' : ($tagmidseq ? $tagmidseq : $data->{tagmidnum})) : '-').'</td><td class="info-table-value"> </td></tr></tbody></table><br>';
|
|
2446 $html .= '<table border="0" cellspacing="0" cellpadding="0"><tr><td>'.&insert_image($FREQCHART_L,undef,undef,1).'</td>';
|
|
2447 foreach my $pos (sort {$a <=> $b} keys %{$data->{freqs}->{5}}) {
|
|
2448 $html .= '<td valign="bottom" align="center">';
|
|
2449 foreach my $base (qw(A C G T N)) {
|
|
2450 if($data->{freqs}->{5}->{$pos}->{$base}) {
|
|
2451 $html .= &insert_image($BASE64_BASES->{$base},$data->{freqs}->{5}->{$pos}->{$base},14,1).'<br />';
|
|
2452 #'<img height="'.$data->{freqs}->{5}->{pos}->{$base}.'px" border="0" src="'..'" alt="'.$base.'" width="14px" /><br />';
|
|
2453 }
|
|
2454 }
|
|
2455 $html .= &insert_image($MMCHART_B2,6,16,1).'</td>';
|
|
2456 }
|
|
2457 $html .= '<td align="center" valign="middle"> ... </td>';
|
|
2458 foreach my $pos (sort {$a <=> $b} keys %{$data->{freqs}->{3}}) {
|
|
2459 $html .= '<td valign="bottom" align="center">';
|
|
2460 foreach my $base (qw(A C G T N)) {
|
|
2461 if($data->{freqs}->{3}->{$pos}->{$base}) {
|
|
2462 $html .= &insert_image($BASE64_BASES->{$base},$data->{freqs}->{3}->{$pos}->{$base},14,1).'<br />';
|
|
2463 }
|
|
2464 }
|
|
2465 $html .= &insert_image($MMCHART_B2,6,16,1).'</td>';
|
|
2466 }
|
|
2467 $html .= '</tr>';
|
|
2468 $html .= '<tr><td> </td>';
|
|
2469 foreach my $num (1,0,0,0,5,0,0,0,0,10,0,0,0,0,15,0,0,0,0,20,0,20,0,0,0,0,15,0,0,0,0,10,0,0,0,0,5,0,0,0,1) {
|
|
2470 $html .= '<td valign="top" align="center" style="font-size: 10px;margin: 0;">'.($num ? $num : '').' </td>';
|
|
2471 }
|
|
2472 $html .= '</tr><tr><td align="left" valign="middle"> </td><td align="center" valign="middle" colspan="41" class="pinfo"><b>Position from Sequence Ends</b></td></tr>';
|
|
2473 $html .= '</table>';
|
|
2474 }
|
|
2475 if(exists $data->{pairedend} && $data->{pairedend} && exists $data->{freqs2}) {
|
|
2476 $html .= '<br /><br /><br /><b>File 2</b><br />';
|
|
2477 $html .= '<table border="0" cellpadding="0" cellspacing="0"> <tbody><tr><td class="info-table-type"></td><td class="info-table-value">5\'-end</td><td class="info-table-value">3\'-end</td></tr><tr><td class="info-table-type">Probability of tag sequence:</td><td class="info-table-value">'.(exists $data->{tagprob2}->{5} ? $data->{tagprob2}->{5}.' %' : '-').'</td><td class="info-table-value">'.(exists $data->{tagprob2}->{3} ? $data->{tagprob2}->{3}.' %' : '-').'</td></tr></tbody></table><br>';
|
|
2478 $html .= '<table border="0" cellspacing="0" cellpadding="0"><tr><td>'.&insert_image($FREQCHART_L,undef,undef,1).'</td>';
|
|
2479 foreach my $pos (sort {$a <=> $b} keys %{$data->{freqs2}->{5}}) {
|
|
2480 $html .= '<td valign="bottom" align="center">';
|
|
2481 foreach my $base (qw(A C G T N)) {
|
|
2482 if($data->{freqs2}->{5}->{$pos}->{$base}) {
|
|
2483 $html .= &insert_image($BASE64_BASES->{$base},$data->{freqs2}->{5}->{$pos}->{$base},14,1).'<br />';
|
|
2484 #'<img height="'.$data->{freqs}->{5}->{pos}->{$base}.'px" border="0" src="'..'" alt="'.$base.'" width="14px" /><br />';
|
|
2485 }
|
|
2486 }
|
|
2487 $html .= &insert_image($MMCHART_B2,6,16,1).'</td>';
|
|
2488 }
|
|
2489 $html .= '<td align="center" valign="middle"> ... </td>';
|
|
2490 foreach my $pos (sort {$a <=> $b} keys %{$data->{freqs2}->{3}}) {
|
|
2491 $html .= '<td valign="bottom" align="center">';
|
|
2492 foreach my $base (qw(A C G T N)) {
|
|
2493 if($data->{freqs2}->{3}->{$pos}->{$base}) {
|
|
2494 $html .= &insert_image($BASE64_BASES->{$base},$data->{freqs2}->{3}->{$pos}->{$base},14,1).'<br />';
|
|
2495 }
|
|
2496 }
|
|
2497 $html .= &insert_image($MMCHART_B2,6,16,1).'</td>';
|
|
2498 }
|
|
2499 $html .= '</tr>';
|
|
2500 $html .= '<tr><td> </td>';
|
|
2501 foreach my $num (1,0,0,0,5,0,0,0,0,10,0,0,0,0,15,0,0,0,0,20,0,20,0,0,0,0,15,0,0,0,0,10,0,0,0,0,5,0,0,0,1) {
|
|
2502 $html .= '<td valign="top" align="center" style="font-size: 10px;margin: 0;">'.($num ? $num : '').' </td>';
|
|
2503 }
|
|
2504 $html .= '</tr><tr><td align="left" valign="middle"> </td><td align="center" valign="middle" colspan="41" class="pinfo"><b>Position from Sequence Ends</b></td></tr>';
|
|
2505 $html .= '</table>';
|
|
2506 }
|
|
2507 if(exists $data->{freqs} || exists $data->{freqs2}) {
|
|
2508 $html .= '</div><hr>';
|
|
2509 }
|
|
2510
|
|
2511 #Sequence duplicates
|
|
2512 if(exists $data->{dubslength} || exists $data->{dubscounts}) {
|
|
2513 $html .= '<div class="info-header"><span class="info-header-title">Sequence Duplication</span></div>';
|
|
2514 }
|
|
2515 my %dubs;
|
|
2516 if(exists $data->{dubscounts} && keys %{$data->{dubscounts}}) {
|
|
2517 my $exactonly = $data->{exactonly}||0;
|
|
2518 foreach my $n (keys %{$data->{dubscounts}}) {
|
|
2519 foreach my $s (keys %{$data->{dubscounts}->{$n}}) {
|
|
2520 $dubs{$s}->{count} += $data->{dubscounts}->{$n}->{$s} * $n;
|
|
2521 $dubs{$s}->{max} = $n unless(exists $dubs{$s}->{max} && $dubs{$s}->{max} > $n);
|
|
2522 $dubs{all} += $data->{dubscounts}->{$n}->{$s} * $n;
|
|
2523 }
|
|
2524 }
|
|
2525 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value"># Sequences</td> <td class="info-table-value">Max duplicates</td></tr><tr><td class="info-table-type">Exact duplicates:</td><td class="info-table-value">'.(exists $dubs{0}->{count} ? &addCommas($dubs{0}->{count}).' ('.sprintf("%.2f",100/$data->{numseqs}*$dubs{0}->{count}).' %)' : 0).'</td><td class="info-table-value">'.($dubs{0}->{max}||0).'</td></tr><tr><td class="info-table-type">Exact duplicates with reverse complements:</td><td class="info-table-value">'.(exists $dubs{3}->{count} ? &addCommas($dubs{3}->{count}).' ('.sprintf("%.2f",100/$data->{numseqs}*$dubs{3}->{count}).' %)' : 0).'</td> <td class="info-table-value">'.($dubs{3}->{max}||0).'</td></tr>';
|
|
2526 unless($exactonly) {
|
|
2527 $html .= '<tr><td class="info-table-type">5\' duplicates</td><td class="info-table-value">'.(exists $dubs{1}->{count} ? &addCommas($dubs{1}->{count}).' ('.sprintf("%.2f",100/$data->{numseqs}*$dubs{1}->{count}).' %)' : 0).'</td> <td class="info-table-value">'.($dubs{1}->{max}||0).'</td></tr><tr><td class="info-table-type">3\' duplicates</td><td class="info-table-value">'.(exists $dubs{2}->{count} ? &addCommas($dubs{2}->{count}).' ('.sprintf("%.2f",100/$data->{numseqs}*$dubs{2}->{count}).' %)' : 0).'</td> <td class="info-table-value">'.($dubs{2}->{max}||0).'</td></tr><tr><td class="info-table-type">5\'/3\' duplicates with reverse complements</td><td class="info-table-value">'.(exists $dubs{4}->{count} ? &addCommas($dubs{4}->{count}).' ('.sprintf("%.2f",100/$data->{numseqs}*$dubs{4}->{count}).' %)' : 0).'</td> <td class="info-table-value">'.($dubs{4}->{max}||0).'</td></tr>';
|
|
2528 }
|
|
2529 $html .= '<tr><td class="info-table-type">Total:</td><td class="info-table-value">'.(exists $dubs{all} ? &addCommas($dubs{all}).' ('.sprintf("%.2f",100/$data->{numseqs}*$dubs{all}).' %)' : 0).'</td><td class="info-table-value">-</td></tr></tbody></table>';
|
|
2530 }
|
|
2531 if(exists $dubs{all} && $dubs{all}) {
|
|
2532 if(exists $data->{dubslength} && keys %{$data->{dubslength}}) {
|
|
2533 $surface = &createStackBarPlot(&convertOdToStackBinMatrix($data->{dubslength},5,1),'Sequence duplication level','Read Length in bp','Number of duplicates','',0,' bp');
|
|
2534 $png = '';
|
|
2535 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2536 $html .= '<br />'.&insert_image($png);
|
|
2537 }
|
|
2538 if(exists $data->{dubscounts} && keys %{$data->{dubscounts}}) {
|
|
2539 $surface = &createStackBarPlot(&convertOdToStackBinMatrix($data->{dubscounts},5,1,100),'Sequence duplication level','Number of duplicates','Number of sequences','',0);
|
|
2540 $png = '';
|
|
2541 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2542 $html .= '<br /><br />' if(exists $data->{dubslength});
|
|
2543 $html .= &insert_image($png);
|
|
2544 my %dubsmax;
|
|
2545 my $count = 1;
|
|
2546 foreach my $n (sort {$b <=> $a} keys %{$data->{dubscounts}}) {
|
|
2547 foreach my $s (keys %{$data->{dubscounts}->{$n}}) {
|
|
2548 foreach my $i (1..$data->{dubscounts}->{$n}->{$s}) {
|
|
2549 $dubsmax{$count++}->{$s} = $n;
|
|
2550 last unless($count <= 100);
|
|
2551 }
|
|
2552 last unless($count <= 100);
|
|
2553 }
|
|
2554 last unless($count <= 100);
|
|
2555 }
|
|
2556 $surface = &createStackBarPlot(&convertOdToStackBinMatrix(\%dubsmax,5,1,100),'Sequence duplication level','Sequence','Number of duplicates','',0);
|
|
2557 $png = '';
|
|
2558 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2559 $html .= '<br /><br />' if(exists $data->{dubslength});
|
|
2560 $html .= &insert_image($png);
|
|
2561 }
|
|
2562 }
|
|
2563 if(exists $data->{dubslength} || exists $data->{dubscounts}) {
|
|
2564 $html .= '</div><hr>';
|
|
2565 }
|
|
2566
|
|
2567 #Sequence complexity
|
|
2568 if(exists $data->{compldust} || exists $data->{complentropy}) {
|
|
2569 $html .= '<div class="info-header"><span class="info-header-title">Sequence Complexity</span></div>';
|
|
2570 if(exists $data->{complvals}) {
|
|
2571 my $complseq;
|
|
2572 foreach my $d (keys %{$data->{complvals}}) {
|
|
2573 foreach my $m ('minseq','maxseq') {
|
|
2574 $complseq = $data->{complvals}->{$d}->{$m};
|
|
2575 $complseq = substr($complseq,0,797).'...' if(length($complseq) > 800);
|
|
2576 $complseq =~ s/(.{60})/$1\<br \/\>/g;
|
|
2577 $data->{complvals}->{$d}->{$m} = $complseq;
|
|
2578 }
|
|
2579 }
|
|
2580 }
|
|
2581 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value">Value</td><td class="info-table-value">Sequence</td></tr><tr><td class="info-table-type">Minimum DUST score:</td><td class="info-table-value">'.(exists $data->{complvals}->{dust}->{minval} ? $data->{complvals}->{dust}->{minval} : '-').'</td><td class="info-table-value sequencetext">'.(exists $data->{complvals}->{dust}->{minseq} ? $data->{complvals}->{dust}->{minseq} : '').'</td></tr><tr><td class="info-table-type">Maximum DUST score:</td><td class="info-table-value">'.(exists $data->{complvals}->{dust}->{maxval} ? $data->{complvals}->{dust}->{maxval} : '').'</td><td class="info-table-value sequencetext">'.(exists $data->{complvals}->{dust}->{maxseq} ? $data->{complvals}->{dust}->{maxseq} : '').'</td></tr><tr><td class="info-table-type">Minimum Entropy value:</td><td class="info-table-value">'.(exists $data->{complvals}->{entropy}->{minval} ? $data->{complvals}->{entropy}->{minval} : '').'</td><td class="info-table-value sequencetext">'.(exists $data->{complvals}->{entropy}->{minseq} ? $data->{complvals}->{entropy}->{minseq} : '').'</td></tr><tr><td class="info-table-type">Maximum Entropy value:</td><td class="info-table-value">'.(exists $data->{complvals}->{entropy}->{maxval} ? $data->{complvals}->{entropy}->{maxval} : '').'</td><td class="info-table-value sequencetext">'.(exists $data->{complvals}->{entropy}->{maxseq} ? $data->{complvals}->{entropy}->{maxseq} : '').'</td></tr> </tbody></table><br />';
|
|
2582 }
|
|
2583 if(exists $data->{compldust}) {
|
|
2584 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{compldust},0),undef,'Sequence complexity distribution','Mean sequence complexity (DUST scores)','Number of sequences','',1);
|
|
2585 $png = '';
|
|
2586 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2587 $html .= &insert_image($png);
|
|
2588 }
|
|
2589 if(exists $data->{complentropy}) {
|
|
2590 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{complentropy},0),undef,'Sequence complexity distribution','Mean sequence complexity (Entropy values)','Number of sequences','',1);
|
|
2591 $png = '';
|
|
2592 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2593 $html .= '<br /><br />' if(exists $data->{compldust});
|
|
2594 $html .= &insert_image($png);
|
|
2595 }
|
|
2596 if(exists $data->{compldust} || exists $data->{complentropy}) {
|
|
2597 $html .= '</div><hr>';
|
|
2598 }
|
|
2599
|
|
2600 #Dinucleotide odd ratio PCA - microbial/viral
|
|
2601 if(exists $data->{dinucodds} && keys %{$data->{dinucodds}}) {
|
|
2602 $html .= '<div class="info-header"><span class="info-header-title">Dinucleotide Odds Ratios</span></div>';
|
|
2603 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"> </td>';
|
|
2604 foreach my $d (map {join("/",(m/../g ))} sort keys %{$data->{dinucodds}}) {
|
|
2605 $html .= '<td class="info-table-value">'.$d.'</td>';
|
|
2606 }
|
|
2607 $html .= '</tr><tr><td class="info-table-type">Odds ratio</td>';
|
|
2608 foreach my $d (map {sprintf("%.4f",$data->{dinucodds}->{$_})} sort keys %{$data->{dinucodds}}) {
|
|
2609 $html .= '<td class="info-table-value">'.$d.'</td>';
|
|
2610 }
|
|
2611 $html .= '</tr></tbody></table><br />';
|
|
2612 my @new = map {$data->{dinucodds}->{$_}} sort keys %{$data->{dinucodds}};
|
|
2613 $surface = &createOddsRatioPlot($data->{dinucodds},'Odds ratios','Dinucleotide','Odds ratio','');
|
|
2614 $png = '';
|
|
2615 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2616 $html .= &insert_image($png);
|
|
2617 #$surface = &createPCAPlot(&convertToPCAValues(\@new,'m'),'PCA','1st Principal Component Score','2nd Principal Component Score','');
|
|
2618 #$png = '';
|
|
2619 #$surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2620 #$html .= '<br /><br />';
|
|
2621 #$html .= &insert_image($png);
|
|
2622 #$surface = &createPCAPlot(&convertToPCAValues(\@new,'v'),'PCA','1st Principal Component Score','2nd Principal Component Score','');
|
|
2623 #$png = '';
|
|
2624 #$surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
|
|
2625 #$html .= '<br /><br />';
|
|
2626 #$html .= &insert_image($png);
|
|
2627 $html .= '</div>';
|
|
2628 }
|
|
2629
|
|
2630 $html .= '</div>';
|
|
2631 $html .= &footer();
|
|
2632
|
|
2633 #write html to file
|
|
2634 $file = &getFileName('.html');
|
|
2635 open(FH, ">$file") or &printError("Can't open file ".$file.": $!");
|
|
2636 print FH $html;
|
|
2637 close(FH);
|
|
2638 &printLog("Done with HTML data");
|
|
2639 }
|
|
2640
|
|
2641 sub insert_image {
|
|
2642 my ($data, $height, $width, $noencode) = @_;
|
|
2643 my $content .= '<img border="0" '.($height ? 'height="'.$height.'"' : '').($width ? 'width="'.$width.'"' : '').' src="'.($noencode ? "data:image/png;base64,".$data : &inline_image($data)).'" />'."\n";
|
|
2644 return $content;
|
|
2645 }
|
|
2646
|
|
2647 sub inline_image {
|
|
2648 return "data:image/png;base64,".MIME::Base64::encode_base64($_[0]);
|
|
2649 }
|
|
2650
|
|
2651 sub convertIntToString {
|
|
2652 my $int = shift;
|
|
2653 $int =~ s/(.{2})/chr(hex($1))/eg;
|
|
2654 return $int;
|
|
2655 }
|