comparison prinseq-graphs-noPCA.pl @ 0:9790cfb46d03 draft default tip

Uploaded
author bgruening
date Mon, 07 Oct 2013 15:34:32 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9790cfb46d03
1 #!/usr/bin/perl
2
3 #===============================================================================
4 # Author: Robert SCHMIEDER, Computational Science Research Center @ SDSU, CA
5 #
6 # File: prinseq-graphs
7 # Date: 2012-12-22
8 # Version: 0.6 graphs
9 #
10 # Usage:
11 # prinseq-graphs [options]
12 #
13 # Try 'prinseq-graphs-noPCA -h' for more information.
14 #
15 # Purpose: PRINSEQ will help you to preprocess your genomic or metagenomic
16 # sequence data in FASTA or FASTQ format. The graphs version allows
17 # users of the lite version to generate graphs similar to the web
18 # version.
19 #
20 # Bugs: Please use http://sourceforge.net/tracker/?group_id=315449
21 #
22 #===============================================================================
23
24 use strict;
25 use warnings;
26
27 use Getopt::Long;
28 use Pod::Usage;
29 use File::Temp qw(tempfile); #for output files
30 use Fcntl qw(:flock SEEK_END); #for log file
31 use Cwd;
32 use JSON;
33 use Cairo;
34 #use Statistics::PCA;
35 use MIME::Base64;
36 use File::Basename;
37 use Data::Dumper; ###
38
39 $| = 1; # Do not buffer output
40
41 my $PI = 4 * atan2(1, 1);
42 my $LOG62 = log(62);
43 my $DINUCODDS_VIR = [
44 [qw(1.086940308 0.98976932 1.034167044 0.880024041 1.070421277 0.990687084 0.890945575 1.069957074 0.92465631 0.803973303)],
45 [qw(1.101064857 0.986812783 1.038299155 0.896162618 1.081652847 0.976365237 0.867445186 1.06727283 0.94688543 0.768007295)],
46 [qw(1.071548411 0.912204166 1.196914981 0.80628184 1.294201511 1.148517794 0.269295791 1.033948026 0.895951033 0.623192149)],
47 [qw(1.090253719 0.907428629 1.203991784 0.786359294 1.281499107 1.145421568 0.235974709 1.033437274 0.899580091 0.631699771)],
48 [qw(1.075864745 1.003413074 1.01872902 0.897841689 0.980373171 1.05854979 0.934262259 1.052477953 0.88145851 0.889239724)],
49 [qw(1.101890467 1.030028291 1.019912674 0.84191395 1.0015174 1.069546264 0.900151602 0.996269395 0.889195343 0.904039022)],
50 [qw(1.152417359 0.855028574 0.91164793 1.017415486 1.114163672 1.128353311 0.846355573 0.916745489 1.206820475 0.811014651)],
51 [qw(1.142454218 0.8635465 0.923406967 1.026242747 1.134445058 1.131747833 0.79793368 0.920767641 1.179468556 0.799770057)],
52 [qw(1.124462747 0.873556143 0.945627041 1.013755408 1.159866153 1.096259526 0.757315047 0.972924919 1.105562567 0.772731886)],
53 [qw(1.143826972 0.866968779 0.995740249 0.945859278 1.109590621 1.089305083 0.76048874 0.971561388 1.157101408 0.792923027)],
54 [qw(1.131900141 0.82776996 0.996204924 0.999433455 1.024692372 1.071176333 0.921026216 1.088936699 1.054010776 0.773498892)],
55 [qw(1.042180476 0.930180412 1.019242897 0.98909997 1.006666828 1.046708539 0.959492164 1.011183418 1.055168776 0.937433818)],
56 [qw(1.086515695 0.985345815 0.930914307 0.969581792 1.043010232 1.087463712 0.939482285 0.990551965 0.954752469 0.893972874)],
57 [qw(1.096657826 0.950117614 0.936195529 0.965619788 1.114975275 1.077011195 0.843153131 0.989128406 1.043790912 0.840634731)],
58 [qw(1.158030995 0.935307365 0.874812261 1.056236525 1.117171274 0.937484692 1.057442372 0.970079538 1.174848738 0.725071711)],
59 [qw(1.15591506 0.93000227 0.883538923 1.0567652 1.095730954 0.944489906 1.074229471 0.983993745 1.156051409 0.726688465)],
60 [qw(1.205726473 0.924439339 1.049457756 0.805718412 0.975472778 1.07581991 0.726992211 1.075025787 0.8704929 0.726672843)],
61 [qw(1.188544681 0.95239611 1.049066985 0.790031334 1.038632598 1.056749787 0.665197397 1.057566244 0.862429061 0.708982398)],
62 [qw(1.063631482 0.925593715 1.014869316 0.944904401 1.119690731 1.325971834 0.273781451 0.943347677 1.06438014 0.920825904)],
63 [qw(1.077560287 0.911888545 1.044147857 0.927758054 1.058535939 1.296838544 0.421514996 0.945722451 1.128317986 0.926419928)],
64 [qw(1.163753415 0.989905668 0.893599328 0.955641844 1.176047687 0.941559156 0.950641089 0.959741692 1.100815282 0.72491925)],
65 [qw(1.139253929 0.946297517 0.922096125 1.024801537 1.205206793 0.968818717 0.915801342 0.971626058 1.107569276 0.627623404)]
66 ];
67 my $DINUCODDS_MIC = [
68 [qw(1.13127323 0.853587195 0.911041047 1.104520778 1.065586428 1.021434164 0.999734139 1.063684014 1.078035184 0.733596552)],
69 [qw(1.173267344 0.840539337 0.919534602 1.068050141 1.062394214 1.051999071 0.96770576 1.035511729 1.095600433 0.72328141)],
70 [qw(1.172939786 0.84567902 0.911836259 1.106288994 1.05351787 1.026143368 1.002308358 1.066319771 1.094918797 0.710733535)],
71 [qw(1.073527689 0.850290918 0.978455025 1.080882178 1.111174765 1.010754115 0.895668707 1.072980666 1.079304608 0.754057386)],
72 [qw(1.08807747 0.837444678 0.95824965 1.097310298 1.118897971 1.030863881 0.886827263 1.072349394 1.07406322 0.733440096)],
73 [qw(1.071685485 0.861055813 0.966566865 1.090268118 1.112945761 1.012538936 0.909535491 1.063745603 1.071156598 0.755770377)],
74 [qw(1.142698587 0.867936867 1.000612099 0.977934257 1.111801746 1.018318601 0.788556794 0.987763594 1.184649653 0.784776176)],
75 [qw(1.134560074 0.876651844 0.998190253 0.995723123 1.128448077 1.014172324 0.781776188 0.971020602 1.182411449 0.786449476)],
76 [qw(1.180029632 0.787899325 1.01316945 0.932268406 1.077837263 1.211699678 0.612128817 1.033036699 1.157314398 0.74940288)],
77 [qw(1.160925546 0.788308899 1.003702496 0.965371236 1.076051693 1.188304271 0.641536444 1.070331188 1.124067192 0.740126813)],
78 [qw(1.173873006 0.790118011 1.014718833 0.937979878 1.07453725 1.207167373 0.622279064 1.046150047 1.145627707 0.742212886)],
79 [qw(1.128383111 0.870541389 0.987269741 0.98353238 1.115643879 1.040107028 0.774505865 1.010896432 1.164757274 0.775254395)],
80 [qw(1.15297511 0.853883985 0.956393231 1.000027661 1.139915472 1.01355294 0.838843622 1.015553125 1.216219741 0.70447264)],
81 [qw(1.148264236 0.852123859 0.974568293 0.985455546 1.13192373 1.015879393 0.828987111 1.016820786 1.216647853 0.71634006)],
82 [qw(1.12933788 0.831777975 1.005434367 0.991081409 1.126146895 1.07421504 0.69343913 1.054032466 1.14809591 0.728541157)],
83 [qw(1.124157235 0.828112691 1.022348424 0.983822386 1.143028487 1.081830005 0.672594435 1.05685982 1.149537403 0.684432106)],
84 [qw(1.128029586 0.841853305 1.00983936 0.967179139 1.122524003 1.094555807 0.659238308 1.061578854 1.1243601 0.740148171)],
85 [qw(1.093521636 0.855071052 0.929160818 1.203773691 1.178257185 0.881341255 1.078305505 1.051988532 1.169143967 0.555057308)],
86 [qw(1.073737278 0.877396537 0.968017446 1.124155374 1.166244435 0.909044208 0.999147578 1.071098934 1.120156138 0.607444953)],
87 [qw(1.092150184 0.863407008 0.927040387 1.185387013 1.171670826 0.882276859 1.083058605 1.048379554 1.168635365 0.580337997)]
88 ];
89 my $DATA_VIR = [
90 [2,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
91 [3,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
92 [42,2,'Human (nasal)',[127/255, 127/255, 255/255,1]],
93 [43,2,'Human (nasal)',[127/255, 127/255, 255/255,1]],
94 [45,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
95 [49,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
96 [52,3,'Human (sputum)',[127/255, 127/255, 255/255,1]],
97 [54,3,'Human (sputum)',[127/255, 127/255, 255/255,1]],
98 [55,4,'Human (sputum, CF)',[127/255, 127/255, 255/255,1]],
99 [57,4,'Human (sputum, CF)',[127/255, 127/255, 255/255,1]],
100 [88,5,'Freshwater (Hot spring)',[127/255, 127/255, 255/255,1]],
101 [89,5,'Freshwater (Hot spring)',[127/255, 127/255, 255/255,1]],
102 [98,6,'Freshwater (Antartic lake)',[127/255, 127/255, 255/255,1]],
103 [99,6,'Freshwater (Antartic lake)',[127/255, 127/255, 255/255,1]],
104 [100,7,'Freshwater (reclaimed)',[127/255, 127/255, 255/255,1]],
105 [102,7,'Freshwater (reclaimed)',[127/255, 127/255, 255/255,1]],
106 [153,8,'Mouse (brain tissue)',[127/255, 127/255, 255/255,1]],
107 [154,8,'Mouse (brain tissue)',[127/255, 127/255, 255/255,1]],
108 [202,9,'Fish (gut)',[127/255, 127/255, 255/255,1]],
109 [206,9,'Fish (gut)',[127/255, 127/255, 255/255,1]],
110 [209,10,'Mosquito',[127/255, 127/255, 255/255,1]],
111 [211,10,'Mosquito',[127/255, 127/255, 255/255,1]],
112 ['U',0,'User input',[255/255, 127/255, 127/255,1]]
113 ];
114 my $DATA_MIC = [
115 [17,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
116 [20,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
117 [22,1,'Human (fecal)',[127/255, 127/255, 255/255,1]],
118 [63,2,'Mouse (fecal)',[127/255, 127/255, 255/255,1]],
119 [65,2,'Mouse (fecal)',[127/255, 127/255, 255/255,1]],
120 [68,2,'Mouse (fecal)',[127/255, 127/255, 255/255,1]],
121 [93,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
122 [95,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
123 [109,4,'Marine (open ocean)',[127/255, 127/255, 255/255,1]],
124 [110,4,'Marine (open ocean)',[127/255, 127/255, 255/255,1]],
125 [111,4,'Marine (open ocean)',[127/255, 127/255, 255/255,1]],
126 [120,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
127 [124,5,'Marine (estuary)',[127/255, 127/255, 255/255,1]],
128 [125,5,'Marine (estuary)',[127/255, 127/255, 255/255,1]],
129 [134,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
130 [146,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
131 [148,3,'Marine (coastal)',[127/255, 127/255, 255/255,1]],
132 [201,6,'Fish (gut)',[127/255, 127/255, 255/255,1]],
133 [203,7,'Fish (slime)',[127/255, 127/255, 255/255,1]],
134 [205,6,'Fish (gut)',[127/255, 127/255, 255/255,1]],
135 ['U',0,'User input',[255/255, 127/255, 127/255,1]]
136 ];
137 my $BASE64_BASES = {A => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
138 bWFnZVJlYWR5ccllPAAAAzVJREFUeNrsnMFxo0AQRWe7fJcyMBnYGawyMIe9a0JQJtbefDPOAB33
139 JmdgZyBlsIpgl9lCLkwJA/N7uhu0XTXlkstI8Oh+agbG355+/XDC8VaNu8htf1ZjI73DJPx59wCg
140 EN4phDQkNAsWGqCkIeUM7zFrSL7OBDS+VyObMyQrZWsSUlZnACfw5dwgcZ/5BZPfTEHyEwCvColL
141 2O24q/uuWUDKJ1TGKpCCsB8Sn4Dl1CGlbvxEBD51SCIlR4lL4VYAUnKB08SzSCSbUkFKLWxRgdMM
142 sii5wK1BOlksuRSQVoCwA9wjIPDVVCAhWVTWw1SZc0MK8lxHblvUP7fA569TCJyMZFET0qEa75ay
143 iRtSrDwDlLfG663CPohAQoRdtF4jXrrlFjgZKbU2lN/VeLFSclyQlkAzt6s95BiziVXgXJByFz/7
144 WH7x+6OFbOKCFCvL0wUffeUqFYFzQELu7/eVFAKJTeCkmEVDIARXvWqXHAoJEXbwzZ4BZJ/AM21I
145 iLCLESV50swmMlxqzZ6pnCqkDBD2a0dvlErguRYkiSw6x16zZyKlDy4FwDbjARE4AYBihf1Se0YS
146 EnRSaSJZpNozxUAKaRv7QNYR/KZSEXgMpI1CFjUhifdMMZBypUzgAB0lcIoAFDv72J6ijY0tuL1P
147 DckrZ5GrQSM90yYlpMxh9/cfq/GHaSBPq4xeVUBCWWQt/kMaEKNWFQyFJPVAlmRsuCF5N7/wnJCW
148 TvaBLKkYLHC60iwadWzEWbtzFXgfpNUMhT06CeiKS23wMVKPsNdXAKlX4HTlWTToWG8SQdoxXK3H
149 zA7E3r0JAr/vmqXogoSu3w87vFeA9AwK3I8pN+Rr/6gAKAQ669m5qoA6hJ0r7mxsoE/Hda4qoA6i
150 CzDttaJI0TMRc6mFKdqDIqS9w2YtLy4LowTC1o4tdzYR83VaaQASu8Dpwh/ERuzta+441H0am8Cp
151 1TwuJp5FSQROTB32yRgk9Om4TwI/Q8oc9g9XCmcv2LKJmIRtERL6LfexqoAYSo3r9nUKgb+D7+HP
152 kFBhW8wi1p6JHL4KujQMCRX4v1UFARJyu2infBky5KIXPYn+rwADAOL8qKxS08x7AAAAAElFTkSu
153 QmCC',
154 C => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
155 bWFnZVJlYWR5ccllPAAAA7BJREFUeNrsnM1xqzAQxxUNDfBKwCWQCt7g+7vgEkgJ5pRDTnYJpgRz
156 eXeYVBBKCCU8SvAzM6sZxuMPaXclQaydYYKTGPBv/7tagdYvp9NJTO3Px6dwZPl5S2A/hdf3rD9v
157 1eT1nvuC/r7/vvr7SLizDGAUEzgmNr5nN3mt9ksAWNu6cNuQYoCyhX0bpmANoK4K9tlMWrrw0euH
158 8/YPPkTsQKkxnIv9nNKSZ79BQb5sy3kNkjnnfMMFzsFiUHNDVZVk9FyDTMguBowvGDS8QTpejDpz
159 tARAZT4gNRr1zZyswYCSrk84Azuahp58MkAqoR9NkjkG0m7BgG5V76yQcgtD/B6mFqvz9nJlW8Pf
160 uacdha6zI0P6B6YLbGH6UGv+b3tRbnCNpgdwDpuSOEr9cU61AXXUBOX9YlJWolOVS4MwyxnUs2L6
161 cAr2G1MhzAKJKu8K1DMw55UKYFHVlFMhYe//KKuZPH7v+CXxGCyQsNZbBjTNUzURUoyFlFEmhhAK
162 g3BjVDUVWEg5MV90DgvEy3vgppZi66ScGAKurTJMDxXAvXuPPMLGqUYy7T1A6mBLHxSlRg6MMPLT
163 hOTLWnBuNVELKS9GD5I2ttDzCalkSOJaiTsmKKkVP8wks4qE4xHNKyRKhd0HSCHcyCPb4LDC9g4p
164 DqFmL9yGZ4EUkrbhBDeYBSWJoKQAKViAFCAFSLOERKl1kqCkoKSgJFMl9QGSPUijpQHSE6rppypJ
165 tU5Y7Qig3IL1vZ5ydNJ403BcdzSuZBt71Rp4ncxJSbFHSNmN36melxMAK6iQhgWrSWf9wu6KylBL
166 byiQCo+hliIcqlTmFFLmaZSjOKfCQFIrNLDmuqUrIULqsHO3muhVl+UAxSl3F3lIDQlSHhMZ9XAQ
167 w9tKqOlAUs2/lBA4OAgz6jlIkDjUlFsEpTqOqGsXeiokqppUfmqYQy+BY1Lz3sPPJg0O1DPkDXSL
168 5xV1fjEAanVKHZM7kxtG72ObCjN4L9eAoLUQ36SVqwNFcdQ/GWzTUL6V+7aTn5zhqh0dpl/DUYLE
169 ueZm6lshhHDbEd4Lg8WnmAcBG7H8dZFGqQMDSfWa9QsG1NmGpOS6XiAoVC+vJMb164JCr8TWe9SH
170 kwOAqmcO6I1SEEvGON/MEI5KC5QWL9bH3KOaVjNSVQXXQ15XLi14TrW0+1r03kIKYGtrlRYvdM0h
171 dUPlvMI5WQeTyIFXW/Cqeu5VMIPpheUuTZdfobifjDTTXvxYcz5YXsBxtrD+vwADADoA0kx0ZQr1
172 AAAAAElFTkSuQmCC',
173 G => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
174 bWFnZVJlYWR5ccllPAAAA6lJREFUeNrsnN2RqjAUgANjA9wS3CefsQQsAUtgS8AStAQsQUuQEuR5
175 nzYlSAkumTnZy/UKJpyTEANn5syqs0L8cn4hIbjf70zI19eaWZS40aT1Pm80Uvhe1ei59b6Ez8hl
176 tbr+vl5YhpLCa8xx4h54RqCZhCQsI9OwEkYEr2700OgRXqMlNARn3+gN/kbMrrTPXzS6dA2SHFzO
177 3BBhyd8wrtEhJTAYV+A8Sg7ji8eCJGbpQmHWhkWM7wrJwxqkCODk7L3kpDvmBWJW3sF6+qyfQRY0
178 YknvDqgNKjUByRdAUgqVYK4DKQJ/9gWQ/E0FJSQl6gNExIVdo9tGgw5dw/8cDJw/fhXIA8UGN8cW
179 ZA9ybPVaQ4vEjHDSapgI/qzBDRXjEBUgAeaj0U8EIAl5Dcepidwux7hbQTRTG3ApTmyRa6LOP+3q
180 M0OFLybIk1fwQ0pmRjhMQEVgTdkQSHsCQBti6+mzVE5gTVqQMmS6l4BqZkckKGymi3UhYQa8tQio
181 7Xo7gisaSpASZHrdWXCxvrqLI61JqcFNkW52HLmSPmrG0yOA5ezfGw2dxaSI8t9s+GXXjcFMppOp
182 bj21WgWhoHMyX90tSRCAuAOAZEws4XecdS6LPJOFik9qmq0rsqE6UEic1VyCxExBWiJcrRoh5Y8C
183 CeNqJfNUKCFVU4GEaUP4DGm2JDQkb63oEVKEyGz1lCCxGZJaMemKiKL2PpJeuiDNme0NLck7SNFU
184 INUzJLOQ2AzptSxnSLO7kaTyyGdQVJC8drmQsJOPpwJpDt4KkDCXYBPisYmbCgFSuSl3qxHuFk3B
185 krDWlE0FEiZ4p1OBdEZmuHgKkDjSmrIpQMJaU2Yg0zkJCXtPfz8FSDUSVOwTqL4rk9gtCvnI2Y6s
186 6e6DRLEg6zRSfBLnvNqAJOST4BwXyxZVMOLtZq8gcUazMOtkIUaJrHozUYKo3C2hWm6cgwtQu5/c
187 qV2Y6h1VINUMv4C8nfUuoBnyOALOHSzU6GWaQOOBLntmZue2XDLMe4rYpHWVwcbu8XK1uv4uTNXZ
188 zb1j/z+thkJS1xtj3Tu4W+bxYq22JWEgyZ1APoPaPhbSQ9YCSFC+rbYVE//xLC4OXTAhQR08ASTi
189 7bqr1AkJDr59YziiUP7zarIplt6cu8zUcTjKu8Gp1idxsCjXg/qB/d1yrzxO6pVuJcyQS6VCBWEh
190 GNpiBYYfoSiLz/0IYM6gg/rO9qbAwOJzJmVrgd0l3pdEGFXGbUP6EWAA2LwDwtC8jpAAAAAASUVO
191 RK5CYII=',
192 T => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
193 bWFnZVJlYWR5ccllPAAAALRJREFUeNrs09ENQDAUQFHEXlhAYgJWMJnEBLqBUWxQFkCC/si5yftq
194 mzYnaR5jzM4KXXu++J9CNc311YYi022QIEGCBAkSJEiCBCll5c16k+DO4Zj+4dnxmPXj92xvkZYE
195 SPWLs2uiN/lukCBBggQJkiBBggQJEiRIggQJEiRIkCBBEiRIkCBBggRJkCBBggQJEiRICCBBggQJ
196 EiRIggQJEiRIkCAJEiRIkCBBggRJ1+0CDAAzsw5U48snWgAAAABJRU5ErkJggg==',
197 N => 'iVBORw0KGgoAAAANSUhEUgAAAEkAAABJCAYAAABxcwvcAAAACXBIWXMAAAsTAAALEwEAmpwYAAAK
198 T2lDQ1BQaG90b3Nob3AgSUNDIHByb2ZpbGUAAHjanVNnVFPpFj333vRCS4iAlEtvUhUIIFJCi4AU
199 kSYqIQkQSoghodkVUcERRUUEG8igiAOOjoCMFVEsDIoK2AfkIaKOg6OIisr74Xuja9a89+bN/rXX
200 Pues852zzwfACAyWSDNRNYAMqUIeEeCDx8TG4eQuQIEKJHAAEAizZCFz/SMBAPh+PDwrIsAHvgAB
201 eNMLCADATZvAMByH/w/qQplcAYCEAcB0kThLCIAUAEB6jkKmAEBGAYCdmCZTAKAEAGDLY2LjAFAt
202 AGAnf+bTAICd+Jl7AQBblCEVAaCRACATZYhEAGg7AKzPVopFAFgwABRmS8Q5ANgtADBJV2ZIALC3
203 AMDOEAuyAAgMADBRiIUpAAR7AGDIIyN4AISZABRG8lc88SuuEOcqAAB4mbI8uSQ5RYFbCC1xB1dX
204 Lh4ozkkXKxQ2YQJhmkAuwnmZGTKBNA/g88wAAKCRFRHgg/P9eM4Ors7ONo62Dl8t6r8G/yJiYuP+
205 5c+rcEAAAOF0ftH+LC+zGoA7BoBt/qIl7gRoXgugdfeLZrIPQLUAoOnaV/Nw+H48PEWhkLnZ2eXk
206 5NhKxEJbYcpXff5nwl/AV/1s+X48/Pf14L7iJIEyXYFHBPjgwsz0TKUcz5IJhGLc5o9H/LcL//wd
207 0yLESWK5WCoU41EScY5EmozzMqUiiUKSKcUl0v9k4t8s+wM+3zUAsGo+AXuRLahdYwP2SycQWHTA
208 4vcAAPK7b8HUKAgDgGiD4c93/+8//UegJQCAZkmScQAAXkQkLlTKsz/HCAAARKCBKrBBG/TBGCzA
209 BhzBBdzBC/xgNoRCJMTCQhBCCmSAHHJgKayCQiiGzbAdKmAv1EAdNMBRaIaTcA4uwlW4Dj1wD/ph
210 CJ7BKLyBCQRByAgTYSHaiAFiilgjjggXmYX4IcFIBBKLJCDJiBRRIkuRNUgxUopUIFVIHfI9cgI5
211 h1xGupE7yAAygvyGvEcxlIGyUT3UDLVDuag3GoRGogvQZHQxmo8WoJvQcrQaPYw2oefQq2gP2o8+
212 Q8cwwOgYBzPEbDAuxsNCsTgsCZNjy7EirAyrxhqwVqwDu4n1Y8+xdwQSgUXACTYEd0IgYR5BSFhM
213 WE7YSKggHCQ0EdoJNwkDhFHCJyKTqEu0JroR+cQYYjIxh1hILCPWEo8TLxB7iEPENyQSiUMyJ7mQ
214 AkmxpFTSEtJG0m5SI+ksqZs0SBojk8naZGuyBzmULCAryIXkneTD5DPkG+Qh8lsKnWJAcaT4U+Io
215 UspqShnlEOU05QZlmDJBVaOaUt2ooVQRNY9aQq2htlKvUYeoEzR1mjnNgxZJS6WtopXTGmgXaPdp
216 r+h0uhHdlR5Ol9BX0svpR+iX6AP0dwwNhhWDx4hnKBmbGAcYZxl3GK+YTKYZ04sZx1QwNzHrmOeZ
217 D5lvVVgqtip8FZHKCpVKlSaVGyovVKmqpqreqgtV81XLVI+pXlN9rkZVM1PjqQnUlqtVqp1Q61Mb
218 U2epO6iHqmeob1Q/pH5Z/YkGWcNMw09DpFGgsV/jvMYgC2MZs3gsIWsNq4Z1gTXEJrHN2Xx2KruY
219 /R27iz2qqaE5QzNKM1ezUvOUZj8H45hx+Jx0TgnnKKeX836K3hTvKeIpG6Y0TLkxZVxrqpaXllir
220 SKtRq0frvTau7aedpr1Fu1n7gQ5Bx0onXCdHZ4/OBZ3nU9lT3acKpxZNPTr1ri6qa6UbobtEd79u
221 p+6Ynr5egJ5Mb6feeb3n+hx9L/1U/W36p/VHDFgGswwkBtsMzhg8xTVxbzwdL8fb8VFDXcNAQ6Vh
222 lWGX4YSRudE8o9VGjUYPjGnGXOMk423GbcajJgYmISZLTepN7ppSTbmmKaY7TDtMx83MzaLN1pk1
223 mz0x1zLnm+eb15vft2BaeFostqi2uGVJsuRaplnutrxuhVo5WaVYVVpds0atna0l1rutu6cRp7lO
224 k06rntZnw7Dxtsm2qbcZsOXYBtuutm22fWFnYhdnt8Wuw+6TvZN9un2N/T0HDYfZDqsdWh1+c7Ry
225 FDpWOt6azpzuP33F9JbpL2dYzxDP2DPjthPLKcRpnVOb00dnF2e5c4PziIuJS4LLLpc+Lpsbxt3I
226 veRKdPVxXeF60vWdm7Obwu2o26/uNu5p7ofcn8w0nymeWTNz0MPIQ+BR5dE/C5+VMGvfrH5PQ0+B
227 Z7XnIy9jL5FXrdewt6V3qvdh7xc+9j5yn+M+4zw33jLeWV/MN8C3yLfLT8Nvnl+F30N/I/9k/3r/
228 0QCngCUBZwOJgUGBWwL7+Hp8Ib+OPzrbZfay2e1BjKC5QRVBj4KtguXBrSFoyOyQrSH355jOkc5p
229 DoVQfujW0Adh5mGLw34MJ4WHhVeGP45wiFga0TGXNXfR3ENz30T6RJZE3ptnMU85ry1KNSo+qi5q
230 PNo3ujS6P8YuZlnM1VidWElsSxw5LiquNm5svt/87fOH4p3iC+N7F5gvyF1weaHOwvSFpxapLhIs
231 OpZATIhOOJTwQRAqqBaMJfITdyWOCnnCHcJnIi/RNtGI2ENcKh5O8kgqTXqS7JG8NXkkxTOlLOW5
232 hCepkLxMDUzdmzqeFpp2IG0yPTq9MYOSkZBxQqohTZO2Z+pn5mZ2y6xlhbL+xW6Lty8elQfJa7OQ
233 rAVZLQq2QqboVFoo1yoHsmdlV2a/zYnKOZarnivN7cyzytuQN5zvn//tEsIS4ZK2pYZLVy0dWOa9
234 rGo5sjxxedsK4xUFK4ZWBqw8uIq2Km3VT6vtV5eufr0mek1rgV7ByoLBtQFr6wtVCuWFfevc1+1d
235 T1gvWd+1YfqGnRs+FYmKrhTbF5cVf9go3HjlG4dvyr+Z3JS0qavEuWTPZtJm6ebeLZ5bDpaql+aX
236 Dm4N2dq0Dd9WtO319kXbL5fNKNu7g7ZDuaO/PLi8ZafJzs07P1SkVPRU+lQ27tLdtWHX+G7R7ht7
237 vPY07NXbW7z3/T7JvttVAVVN1WbVZftJ+7P3P66Jqun4lvttXa1ObXHtxwPSA/0HIw6217nU1R3S
238 PVRSj9Yr60cOxx++/p3vdy0NNg1VjZzG4iNwRHnk6fcJ3/ceDTradox7rOEH0x92HWcdL2pCmvKa
239 RptTmvtbYlu6T8w+0dbq3nr8R9sfD5w0PFl5SvNUyWna6YLTk2fyz4ydlZ19fi753GDborZ752PO
240 32oPb++6EHTh0kX/i+c7vDvOXPK4dPKy2+UTV7hXmq86X23qdOo8/pPTT8e7nLuarrlca7nuer21
241 e2b36RueN87d9L158Rb/1tWeOT3dvfN6b/fF9/XfFt1+cif9zsu72Xcn7q28T7xf9EDtQdlD3YfV
242 P1v+3Njv3H9qwHeg89HcR/cGhYPP/pH1jw9DBY+Zj8uGDYbrnjg+OTniP3L96fynQ89kzyaeF/6i
243 /suuFxYvfvjV69fO0ZjRoZfyl5O/bXyl/erA6xmv28bCxh6+yXgzMV70VvvtwXfcdx3vo98PT+R8
244 IH8o/2j5sfVT0Kf7kxmTk/8EA5jz/GMzLdsAAAAgY0hSTQAAeiUAAICDAAD5/wAAgOkAAHUwAADq
245 YAAAOpgAABdvkl/FRgAAAh1JREFUeNrsmk1xwzAQRr8RgYRBwqBhkDJoGbQMagZ1GbgMVAYNA5dB
246 wsBm4CBwL9Wx0Uwk7593Z3z0SHmRn3fXi3me8d8FoAUw33kdQB/9PXu9xWCeZ4QFN9zBSCwJ6Qig
247 cUj5aAFsHdLt2Fh47ALBGi8AHh2ScYlTQXrQLPFAuJZaiVNC2gCIDikfTxolHhjWjA4pH7s/Pzmk
248 TDQA9g7JUCYeGNdWI/HAvH50SEYkHgTs4V26xIOQfUSHlI8jgGeHlI9OagEsCdIOQtspQdh+REo8
249 CPzjokNSKPGlIJ0qnKatdUgdgJ/CArhdw+NW+qZ6A888ASmkM4DPCifSvLhbANdCib9ahzRV+JHs
250 mThFCvCtXeJUeVLpaWKVOBWkAcCH1kycMuPuAIwF97PNE1BCqiHxlkPi1LVbX1iysHyK4ihwm8Lc
251 iXwojAPSUOE0dNYhJbdctEics5/UVAC9tQ6pB/BVKPFoHVINiZPME3BDmirUZdE6pPSmKimAF58n
252 kPIhoKlw/946pDPKupiLZuKSPim1FSR+sA6pRgG8sQ4JKO9iYg2QAAGNfw2QBpR3Mc1DSrnT6JCW
253 l7h5SKkAPjmk5QvgVUAaIGAeQDqklImPDkl47qQFUo+yLuYqILFKXBOkCUzTJZogpUz84pAESlwj
254 pDPKZzHNQ0q509Uh5SXeOKR8RBB1MTVDIpO4dkgDCLqY2iGl3Gl0SMwS/x0AsYSfWCRqIfIAAAAA
255 SUVORK5CYII='
256 };
257 my $MMCHART_B2 = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAAGCAYAAAACEPQxAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
258 bWFnZVJlYWR5ccllPAAAABdJREFUeNpiYGBg+M/w//9/BmwEQIABANxBD/HRDNRSAAAAAElFTkSu
259 QmCC';
260 my $FREQCHART_L = 'iVBORw0KGgoAAAANSUhEUgAAAC8AAABvCAIAAADzHQ6XAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJ
261 bWFnZVJlYWR5ccllPAAABWNJREFUeNrkm91V7DoMhTOsaQBKgBKgBCjhUgKUAI/wBiUwJUAJUAKU
262 ACVACed+K/tcYxz5J5nYk3uOHmYFxokVWdrakj17v1rJ6+trdkw3y0x3d3dd172/v5vfPj8/d/8J
263 Iytq488U0+bw8PDq6oqLh4cHhn1+fta1jXQytWGB+ErLhB5cPz4+xp6z11WWr68vPvf39/WJnT4+
264 PmKD17W10dwoYX57c3Oji9vbW2xTXRvpgYVknoQ2fFZfKSkhC6ETFzE7tdDm+PiY6V9eXrh+enri
265 8/T0NDq6RkxxwZ/EczneSJNuIVgsbVa62rmsVqsfeHN5eanV3aU4W8n5cTqWNobx9ST0G2AbPZzD
266 c+HcsJk2ht8ACYTiSy8Y7J9eUmG5hQRYbMQUy+SMhDasnUyVyL1NV4rgRCcs1E6btBdDUPiqnTYX
267 FxeA5q8dyZxYXMJ5W2jDmrqcHHMsTO7GxFDD0OauF/cI1i4bR4yRPylTmixTMOHIvPlMafMjM1xf
268 X/tgAOqcnZ2JSsaEMcwkVoVab29vJhlVqOozwUQ7n0sHb8br8lqZmsMzPnZivuEYgWc6MKXJ2mev
269 AeAe9pKwTZrzOkHdo14Yr9ceYvEP24gQDUODmbK2cRY1bYOX8BDZj1kwtpmSQy8+7oXbPnvhTiwc
270 u9n3UKcut6uK80UVXfb1Qm0wTGDzrNP49ojFlJZGz9EKmEFu4w1Dr3rh/pI0aeJNwIt5lCtfhsZb
271 LhYvixev/XC9v78fwpdPM2rLtzbALgqJW+2YpRMLXO+cUayDKrWxLWwsFnZl0aWdbYAEcjiuE7Q2
272 wLdsJprZb9QDMyXRGKtYT8VytdkFql6H7/eCThR1akVJSp5lkqyt6nAWSyxO2YQFwq+zkF/Cixnj
273 orU0T4k/EFYiBjwCCOY/6dxZwot5DmN4jskwDW38cY6mcH+WVGT5TbZHbLB0k4mqvZtm6X6z02Tp
274 6gltNptVL6TCfJ5ynUsfjtXCTDhyCS/WyzCGV0cnKhOmcK+dwmJEzovBHRNN+02WFysDuofElt5g
275 okGe4s7semf9Rh7p8nGpNtOYaJYXi5lrjBh7Uc0wYx0e8GIfb0bU4QvixScnJ2ZQtGGiYZ4ys08a
276 bCr2i83dSnyiavNxnN80I4QGL06AaRte/O3FQStJfkOWAUIc06jtxSkm6naOF8FE23BQ3zZL3Z/a
277 Rubixd8xFcNiJ/gybjQkUxTwDrWHA3wydHR0RJ4i0eb3fMWtxmIx7MmdkSAqE+PPz89HYPGwI6ee
278 VAKLy89ICNZLc3jA0AqxONh4jnUY1XfWFGlt1sHSDtsU2/eLWUEVOiN6FGrN8wbq0GJ/uVuiqSPb
279 OIuatsEYSr0BBRvdoc0eJSrxm2EETeTFJYzO2SN77micbbY/KeVe2px4ijbcg7XH7s7Pv3c3rUcx
280 bw4Pz1GoR6GiGpDI7pZV4cWTexRVbDO5R1Gllz6tR1G3XzyhR1GrgpnWo6h74mWz2ciHUAVQb0aN
281 F8yLcVgdxtstL86wrWl5akj8Rp/3U0CN1SZ7dli4qmSpMWa2Cb2YzMBiDelIYg+GBaLSUDCy1gcH
282 B8MymTF+qOIfZtmQr3yzezCFvNhkZyleLJQbuwdTyItdoGB+LOebKorFk6lWmhf7NNc8gWIzinpn
283 h3EdmAmq+AEYPf/rVwVjgzzLi0VLsp2X3xHu++kE1MnyYh9s8jXDltrMy4vn6ZjMtZG+1y1JlqXN
284 KrBzAOSNO/vrYO7GRymi/eI/pwv5Z3rx36pNEXduUCst63cwJb+7a6RNYU95zqyZ3Wz7n/3uTgDY
285 tXHhEu7cYqX++t/dbY83vyOr2WnHEu78rwADABaBbeIZChwYAAAAAElFTkSuQmCC';
286
287 my $CSS_STYLE = '
288 html, body, div, span, p, img {
289 margin: 0;
290 padding: 0;
291 border: 0;
292 outline: 0;
293 font-size: 100%;
294 vertical-align: baseline;
295 background: transparent;
296 }
297
298 html, body {
299 font-family: Arial, Verdana;
300 color: #40454b;
301 font-size: 12px;
302 text-align: center;
303 }
304
305 img {
306 padding: 0px; margin: 0px; border: none;
307 }
308
309 .info-panel {
310 margin-top: 10px;
311 margin-bottom: 10px;
312 width: 740px;
313 text-align: left;
314 }
315
316 .info-header {
317 padding-top: 20px;
318 padding-top: 10px;
319 }
320
321 .info-header-title {
322 color: #126499;
323 text-decoration: none;
324 font-family: sans-serif;
325 font-weight: bold;
326 font-size: 16px;
327 vertical-align: baseline;
328 margin-right: 20px;
329 margin-bottom: 25px;
330 margin-top: 15px;
331 }
332
333 .info-content {
334 padding: 2px;
335 font-family: "lucida grande",sans-serif,arial;
336 margin-top: 15px;
337 margin-bottom: 15px;
338 }
339
340 .info-table-type {
341 min-width: 70px;
342 padding: 4px;
343 vertical-align: top;
344 }
345
346 .info-table-value {
347 font-weight: bold;
348 padding-top: 4px;
349 padding-left: 10px;
350 padding-right: 10px;
351 vertical-align: top;
352 }
353
354 hr {
355 background-color: #E0E0E0;
356 border: medium none;
357 color: #E0E0E0;
358 height: 1px;
359 outline: medium none;
360 }
361
362 .sequencetext {
363 font-family: courier, "courier new";
364 font-weight: normal;
365 }
366 ';
367
368 my $VERSION = '0.6';
369 my $WHAT = 'graphs-noPCA';
370
371 my $man = 0;
372 my $help = 0;
373 my %params = ('help' => \$help, 'h' => \$help, 'man' => \$man);
374 GetOptions( \%params,
375 'help|h',
376 'man',
377 'verbose',
378 'version' => sub { print "PRINSEQ-$WHAT $VERSION\n"; exit; },
379 'i=s',
380 'o=s',
381 'png_all',
382 'html_all',
383 'log:s',
384 'web:s'
385 ) or pod2usage(2);
386 pod2usage(1) if $help;
387 pod2usage(-exitstatus => 0, -verbose => 2) if $man;
388
389 =head1 NAME
390
391 PRINSEQ - PReprocessing and INformation of SEQuence data
392
393 =head1 VERSION
394
395 PRINSEQ-graphs 0.6
396
397 =head1 SYNOPSIS
398
399 perl prinseq-graphs.pl [-h] [-help] [-version] [-man] [-verbose] [-i input_graph_data_file] [-png_all] [-html_all] [-log file]
400
401 =head1 DESCRIPTION
402
403 PRINSEQ will help you to preprocess your genomic or metagenomic sequence data in FASTA (and QUAL) or FASTQ format. The graphs version allows users of the lite version to generate graphs similar to the web version.
404
405 =head1 OPTIONS
406
407 =over 8
408
409 =item B<-help> | B<-h>
410
411 Print the help message; ignore other arguments.
412
413 =item B<-man>
414
415 Print the full documentation; ignore other arguments.
416
417 =item B<-version>
418
419 Print program version; ignore other arguments.
420
421 =item B<-verbose>
422
423 Prints status and info messages during processing.
424
425 =item B<***** INPUT OPTIONS *****>
426
427 =item B<-i> <file>
428
429 Input file containing the graph data generated by the lite version.
430
431 =item B<***** OUTPUT OPTIONS *****>
432
433 =item B<-o> <string>
434
435 By default, the output files are created in the same directory as the input file with an additional "_prinseq_graphs_XXXX" in their name (where XXXX is replaced by random characters to prevent overwriting previous files). To change the output filename and location, specify the filename using this option. The file extension will be added automatically.
436
437 =item B<-png_all>
438
439 Use this option to generate PNG files with the graphs.
440
441 =item B<-html_all>
442
443 Use this option to generate a HTML file with the graphs and tables.
444
445 =item B<-log> <file>
446
447 Log file to keep track of parameters, errors, etc. The log file name is optional. If no file name is given, the log file name will be "inputname.log". If the log file already exists, new content will be added to the file.
448
449 =back
450
451 =head1 AUTHOR
452
453 Robert SCHMIEDER, C<< <rschmieder_at_gmail_dot_com> >>
454
455 =head1 BUGS
456
457 If you find a bug please email me at C<< <rschmieder_at_gmail_dot_com> >> or use http://sourceforge.net/tracker/?group_id=315449 so that I can make PRINSEQ better.
458
459 =head1 COPYRIGHT
460
461 Copyright (C) 2011-2012 Robert SCHMIEDER
462
463 =head1 LICENSE
464
465 This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
466
467 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
468
469 You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>.
470
471 =cut
472
473 #
474 ################################################################################
475 ## DATA AND PARAMETER CHECKING
476 ################################################################################
477 #
478
479 my ($file1,$command,@dataread);
480
481 #Check if input file exists and check if file format is correct
482 if(exists $params{i}) {
483 $command .= ' -i '.$params{i};
484 $file1 = $params{i};
485 if($params{i} eq 'stdin') {
486 my $format = &checkInputFormat();
487 unless($format eq 'gd') {
488 &printError('input data for -i is in '.uc($format).' format not in graph data format');
489 }
490 } elsif(-e $params{i}) {
491 #check for file format
492 my $format = &checkFileFormat($file1);
493 unless($format eq 'gd') {
494 &printError('input file for -i is in '.uc($format).' format not in graph data format');
495 }
496 } else {
497 &printError("could not find input file \"".$params{i}."\"");
498 }
499 } else {
500 &printError("you did not specify an input file containing the graph data");
501 }
502
503 #check output file name prefix
504 if(exists $params{o}) {
505 $command .= ' -o '.$params{o};
506 }
507
508 #check for output format
509 unless(exists $params{png_all} || exists $params{html_all}) {
510 &printError("No output format specified. Use -png_all and/or -html_all to generate graphs.");
511 }
512 if(exists $params{png_all}) {
513 $command .= ' -png_all';
514 }
515 if(exists $params{html_all}) {
516 $command .= ' -html_all';
517 }
518 if(exists $params{web}) {
519 $command .= ' -web'.($params{web} ? ' '.$params{web} : '');
520 }
521
522 #add remaining to log command
523 if(exists $params{log}) {
524 $command .= ' -log'.($params{log} ? ' '.$params{log} : '');
525
526 unless($params{log}) {
527 $params{log} = join("__",$file1||'nonamegiven').'.log';
528 }
529 $params{log} = cwd().'/'.$params{log} unless($params{log} =~ /^\//);
530 &printLog("Executing PRINSEQ with command: \"perl prinseq-".$WHAT.".pl".$command."\"");
531 }
532
533 #
534 ################################################################################
535 ## DATA PROCESSING
536 ################################################################################
537 #
538
539 my $filename = $file1;
540 while($filename =~ /[\w\d]+\.[\w\d]+$/) {
541 $filename =~ s/\.[\w\d]+$//;
542 last if($filename =~ /\/[^\.]+$/);
543 }
544
545 if(exists $params{png_all}) {
546 my $graphs = &generateGraphs($params{i},$params{o});
547 if(exists $params{web} && $params{web} ne 'nozip') {
548 #png files
549 if(scalar(@$graphs)) {
550 system("zip -j -r ".dirname($params{o})."/png_graphs.zip ".dirname($params{o}).' -i \*.png') == 0 or &printError("Cannot generate graphs ZIP file");
551 }
552 }
553 }
554 if(exists $params{html_all}) {
555 &generateHtml($params{i},$params{o});
556 }
557
558 &printWeb("STATUS: done");
559
560 ##
561 #################################################################################
562 ### MISC FUNCTIONS
563 #################################################################################
564 ##
565
566 sub printError {
567 my $msg = shift;
568 print STDERR "\nERROR: ".$msg.".\n\nTry \'perl prinseq-".$WHAT.".pl -h\' for more information.\nExit program.\n";
569 &printLog("ERROR: ".$msg.". Exit program.\n");
570 exit(0);
571 }
572
573 sub printWarning {
574 my $msg = shift;
575 print STDERR "WARNING: ".$msg.".\n";
576 &printLog("WARNING: ".$msg.".\n");
577 }
578
579 sub printWeb {
580 my $msg = shift;
581 if(exists $params{web}) {
582 print STDERR "\n".&getTime()."$msg\n";
583 }
584 }
585
586 sub getTime {
587 return sprintf("[%02d/%02d/%04d %02d:%02d:%02d] ",sub {($_[4]+1,$_[3],$_[5]+1900,$_[2],$_[1],$_[0])}->(localtime));
588 }
589
590 sub printLog {
591 my $msg = shift;
592 if(exists $params{log}) {
593 my $time = sprintf("%02d/%02d/%04d %02d:%02d:%02d",sub {($_[4]+1,$_[3],$_[5]+1900,$_[2],$_[1],$_[0])}->(localtime));
594 open(FH, ">>", $params{log}) or die "ERROR: Can't open file ".$params{log}.": $! \n";
595 flock(FH, LOCK_EX) or die "ERROR: Cannot lock file ".$params{log}.": $! \n";
596 print FH "[prinseq-".$WHAT."-$VERSION] [$time] $msg\n";
597 flock(FH, LOCK_UN) or die "ERROR: cannot unlock ".$params{log}.": $! \n";
598 close(FH);
599 }
600 }
601
602 sub addCommas {
603 my $num = shift;
604 return unless(defined $num);
605 return $num if($num < 1000);
606 $num = scalar reverse $num;
607 $num =~ s/(\d{3})/$1\,/g;
608 $num =~ s/\,$//;
609 $num = scalar reverse $num;
610 return $num;
611 }
612
613 sub checkFileFormat {
614 my $file = shift;
615
616 my ($format,$count,$id,$fasta,$fastq,$qual,$gd,$aa);
617 $count = 3;
618 $fasta = $fastq = $qual = $gd = $aa = 0;
619 $format = 'unknown';
620
621 open(FILE,"perl -p -e 's/\r/\n/g;s/\n\n/\n/g' < $file |") or die "ERROR: Could not open file $file: $! \n";
622 while (<FILE>) {
623 # chomp();
624 # next unless(length($_));
625 if($count-- == 0) {
626 last;
627 } elsif(!$fasta && /^\>\S+\s*/) {
628 $fasta = 1;
629 $qual = 1;
630 } elsif($fasta == 1 && (($aa && /^[ABCDEFGHIKLMNOPQRSTUVWYZXabcdefghiklmmopqrstuvwyzx*-]+/) || (!$aa && /^[ACGTURYKMSWBDHVNXacgturykmswbdhvnx-]+/))) {
631 $fasta = 2;
632 } elsif($qual == 1 && /^\s*\d+/) {
633 $qual = 2;
634 } elsif(!$fastq && /^\@(\S+)\s*/) {
635 $id = $1;
636 $fastq = 1;
637 } elsif($fastq == 1 && (($aa && /^[ABCDEFGHIKLMNOPQRSTUVWYZXabcdefghiklmmopqrstuvwyzx*-]+/) || (!$aa && /^[ACGTURYKMSWBDHVNXacgturykmswbdhvnx-]+/))) {
638 $fastq = 2;
639 } elsif($fastq == 2 && /^\+(\S*)\s*/) {
640 $fastq = 3 if($id eq $1 || /^\+\s*$/);
641 } elsif(!$gd && /^\{\"numseqs\"\:/) {
642 $gd = 1;
643 }
644 }
645 close(FILE);
646 if($fasta == 2) {
647 $format = 'fasta';
648 } elsif($qual == 2) {
649 $format = 'qual';
650 } elsif($fastq == 3) {
651 $format = 'fastq';
652 } elsif($gd == 1) {
653 $format = 'gd';
654 }
655
656 return $format;
657 }
658
659 sub checkInputFormat {
660 my ($format,$count,$id,$fasta,$fastq,$qual,$gd,$aa);
661 $count = 3;
662 $fasta = $fastq = $qual = $gd = $aa = 0;
663 $format = 'unknown';
664
665 while (<STDIN>) {
666 push(@dataread,$_);
667 # chomp();
668 # next unless(length($_));
669 if($count-- == 0) {
670 last;
671 } elsif(!$fasta && /^\>\S+\s*/) {
672 $fasta = 1;
673 $qual = 1;
674 } elsif($fasta == 1 && (($aa && /^[ABCDEFGHIKLMNOPQRSTUVWYZXabcdefghiklmmopqrstuvwyzx*-]+/) || (!$aa && /^[ACGTURYKMSWBDHVNXacgturykmswbdhvnx-]+/))) {
675 $fasta = 2;
676 } elsif($qual == 1 && /^\s*\d+/) {
677 $qual = 2;
678 } elsif(!$fastq && /^\@(\S+)\s*/) {
679 $id = $1;
680 $fastq = 1;
681 } elsif($fastq == 1 && (($aa && /^[ABCDEFGHIKLMNOPQRSTUVWYZXabcdefghiklmmopqrstuvwyzx*-]+/) || (!$aa && /^[ACGTURYKMSWBDHVNXacgturykmswbdhvnx-]+/))) {
682 $fastq = 2;
683 } elsif($fastq == 2 && /^\+(\S*)\s*/) {
684 $fastq = 3 if($id eq $1 || /^\+\s*$/);
685 } elsif(!$gd && /^\{\"numseqs\"\:/) {
686 $gd = 1;
687 }
688 }
689
690 if($fasta == 2) {
691 $format = 'fasta';
692 } elsif($qual == 2) {
693 $format = 'qual';
694 } elsif($fastq == 3) {
695 $format = 'fastq';
696 } elsif($gd == 1) {
697 $format = 'gd';
698 }
699
700 return $format;
701 }
702
703 sub readGdFile {
704 my $file = shift;
705 my $data;
706
707 open(DATA,"<$file") or &printError("Could not open file $file: $!");
708 while(<DATA>) {
709 next if(/^\#/);
710 chomp();
711 if(length($_)) {
712 $data = from_json($_);
713 }
714 }
715 close(DATA);
716
717 return $data;
718 }
719
720 sub getFileName {
721 my $ext = shift;
722 my ($file,$fh);
723 if(exists $params{o}) {
724 $file = $params{o}.$ext;
725 open(OUT,">$file") or &printError('cannot open output file');
726 close(OUT);
727 } else {
728 $fh = File::Temp->new( TEMPLATE => $filename.'_prinseq_graphs_XXXX',
729 SUFFIX => $ext,
730 UNLINK => 0);
731 $file = $fh->filename;
732 $fh->close();
733 }
734 return $file;
735 }
736
737 sub generateGraphs {
738 my ($in,$out) = @_;
739 my ($file,$data,$surface,@graphs);
740 $data = &readGdFile($in);
741
742 #length plot
743 if(exists $data->{counts}->{length}) {
744 $file = &getFileName('_ld.png');
745 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{length},1),$data->{stats}->{length},'Length Distribution','Read Length in bp','# Sequences',$file,0,' bp');
746 $surface->write_to_png($file);
747 push(@graphs,$file);
748 }
749 if(exists $data->{counts2} && exists $data->{counts2}->{length}) {
750 $file = &getFileName('_ld-2.png');
751 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{length},1),$data->{stats2}->{length},'Length Distribution','Read Length in bp','# Sequences',$file,0,' bp');
752 $surface->write_to_png($file);
753 push(@graphs,$file);
754 }
755
756 #tail plot
757 if(exists $data->{tail}) {
758 $file = &getFileName('_td5.png');
759 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{tail5},1),undef,'Poly-A/T Tail Distribution (> 4bp)','5\' Tail Length in bp','# Sequences',$file,0,' bp');
760 $surface->write_to_png($file);
761 push(@graphs,$file);
762 $file = &getFileName('_td3.png');
763 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{tail3},1),undef,'Poly-A/T Tail Distribution (> 4bp)','3\' Tail Length in bp','# Sequences',$file,0,' bp');
764 $surface->write_to_png($file);
765 push(@graphs,$file);
766 }
767 if(exists $data->{tail2}) {
768 $file = &getFileName('_td5-2.png');
769 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{tail5},1),undef,'Poly-A/T Tail Distribution (> 4bp)','5\' Tail Length in bp','# Sequences',$file,0,' bp');
770 $surface->write_to_png($file);
771 push(@graphs,$file);
772 $file = &getFileName('_td3-2.png');
773 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{tail3},1),undef,'Poly-A/T Tail Distribution (> 4bp)','3\' Tail Length in bp','# Sequences',$file,0,' bp');
774 $surface->write_to_png($file);
775 push(@graphs,$file);
776 }
777
778 #Ns plot
779 if(exists $data->{counts}->{ns}) {
780 $file = &getFileName('_ns.png');
781 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{ns},1),undef,'Percentage of N\'s (> 0%)','Percentage of N\'s per Read (1-100%)','# Sequences',$file,0);
782 $surface->write_to_png($file);
783 push(@graphs,$file);
784 }
785 if(exists $data->{counts2} && exists $data->{counts2}->{ns}) {
786 $file = &getFileName('_ns-2.png');
787 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{ns},1),undef,'Percentage of N\'s (> 0%)','Percentage of N\'s per Read (1-100%)','# Sequences',$file,0);
788 $surface->write_to_png($file);
789 push(@graphs,$file);
790 }
791
792 #GC content plot
793 if(exists $data->{counts}->{gc}) {
794 $file = &getFileName('_gc.png');
795 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{gc},0),$data->{stats}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences',$file,1);
796 $surface->write_to_png($file);
797 push(@graphs,$file);
798 }
799 if(exists $data->{counts2} && exists $data->{counts2}->{gc}) {
800 $file = &getFileName('_gc-2.png');
801 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{gc},0),$data->{stats2}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences',$file,1);
802 $surface->write_to_png($file);
803 push(@graphs,$file);
804 }
805
806 #Sequence complexity plot - dust
807 if(exists $data->{compldust}) {
808 $file = &getFileName('_cd.png');
809 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{compldust},0),undef,'Sequence complexity distribution','Mean sequence complexity (DUST scores)','Number of sequences',$file,1);
810 $surface->write_to_png($file);
811 push(@graphs,$file);
812 }
813
814 #Sequence complexity plot - entropy
815 if(exists $data->{complentropy}) {
816 $file = &getFileName('_ce.png');
817 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{complentropy},0),undef,'Sequence complexity distribution','Mean sequence complexity (Entropy values)','Number of sequences',$file,1);
818 $surface->write_to_png($file);
819 push(@graphs,$file);
820 }
821
822 #Dinucleotide odd ratio PCA plot - microbial/viral
823 #Odds ratio plot
824 if(exists $data->{dinucodds}) {
825 my @new = map {$data->{dinucodds}->{$_}} sort keys %{$data->{dinucodds}};
826 # $file = &getFileName('_pm.png');
827 # $surface = &createPCAPlot(&convertToPCAValues(\@new,'m'),'PCA','1st Principal Component Score','2nd Principal Component Score',$file);
828 # $surface->write_to_png($file);
829 # push(@graphs,$file);
830 # $file = &getFileName('_pv.png');
831 # $surface = &createPCAPlot(&convertToPCAValues(\@new,'v'),'PCA','1st Principal Component Score','2nd Principal Component Score',$file);
832 # $surface->write_to_png($file);
833 # push(@graphs,$file);
834 $file = &getFileName('_or.png');
835 $surface = &createOddsRatioPlot($data->{dinucodds},'Odds ratios','Dinucleotide','Odds ratio',$file);
836 $surface->write_to_png($file);
837 push(@graphs,$file);
838 }
839
840 #Qual plot
841 if(exists $data->{quals}) {
842 $file = &getFileName('_qd.png');
843 $surface = &createBoxPlot(&convertToBoxValues($data->{quals},4),'Base Quality Distribution','Read position in %','Quality score',$file);
844 $surface->write_to_png($file);
845 push(@graphs,$file);
846 }
847 if(exists $data->{quals2}) {
848 $file = &getFileName('_qd-2.png');
849 $surface = &createBoxPlot(&convertToBoxValues($data->{quals2},4),'Base Quality Distribution','Read position in %','Quality score',$file);
850 $surface->write_to_png($file);
851 push(@graphs,$file);
852 }
853
854 #Qualbin plot
855 if(exists $data->{qualsbin}) {
856 $file = &getFileName('_qd2.png');
857 $surface = &createBoxPlot(&convertToBoxValues($data->{qualsbin},4),'Base Quality Distribution','Read position in bp','Quality score',$file,0,'bp',$data->{binval});
858 $surface->write_to_png($file);
859 push(@graphs,$file);
860 }
861 if(exists $data->{qualsbin2}) {
862 $file = &getFileName('_qd2-2.png');
863 $surface = &createBoxPlot(&convertToBoxValues($data->{qualsbin2},4),'Base Quality Distribution','Read position in bp','Quality score',$file,0,'bp',$data->{binval});
864 $surface->write_to_png($file);
865 push(@graphs,$file);
866 }
867
868 #Qualmean plot
869 if(exists $data->{qualsmean}) {
870 $file = &getFileName('_qd3.png');
871 $surface = &createBarPlot(&convertToBarValues($data->{qualsmean},5,1),'Sequence Quality Distribution','Mean of quality scores per sequence','Number of sequences',$file,0);
872 $surface->write_to_png($file);
873 push(@graphs,$file);
874 }
875 if(exists $data->{qualsmean2}) {
876 $file = &getFileName('_qd3-2.png');
877 $surface = &createBarPlot(&convertToBarValues($data->{qualsmean2},5,1),'Sequence Quality Distribution','Mean of quality scores per sequence','Number of sequences',$file,0);
878 $surface->write_to_png($file);
879 push(@graphs,$file);
880 }
881
882 #Sequence duplicate plots
883 if(exists $data->{dubscounts}) {
884 $file = &getFileName('_df.png');
885 $surface = &createStackBarPlot(&convertOdToStackBinMatrix($data->{dubscounts},5,1,100),'Sequence duplication level','Number of duplicates','Number of sequences',$file,0);
886 $surface->write_to_png($file);
887 push(@graphs,$file);
888 }
889 if(exists $data->{dubslength}) {
890 $file = &getFileName('_dl.png');
891 $surface = &createStackBarPlot(&convertOdToStackBinMatrix($data->{dubslength},5,1),'Sequence duplication level','Read Length in bp','Number of duplicates',$file,0,' bp');
892 $surface->write_to_png($file);
893 push(@graphs,$file);
894 }
895 if(exists $data->{dubscounts}) {
896 my %dubsmax;
897 my $count = 1;
898 foreach my $n (sort {$b <=> $a} keys %{$data->{dubscounts}}) {
899 foreach my $s (keys %{$data->{dubscounts}->{$n}}) {
900 foreach my $i (1..$data->{dubscounts}->{$n}->{$s}) {
901 $dubsmax{$count++}->{$s} = $n;
902 last unless($count <= 100);
903 }
904 last unless($count <= 100);
905 }
906 last unless($count <= 100);
907 }
908 $file = &getFileName('_dm.png');
909 $surface = &createStackBarPlot(&convertOdToStackBinMatrix(\%dubsmax,5,1,100),'Sequence duplication level','Sequence','Number of duplicates',$file,0);
910 $surface->write_to_png($file);
911 push(@graphs,$file);
912 }
913
914 return \@graphs;
915 }
916
917 sub convertOdToBinMatrix {
918 my ($data,$min,$max,$nonice) = @_;
919
920 my ($num,$ymax,$xmax,$xmin,$step,%vals,$tmp,@matrix,$bin,$tmpbin);
921
922 #make nice xmax value
923 if(defined $max) {
924 $xmax = $max;
925 } else {
926 $xmax = (sort {$b <=> $a} keys %$data)[0];
927 }
928 $bin = &getBinVal($xmax);
929 $xmax = $bin*100;
930 $xmin = (defined $min ? $min : 0);
931
932 #get data to bin and find y axis max value
933 $ymax = 0;
934 $tmp = 0;
935 $tmpbin = $bin;
936 foreach my $i ($xmin..$xmax) {
937 if(exists $data->{$i}) {
938 $tmp += $data->{$i};
939 }
940 if(--$tmpbin <= 0) {
941 $tmpbin = $bin;
942 $ymax = &max($ymax,$tmp);
943 push(@matrix,$tmp);
944 $tmp = 0;
945 }
946 }
947
948 #make nice ymax value
949 unless($nonice) {
950 $ymax = sprintf("%d",($ymax/4)+1)*4 if($ymax % 4);
951 # $step = ($ymax <= 10 ? 10 : ($ymax < 40 ? 40 : ($ymax < 100 ? 100 : ($ymax < 1000 ? 100 : 100))));
952 # $ymax = sprintf("%d",($ymax/$step)+1)*$step if($ymax % $step);
953 }
954
955 return (\@matrix,$xmax,$ymax);
956 }
957
958 sub getBinVal {
959 my $val = shift;
960 my $step;
961 if(!$val || $val <= 100) {
962 return 1;
963 } elsif($val < 10000) {
964 return int($val/100)+($val % 100 ? 1 : 0);
965 } elsif($val < 100000) {
966 return 1000;
967 } else {
968 $step = 1000000;
969 my $xmax = ($val % $step ? sprintf("%d",($val/$step+1))*$step : $val);
970 return ($xmax/100);
971 }
972 }
973
974 sub max {
975 my ($a,$b) = @_;
976 return ($a < $b ? $b : $a);
977 }
978
979 sub min {
980 my ($a,$b) = @_;
981 return ($a > $b ? $b : $a);
982 }
983
984 sub createAnnotBarPlot {
985 my ($matrix,$xmax,$ymax,$annot,$title,$xlab,$ylab,$file,$zero,$add) = @_;
986
987 my $bin = 1;
988 if($xmax > 100) {
989 $bin = $xmax / 100;
990 $xmax = 100;
991 }
992
993 my @barcol = (127/255, 127/255, 255/255, 1); #b2b2ff
994 my @meancol = (255/255, 127/255, 127/255, 1); #ffb2b2
995 my @stdcol = (178/255, 178/255, 255/255, 0.8); #7f7fff
996 my @std1col = (0, 0, 0, 0.04); #ff7f7f
997 my @std2col = (0, 0, 0, 0.03); #ff7f7f
998 my @linecol = (0, 0, 0, 0.4);
999 my @helplinecol = (1, 1, 1, 0.9);
1000 my @background = (0.95, 0.95, 0.95, 1);
1001 my @tickcol = (0, 0, 0, 0.8);
1002 my @labelcol = (0, 0, 0, 1);
1003
1004 #create new image
1005 my $size = 6;
1006 my $offset = 20;
1007 my $left = 40;
1008 my $bottom = 15;
1009 my $top = 20;
1010 my $height = 200;
1011 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+($xmax+$zero)*$size,$bottom+$top+$offset*2+$height); #format, width, height
1012 my $cr = Cairo::Context->create($surface);
1013
1014 my ($font_extents,$extents,$fontheight,$fontdescent);
1015
1016 #background
1017 $cr->rectangle(0, 0, $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+2*200+20);
1018 $cr->set_source_rgba(1, 1, 1, 1);
1019 $cr->fill;
1020
1021 #fonts
1022 $cr->select_font_face ('sans', 'normal', 'normal');
1023
1024 $cr->save;
1025
1026 #set up work space
1027 $cr->set_antialias('none');
1028 $cr->set_line_width(1);
1029
1030 #background for plot
1031 $cr->rectangle($left+$offset, $top+$offset, ($xmax+$zero)*$size-1, $height);
1032 $cr->set_source_rgba(@background);
1033 $cr->fill;
1034
1035 #draw ticks
1036 #x-axis
1037 $cr->set_source_rgba(@tickcol);
1038 foreach my $i (($zero ? 0 : 1)..$xmax) {
1039 if(($i%5) == 0 && $i > 1 && $i < $xmax) {
1040 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
1041 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+3);
1042 } else {
1043 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
1044 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+1);
1045 }
1046 }
1047 $cr->stroke;
1048
1049 #y-axis
1050 $cr->move_to($left+$offset, $top+$offset);
1051 $cr->line_to($left+$offset-3, $top+$offset);
1052 $cr->move_to($left+$offset, $top+$offset+$height-1);
1053 $cr->line_to($left+$offset-3, $top+$offset+$height-1);
1054 $cr->stroke;
1055
1056 #helplines
1057 $cr->set_source_rgba(@helplinecol);
1058 foreach my $j (1..3) {
1059 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
1060 $cr->line_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
1061 }
1062 $cr->stroke;
1063
1064 $cr->set_antialias('default');
1065
1066 #tick labels
1067 $cr->set_source_rgba(@tickcol);
1068 $cr->set_font_size(10);
1069 $font_extents = $cr->font_extents;
1070 $fontheight = $font_extents->{height};
1071 #x-axis
1072 foreach my $i (($zero ? 0 : 1)..$xmax) {
1073 if(($i%10) == 0 && $i > 1 && $i < $xmax) {
1074 $extents = $cr->text_extents($i*$bin);
1075 $cr->move_to($left+$offset+int($size/2+1)+$size*$i-($zero ? 0 : $size)-$extents->{width}/2-1-($i == 1 ? 1 : 0), $top+$offset+$height+$fontheight+2);
1076 $cr->show_text($i*$bin);
1077 }
1078 }
1079 #y-axis
1080 $extents = $cr->text_extents(&addCommas($ymax));
1081 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2);
1082 $cr->show_text(&addCommas($ymax));
1083 $extents = $cr->text_extents(0);
1084 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2+$height);
1085 $cr->show_text(0);
1086
1087 $cr->save;
1088
1089 #labels
1090 $cr->set_font_size (14);
1091 $font_extents = $cr->font_extents;
1092 $fontheight = $font_extents->{height};
1093
1094 #axis labels
1095 $cr->set_source_rgba(@labelcol);
1096 $extents = $cr->text_extents($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
1097 $cr->move_to($left+$offset+($xmax+$zero)*$size/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
1098 $cr->show_text($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
1099 $cr->rotate($PI * 3 / 2);
1100 $extents = $cr->text_extents($ylab.($bin>1 ? ' (per bin)' : ''));
1101 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2),$offset+10);
1102 $cr->show_text($ylab.($bin>1 ? ' (per bin)' : ''));
1103
1104 $cr->restore;
1105
1106 #draw annotations
1107 if($annot) {
1108 $cr->set_antialias('none');
1109 my ($std1l,$std2l,$std1r,$std2r);
1110 #std boxes
1111 $std1l = int($annot->{mean})-int($annot->{std});
1112 $std2l = int($annot->{mean})-2*int($annot->{std});
1113 $std1r = int($annot->{mean})+int($annot->{std});
1114 $std2r = int($annot->{mean})+2*int($annot->{std});
1115 unless($std1l == $std1r) {
1116 if($std1l < 0) {
1117 $std1l = 0;
1118 } else {
1119 $std1l = int($std1l/$bin);
1120 }
1121 if($std2l < 0) {
1122 $std2l = 0;
1123 } else {
1124 $std2l = int($std2l/$bin);
1125 }
1126 if($std1r/$bin > 100) {
1127 $std1r = 100;
1128 } else {
1129 $std1r = int($std1r/$bin);
1130 }
1131 if($std2r/$bin > 100) {
1132 $std2r = 100;
1133 } else {
1134 $std2r = int($std2r/$bin);
1135 }
1136 $cr->rectangle($left+$offset+$std2l*$size+2, $top+$offset, ($std2r-$std2l)*$size, $height);
1137 $cr->set_source_rgba(@std2col);
1138 $cr->fill;
1139 $cr->rectangle($left+$offset+$std1l*$size+2, $top+$offset, ($std1r-$std1l)*$size, $height);
1140 $cr->set_source_rgba(@std1col);
1141 $cr->fill;
1142 #mean line
1143 $cr->set_source_rgba(@meancol);
1144 $cr->move_to($left+$offset+int(int($annot->{mean})/$bin)*$size+2, $top+$offset-5);
1145 $cr->line_to($left+$offset+int(int($annot->{mean})/$bin)*$size+2, $top+$offset+$height);
1146 $cr->stroke;
1147 #std lines
1148 $cr->set_source_rgba(@stdcol);
1149 if($std1l > 0) {
1150 $cr->move_to($left+$offset+$std1l*$size+2, $top+$offset-5);
1151 $cr->line_to($left+$offset+$std1l*$size+2, $top+$offset+$height);
1152 }
1153 if($std2l > 0) {
1154 $cr->move_to($left+$offset+$std2l*$size+2, $top+$offset-5);
1155 $cr->line_to($left+$offset+$std2l*$size+2, $top+$offset+$height);
1156 }
1157 if($std1r < 100) {
1158 $cr->move_to($left+$offset+$std1r*$size+2, $top+$offset-5);
1159 $cr->line_to($left+$offset+$std1r*$size+2, $top+$offset+$height);
1160 }
1161 if($std2r < 100) {
1162 $cr->move_to($left+$offset+$std2r*$size+2, $top+$offset-5);
1163 $cr->line_to($left+$offset+$std2r*$size+2, $top+$offset+$height);
1164 }
1165 $cr->stroke;
1166 #labels
1167 $cr->set_antialias('default');
1168 $cr->set_source_rgba(@tickcol);
1169 $extents = $cr->text_extents('M');
1170 $cr->move_to($left+$offset+int(int($annot->{mean})/$bin)*$size+2-$extents->{width}/2, $top+$offset-10);
1171 $cr->show_text('M');
1172 if($std1l > 0) {
1173 $extents = $cr->text_extents('1SD');
1174 $cr->move_to($left+$offset+$std1l*$size-$extents->{width}/2+2, $top+$offset-10);
1175 $cr->show_text('1SD');
1176 }
1177 if($std2l > 0) {
1178 $extents = $cr->text_extents('2SD');
1179 $cr->move_to($left+$offset+$std2l*$size-$extents->{width}/2+3, $top+$offset-10);
1180 $cr->show_text('2SD');
1181 }
1182 if($std1r < 100) {
1183 $extents = $cr->text_extents('1SD');
1184 $cr->move_to($left+$offset+$std1r*$size-$extents->{width}/2+2, $top+$offset-10);
1185 $cr->show_text('1SD');
1186 }
1187 if($std2r < 100) {
1188 $extents = $cr->text_extents('2SD');
1189 $cr->move_to($left+$offset+$std2r*$size-$extents->{width}/2+3, $top+$offset-10);
1190 $cr->show_text('2SD');
1191 }
1192 }
1193 }
1194
1195 #draw boxes
1196 $cr->set_antialias('none');
1197 $cr->set_source_rgba(@barcol);
1198 foreach my $pos (0..$xmax-($zero ? 0 : 1)) {
1199 next unless($matrix->[$pos]);
1200 my $tmp = $matrix->[$pos] / $ymax;
1201 #unique
1202 if($tmp) {
1203 $cr->rectangle($left+$offset+$pos*$size, $top+$offset+$height, $size-1, -$tmp*$height);
1204 $cr->fill;
1205 }
1206 }
1207
1208 #write image
1209 $cr->show_page;
1210 return $surface;
1211 }
1212
1213 #sub convertToPCAValues {
1214 # my ($new,$type) = @_;
1215 #
1216 # my @data = ($type eq 'v' ? @$DINUCODDS_VIR : @$DINUCODDS_MIC);
1217 #
1218 # push(@data,$new);
1219 #
1220 # my $pca = Statistics::PCA->new;
1221 # $pca->load_data({format => 'table', data => \@data});
1222 # $pca->pca();
1223 #
1224 # my @variances = $pca->results('proportion');
1225 # my @list = $pca->results('transformed');
1226 #
1227 # my ($xmin,$xmax,$ymin,$ymax);
1228 # $xmax = $ymax = -100;
1229 # $xmin = $ymin = 100;
1230 #
1231 # #get min/max values for PC1
1232 # foreach my $v (@{$list[0]}) {
1233 # $xmax = &max($xmax,$v);
1234 # $xmin = &min($xmin,$v);
1235 # }
1236 # #get min/max values for PC2
1237 # foreach my $v (@{$list[1]}) {
1238 # $ymax = &max($ymax,$v);
1239 # $ymin = &min($ymin,$v);
1240 # }
1241 #
1242 # return ([$list[0],$list[1]],sprintf("%d",$variances[0]*100),sprintf("%d",$variances[1]*100),$xmin,$xmax,$ymin,$ymax,$type);
1243 #}
1244
1245 sub createPCAPlot {
1246 my ($data,$var1,$var2,$xmin,$xmax,$ymin,$ymax,$type,$title,$xlab,$ylab,$file) = @_;
1247
1248 my @linecol = (0, 0, 0, 0.4);
1249 my @helplinecol1 = (1,1,1, 0.9);
1250 my @helplinecol2 = (1,1,1, 0.5);
1251
1252 #create new image
1253 my $size = 5;
1254 my $offset = 20;
1255 my $left = 25;
1256 my $bottom = 15;
1257 my $top = ($type eq 'v' ? 35 : 20);
1258 my $height = 500;
1259 my $space = 10;
1260 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+$height+2*$space,$top+$bottom+$offset*2+$height+2*$space); #format, width, height
1261 my $cr = Cairo::Context->create($surface);
1262
1263 my ($font_extents,$extents,$fontheight,$fontdescent);
1264
1265 #background
1266 $cr->rectangle(0, 0, $left+$offset*2+$height+2*$space,$top+$bottom+$offset*2+$height+2*$space);
1267 $cr->set_source_rgba(1, 1, 1, 1);
1268 $cr->fill;
1269
1270 #fonts
1271 $cr->select_font_face ('sans-serif', 'normal', 'normal');
1272
1273 $cr->save;
1274
1275 #set up work space
1276 my ($dx, $dy);
1277 $cr->set_antialias('none');
1278 $cr->set_line_width(1);
1279
1280 #background for plot
1281 $cr->rectangle($left+$offset, $top+$offset, $height+2*$space, $height+2*$space);
1282 $cr->set_source_rgba(0.95, 0.95, 0.95, 1);
1283 $cr->fill;
1284
1285 #get infos
1286 my $num = scalar(@{$data->[0]})-1;
1287 my $xrange = ($xmax-$xmin);
1288 my $yrange = ($ymax-$ymin);
1289 my $data_info = ($type eq 'v' ? $DATA_VIR : $DATA_MIC);
1290
1291 #draw ticks
1292 #x-axis
1293 $cr->set_source_rgba(0, 0, 0, 0.8);
1294 $cr->move_to($left+$offset+$space, $top+$offset+$height+2*$space);
1295 $cr->line_to($left+$offset+$space, $top+$offset+$height+2*$space+3);
1296 $cr->move_to($left+$offset+$space+$height, $top+$offset+$height+2*$space);
1297 $cr->line_to($left+$offset+$space+$height, $top+$offset+$height+2*$space+3);
1298 $cr->move_to($left+$offset+$space+int(abs($xmin)/$xrange*$height), $top+$offset+$height+2*$space);
1299 $cr->line_to($left+$offset+$space+int(abs($xmin)/$xrange*$height), $top+$offset+$height+2*$space+3);
1300 $cr->stroke;
1301 #y-axis
1302 $cr->move_to($left+$offset, $top+$offset+$space);
1303 $cr->line_to($left+$offset-3, $top+$offset+$space);
1304 $cr->move_to($left+$offset, $top+$offset+$height+$space);
1305 $cr->line_to($left+$offset-3, $top+$offset+$height+$space);
1306 $cr->move_to($left+$offset, $top+$offset+$space+int(abs($ymax)/$yrange*$height));
1307 $cr->line_to($left+$offset-3, $top+$offset+$space+int(abs($ymax)/$yrange*$height));
1308 $cr->stroke;
1309
1310 #helplines
1311 $cr->set_source_rgba(@helplinecol1);
1312 $cr->move_to($left+$offset+$space+int(abs($xmin)/$xrange*$height), $top+$offset);
1313 $cr->line_to($left+$offset+$space+int(abs($xmin)/$xrange*$height), $top+$offset+$height+2*$space);
1314 $cr->stroke;
1315 $cr->move_to($left+$offset, $top+$offset+$space+int(abs($ymax)/$yrange*$height));
1316 $cr->line_to($left+$offset+2*$space+$height, $top+$offset+$space+int(abs($ymax)/$yrange*$height));
1317 $cr->stroke;
1318
1319
1320 $cr->set_antialias('default');
1321
1322 #tick labels
1323 $cr->set_source_rgba(0, 0, 0, 0.8);
1324 $cr->set_font_size(10);
1325 $font_extents = $cr->font_extents;
1326 $fontheight = $font_extents->{height};
1327 #x-axis
1328 $extents = $cr->text_extents(sprintf("%.2f",$xmin));
1329 $cr->move_to($left+$offset+$space-$extents->{width}/2-1, $top+$offset+$height+2*$space+$fontheight+2);
1330 $cr->show_text(sprintf("%.2f",$xmin));
1331 $extents = $cr->text_extents(sprintf("%.2f",$xmax));
1332 $cr->move_to($left+$offset+$space+$height-$extents->{width}/2-1, $top+$offset+$height+2*$space+$fontheight+2);
1333 $cr->show_text(sprintf("%.2f",$xmax));
1334 $extents = $cr->text_extents(0);
1335 $cr->move_to($left+$offset+$space+int(abs($xmin)/$xrange*$height)-$extents->{width}/2, $top+$offset+$height+2*$space+$fontheight+2);
1336 $cr->show_text(0);
1337 #y-axis
1338 $extents = $cr->text_extents(sprintf("%.2f",$ymax));
1339 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$space+$fontheight/2-2);
1340 $cr->show_text(sprintf("%.2f",$ymax));
1341 $extents = $cr->text_extents(sprintf("%.2f",$ymin));
1342 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$height+$space+$fontheight/2-2);
1343 $cr->show_text(sprintf("%.2f",$ymin));
1344 $extents = $cr->text_extents(0);
1345 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$space+int(abs($ymax)/$yrange*$height)+$fontheight/2-2);
1346 $cr->show_text(0);
1347
1348 $cr->save;
1349
1350 #labels
1351 $cr->set_font_size (14);
1352 $font_extents = $cr->font_extents;
1353 $fontheight = $font_extents->{height};
1354
1355 #add type
1356 $cr->set_source_rgba(0, 0, 0, 0.5);
1357 $extents = $cr->text_extents(uc($type));
1358 $cr->arc($offset/2+$extents->{width}/2, $offset-5, 10, 0, 2*$PI);
1359 $cr->fill;
1360 $cr->set_source_rgba(1, 1, 1, 1);
1361 $cr->move_to($offset/2-($type eq 'm' ? 1 : 0), $offset);
1362 $cr->show_text(uc($type));
1363
1364 #axis labels
1365 $cr->set_source_rgba(0, 0, 0, 1);
1366 $extents = $cr->text_extents($xlab.' ('.$var1.'%)');
1367 $cr->move_to($left+$offset+$height/2-$extents->{width}/2+$space, $top+$offset+$height+$fontheight+15+2*$space);
1368 $cr->show_text($xlab.' ('.$var1.'%)');
1369 $cr->rotate($PI * 3 / 2);
1370 $extents = $cr->text_extents($ylab.' ('.$var2.'%)');
1371 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2)+$space,$offset);
1372 $cr->show_text($ylab.' ('.$var2.'%)');
1373
1374 $cr->restore;
1375
1376 #draw dots
1377 $cr->set_antialias('default');
1378 $cr->set_font_size (10);
1379 foreach my $i (0..$num) {
1380 $cr->set_source_rgba(@{$data_info->[$i]->[3]});
1381 $cr->arc(($left+$offset+$space+int(($data->[0]->[$i]+abs($xmin))/$xrange*$height)), ($space+$top+$offset+int(($data->[1]->[$i]+abs($ymin))/$yrange*$height)), $size, 0, 2*$PI);
1382 $cr->fill;
1383 }
1384 $cr->set_source_rgba(0, 0, 0, 1);
1385 foreach my $i (0..$num) {
1386 $extents = $cr->text_extents($data_info->[$i]->[1]);
1387 $cr->move_to(($left+$offset+$space+int(($data->[0]->[$i]+abs($xmin))/$xrange*$height))+$size+1, ($space+$top+$offset+int(($data->[1]->[$i]+abs($ymin))/$yrange*$height))+$size*2);
1388 $cr->show_text($data_info->[$i]->[1]);
1389 }
1390
1391 #draw legend
1392 my %labels;
1393 foreach my $i (0..$num) {
1394 $labels{$data_info->[$i]->[1]} = $data_info->[$i]->[2];
1395 }
1396 $cr->set_font_size(10);
1397 $fontheight = $font_extents->{height};
1398 $cr->set_source_rgba(0, 0, 0, 1);
1399 my $x = $left+$offset+$space;
1400 my $y = int($offset/2);
1401 foreach my $n (sort {$a <=> $b} keys %labels) {
1402 if($x+$cr->text_extents($n.' - '.$labels{$n})->{width}+15 >= $left+$offset+$space+$height) {
1403 $x = $left+$offset+$space;
1404 $y += $fontheight;
1405 }
1406 $cr->move_to($x,$y);
1407 $cr->show_text($n.' - '.$labels{$n});
1408 $x += $cr->text_extents($n.' - '.$labels{$n})->{width}+15;
1409
1410 }
1411
1412 #write image
1413 $cr->show_page;
1414 return $surface;
1415 }
1416
1417 sub createOddsRatioPlot {
1418 my ($data,$title,$xlab,$ylab,$file) = @_;
1419
1420 my @yvalues = (0.5,0.78,1.00,1.23,1.5);
1421
1422 my @linecol = (0, 0, 0, 0.4);
1423 my @helplinecol1 = (1,1,1, 0.9);
1424 my @helplinecol2 = (1,1,1, 0.5);
1425
1426 #create new image
1427 my $size = 40;
1428 my $offset = 20;
1429 my $left = 35;
1430 my $right = 90;
1431 my $bottom = 20;
1432 my $top = 0;
1433 my $height = 100;
1434 my $width = $size*10;
1435 my $space = 20;
1436 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+$width+$right,$top+$bottom+$offset*2+$height); #format, width, height
1437 my $cr = Cairo::Context->create($surface);
1438
1439 my ($font_extents,$extents,$fontheight,$fontdescent);
1440
1441 #background
1442 $cr->rectangle(0, 0, $left+$offset*2+$width+$right,$top+$bottom+$offset*2+$height);
1443 $cr->set_source_rgba(1, 1, 1, 1);
1444 $cr->fill;
1445
1446 #fonts
1447 $cr->select_font_face ('sans', 'normal', 'normal');
1448
1449 $cr->save;
1450
1451 #set up work space
1452 my ($dx, $dy);
1453 $cr->set_antialias('none');
1454 $cr->set_line_width(1);
1455
1456 #background for plot
1457 $cr->rectangle($left+$offset, $top+$offset, $width, $height);
1458 $cr->set_source_rgba(0.95, 0.95, 0.95, 1);
1459 $cr->fill;
1460
1461 #right side marks
1462 $cr->set_source_rgba(255/255, 127/255, 127/255, 0.6);
1463 $cr->rectangle($left+$offset+$width+8, $top+$offset, 3, 0.77/2*$height);
1464 $cr->fill;
1465 $cr->rectangle($left+$offset+$width+8, $top+$offset+$height-0.78/2*$height, 3, 0.78/2*$height);
1466 $cr->fill;
1467
1468 #get infos
1469 my $num = scalar(keys %$data)-1;
1470
1471 #draw ticks
1472 #x-axis
1473 $cr->set_source_rgba(0, 0, 0, 0.8);
1474 foreach my $i (0..$num) {
1475 $cr->move_to($left+$offset+$size/2+$i*$size, $top+$offset+$height);
1476 $cr->line_to($left+$offset+$size/2+$i*$size, $top+$offset+$height+3);
1477 }
1478 $cr->stroke;
1479 #y-axis
1480 foreach my $i (@yvalues) {
1481 $cr->move_to($left+$offset, $top+$offset+$height-$i/2*$height);
1482 $cr->line_to($left+$offset-3, $top+$offset+$height-$i/2*$height);
1483 }
1484 $cr->stroke;
1485
1486 #helplines
1487 #x-axis
1488 $cr->set_source_rgba(@helplinecol1);
1489 foreach my $i (0..$num) {
1490 $cr->move_to($left+$offset+$size/2+$i*$size, $top+$offset);
1491 $cr->line_to($left+$offset+$size/2+$i*$size, $top+$offset+$height);
1492 }
1493 $cr->stroke;
1494 #yaxis
1495 foreach my $i (@yvalues) {
1496 $cr->set_source_rgba(0, 0, 0, ($i == 0.5 || $i == 1.00 || $i == 1.50 ? 0.1 : 0.3));
1497 $cr->move_to($left+$offset, $top+$offset+$height-$i/2*$height);
1498 $cr->line_to($left+$offset+$width, $top+$offset+$height-$i/2*$height);
1499 $cr->stroke;
1500 }
1501
1502 $cr->set_antialias('default');
1503
1504 #tick labels
1505 $cr->set_source_rgba(0, 0, 0, 0.8);
1506 $cr->set_font_size(10);
1507 $font_extents = $cr->font_extents;
1508 $fontheight = $font_extents->{height};
1509 #x-axis
1510 my $xcur = 0;
1511 foreach my $dn (map {join("/",(m/../g ))} sort keys %$data) {
1512 $extents = $cr->text_extents($dn);
1513 $cr->move_to($left+$offset+$size/2-$extents->{width}/2-1+$size*$xcur++, $top+$offset+$height+$fontheight+2);
1514 $cr->show_text($dn);
1515 }
1516 #y-axis
1517 foreach my $i (@yvalues) {
1518 $cr->set_source_rgba(0, 0, 0, ($i == 0.5 || $i == 1.00 || $i == 1.50 ? 0.5 : 0.8));
1519 $extents = $cr->text_extents(sprintf("%.2f",$i));
1520 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$height-$i/2*$height+$fontheight/2-2);
1521 $cr->show_text(sprintf("%.2f",$i));
1522 }
1523
1524 #label on right side
1525 $cr->set_source_rgba(0, 0, 0, 1);
1526 $extents = $cr->text_extents('Over-represented');
1527 $cr->move_to($left+$offset+$width+15, $top+$offset+$height-1.6/2*$height+$fontheight/2-2);
1528 $cr->show_text('Over-represented');
1529 $extents = $cr->text_extents('Under-represented');
1530 $cr->move_to($left+$offset+$width+15, $top+$offset+$height-0.4/2*$height+$fontheight/2-2);
1531 $cr->show_text('Under-represented');
1532
1533 $cr->save;
1534
1535 #labels
1536 $cr->set_font_size (14);
1537 $font_extents = $cr->font_extents;
1538 $fontheight = $font_extents->{height};
1539
1540 #axis labels
1541 #x-axis
1542 $cr->set_source_rgba(0, 0, 0, 1);
1543 $extents = $cr->text_extents($xlab);
1544 $cr->move_to($left+$offset+$width/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
1545 $cr->show_text($xlab);
1546 #y-axis
1547 $cr->rotate($PI * 3 / 2);
1548 $extents = $cr->text_extents($ylab);
1549 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2),$offset);
1550 $cr->show_text($ylab);
1551
1552 $cr->restore;
1553
1554 #draw dots
1555 $cr->set_antialias('default');
1556 $xcur = 0;
1557 foreach my $dn (sort keys %$data) {
1558 if($data->{$dn} > 1.23 || $data->{$dn} < 0.78) {
1559 $cr->set_source_rgba(255/255, 127/255, 127/255, 1);
1560 } else {
1561 $cr->set_source_rgba(127/255, 127/255, 255/255, 1);
1562 }
1563 $cr->arc($left+$offset+$size/2+$size*$xcur++, $top+$offset+$height-$data->{$dn}/2*$height, 5, 0, 2*$PI);
1564 $cr->fill;
1565 }
1566
1567 #write image
1568 $cr->show_page;
1569 return $surface;
1570 }
1571
1572 sub convertToBoxValues {
1573 my ($data,$niceval) = @_;
1574 my ($xmax,$ymax,@matrix);
1575 $xmax = $ymax = 0;
1576 foreach my $i (sort {$a <=> $b} keys %$data) {
1577 $xmax++;
1578 push(@matrix,[$i,$data->{$i}->{min},$data->{$i}->{p25},$data->{$i}->{median},$data->{$i}->{p75},$data->{$i}->{max}]);
1579 $ymax = &max($ymax,$data->{$i}->{max});
1580 }
1581
1582 if($niceval) {
1583 $ymax = sprintf("%d",($ymax/$niceval)+1)*$niceval if($ymax % $niceval);
1584 }
1585
1586 return (\@matrix,$xmax,$ymax);
1587 }
1588
1589 sub createBoxPlot {
1590 my ($matrix,$xmax,$ymax,$title,$xlab,$ylab,$file,$zero,$add,$bin) = @_;
1591 $bin = ($bin ? $bin : 1);
1592 $zero = 0 unless($zero);
1593 $add = '' unless(defined $add);
1594 if($xmax != 100) {
1595 $xmax = 100;
1596 }
1597 $ymax = 1 unless($ymax);
1598 # die Dumper $matrix;
1599
1600
1601 my @col0 = (178/255, 178/255, 255/255); #b2b2ff
1602 my @col1 = (255/255, 178/255, 178/255); #ffb2b2
1603 my @col3 = (127/255, 127/255, 255/255); #7f7fff
1604 my @col4 = (255/255, 127/255, 127/255); #ff7f7f
1605 my @linecol = (0, 0, 0, 0.4);
1606 my @linecol0 = (@col3, 1);
1607 my @linecol1 = (@col4, 1);
1608 my @boxcol = (@col3, 1);
1609 my @whiscol = (@col0, 0.9);
1610 my @medcol = (0,0,0, 0.5);
1611 my @helplinecol1 = (1,1,1, 0.9);
1612 my @helplinecol2 = (1,1,1, 0.5);
1613
1614 #create new image
1615 my $size = 6;
1616 my $offset = 20;
1617 my $left = 25;
1618 my $bottom = 25;
1619 my $top = 5;
1620 my $height = 300;
1621 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+$height); #format, width, height
1622 my $cr = Cairo::Context->create($surface);
1623
1624 my ($font_extents,$extents,$fontheight,$fontdescent);
1625
1626 #background
1627 $cr->rectangle(0, 0, $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+2*200+20);
1628 $cr->set_source_rgba(1, 1, 1, 1);
1629 $cr->fill;
1630
1631 #fonts
1632 $cr->select_font_face ('sans', 'normal', 'normal');
1633 # $cr->set_font_size (30);
1634
1635 $cr->save;
1636
1637 #set up work space
1638 $cr->set_antialias('none');
1639 $cr->set_line_width(1);
1640
1641 #background for plot
1642 $cr->rectangle($left+$offset, $top+$offset, ($xmax+$zero)*$size-1, $height);
1643 $cr->set_source_rgba(0.95, 0.95, 0.95, 1);
1644 $cr->fill;
1645
1646 #draw legend
1647 $cr->set_font_size(10);
1648 # $font_extents = $cr->font_extents;
1649 my $x = $left+$offset+$size*50;
1650 foreach my $v ([\@whiscol,'Min/Max value'],[\@boxcol,'25th to 75th percentile'],[\@medcol,'Median']) {
1651 $cr->set_antialias('none');
1652 $cr->set_source_rgba(@{$v->[0]});
1653 $cr->rectangle($x, $top+5, 10, 10);
1654 $cr->fill;
1655 $x += 15;
1656 $cr->set_antialias('default');
1657 $cr->move_to($x,$top+5+9);
1658 $cr->set_source_rgba(0, 0, 0, 0.8);
1659 $cr->show_text($v->[1]);
1660 $x += $cr->text_extents($v->[1])->{width}+15;
1661 }
1662
1663 $cr->set_antialias('none');
1664
1665 #draw ticks
1666 #x-axis
1667 $cr->set_source_rgba(0, 0, 0, 0.8);
1668 # $cr->move_to($left+$offset+int($size/2+1), $top+$offset+$height);
1669 # $cr->line_to($left+$offset+int($size/2+1), $top+$offset+$height+3);
1670 # $cr->move_to($left+$offset+int($size/2+1), $top+$offset+$height+$space);
1671 # $cr->line_to($left+$offset+int($size/2+1), $top+$offset+$height+$space-3);
1672 foreach my $i (1..9) {
1673 $cr->move_to($left+$offset+int($size/2)+$size*10*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
1674 $cr->line_to($left+$offset+int($size/2)+$size*10*$i-($zero ? 0 : $size)-1, $top+$offset+$height+3);
1675 # $cr->move_to($left+$offset+int($size/2)+$size*10*$i-($zero ? 0 : $size)-1, $top+$offset);
1676 # $cr->line_to($left+$offset+int($size/2)+$size*10*$i-($zero ? 0 : $size)-1, $top+$offset-3);
1677 }
1678 $cr->stroke;
1679 #y-axis
1680 foreach my $j (0..4) {
1681 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
1682 $cr->line_to($left+$offset-3, $top+$offset+$height*$j/4-($j ? 1 : 0));
1683 # $cr->move_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
1684 # $cr->line_to($left+$offset+($xmax+$zero)*$size+3, $top+$offset+$height*$j/4-($j ? 1 : 0));
1685 }
1686 $cr->stroke;
1687
1688 #helplines
1689 $cr->set_source_rgba(@helplinecol1);
1690 foreach my $j (1..3) {
1691 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
1692 $cr->line_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
1693 }
1694 $cr->stroke;
1695
1696 $cr->set_antialias('default');
1697
1698 #tick labels
1699 $cr->set_source_rgba(0, 0, 0, 0.8);
1700 $cr->set_font_size(10);
1701 $font_extents = $cr->font_extents;
1702 $fontheight = $font_extents->{height};
1703 #x-axis
1704 # $extents = $cr->text_extents(1);
1705 # $cr->move_to($left+$offset+int($size/2+1)-$extents->{width}, $top+$offset+$height+$fontheight+2);
1706 # $cr->show_text(1);
1707 foreach my $i (1..9) {
1708 $extents = $cr->text_extents($i*10*$bin);
1709 $cr->move_to($left+$offset+int($size/2+1)+$size*10*$i-($zero ? 0 : $size)-$extents->{width}/2-1-($i == 1 ? 1 : 0), $top+$offset+$height+$fontheight+2);
1710 $cr->show_text($i*10*$bin);
1711 }
1712 #y-axis
1713 foreach my $j (0..4) {
1714 $extents = $cr->text_extents(&addCommas($ymax*$j/4));
1715 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2+$height*(4-$j)/4);
1716 $cr->show_text(&addCommas($ymax*$j/4));
1717 }
1718
1719 $cr->save;
1720
1721 #axis labels
1722 $cr->set_source_rgba(0, 0, 0, 1);
1723 $cr->set_font_size (14);
1724 $font_extents = $cr->font_extents;
1725 $fontheight = $font_extents->{height};
1726 $extents = $cr->text_extents($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
1727 $cr->move_to($left+$offset+($xmax+$zero)*$size/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
1728 $cr->show_text($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
1729 $cr->rotate($PI * 3 / 2);
1730 $extents = $cr->text_extents($ylab);
1731 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2),$offset);
1732 $cr->show_text($ylab);
1733
1734 $cr->restore;
1735
1736 #draw boxes
1737 my $factor = $height/$ymax;
1738 $cr->set_antialias('none');
1739 foreach my $v (@$matrix) {
1740 #wiskers
1741 $cr->set_source_rgba(@whiscol);
1742 if($v->[1] != $v->[2]) {
1743 $cr->move_to($left+$offset+$size*$v->[0]+1, $top+$offset+$height-$v->[1]*$factor-1);
1744 $cr->line_to($left+$offset+$size*$v->[0]+$size-2, $top+$offset+$height-$v->[1]*$factor-1);
1745 $cr->stroke;
1746 }
1747 if($v->[4] != $v->[5]) {
1748 $cr->move_to($left+$offset+$size*$v->[0]+1, $top+$offset+$height-$v->[5]*$factor);
1749 $cr->line_to($left+$offset+$size*$v->[0]+$size-2, $top+$offset+$height-$v->[5]*$factor);
1750 $cr->stroke;
1751 }
1752 $cr->save;
1753 $cr->set_dash(1,4,3);
1754 if($v->[1] != $v->[2]) {
1755 $cr->move_to($left+$offset+$size*$v->[0]+int($size/2)-1, $top+$offset+$height-$v->[2]*$factor);
1756 $cr->line_to($left+$offset+$size*$v->[0]+int($size/2)-1, $top+$offset+$height-$v->[1]*$factor);
1757 $cr->stroke;
1758 }
1759 if($v->[4] != $v->[5]) {
1760 $cr->move_to($left+$offset+$size*$v->[0]+int($size/2)-1, $top+$offset+$height-$v->[5]*$factor);
1761 $cr->line_to($left+$offset+$size*$v->[0]+int($size/2)-1, $top+$offset+$height-$v->[4]*$factor-1);
1762 $cr->stroke;
1763 }
1764 $cr->restore;
1765 #box
1766 if(($v->[2] != $v->[3]) || ($v->[4] != $v->[3])) {
1767 $cr->set_source_rgba(@whiscol);
1768 $cr->rectangle($left+$offset+$size*$v->[0], $top+$offset+$height-$v->[2]*$factor, $size-1, -($v->[4]-$v->[2])*$factor);
1769 $cr->fill;
1770 $cr->stroke;
1771 $cr->set_source_rgba(@boxcol);
1772 $cr->rectangle($left+$offset+$size*$v->[0], $top+$offset+$height-$v->[2]*$factor, $size-2, -($v->[4]-$v->[2])*$factor);
1773 $cr->stroke;
1774 } else {
1775 $cr->set_source_rgba(@boxcol);
1776 $cr->move_to($left+$offset+$size*$v->[0], $top+$offset+$height-$v->[3]*$factor);
1777 $cr->line_to($left+$offset+$size*$v->[0]+$size-1, $top+$offset+$height-$v->[3]*$factor);
1778 $cr->stroke;
1779 }
1780 #median
1781 $cr->set_source_rgba(@medcol);
1782 $cr->move_to($left+$offset+$size*$v->[0]+1, $top+$offset+$height-$v->[3]*$factor);
1783 $cr->line_to($left+$offset+$size*$v->[0]+$size-2, $top+$offset+$height-$v->[3]*$factor);
1784 $cr->stroke;
1785 }
1786
1787 #write image
1788 $cr->show_page;
1789 return $surface;
1790 }
1791
1792 sub convertToBarValues {
1793 my ($data,$niceval,$start,$max) = @_;
1794 my ($xmax,$ymax,@matrix,$tmp);
1795 $xmax = $ymax = 0;
1796
1797 #get xmax value
1798 if($max) {
1799 $xmax = $max;
1800 } else {
1801 foreach my $q (keys %$data) {
1802 $xmax = &max($xmax,$q);
1803 }
1804 }
1805 if($niceval) {
1806 $xmax = sprintf("%d",($xmax/$niceval)+1)*$niceval if($xmax % $niceval);
1807 }
1808
1809 #get matrix values
1810 foreach my $q ($start..$xmax) {
1811 $tmp = (exists $data->{$q} ? $data->{$q} : 0);
1812 $ymax = &max($ymax,$tmp);
1813 push(@matrix,$tmp);
1814 }
1815
1816 $ymax = sprintf("%d",($ymax/4)+1)*4 if($ymax % 4);
1817
1818 return (\@matrix,$xmax,$ymax);
1819 }
1820
1821 sub createBarPlot {
1822 my ($matrix,$xmax,$ymax,$title,$xlab,$ylab,$file,$zero) = @_;
1823
1824 my @col0 = (178/255, 178/255, 255/255); #b2b2ff
1825 my @col1 = (255/255, 178/255, 178/255); #ffb2b2
1826 my @col3 = (127/255, 127/255, 255/255); #7f7fff
1827 my @col4 = (255/255, 127/255, 127/255); #ff7f7f
1828 my @linecol = (0, 0, 0, 0.4);
1829 my @linecol0 = (@col3, 1);
1830 my @linecol1 = (@col4, 1);
1831 my @barcol0 = (@col3, 1);
1832 my @barcol1 = (@col4, 1);
1833 my @helplinecol1 = (1,1,1, 0.9);
1834 my @helplinecol2 = (1,1,1, 0.5);
1835
1836 #create new image
1837 my $size = ($xmax <= 50 ? 10 : ($xmax <= 100 ? 6 : 3));
1838 my $offset = 20;
1839 my $left = 25;
1840 my $bottom = 15;
1841 my $top = 0;
1842 my $height = 200;
1843 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+$height); #format, width, height
1844 my $cr = Cairo::Context->create($surface);
1845
1846 my ($font_extents,$extents,$fontheight,$fontdescent);
1847
1848 #background
1849 $cr->rectangle(0, 0, $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+2*200+20);
1850 $cr->set_source_rgba(1, 1, 1, 1);
1851 $cr->fill;
1852
1853 #fonts
1854 $cr->select_font_face ('sans', 'normal', 'normal');
1855 # $cr->set_font_size (30);
1856
1857 $cr->save;
1858
1859 #set up work space
1860 my ($dx, $dy);
1861 $cr->set_antialias('none');
1862 $cr->set_line_width(1);
1863
1864 #background for plot
1865 $cr->rectangle($left+$offset, $top+$offset, ($xmax+$zero)*$size-1, $height);
1866 $cr->set_source_rgba(0.95, 0.95, 0.95, 1);
1867 $cr->fill;
1868
1869 #draw ticks
1870 #x-axis
1871 $cr->set_source_rgba(0, 0, 0, 0.8);
1872 foreach my $i (($zero ? 0 : 1)..$xmax) {
1873 if(($i%5) == 0 && $i > 1 && $i < $xmax) {
1874 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
1875 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+3);
1876 } else {
1877 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
1878 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+1);
1879 }
1880 }
1881 $cr->stroke;
1882
1883 #y-axis
1884 $cr->move_to($left+$offset, $top+$offset);
1885 $cr->line_to($left+$offset-3, $top+$offset);
1886 $cr->move_to($left+$offset, $top+$offset+$height-1);
1887 $cr->line_to($left+$offset-3, $top+$offset+$height-1);
1888 $cr->stroke;
1889
1890 #helplines
1891 $cr->set_source_rgba(@helplinecol1);
1892 foreach my $j (1..3) {
1893 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
1894 $cr->line_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
1895 }
1896 $cr->stroke;
1897
1898 $cr->set_antialias('default');
1899
1900 #tick labels
1901 $cr->set_source_rgba(0, 0, 0, 0.8);
1902 $cr->set_font_size(10);
1903 $font_extents = $cr->font_extents;
1904 $fontheight = $font_extents->{height};
1905 #x-axis
1906 foreach my $i (($zero ? 0 : 1)..$xmax) {
1907 if(($i%5) == 0 && $i > 1 && $i < $xmax) {
1908 $extents = $cr->text_extents($i);
1909 $cr->move_to($left+$offset+int($size/2+1)+$size*$i-($zero ? 0 : $size)-$extents->{width}/2-1-($i == 1 ? 1 : 0), $top+$offset+$height+$fontheight+2);
1910 $cr->show_text($i);
1911 }
1912 }
1913 #y-axis
1914 $extents = $cr->text_extents(&addCommas($ymax));
1915 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2);
1916 $cr->show_text(&addCommas($ymax));
1917 $extents = $cr->text_extents(0);
1918 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2+$height);
1919 $cr->show_text(0);
1920
1921 $cr->save;
1922
1923 #labels
1924 $cr->set_font_size (14);
1925 $font_extents = $cr->font_extents;
1926 $fontheight = $font_extents->{height};
1927
1928 #axis labels
1929 $cr->set_source_rgba(0, 0, 0, 1);
1930 $extents = $cr->text_extents($xlab);
1931 $cr->move_to($left+$offset+($xmax+$zero)*$size/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
1932 $cr->show_text($xlab);
1933 $cr->rotate($PI * 3 / 2);
1934 $extents = $cr->text_extents($ylab);
1935 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2),$offset);
1936 $cr->show_text($ylab);
1937
1938 $cr->restore;
1939
1940 #draw boxes
1941 $cr->set_antialias('none');
1942 foreach my $pos (0..$xmax-($zero ? 0 : 1)) {
1943 next unless($matrix->[$pos+($zero ? 0 : 1)]);
1944 my $tmp = $matrix->[$pos+($zero ? 0 : 1)] / $ymax;
1945 #unique
1946 if($tmp) {
1947 $cr->set_source_rgba(@barcol0);
1948 $cr->rectangle($left+$offset+($pos+($zero ? 0 : 1))*$size, $top+$offset+$height, $size-1, -$tmp*$height);
1949 $cr->fill;
1950 }
1951 }
1952
1953 #write image
1954 $cr->show_page;
1955 return $surface;
1956 }
1957
1958 sub convertOdToStackBinMatrix {
1959 my ($data,$stacks,$min,$max,$nonice) = @_;
1960
1961 my ($num,$ymax,$xmax,$xmin,$step,%vals,%sums,$sum,@matrix,$bin,$tmpbin);
1962
1963 #make nice xmax value
1964 if(defined $max) {
1965 $xmax = $max;
1966 } else {
1967 $xmax = (sort {$b <=> $a} keys %$data)[0];
1968 }
1969 $bin = &getBinVal($xmax);
1970 $xmax = $bin*100;
1971 $xmin = (defined $min ? $min : 0);
1972
1973 #get data to bin and find y axis max value
1974 $ymax = 0;
1975 foreach my $s (0..$stacks-1) {
1976 $sums{$s} = 0;
1977 }
1978 $sum = 0;
1979 $tmpbin = $bin;
1980 foreach my $i ($xmin..$xmax) {
1981 foreach my $s (0..$stacks-1) {
1982 next unless(exists $data->{$i}->{$s});
1983 $sums{$s} += $data->{$i}->{$s};
1984 $sum += $data->{$i}->{$s};
1985 }
1986 if(--$tmpbin <= 0) {
1987 $tmpbin = $bin;
1988 $ymax = &max($ymax,$sum);
1989 $sum = 0;
1990 foreach my $s (0..$stacks-1) {
1991 push(@{$matrix[$s]},$sums{$s});
1992 $sums{$s} = 0;
1993 }
1994 }
1995 }
1996
1997 #make nice ymax value
1998 unless($nonice) {
1999 $ymax = sprintf("%d",($ymax/4)+1)*4 if($ymax % 4);
2000 # $step = ($ymax <= 10 ? 10 : ($ymax < 40 ? 40 : ($ymax < 100 ? 100 : ($ymax < 1000 ? 100 : 100))));
2001 # $ymax = sprintf("%d",($ymax/$step)+1)*$step if($ymax % $step);
2002 }
2003
2004 return (\@matrix,$xmax,$ymax,$stacks);
2005 }
2006
2007 sub createStackBarPlot {
2008 my ($matrix,$xmax,$ymax,$stacks,$title,$xlab,$ylab,$file,$zero,$add) = @_;
2009
2010 my $bin = 1;
2011 if($xmax > 100) {
2012 $bin = $xmax / 100;
2013 $xmax = 100;
2014 }
2015
2016 my @legend = ('Exact dupl.','5\' dupl.','3\' dupl.','Rev. compl. exact dupl.','Rev. compl. 5\'/3\' dupl.');
2017 my @cols = ([69/255, 114/255, 167/255, 1],
2018 [137/255, 1165/255, 78/255, 1],
2019 [170/255, 70/255, 67/255, 1],
2020 [147/255, 169/255, 207/255, 1],
2021 [51/255, 102/255, 102/255, 1]);
2022 my @barcol = (127/255, 127/255, 255/255, 1); #b2b2ff
2023 my @meancol = (255/255, 127/255, 127/255, 1); #ffb2b2
2024 my @stdcol = (178/255, 178/255, 255/255, 0.8); #7f7fff
2025 my @std1col = (0, 0, 0, 0.02); #ff7f7f
2026 my @std2col = (0, 0, 0, 0.02); #ff7f7f
2027 my @linecol = (0, 0, 0, 0.4);
2028 my @helplinecol = (1, 1, 1, 0.9);
2029 my @background = (0.95, 0.95, 0.95, 1);
2030 my @tickcol = (0, 0, 0, 0.8);
2031 my @labelcol = (0, 0, 0, 1);
2032
2033 #create new image
2034 my $size = 6;
2035 my $offset = 20;
2036 my $left = 40;
2037 my $bottom = 15;
2038 my $top = 20;
2039 my $height = 200;
2040 my $surface = Cairo::ImageSurface->create('argb32', $left+$offset*2+($xmax+$zero)*$size,$bottom+$top+$offset*2+$height); #format, width, height
2041 my $cr = Cairo::Context->create($surface);
2042
2043 my ($font_extents,$extents,$fontheight,$fontdescent);
2044
2045 #background
2046 $cr->rectangle(0, 0, $left+$offset*2+($xmax+$zero)*$size,$bottom+$offset*2+2*200+20);
2047 $cr->set_source_rgba(1, 1, 1, 1);
2048 $cr->fill;
2049
2050 #fonts
2051 $cr->select_font_face ('sans', 'normal', 'normal');
2052
2053 $cr->save;
2054
2055 #set up work space
2056 $cr->set_antialias('none');
2057 $cr->set_line_width(1);
2058
2059 #background for plot
2060 $cr->rectangle($left+$offset, $top+$offset, ($xmax+$zero)*$size-1, $height);
2061 $cr->set_source_rgba(@background);
2062 $cr->fill;
2063
2064 #draw legend
2065 $cr->set_font_size(10);
2066 # $font_extents = $cr->font_extents;
2067 my $x = $left+$offset+$size*100-5;
2068 foreach my $i (reverse (0..scalar(@legend)-1)) {
2069 $cr->set_antialias('default');
2070 $x -= $cr->text_extents($legend[$i])->{width};
2071 $cr->move_to($x,$top+5+9);
2072 $cr->set_source_rgba(@tickcol);
2073 $cr->show_text($legend[$i]);
2074 $x -= 15;
2075 $cr->set_antialias('none');
2076 $cr->set_source_rgba(@{$cols[$i]});
2077 $cr->rectangle($x, $top+5, 10, 10);
2078 $cr->fill;
2079 $x -= 15;
2080 }
2081
2082 #draw ticks
2083 $cr->set_antialias('none');
2084 #x-axis
2085 $cr->set_source_rgba(@tickcol);
2086 foreach my $i (($zero ? 0 : 1)..$xmax) {
2087 if(($i%5) == 0 && $i > 1 && $i < $xmax) {
2088 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
2089 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+3);
2090 } else {
2091 $cr->move_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height);
2092 $cr->line_to($left+$offset+int($size/2)+$size*$i-($zero ? 0 : $size)-1, $top+$offset+$height+1);
2093 }
2094 }
2095 $cr->stroke;
2096
2097 #y-axis
2098 $cr->move_to($left+$offset, $top+$offset);
2099 $cr->line_to($left+$offset-3, $top+$offset);
2100 $cr->move_to($left+$offset, $top+$offset+$height-1);
2101 $cr->line_to($left+$offset-3, $top+$offset+$height-1);
2102 $cr->stroke;
2103
2104 #helplines
2105 $cr->set_source_rgba(@helplinecol);
2106 foreach my $j (1..3) {
2107 $cr->move_to($left+$offset, $top+$offset+$height*$j/4-($j ? 1 : 0));
2108 $cr->line_to($left+$offset+($xmax+$zero)*$size, $top+$offset+$height*$j/4-($j ? 1 : 0));
2109 }
2110 $cr->stroke;
2111
2112 $cr->set_antialias('default');
2113
2114 #tick labels
2115 $cr->set_source_rgba(@tickcol);
2116 $cr->set_font_size(10);
2117 $font_extents = $cr->font_extents;
2118 $fontheight = $font_extents->{height};
2119 #x-axis
2120 foreach my $i (($zero ? 0 : 1)..$xmax) {
2121 if(($i%10) == 0 && $i > 1 && $i < $xmax) {
2122 $extents = $cr->text_extents($i*$bin);
2123 $cr->move_to($left+$offset+int($size/2+1)+$size*$i-($zero ? 0 : $size)-$extents->{width}/2-1-($i == 1 ? 1 : 0), $top+$offset+$height+$fontheight+2);
2124 $cr->show_text($i*$bin);
2125 }
2126 }
2127 #y-axis
2128 $extents = $cr->text_extents(&addCommas($ymax));
2129 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2);
2130 $cr->show_text(&addCommas($ymax));
2131 $extents = $cr->text_extents(0);
2132 $cr->move_to($left+$offset-5-$extents->{width}, $top+$offset+$fontheight/2-2+$height);
2133 $cr->show_text(0);
2134
2135 $cr->save;
2136
2137 #labels
2138 $cr->set_font_size(14);
2139 $font_extents = $cr->font_extents;
2140 $fontheight = $font_extents->{height};
2141
2142 #axis labels
2143 $cr->set_source_rgba(@labelcol);
2144 $extents = $cr->text_extents($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
2145 $cr->move_to($left+$offset+($xmax+$zero)*$size/2-$extents->{width}/2, $top+$offset+$height+$fontheight+15);
2146 $cr->show_text($xlab.($bin>1 ? ' (Bin size: '.$bin.($add ? $add.')' : '') : ''));
2147 $cr->rotate($PI * 3 / 2);
2148 $extents = $cr->text_extents($ylab.($bin>1 ? ' (per bin)' : ''));
2149 $cr->move_to(-($top+$offset+$height/2+$extents->{width}/2+($bin>1 ? 12 : 0)),$offset+10);
2150 $cr->show_text($ylab.($bin>1 ? ' (per bin)' : ''));
2151
2152 $cr->restore;
2153
2154 #draw boxes
2155 $cr->set_antialias('none');
2156 foreach my $pos (0..$xmax-($zero ? 0 : 1)) {
2157 my $tmp = 0;
2158 foreach my $s (0..$stacks-1) {
2159 next unless($matrix->[$s]->[$pos]);
2160 my $cur = $matrix->[$s]->[$pos] / $ymax;
2161 $cr->set_source_rgba(@{$cols[$s]});
2162 if($cur) {
2163 $cr->rectangle($left+$offset+$pos*$size, $top+$offset+$height-$tmp*$height, $size-1, -$cur*$height);
2164 $cr->fill;
2165 }
2166 $tmp += $cur;
2167 }
2168 }
2169
2170 #write image
2171 $cr->show_page;
2172 return $surface;
2173 }
2174
2175 sub header {
2176 return '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
2177 <html>
2178 <head>
2179 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
2180 <title>PRINSEQ-'.$WHAT.' Report</title>
2181 <style type="text/css">
2182 <!--/* <![CDATA[ */
2183 '.$CSS_STYLE.'
2184 /* ]]> */-->
2185 </style>
2186 </head>
2187 <body>
2188 <center>';
2189 }
2190
2191 sub footer {
2192 return '</center></body></html>';
2193 }
2194
2195 sub generateHtml {
2196 my ($in,$out) = @_;
2197 my ($file,$data,$surface,$html,$png);
2198 $data = &readGdFile($in);
2199 my $time = sprintf("%02d/%02d/%04d %02d:%02d:%02d",sub {($_[4]+1,$_[3],$_[5]+1900,$_[2],$_[1],$_[0])}->(localtime));
2200
2201 $html .= &header();
2202 $html .= '<h2>PRINSEQ-'.$WHAT.' v'.$VERSION.' HTML Report&nbsp;&nbsp;&nbsp;</h2>[Generated: '.$time.']<br /><br />';
2203 $html .= '<div class="info-panel">';
2204
2205 #input info
2206 if(exists $data->{numseqs}) {
2207 $html .= '<div class="info-header"><span class="info-header-title">Input Information</span></div>';
2208 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Input file(s):</td><td class="info-table-value">'.($data->{filename1} ? &convertIntToString($data->{filename1}) : '-').($data->{filename2} ? ' and '.&convertIntToString($data->{filename2}) : '').'</td></tr><tr><td class="info-table-type">Input format(s):</td><td class="info-table-value">'.($data->{format1} ? uc($data->{format1}) : '-').($data->{format2} ? ' and '.uc($data->{format2}) : '').'</td></tr>';
2209 if(exists $data->{pairedend} && $data->{pairedend}) {
2210 my $singletons1 = ($data->{numseqs}||0)-($data->{pairs}||0);
2211 my $singletons2 = ($data->{numseqs2}||0)-($data->{pairs}||0);
2212 $html .= '<tr><td class="info-table-type"># Sequences (file 1):</td><td class="info-table-value">'.&addCommas($data->{numseqs}||'-').'</td></tr><tr><td class="info-table-type">Total bases (file 1):</td><td class="info-table-value">'.&addCommas($data->{numbases}||'-').'</td></tr><tr><td class="info-table-type"># Sequences (file 2):</td><td class="info-table-value">'.&addCommas($data->{numseqs2}||'-').'</td></tr><tr><td class="info-table-type">Total bases (file 2):</td><td class="info-table-value">'.&addCommas($data->{numbases2}||'-').'</td></tr><tr><td class="info-table-type"># Pairs:</td><td class="info-table-value">'.&addCommas($data->{pairs}||'-').($data->{pairs} ? '&nbsp;&nbsp;('.sprintf("%.2f",(100*(2*$data->{pairs})/(($data->{numseqs}||0)+($data->{numseqs2}||0)))).'% of sequences)' : '').'</td></tr></tr><tr><td class="info-table-type"># Singletons (file 1):</td><td class="info-table-value">'.&addCommas($singletons1).($singletons1 ? '&nbsp;&nbsp;('.sprintf("%.2f",(100*$singletons1/$data->{numseqs})).'%)' : '').'</td></tr><tr><td class="info-table-type"># Singletons (file 2):</td><td class="info-table-value">'.&addCommas($singletons2).($singletons2 ? '&nbsp;&nbsp;('.sprintf("%.2f",(100*$singletons2/$data->{numseqs2})).'%)' : '').'</td></tr>';
2213 } else {
2214 $html .= '<tr><td class="info-table-type"># Sequences:</td><td class="info-table-value">'.&addCommas($data->{numseqs}||'-').'</td></tr><tr><td class="info-table-type">Total bases:</td><td class="info-table-value">'.&addCommas($data->{numbases}||'-').'</td></tr>';
2215 }
2216 $html .= '</tbody></table></div><hr>';
2217 }
2218
2219 #length plot
2220 if(exists $data->{counts}->{length} && keys %{$data->{counts}->{length}}) {
2221 $html .= '<div class="info-header"><span class="info-header-title">Length Distribution</span></div>';
2222 if(exists $data->{pairedend} && $data->{pairedend}) {
2223 $html .= '<div class="info-content"><b>File 1</b><br /><table border="0" cellpadding="0" cellspacing="0"> <tbody><tr><td class="info-table-type">Mean sequence length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{mean} ? sprintf("%.2f",$data->{stats}->{length}->{mean}) : '-').' &plusmn; '.(exists $data->{stats}->{length}->{std} ? sprintf("%.2f",$data->{stats}->{length}->{std}) : '-').' bp</td></tr><tr><td class="info-table-type">Minimum length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{min} ? &addCommas($data->{stats}->{length}->{min}) : '-').' bp</td></tr> <tr><td class="info-table-type">Maximum length:</td><td class="info-table-value">'.(exists $data->{stats}->{length}->{max} ? &addCommas($data->{stats}->{length}->{max}) : '-').' bp</td></tr> <tr><td class="info-table-type">Length range:</td><td class="info-table-value">'.(exists $data->{stats}->{length}->{range} ? &addCommas($data->{stats}->{length}->{range}) : '-').' bp</td></tr> <tr><td class="info-table-type">Mode length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{mode} ? &addCommas($data->{stats}->{length}->{mode}) : '-').' bp with '.(exists $data->{stats}->{length}->{modeval} ? &addCommas($data->{stats}->{length}->{modeval}) : '-').' sequences</td></tr></tbody></table><br>';
2224 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{length},1),$data->{stats}->{length},'Length Distribution','Read Length in bp','# Sequences','',0,' bp');
2225 $png = '';
2226 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2227 $html .= &insert_image($png);
2228 $html .= '</div>';
2229 $html .= '<div class="info-content"><br /><b>File 2</b><br /><table border="0" cellpadding="0" cellspacing="0"> <tbody><tr><td class="info-table-type">Mean sequence length:</td> <td class="info-table-value">'.(exists $data->{stats2}->{length}->{mean} ? sprintf("%.2f",$data->{stats2}->{length}->{mean}) : '-').' &plusmn; '.(exists $data->{stats2}->{length}->{std} ? sprintf("%.2f",$data->{stats2}->{length}->{std}) : '-').' bp</td></tr><tr><td class="info-table-type">Minimum length:</td> <td class="info-table-value">'.(exists $data->{stats2}->{length}->{min} ? &addCommas($data->{stats2}->{length}->{min}) : '-').' bp</td></tr> <tr><td class="info-table-type">Maximum length:</td><td class="info-table-value">'.(exists $data->{stats2}->{length}->{max} ? &addCommas($data->{stats2}->{length}->{max}) : '-').' bp</td></tr> <tr><td class="info-table-type">Length range:</td><td class="info-table-value">'.(exists $data->{stats2}->{length}->{range} ? &addCommas($data->{stats2}->{length}->{range}) : '-').' bp</td></tr> <tr><td class="info-table-type">Mode length:</td> <td class="info-table-value">'.(exists $data->{stats2}->{length}->{mode} ? &addCommas($data->{stats2}->{length}->{mode}) : '-').' bp with '.(exists $data->{stats2}->{length}->{modeval} ? &addCommas($data->{stats2}->{length}->{modeval}) : '-').' sequences</td></tr></tbody></table><br>';
2230 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{length},1),$data->{stats2}->{length},'Length Distribution','Read Length in bp','# Sequences','',0,' bp');
2231 $png = '';
2232 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2233 $html .= &insert_image($png);
2234 $html .= '</div>';
2235 } else {
2236 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"> <tbody><tr><td class="info-table-type">Mean sequence length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{mean} ? sprintf("%.2f",$data->{stats}->{length}->{mean}) : '-').' &plusmn; '.(exists $data->{stats}->{length}->{std} ? sprintf("%.2f",$data->{stats}->{length}->{std}) : '-').' bp</td></tr><tr><td class="info-table-type">Minimum length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{min} ? &addCommas($data->{stats}->{length}->{min}) : '-').' bp</td></tr> <tr><td class="info-table-type">Maximum length:</td><td class="info-table-value">'.(exists $data->{stats}->{length}->{max} ? &addCommas($data->{stats}->{length}->{max}) : '-').' bp</td></tr> <tr><td class="info-table-type">Length range:</td><td class="info-table-value">'.(exists $data->{stats}->{length}->{range} ? &addCommas($data->{stats}->{length}->{range}) : '-').' bp</td></tr> <tr><td class="info-table-type">Mode length:</td> <td class="info-table-value">'.(exists $data->{stats}->{length}->{mode} ? &addCommas($data->{stats}->{length}->{mode}) : '-').' bp with '.(exists $data->{stats}->{length}->{modeval} ? &addCommas($data->{stats}->{length}->{modeval}) : '-').' sequences</td></tr></tbody></table><br>';
2237 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{length},1),$data->{stats}->{length},'Length Distribution','Read Length in bp','# Sequences','',0,' bp');
2238 $png = '';
2239 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2240 $html .= &insert_image($png);
2241 $html .= '</div>';
2242 }
2243 $html .= '<hr>';
2244 }
2245
2246 #GC content
2247 if(exists $data->{counts}->{gc} && keys %{$data->{counts}->{gc}}) {
2248 $html .= '<div class="info-header"><span class="info-header-title">GC Content Distribution</span></div>';
2249 if(exists $data->{pairedend} && $data->{pairedend}) {
2250 $html .= '<div class="info-content"><b>File 1</b><br /><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Mean GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{mean} ? sprintf("%.2f",$data->{stats}->{gc}->{mean}) : '-').' &plusmn; '.(exists $data->{stats}->{gc}->{std} ? sprintf("%.2f",$data->{stats}->{gc}->{std}) : '-').' %</td></tr> <tr><td class="info-table-type">Minimum GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{min} ? $data->{stats}->{gc}->{min} : '-').' %</td></tr> <tr><td class="info-table-type">Maximum GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{max} ? $data->{stats}->{gc}->{max} : '-').' %</td></tr> <tr><td class="info-table-type">GC content range:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{range} ? $data->{stats}->{gc}->{range} : '-').' %</td></tr> <tr><td class="info-table-type">Mode GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{mode} ? $data->{stats}->{gc}->{mode} : '-').' % with '.(exists $data->{stats}->{gc}->{modeval} ? &addCommas($data->{stats}->{gc}->{modeval}) : '-').' sequences</td></tr> </tbody></table><br>';
2251 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{gc},0),$data->{stats}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences','',1);
2252 $png = '';
2253 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2254 $html .= &insert_image($png);
2255 $html .= '</div>';
2256 $html .= '<div class="info-content"><br /><b>File 2</b><br /><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Mean GC content:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{mean} ? sprintf("%.2f",$data->{stats2}->{gc}->{mean}) : '-').' &plusmn; '.(exists $data->{stats2}->{gc}->{std} ? sprintf("%.2f",$data->{stats2}->{gc}->{std}) : '-').' %</td></tr> <tr><td class="info-table-type">Minimum GC content:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{min} ? $data->{stats2}->{gc}->{min} : '-').' %</td></tr> <tr><td class="info-table-type">Maximum GC content:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{max} ? $data->{stats2}->{gc}->{max} : '-').' %</td></tr> <tr><td class="info-table-type">GC content range:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{range} ? $data->{stats2}->{gc}->{range} : '-').' %</td></tr> <tr><td class="info-table-type">Mode GC content:</td> <td class="info-table-value">'.(exists $data->{stats2}->{gc}->{mode} ? $data->{stats2}->{gc}->{mode} : '-').' % with '.(exists $data->{stats2}->{gc}->{modeval} ? &addCommas($data->{stats2}->{gc}->{modeval}) : '-').' sequences</td></tr> </tbody></table><br>';
2257 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{gc},0),$data->{stats2}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences','',1);
2258 $png = '';
2259 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2260 $html .= &insert_image($png);
2261 $html .= '</div>';
2262 } else {
2263 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Mean GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{mean} ? sprintf("%.2f",$data->{stats}->{gc}->{mean}) : '-').' &plusmn; '.(exists $data->{stats}->{gc}->{std} ? sprintf("%.2f",$data->{stats}->{gc}->{std}) : '-').' %</td></tr> <tr><td class="info-table-type">Minimum GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{min} ? $data->{stats}->{gc}->{min} : '-').' %</td></tr> <tr><td class="info-table-type">Maximum GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{max} ? $data->{stats}->{gc}->{max} : '-').' %</td></tr> <tr><td class="info-table-type">GC content range:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{range} ? $data->{stats}->{gc}->{range} : '-').' %</td></tr> <tr><td class="info-table-type">Mode GC content:</td> <td class="info-table-value">'.(exists $data->{stats}->{gc}->{mode} ? $data->{stats}->{gc}->{mode} : '-').' % with '.(exists $data->{stats}->{gc}->{modeval} ? &addCommas($data->{stats}->{gc}->{modeval}) : '-').' sequences</td></tr> </tbody></table><br>';
2264 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{gc},0),$data->{stats}->{gc},'GC Content Distribution','GC Content (0-100%)','Number of Sequences','',1);
2265 $png = '';
2266 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2267 $html .= &insert_image($png);
2268 $html .= '</div>';
2269 }
2270 $html .= '<hr>';
2271 }
2272
2273 #Base quality
2274 if(exists $data->{quals} || exists $data->{qualsmean} || exists $data->{qualsbin}) {
2275 $html .= '<div class="info-header"><span class="info-header-title">Base Quality Distribution</span></div><div class="info-content">';
2276 if(exists $data->{pairedend} && $data->{pairedend}) {
2277 $html .= '<b>File 1</b><br />';
2278 }
2279 }
2280 if(exists $data->{quals} && keys %{$data->{quals}}) {
2281 $surface = &createBoxPlot(&convertToBoxValues($data->{quals},4),'Base Quality Distribution','Read position in %','Quality score','');
2282 $png = '';
2283 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2284 $html .= &insert_image($png);
2285 }
2286 if(exists $data->{qualsbin} && keys %{$data->{qualsbin}}) {
2287 $surface = &createBoxPlot(&convertToBoxValues($data->{qualsbin},4),'Base Quality Distribution','Read position in bp','Quality score','',0,'bp',$data->{binval});
2288 $png = '';
2289 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2290 $html .= '<br /><br />' if(exists $data->{quals});
2291 $html .= &insert_image($png);
2292 }
2293 if(exists $data->{qualsmean} && keys %{$data->{qualsmean}}) {
2294 $surface = &createBarPlot(&convertToBarValues($data->{qualsmean},5,1),'Sequence Quality Distribution','Mean of quality scores per sequence','Number of sequences','',0);
2295 $png = '';
2296 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2297 $html .= '<br /><br />' if(exists $data->{qualsbin});
2298 $html .= &insert_image($png);
2299 }
2300 if(exists $data->{pairedend} && $data->{pairedend}) {
2301 if(exists $data->{quals} || exists $data->{qualsmean} || exists $data->{qualsbin}) {
2302 $html .= '<br /><br /><br /><b>File 2</b><br />';
2303 }
2304 if(exists $data->{quals2} && keys %{$data->{quals2}}) {
2305 $surface = &createBoxPlot(&convertToBoxValues($data->{quals2},4),'Base Quality Distribution','Read position in %','Quality score','');
2306 $png = '';
2307 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2308 $html .= &insert_image($png);
2309 }
2310 if(exists $data->{qualsbin2} && keys %{$data->{qualsbin2}}) {
2311 $surface = &createBoxPlot(&convertToBoxValues($data->{qualsbin2},4),'Base Quality Distribution','Read position in bp','Quality score','',0,'bp',$data->{binval});
2312 $png = '';
2313 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2314 $html .= '<br /><br />' if(exists $data->{quals2});
2315 $html .= &insert_image($png);
2316 }
2317 if(exists $data->{qualsmean2} && keys %{$data->{qualsmean2}}) {
2318 $surface = &createBarPlot(&convertToBarValues($data->{qualsmean2},5,1),'Sequence Quality Distribution','Mean of quality scores per sequence','Number of sequences','',0);
2319 $png = '';
2320 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2321 $html .= '<br /><br />' if(exists $data->{qualsbin2});
2322 $html .= &insert_image($png);
2323 }
2324 }
2325 if(exists $data->{quals} || exists $data->{qualsmean} || exists $data->{qualsbin}) {
2326 $html .= '</div><hr>';
2327 }
2328
2329 #Ns
2330 if((exists $data->{counts}->{ns} && keys %{$data->{counts}->{ns}}) || (exists $data->{counts2} && exists $data->{counts2}->{ns} && keys %{$data->{counts2}->{ns}})) {
2331 $html .= '<div class="info-header"><span class="info-header-title">Occurence of N</span></div><div class="info-content">';
2332 if(exists $data->{pairedend} && $data->{pairedend}) {
2333 $html .= '<b>File 1</b><br />';
2334 }
2335 }
2336 if(exists $data->{counts}->{ns} && keys %{$data->{counts}->{ns}}) {
2337 my $nscount = 0;
2338 foreach my $n (values %{$data->{counts}->{ns}}) {
2339 $nscount += $n;
2340 }
2341 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Sequences with N:</td> <td class="info-table-value">'.($nscount ? &addCommas($nscount).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$nscount).' %)' : 0).'</td></tr><tr><td class="info-table-type">Max percentage of Ns per sequence:</td> <td class="info-table-value">'.(exists $data->{stats}->{ns}->{max} ? $data->{stats}->{ns}->{max} : 0).' %</td></tr></tbody></table>';
2342 if($nscount) {
2343 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{ns},1),undef,'Percentage of N\'s (> 0%)','Percentage of N\'s per Read (1-100%)','# Sequences','',0);
2344 $png = '';
2345 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2346 $html .= '<br />'.&insert_image($png);
2347 }
2348 }
2349 if(exists $data->{pairedend} && $data->{pairedend} && exists $data->{counts2}->{ns} && keys %{$data->{counts2}->{ns}}) {
2350 $html .= '<br /><br /><br /><b>File 2</b><br />';
2351 my $nscount = 0;
2352 foreach my $n (values %{$data->{counts2}->{ns}}) {
2353 $nscount += $n;
2354 }
2355 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">Sequences with N:</td> <td class="info-table-value">'.($nscount ? &addCommas($nscount).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs2}*$nscount).' %)' : 0).'</td></tr><tr><td class="info-table-type">Max percentage of Ns per sequence:</td> <td class="info-table-value">'.(exists $data->{stats2}->{ns}->{max} ? $data->{stats2}->{ns}->{max} : 0).' %</td></tr></tbody></table>';
2356 if($nscount) {
2357 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{ns},1),undef,'Percentage of N\'s (> 0%)','Percentage of N\'s per Read (1-100%)','# Sequences','',0);
2358 $png = '';
2359 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2360 $html .= '<br />'.&insert_image($png);
2361 }
2362 }
2363 if((exists $data->{counts}->{ns} && keys %{$data->{counts}->{ns}}) || (exists $data->{counts2} && exists $data->{counts2}->{ns} && keys %{$data->{counts2}->{ns}})) {
2364 $html .= '</div><hr>';
2365 }
2366
2367 #tails
2368 if(exists $data->{tail} || exists $data->{tail2}) {
2369 $html .= '<div class="info-header"><span class="info-header-title">Poly-A/T Tails</span></div><div class="info-content">';
2370 }
2371 if(exists $data->{tail}) {
2372 my $tail5count = 0;
2373 foreach my $n (values %{$data->{counts}->{tail5}}) {
2374 $tail5count += $n;
2375 }
2376 my $tail3count = 0;
2377 foreach my $n (values %{$data->{counts}->{tail3}}) {
2378 $tail3count += $n;
2379 }
2380 if(exists $data->{pairedend} && $data->{pairedend}) {
2381 $html .= '<b>File 1</b><br />';
2382 }
2383 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value">5\'-end</td> <td class="info-table-value">3\'-end</td></tr> <tr><td class="info-table-type">Sequences with tail:</td><td class="info-table-value">'.($tail5count ? &addCommas($tail5count).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$tail5count).' %)' : 0).'</td> <td class="info-table-value">'.($tail3count ? &addCommas($tail3count).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$tail3count).' %)' : 0).'</td></tr> <tr><td class="info-table-type">Maximum tail length:</td> <td class="info-table-value">'.(exists $data->{stats}->{tail5}->{max} ? $data->{stats}->{tail5}->{max} : 0).'</td> <td class="info-table-value">'.(exists $data->{stats}->{tail3}->{max} ? $data->{stats}->{tail3}->{max} : 0).'</td></tr></tbody></table>';
2384 if($tail5count) {
2385 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{tail5},1),undef,'Poly-A/T Tail Distribution (> 4bp)','5\' Tail Length in bp','# Sequences','',0,' bp');
2386 $png = '';
2387 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2388 $html .= '<br />'.&insert_image($png);
2389 if($tail3count) {
2390 $html .= '<br />';
2391 }
2392 }
2393 if($tail3count) {
2394 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts}->{tail3},1),undef,'Poly-A/T Tail Distribution (> 4bp)','3\' Tail Length in bp','# Sequences','',0,' bp');
2395 $png = '';
2396 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2397 $html .= '<br />'.&insert_image($png);
2398 }
2399 }
2400 if(exists $data->{pairedend} && $data->{pairedend} && exists $data->{tail2}) {
2401 my $tail5count = 0;
2402 foreach my $n (values %{$data->{counts2}->{tail5}}) {
2403 $tail5count += $n;
2404 }
2405 my $tail3count = 0;
2406 foreach my $n (values %{$data->{counts2}->{tail3}}) {
2407 $tail3count += $n;
2408 }
2409 $html .= '<br /><br /><br /><b>File 2</b><br />';
2410 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value">5\'-end</td> <td class="info-table-value">3\'-end</td></tr> <tr><td class="info-table-type">Sequences with tail:</td><td class="info-table-value">'.($tail5count ? &addCommas($tail5count).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs2}*$tail5count).' %)' : 0).'</td> <td class="info-table-value">'.($tail3count ? &addCommas($tail3count).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs2}*$tail3count).' %)' : 0).'</td></tr> <tr><td class="info-table-type">Maximum tail length:</td> <td class="info-table-value">'.(exists $data->{stats2}->{tail5}->{max} ? $data->{stats2}->{tail5}->{max} : 0).'</td> <td class="info-table-value">'.(exists $data->{stats2}->{tail3}->{max} ? $data->{stats2}->{tail3}->{max} : 0).'</td></tr></tbody></table>';
2411 if($tail5count) {
2412 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{tail5},1),undef,'Poly-A/T Tail Distribution (> 4bp)','5\' Tail Length in bp','# Sequences','',0,' bp');
2413 $png = '';
2414 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2415 $html .= '<br />'.&insert_image($png);
2416 if($tail3count) {
2417 $html .= '<br />';
2418 }
2419 }
2420 if($tail3count) {
2421 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{counts2}->{tail3},1),undef,'Poly-A/T Tail Distribution (> 4bp)','3\' Tail Length in bp','# Sequences','',0,' bp');
2422 $png = '';
2423 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2424 $html .= '<br />'.&insert_image($png);
2425 }
2426 }
2427 if(exists $data->{tail} || exists $data->{tail2}) {
2428 $html .= '</div><hr>';
2429 }
2430
2431
2432 #tag sequence check
2433 if(exists $data->{freqs} || exists $data->{freqs2}) {
2434 $html .= '<div class="info-header"><span class="info-header-title">Tag Sequence Check</span></div><div class="info-content">';
2435 }
2436 if(exists $data->{freqs}) {
2437 my $tagmidseq;
2438 if(exists $data->{tagmidseq}) {
2439 $tagmidseq = $data->{tagmidseq};
2440 $tagmidseq =~ s/\,/\<br \/\>/g;
2441 }
2442 if(exists $data->{pairedend} && $data->{pairedend}) {
2443 $html .= '<b>File 1</b><br />';
2444 }
2445 $html .= '<table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value">5\'-end</td><td class="info-table-value">3\'-end</td></tr><tr><td class="info-table-type">Probability of tag sequence:</td><td class="info-table-value">'.(exists $data->{tagprob}->{5} ? $data->{tagprob}->{5}.' %' : '-').'</td><td class="info-table-value">'.(exists $data->{tagprob}->{3} ? $data->{tagprob}->{3}.' %' : '-').'</td></tr><tr><td class="info-table-type">GSMIDs or RLMIDs:</td><td class="info-table-value">'.(exists $data->{tagmidnum} ? ($data->{tagmidnum} == 0 ? 'none' : ($tagmidseq ? $tagmidseq : $data->{tagmidnum})) : '-').'</td><td class="info-table-value">&nbsp;</td></tr></tbody></table><br>';
2446 $html .= '<table border="0" cellspacing="0" cellpadding="0"><tr><td>'.&insert_image($FREQCHART_L,undef,undef,1).'</td>';
2447 foreach my $pos (sort {$a <=> $b} keys %{$data->{freqs}->{5}}) {
2448 $html .= '<td valign="bottom" align="center">';
2449 foreach my $base (qw(A C G T N)) {
2450 if($data->{freqs}->{5}->{$pos}->{$base}) {
2451 $html .= &insert_image($BASE64_BASES->{$base},$data->{freqs}->{5}->{$pos}->{$base},14,1).'<br />';
2452 #'<img height="'.$data->{freqs}->{5}->{pos}->{$base}.'px" border="0" src="'..'" alt="'.$base.'" width="14px" /><br />';
2453 }
2454 }
2455 $html .= &insert_image($MMCHART_B2,6,16,1).'</td>';
2456 }
2457 $html .= '<td align="center" valign="middle">&nbsp;...&nbsp;</td>';
2458 foreach my $pos (sort {$a <=> $b} keys %{$data->{freqs}->{3}}) {
2459 $html .= '<td valign="bottom" align="center">';
2460 foreach my $base (qw(A C G T N)) {
2461 if($data->{freqs}->{3}->{$pos}->{$base}) {
2462 $html .= &insert_image($BASE64_BASES->{$base},$data->{freqs}->{3}->{$pos}->{$base},14,1).'<br />';
2463 }
2464 }
2465 $html .= &insert_image($MMCHART_B2,6,16,1).'</td>';
2466 }
2467 $html .= '</tr>';
2468 $html .= '<tr><td>&nbsp;</td>';
2469 foreach my $num (1,0,0,0,5,0,0,0,0,10,0,0,0,0,15,0,0,0,0,20,0,20,0,0,0,0,15,0,0,0,0,10,0,0,0,0,5,0,0,0,1) {
2470 $html .= '<td valign="top" align="center" style="font-size: 10px;margin: 0;">'.($num ? $num : '').'&nbsp;</td>';
2471 }
2472 $html .= '</tr><tr><td align="left" valign="middle">&nbsp;</td><td align="center" valign="middle" colspan="41" class="pinfo"><b>Position from Sequence Ends</b></td></tr>';
2473 $html .= '</table>';
2474 }
2475 if(exists $data->{pairedend} && $data->{pairedend} && exists $data->{freqs2}) {
2476 $html .= '<br /><br /><br /><b>File 2</b><br />';
2477 $html .= '<table border="0" cellpadding="0" cellspacing="0"> <tbody><tr><td class="info-table-type"></td><td class="info-table-value">5\'-end</td><td class="info-table-value">3\'-end</td></tr><tr><td class="info-table-type">Probability of tag sequence:</td><td class="info-table-value">'.(exists $data->{tagprob2}->{5} ? $data->{tagprob2}->{5}.' %' : '-').'</td><td class="info-table-value">'.(exists $data->{tagprob2}->{3} ? $data->{tagprob2}->{3}.' %' : '-').'</td></tr></tbody></table><br>';
2478 $html .= '<table border="0" cellspacing="0" cellpadding="0"><tr><td>'.&insert_image($FREQCHART_L,undef,undef,1).'</td>';
2479 foreach my $pos (sort {$a <=> $b} keys %{$data->{freqs2}->{5}}) {
2480 $html .= '<td valign="bottom" align="center">';
2481 foreach my $base (qw(A C G T N)) {
2482 if($data->{freqs2}->{5}->{$pos}->{$base}) {
2483 $html .= &insert_image($BASE64_BASES->{$base},$data->{freqs2}->{5}->{$pos}->{$base},14,1).'<br />';
2484 #'<img height="'.$data->{freqs}->{5}->{pos}->{$base}.'px" border="0" src="'..'" alt="'.$base.'" width="14px" /><br />';
2485 }
2486 }
2487 $html .= &insert_image($MMCHART_B2,6,16,1).'</td>';
2488 }
2489 $html .= '<td align="center" valign="middle">&nbsp;...&nbsp;</td>';
2490 foreach my $pos (sort {$a <=> $b} keys %{$data->{freqs2}->{3}}) {
2491 $html .= '<td valign="bottom" align="center">';
2492 foreach my $base (qw(A C G T N)) {
2493 if($data->{freqs2}->{3}->{$pos}->{$base}) {
2494 $html .= &insert_image($BASE64_BASES->{$base},$data->{freqs2}->{3}->{$pos}->{$base},14,1).'<br />';
2495 }
2496 }
2497 $html .= &insert_image($MMCHART_B2,6,16,1).'</td>';
2498 }
2499 $html .= '</tr>';
2500 $html .= '<tr><td>&nbsp;</td>';
2501 foreach my $num (1,0,0,0,5,0,0,0,0,10,0,0,0,0,15,0,0,0,0,20,0,20,0,0,0,0,15,0,0,0,0,10,0,0,0,0,5,0,0,0,1) {
2502 $html .= '<td valign="top" align="center" style="font-size: 10px;margin: 0;">'.($num ? $num : '').'&nbsp;</td>';
2503 }
2504 $html .= '</tr><tr><td align="left" valign="middle">&nbsp;</td><td align="center" valign="middle" colspan="41" class="pinfo"><b>Position from Sequence Ends</b></td></tr>';
2505 $html .= '</table>';
2506 }
2507 if(exists $data->{freqs} || exists $data->{freqs2}) {
2508 $html .= '</div><hr>';
2509 }
2510
2511 #Sequence duplicates
2512 if(exists $data->{dubslength} || exists $data->{dubscounts}) {
2513 $html .= '<div class="info-header"><span class="info-header-title">Sequence Duplication</span></div>';
2514 }
2515 my %dubs;
2516 if(exists $data->{dubscounts} && keys %{$data->{dubscounts}}) {
2517 my $exactonly = $data->{exactonly}||0;
2518 foreach my $n (keys %{$data->{dubscounts}}) {
2519 foreach my $s (keys %{$data->{dubscounts}->{$n}}) {
2520 $dubs{$s}->{count} += $data->{dubscounts}->{$n}->{$s} * $n;
2521 $dubs{$s}->{max} = $n unless(exists $dubs{$s}->{max} && $dubs{$s}->{max} > $n);
2522 $dubs{all} += $data->{dubscounts}->{$n}->{$s} * $n;
2523 }
2524 }
2525 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value"># Sequences</td> <td class="info-table-value">Max duplicates</td></tr><tr><td class="info-table-type">Exact duplicates:</td><td class="info-table-value">'.(exists $dubs{0}->{count} ? &addCommas($dubs{0}->{count}).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$dubs{0}->{count}).' %)' : 0).'</td><td class="info-table-value">'.($dubs{0}->{max}||0).'</td></tr><tr><td class="info-table-type">Exact duplicates with reverse complements:</td><td class="info-table-value">'.(exists $dubs{3}->{count} ? &addCommas($dubs{3}->{count}).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$dubs{3}->{count}).' %)' : 0).'</td> <td class="info-table-value">'.($dubs{3}->{max}||0).'</td></tr>';
2526 unless($exactonly) {
2527 $html .= '<tr><td class="info-table-type">5\' duplicates</td><td class="info-table-value">'.(exists $dubs{1}->{count} ? &addCommas($dubs{1}->{count}).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$dubs{1}->{count}).' %)' : 0).'</td> <td class="info-table-value">'.($dubs{1}->{max}||0).'</td></tr><tr><td class="info-table-type">3\' duplicates</td><td class="info-table-value">'.(exists $dubs{2}->{count} ? &addCommas($dubs{2}->{count}).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$dubs{2}->{count}).' %)' : 0).'</td> <td class="info-table-value">'.($dubs{2}->{max}||0).'</td></tr><tr><td class="info-table-type">5\'/3\' duplicates with reverse complements</td><td class="info-table-value">'.(exists $dubs{4}->{count} ? &addCommas($dubs{4}->{count}).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$dubs{4}->{count}).' %)' : 0).'</td> <td class="info-table-value">'.($dubs{4}->{max}||0).'</td></tr>';
2528 }
2529 $html .= '<tr><td class="info-table-type">Total:</td><td class="info-table-value">'.(exists $dubs{all} ? &addCommas($dubs{all}).' &nbsp;('.sprintf("%.2f",100/$data->{numseqs}*$dubs{all}).' %)' : 0).'</td><td class="info-table-value">-</td></tr></tbody></table>';
2530 }
2531 if(exists $dubs{all} && $dubs{all}) {
2532 if(exists $data->{dubslength} && keys %{$data->{dubslength}}) {
2533 $surface = &createStackBarPlot(&convertOdToStackBinMatrix($data->{dubslength},5,1),'Sequence duplication level','Read Length in bp','Number of duplicates','',0,' bp');
2534 $png = '';
2535 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2536 $html .= '<br />'.&insert_image($png);
2537 }
2538 if(exists $data->{dubscounts} && keys %{$data->{dubscounts}}) {
2539 $surface = &createStackBarPlot(&convertOdToStackBinMatrix($data->{dubscounts},5,1,100),'Sequence duplication level','Number of duplicates','Number of sequences','',0);
2540 $png = '';
2541 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2542 $html .= '<br /><br />' if(exists $data->{dubslength});
2543 $html .= &insert_image($png);
2544 my %dubsmax;
2545 my $count = 1;
2546 foreach my $n (sort {$b <=> $a} keys %{$data->{dubscounts}}) {
2547 foreach my $s (keys %{$data->{dubscounts}->{$n}}) {
2548 foreach my $i (1..$data->{dubscounts}->{$n}->{$s}) {
2549 $dubsmax{$count++}->{$s} = $n;
2550 last unless($count <= 100);
2551 }
2552 last unless($count <= 100);
2553 }
2554 last unless($count <= 100);
2555 }
2556 $surface = &createStackBarPlot(&convertOdToStackBinMatrix(\%dubsmax,5,1,100),'Sequence duplication level','Sequence','Number of duplicates','',0);
2557 $png = '';
2558 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2559 $html .= '<br /><br />' if(exists $data->{dubslength});
2560 $html .= &insert_image($png);
2561 }
2562 }
2563 if(exists $data->{dubslength} || exists $data->{dubscounts}) {
2564 $html .= '</div><hr>';
2565 }
2566
2567 #Sequence complexity
2568 if(exists $data->{compldust} || exists $data->{complentropy}) {
2569 $html .= '<div class="info-header"><span class="info-header-title">Sequence Complexity</span></div>';
2570 if(exists $data->{complvals}) {
2571 my $complseq;
2572 foreach my $d (keys %{$data->{complvals}}) {
2573 foreach my $m ('minseq','maxseq') {
2574 $complseq = $data->{complvals}->{$d}->{$m};
2575 $complseq = substr($complseq,0,797).'...' if(length($complseq) > 800);
2576 $complseq =~ s/(.{60})/$1\<br \/\>/g;
2577 $data->{complvals}->{$d}->{$m} = $complseq;
2578 }
2579 }
2580 }
2581 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type"></td><td class="info-table-value">Value</td><td class="info-table-value">Sequence</td></tr><tr><td class="info-table-type">Minimum DUST score:</td><td class="info-table-value">'.(exists $data->{complvals}->{dust}->{minval} ? $data->{complvals}->{dust}->{minval} : '-').'</td><td class="info-table-value sequencetext">'.(exists $data->{complvals}->{dust}->{minseq} ? $data->{complvals}->{dust}->{minseq} : '').'</td></tr><tr><td class="info-table-type">Maximum DUST score:</td><td class="info-table-value">'.(exists $data->{complvals}->{dust}->{maxval} ? $data->{complvals}->{dust}->{maxval} : '').'</td><td class="info-table-value sequencetext">'.(exists $data->{complvals}->{dust}->{maxseq} ? $data->{complvals}->{dust}->{maxseq} : '').'</td></tr><tr><td class="info-table-type">Minimum Entropy value:</td><td class="info-table-value">'.(exists $data->{complvals}->{entropy}->{minval} ? $data->{complvals}->{entropy}->{minval} : '').'</td><td class="info-table-value sequencetext">'.(exists $data->{complvals}->{entropy}->{minseq} ? $data->{complvals}->{entropy}->{minseq} : '').'</td></tr><tr><td class="info-table-type">Maximum Entropy value:</td><td class="info-table-value">'.(exists $data->{complvals}->{entropy}->{maxval} ? $data->{complvals}->{entropy}->{maxval} : '').'</td><td class="info-table-value sequencetext">'.(exists $data->{complvals}->{entropy}->{maxseq} ? $data->{complvals}->{entropy}->{maxseq} : '').'</td></tr> </tbody></table><br />';
2582 }
2583 if(exists $data->{compldust}) {
2584 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{compldust},0),undef,'Sequence complexity distribution','Mean sequence complexity (DUST scores)','Number of sequences','',1);
2585 $png = '';
2586 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2587 $html .= &insert_image($png);
2588 }
2589 if(exists $data->{complentropy}) {
2590 $surface = &createAnnotBarPlot(&convertOdToBinMatrix($data->{complentropy},0),undef,'Sequence complexity distribution','Mean sequence complexity (Entropy values)','Number of sequences','',1);
2591 $png = '';
2592 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2593 $html .= '<br /><br />' if(exists $data->{compldust});
2594 $html .= &insert_image($png);
2595 }
2596 if(exists $data->{compldust} || exists $data->{complentropy}) {
2597 $html .= '</div><hr>';
2598 }
2599
2600 #Dinucleotide odd ratio PCA - microbial/viral
2601 if(exists $data->{dinucodds} && keys %{$data->{dinucodds}}) {
2602 $html .= '<div class="info-header"><span class="info-header-title">Dinucleotide Odds Ratios</span></div>';
2603 $html .= '<div class="info-content"><table border="0" cellpadding="0" cellspacing="0"><tbody><tr><td class="info-table-type">&nbsp;</td>';
2604 foreach my $d (map {join("/",(m/../g ))} sort keys %{$data->{dinucodds}}) {
2605 $html .= '<td class="info-table-value">'.$d.'</td>';
2606 }
2607 $html .= '</tr><tr><td class="info-table-type">Odds ratio</td>';
2608 foreach my $d (map {sprintf("%.4f",$data->{dinucodds}->{$_})} sort keys %{$data->{dinucodds}}) {
2609 $html .= '<td class="info-table-value">'.$d.'</td>';
2610 }
2611 $html .= '</tr></tbody></table><br />';
2612 my @new = map {$data->{dinucodds}->{$_}} sort keys %{$data->{dinucodds}};
2613 $surface = &createOddsRatioPlot($data->{dinucodds},'Odds ratios','Dinucleotide','Odds ratio','');
2614 $png = '';
2615 $surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2616 $html .= &insert_image($png);
2617 #$surface = &createPCAPlot(&convertToPCAValues(\@new,'m'),'PCA','1st Principal Component Score','2nd Principal Component Score','');
2618 #$png = '';
2619 #$surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2620 #$html .= '<br /><br />';
2621 #$html .= &insert_image($png);
2622 #$surface = &createPCAPlot(&convertToPCAValues(\@new,'v'),'PCA','1st Principal Component Score','2nd Principal Component Score','');
2623 #$png = '';
2624 #$surface->write_to_png_stream(sub { my ($closure, $data) = @_; $png .= $data; });
2625 #$html .= '<br /><br />';
2626 #$html .= &insert_image($png);
2627 $html .= '</div>';
2628 }
2629
2630 $html .= '</div>';
2631 $html .= &footer();
2632
2633 #write html to file
2634 $file = &getFileName('.html');
2635 open(FH, ">$file") or &printError("Can't open file ".$file.": $!");
2636 print FH $html;
2637 close(FH);
2638 &printLog("Done with HTML data");
2639 }
2640
2641 sub insert_image {
2642 my ($data, $height, $width, $noencode) = @_;
2643 my $content .= '<img border="0" '.($height ? 'height="'.$height.'"' : '').($width ? 'width="'.$width.'"' : '').' src="'.($noencode ? "data:image/png;base64,".$data : &inline_image($data)).'" />'."\n";
2644 return $content;
2645 }
2646
2647 sub inline_image {
2648 return "data:image/png;base64,".MIME::Base64::encode_base64($_[0]);
2649 }
2650
2651 sub convertIntToString {
2652 my $int = shift;
2653 $int =~ s/(.{2})/chr(hex($1))/eg;
2654 return $int;
2655 }