Mercurial > repos > elixir-it > corgat_funct_annot
comparison FunAnn/annotate.pl @ 1:40b81942976a draft default tip
Uploaded
author | elixir-it |
---|---|
date | Tue, 16 Feb 2021 09:14:20 +0000 |
parents | 7e7168ebc150 |
children |
comparison
equal
deleted
inserted
replaced
0:7e7168ebc150 | 1:40b81942976a |
---|---|
1 $fss=13468; | 1 $fss=13468; |
2 | 2 |
3 unless (-e "CorGAT") | 3 unless (-e "GCF_009858895.2_ASM985889v3_genomic.fna") |
4 | 4 { |
5 { | 5 |
6 system("wget -i https://raw.githubusercontent.com/matteo14c/CorGAT_galaxy/dev/ann.txt"); | 6 system("wget -i https://raw.githubusercontent.com/matteo14c/CorGAT_galaxy/master/ann.txt"); |
7 system("gzip -d GCF_009858895.2_ASM985889v3_genomic.fna.gz"); | 7 system("gzip -d GCF_009858895.2_ASM985889v3_genomic.fna.gz"); |
8 } | 8 } |
9 | 9 |
10 $gen_code="genetic_code"; | 10 $gen_code="genetic_code"; |
11 die("need genetic code file in the current folder\n") unless -e "genetic_code"; | 11 die("need genetic code file in the current folder\n") unless -e "genetic_code"; |
12 open(IN,$gen_code); | 12 open(IN,$gen_code); |
14 { | 14 { |
15 ($triplet,$oneL)=(split()); | 15 ($triplet,$oneL)=(split()); |
16 $code{$triplet}=$oneL; | 16 $code{$triplet}=$oneL; |
17 } | 17 } |
18 | 18 |
19 $genome="GCF_009858895.2_ASM985889v3_genomic.fna"; | 19 $genome="GCF_009858895.2_ASM985889v3_genomic.fna "; |
20 die("need reference genome file in the current folder\n") unless -e "GCF_009858895.2_ASM985889v3_genomic.fna"; | 20 die("need reference genome file in the current folder\n") unless -e "GCF_009858895.2_ASM985889v3_genomic.fna"; |
21 open(IN,$genome); | 21 open(IN,$genome); |
22 while(<IN>) | 22 while(<IN>) |
23 { | 23 { |
24 next if $_=~/^>/; | 24 next if $_=~/^>/; |
56 { | 56 { |
57 $pos=$i+1; | 57 $pos=$i+1; |
58 $res=$seq_res[$i]; | 58 $res=$seq_res[$i]; |
59 } | 59 } |
60 } | 60 } |
61 %AF_data=%{read_simple_table("af_data_new.csv")}; | 61 %AF_data=%{read_simple_table("AF_current.csv")}; |
62 %MFE_data=%{read_simple_table("MFE_annot.csv")}; | 62 %MFE_data=%{read_simple_table("MFE_annot.csv")}; |
63 %epi_data=%{read_epitopes("epitopes_annot.csv")}; | 63 %epi_data=%{read_epitopes("epitopes_annot.csv")}; |
64 %hyphy_data=%{read_hyphy("hyphy_novel.csv")}; | 64 %hyphy_data=%{read_hyphy("hyphy_current.csv")}; |
65 | 65 |
66 | 66 |
67 | 67 |
68 $var_File=shift;#""cl7.csv";#"phenetic_indels_sars_cov2.csv"; | 68 $var_File=shift;#""cl7.csv";#"phenetic_indels_sars_cov2.csv"; |
69 $out_File=shift; | 69 $out_File=shift; |
170 die("2\n $triplet b:$Bs[1] r:$ref")unless ($Bs[1] eq $ref); | 170 die("2\n $triplet b:$Bs[1] r:$ref")unless ($Bs[1] eq $ref); |
171 $Bs[1]=$alt; | 171 $Bs[1]=$alt; |
172 }elsif ($mod==0){ | 172 }elsif ($mod==0){ |
173 $triplet=substr($seq,$pos-3,3); | 173 $triplet=substr($seq,$pos-3,3); |
174 @Bs=split('',$triplet); | 174 @Bs=split('',$triplet); |
175 die("3\n $triplet b:$Bs[2] r:$ref")unless ($Bs[2] eq $ref); | 175 die("$_ 3\n $triplet b:$Bs[2] r:$ref")unless ($Bs[2] eq $ref); |
176 $Bs[2]=$alt; | 176 $Bs[2]=$alt; |
177 } | 177 } |
178 #print "$pos_inG $relpos $mod @Bs\n"; | 178 #print "$pos_inG $relpos $mod @Bs\n"; |
179 $Atriplet=join("",@Bs); | 179 $Atriplet=join("",@Bs); |
180 $A1=$code{$triplet}; | 180 $A1=$code{$triplet}; |
278 $eff="inframe"; | 278 $eff="inframe"; |
279 if ($ref=~/\./) | 279 if ($ref=~/\./) |
280 { | 280 { |
281 $eff.="Ins"; | 281 $eff.="Ins"; |
282 $Talt=(translate($Calt,\%code))[1]; | 282 $Talt=(translate($Calt,\%code))[1]; |
283 if ($Cref=~/[ACTG]{1,}/) | |
284 { | |
285 $Cref=~s/\.//g; | |
286 $Tref=(translate($Cref,\%code))[1]; | |
287 } | |
288 | |
283 } | 289 } |
284 if ($alt=~/\./) | 290 if ($alt=~/\./) |
285 { | 291 { |
286 $eff.="Del"; | 292 $eff.="Del"; |
287 $Tref=(translate($Cref,\%code))[1]; | 293 $Tref=(translate($Cref,\%code))[1]; |
294 if ($Calt=~/[ACTG]{1,}/) | |
295 { | |
296 $Calt=~s/\.//g; | |
297 $Talt=(translate($Calt,\%code))[1]; | |
298 } | |
288 } | 299 } |
289 if ($eff=~/Del/) | 300 if ($eff=~/Del/) |
290 { | 301 { |
291 @Tref=split('',$Ttref); | 302 @Tref=split('',$Ttref); |
292 for ($i=0;$i<=$#Tref;$i++) | 303 for ($i=0;$i<=$#Tref;$i++) |
386 { | 397 { |
387 chomp(); | 398 chomp(); |
388 ($gene,$pos,@annot)=(split(/\,/)); | 399 ($gene,$pos,@annot)=(split(/\,/)); |
389 foreach $a (@annot) | 400 foreach $a (@annot) |
390 { | 401 { |
402 $a=~s/{//g; | |
403 $a=~s/}//g; | |
391 ($ref,$key)=(split(/\:/,$a)); | 404 ($ref,$key)=(split(/\:/,$a)); |
392 $ref=~s/{//g; | |
393 $ref=~s/}//g; | |
394 $ref=~s/"//g; | 405 $ref=~s/"//g; |
395 $key=~s/"//g; | 406 $key=~s/"//g; |
396 #print "$gene $pos $ref $key\n"; | 407 #print "$gene $pos $ref $key\n"; |
397 if ($kw{$ref}) | 408 if ($kw{$ref}) |
398 { | 409 { |