changeset 7:d1917662231c draft

Uploaded
author morinlab
date Wed, 30 Nov 2016 13:37:20 -0500
parents d248caf924d3
children 198b83bf871e
files bin/make.circos.data
diffstat 1 files changed, 79 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/bin/make.circos.data	Wed Nov 30 13:36:59 2016 -0500
+++ b/bin/make.circos.data	Wed Nov 30 13:37:20 2016 -0500
@@ -58,6 +58,23 @@
 # read and parse configuration file
 parse_config();
 
+
+
+my %affected_genes;
+open(GENES, $CONF{files}{genes}) or die "#! $CONF{files}{genes}\n";
+while(<GENES>){
+    chomp;
+    $affected_genes{$_}++;
+}
+close GENES;
+my %mask_genes;
+open(GENES, $CONF{files}{mask}) or die "#! $CONF{files}{mask}\n";
+while(<GENES>){
+    chomp;
+    $mask_genes{$_}++;
+}
+close GENES;
+
 sub validateconfiguration {
 
 }
@@ -76,11 +93,46 @@
 }
 close(F);
 
+# large scale CNV
+open(FCNVLG,">$path/cnv.tiles.txt");
+# collect CNV region idx
+my $cnvlg;
+for my $gene (@$table) {
+	next unless $gene->{cnvlg};
+	for my $idx (keys %{$gene->{cnvlg}}) {
+		$cnvlg->{$idx} ||= { chr  => $gene->{chr},
+												 type => $gene->{cnvlg}{$idx}{type} };
+		push @{$cnvlg->{$idx}{pos}}, $gene->{pos};
+	}
+}
+my $cnvlg_seen;
+for my $idx (sort {$a <=> $b} keys %{$cnvlg}) {
+	my $cnv = $cnvlg->{$idx};
+	my $key = join(",",
+								 scalar(min @{$cnv->{pos}}),
+								 scalar(max @{$cnv->{pos}}),
+								 $cnv->{type});
+	next if $cnvlg_seen->{$key}++;
+	my $start = scalar(min @{$cnv->{pos}});
+	my $end   = 1 + scalar(max @{$cnv->{pos}});
+	printf FCNVLG ("hs%s %d %d %s idx=%d\n",
+								 $cnv->{chr},
+								 $start,
+								 $end,
+								 $cnv->{type},
+								 $idx);
+}
+close(FCNVLG);
+
 # number of CNV
-open(F,">$path/mutations.txt");
-open(FSV,">$path/mutations.stacked.sv.txt");
-open(FCNV,">$path/mutations.stacked.cnv.txt");
+open(F,     ">$path/mutations.txt");
+open(FSV,   ">$path/mutations.stacked.sv.txt");
+open(FCNV,  ">$path/mutations.stacked.cnv.txt");
 for my $gene (@$table) {
+    my $name = $gene->{name};
+    if(defined $mask_genes{$name}){
+	next;
+    }
 	my @sv;
 	my @sv_vals;
 	my @cnv;
@@ -98,7 +150,19 @@
 	}
 	my $cnv_plus  = ($gene->{cnv}{amp}{n} ||0) + ($gene->{cnv}{gain}{n} ||0);
 	my $cnv_minus = ($gene->{cnv}{homd}{n}||0) + ($gene->{cnv}{hetd}{n} ||0);
-	printf F ("hs%s %d %d %s size=%d,sv_top_type=%s,sv_top_n=%d,sv_tot=%d,svaa_max_pos=%s,svaa_max_n=%d,cnv_top_type=%s,cnv_top_n=%d,cnv_top_avg=%f,cnv_top_med=%f,cnv_plus=%d,cnv_minus=%d,%s,%s\n",
+	my $label_flag = "label_gene=0";
+	
+
+	if(defined $affected_genes{$name}){
+	    $label_flag = "label_gene=1";
+	    print "will label $name\n";
+	}
+	unless(defined $gene->{sv_top}){
+	    $gene->{sv_top} = {"missense_mutation"=>0};
+	    $gene->{sv} = {"missense_mutation"=>0};
+	    $gene->{svaa_top} = {"*"=>0};
+	}
+	printf F ("hs%s %d %d %s size=%d,sv_top_type=%s,sv_top_n=%d,sv_tot=%d,svaa_max_pos=%s,svaa_max_n=%d,cnv_top_type=%s,cnv_top_n=%d,cnv_top_avg=%f,cnv_top_med=%f,cnv_plus=%d,cnv_minus=%d,%s,%s",
 						$gene->{chr},
 						$gene->{pos},
 						$gene->{pos}+1,
@@ -116,7 +180,8 @@
 						$cnv_plus||0,
 						$cnv_minus||0,
 						join(",",@sv),
-						join(",",@cnv));
+		  join(",",@cnv));
+	print F ",$label_flag\n";
 	# stacked histograms of number of samples with each SV type
 	printf FSV ("hs%s %d %d %s name=%s,sv_top_type=%s,sv_top_n=%d\n",
 						$gene->{chr},
@@ -149,12 +214,15 @@
 	my @data;
 	my $chrpos;
 	while(<$fh>) {
+	    
 		chomp;
 		next if /^\#/;
+		
 		my @tok = split;
 		my $gene = list2hash([qw(i id name chr start end size)],
 												 [splice(@tok,0,7)]);
 		$gene->{pos} = $chrpos->{ $gene->{chr} }++;
+		
 		# remaining tokens
 		for my $tok (@tok) {
 			my @subtok = split(":",$tok);
@@ -162,6 +230,10 @@
 			my $type   = lc shift @subtok;
 			if($event =~ /sv/) {
 				$gene->{$event}{$type} = shift @subtok;
+			} elsif($event =~ /cnvlg/) {
+				my $h = { idx => $type,
+									type => shift @subtok };
+				$gene->{$event}{$type} = $h;
 			} elsif($event =~ /cnv/) {
 				my $h = { class=> $type,
 									n=>     shift @subtok,
@@ -176,9 +248,10 @@
 				}
 			}
 		}
-		printdumper($gene);
+		#printdumper($gene);
 		push @data, $gene;
 	}
+	
 	return \@data;
 }