diff snpEff_2_1a/scripts/sift2vcf.sh @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpEff_2_1a/scripts/sift2vcf.sh	Fri Apr 20 14:47:09 2012 -0400
@@ -0,0 +1,47 @@
+#!/bin/sh -e
+
+SQLITE=$HOME/tools/sqlite3
+SQLITE=sqlite3
+OUT_FILE="sift.txt"
+OUT_SORT_FILE="sift.sort.txt"
+OUT_VCF="sift.vcf"
+
+# Download files
+for f in Human_CHR1.sqlite.gz Human_CHR10.sqlite.gz Human_CHR11.sqlite.gz Human_CHR12.sqlite.gz Human_CHR13.sqlite.gz Human_CHR14.sqlite.gz Human_CHR15.sqlite.gz Human_CHR16.sqlite.gz Human_CHR17.sqlite.gz Human_CHR18.sqlite.gz Human_CHR19.sqlite.gz Human_CHR2.sqlite.gz Human_CHR20.sqlite.gz Human_CHR21.sqlite.gz Human_CHR22.sqlite.gz Human_CHR3.sqlite.gz Human_CHR4.sqlite.gz Human_CHR5.sqlite.gz Human_CHR6.sqlite.gz Human_CHR7.sqlite.gz Human_CHR8.sqlite.gz Human_CHR9.sqlite.gz Human_CHRX.sqlite.gz Human_CHRY.sqlite.gz Human_Supp.sqlite.gz Human_enst.sqlite.gz 
+do
+	url=ftp://ftp.jcvi.org/pub/data/sift/Human_db_37_ensembl_63/$f
+	echo Getting file $url 
+	#wget $url
+done
+
+# Unzip files
+for f in Human_CHR*.sqlite.gz
+do
+	echo Decompressing file $f 
+	#gunzip $f
+done
+
+# Dumping data
+rm -f $OUT_FILE
+for f in Human_CHR*.sqlite
+do
+	echo Dumping Database $f to $OUT_FILE
+
+	TABLES=`$SQLITE $f ".tables" | tr "\n" "\t"`
+	for t in $TABLES
+	do
+		echo "    Dumping Table $t"
+
+		$SQLITE $f "select CHR, COORD1, NT1, NT2, SCORE, MEDIAN, SEQS_REP from $t where SCORE != '' AND NT1 != NT2;" \
+			| tr "|" "\t" \
+			| sed "s/^chr//" \
+			>> $OUT_FILE
+	done
+done
+
+echo Sorting file
+sort -k 1 -n -k 2 -n -o $OUT_SORT_FILE $OUT_FILE
+
+echo Creating VCF 
+cat $OUT_SORT_FILE | sift2vcf.pl > $OUT_VCF
+