annotate snpEff_2_1a/scripts/sift2vcf.sh @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
1 #!/bin/sh -e
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
2
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
3 SQLITE=$HOME/tools/sqlite3
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
4 SQLITE=sqlite3
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
5 OUT_FILE="sift.txt"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
6 OUT_SORT_FILE="sift.sort.txt"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
7 OUT_VCF="sift.vcf"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
8
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
9 # Download files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
10 for f in Human_CHR1.sqlite.gz Human_CHR10.sqlite.gz Human_CHR11.sqlite.gz Human_CHR12.sqlite.gz Human_CHR13.sqlite.gz Human_CHR14.sqlite.gz Human_CHR15.sqlite.gz Human_CHR16.sqlite.gz Human_CHR17.sqlite.gz Human_CHR18.sqlite.gz Human_CHR19.sqlite.gz Human_CHR2.sqlite.gz Human_CHR20.sqlite.gz Human_CHR21.sqlite.gz Human_CHR22.sqlite.gz Human_CHR3.sqlite.gz Human_CHR4.sqlite.gz Human_CHR5.sqlite.gz Human_CHR6.sqlite.gz Human_CHR7.sqlite.gz Human_CHR8.sqlite.gz Human_CHR9.sqlite.gz Human_CHRX.sqlite.gz Human_CHRY.sqlite.gz Human_Supp.sqlite.gz Human_enst.sqlite.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
11 do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
12 url=ftp://ftp.jcvi.org/pub/data/sift/Human_db_37_ensembl_63/$f
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
13 echo Getting file $url
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
14 #wget $url
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
15 done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
16
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
17 # Unzip files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
18 for f in Human_CHR*.sqlite.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
19 do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
20 echo Decompressing file $f
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
21 #gunzip $f
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
22 done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
23
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
24 # Dumping data
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
25 rm -f $OUT_FILE
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
26 for f in Human_CHR*.sqlite
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
27 do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
28 echo Dumping Database $f to $OUT_FILE
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
29
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
30 TABLES=`$SQLITE $f ".tables" | tr "\n" "\t"`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
31 for t in $TABLES
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
32 do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
33 echo " Dumping Table $t"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
34
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
35 $SQLITE $f "select CHR, COORD1, NT1, NT2, SCORE, MEDIAN, SEQS_REP from $t where SCORE != '' AND NT1 != NT2;" \
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
36 | tr "|" "\t" \
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
37 | sed "s/^chr//" \
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
38 >> $OUT_FILE
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
39 done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
40 done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
41
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
42 echo Sorting file
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
43 sort -k 1 -n -k 2 -n -o $OUT_SORT_FILE $OUT_FILE
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
44
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
45 echo Creating VCF
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
46 cat $OUT_SORT_FILE | sift2vcf.pl > $OUT_VCF
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
47