0
|
1 #!/bin/sh -e
|
|
2
|
|
3 SQLITE=$HOME/tools/sqlite3
|
|
4 SQLITE=sqlite3
|
|
5 OUT_FILE="sift.txt"
|
|
6 OUT_SORT_FILE="sift.sort.txt"
|
|
7 OUT_VCF="sift.vcf"
|
|
8
|
|
9 # Download files
|
|
10 for f in Human_CHR1.sqlite.gz Human_CHR10.sqlite.gz Human_CHR11.sqlite.gz Human_CHR12.sqlite.gz Human_CHR13.sqlite.gz Human_CHR14.sqlite.gz Human_CHR15.sqlite.gz Human_CHR16.sqlite.gz Human_CHR17.sqlite.gz Human_CHR18.sqlite.gz Human_CHR19.sqlite.gz Human_CHR2.sqlite.gz Human_CHR20.sqlite.gz Human_CHR21.sqlite.gz Human_CHR22.sqlite.gz Human_CHR3.sqlite.gz Human_CHR4.sqlite.gz Human_CHR5.sqlite.gz Human_CHR6.sqlite.gz Human_CHR7.sqlite.gz Human_CHR8.sqlite.gz Human_CHR9.sqlite.gz Human_CHRX.sqlite.gz Human_CHRY.sqlite.gz Human_Supp.sqlite.gz Human_enst.sqlite.gz
|
|
11 do
|
|
12 url=ftp://ftp.jcvi.org/pub/data/sift/Human_db_37_ensembl_63/$f
|
|
13 echo Getting file $url
|
|
14 #wget $url
|
|
15 done
|
|
16
|
|
17 # Unzip files
|
|
18 for f in Human_CHR*.sqlite.gz
|
|
19 do
|
|
20 echo Decompressing file $f
|
|
21 #gunzip $f
|
|
22 done
|
|
23
|
|
24 # Dumping data
|
|
25 rm -f $OUT_FILE
|
|
26 for f in Human_CHR*.sqlite
|
|
27 do
|
|
28 echo Dumping Database $f to $OUT_FILE
|
|
29
|
|
30 TABLES=`$SQLITE $f ".tables" | tr "\n" "\t"`
|
|
31 for t in $TABLES
|
|
32 do
|
|
33 echo " Dumping Table $t"
|
|
34
|
|
35 $SQLITE $f "select CHR, COORD1, NT1, NT2, SCORE, MEDIAN, SEQS_REP from $t where SCORE != '' AND NT1 != NT2;" \
|
|
36 | tr "|" "\t" \
|
|
37 | sed "s/^chr//" \
|
|
38 >> $OUT_FILE
|
|
39 done
|
|
40 done
|
|
41
|
|
42 echo Sorting file
|
|
43 sort -k 1 -n -k 2 -n -o $OUT_SORT_FILE $OUT_FILE
|
|
44
|
|
45 echo Creating VCF
|
|
46 cat $OUT_SORT_FILE | sift2vcf.pl > $OUT_VCF
|
|
47
|