annotate snpEff_2_1a/scripts/download.sh @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
1 #!/bin/sh -e
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
2
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
3 RELEASE=66
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
4
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
5 # mkdir download
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
6 cd download
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
7
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
8 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
9 # Download
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
10 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
11
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
12 # # Download GTF files (annotations)
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
13 # wget -r -A "*gtf.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
14 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
15 # # Download FASTA files (reference genomes)
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
16 # wget -r -A "*toplevel.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
17 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
18 # # Download CDS sequences
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
19 # wget -r -A "*cdna.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
20 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
21 # # Download PROTEIN sequences
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
22 # wget -r -A "*.pep.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
23
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
24 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
25 # Create directory structure
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
26 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
27
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
28 # # Move all downloaded file to this directory
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
29 # mv `find ftp.ensembl.org -type f` .
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
30
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
31 # # Gene annotations files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
32 # for gtf in *.gtf.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
33 # do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
34 # short=`../scripts/file2GenomeName.pl $gtf | cut -f 5`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
35 # echo ANNOTATIONS: $short
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
36 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
37 # mkdir -p data/$short
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
38 # cp $gtf data/$short/genes.gtf.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
39 # done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
40 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
41 # # Reference genomes files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
42 # mkdir -p data/genomes
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
43 # for fasta in *.dna.toplevel.fa.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
44 # do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
45 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
46 # echo REFERENCE: $genome
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
47 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
48 # cp $fasta data/genomes/$genome.fa.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
49 # done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
50 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
51 # # CDS genomes files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
52 # for fasta in *.cdna.all.fa.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
53 # do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
54 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
55 # echo CDS: $genome
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
56 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
57 # cp $fasta data/$genome/cds.fa.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
58 # done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
59 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
60 # # Protein seuqence files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
61 # for pep in *.pep.all.fa.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
62 # do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
63 # short=`../scripts/file2GenomeName.pl $pep | cut -f 5`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
64 # echo PROTEIN: $short
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
65 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
66 # mkdir -p data/$short
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
67 # cp $pep data/$short/protein.fa.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
68 # done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
69
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
70 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
71 # Config file entries
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
72 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
73
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
74 # for fasta in *.cdna.all.fa.gz
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
75 # do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
76 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 4`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
77 # short=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
78 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
79 # # Individual genome entry
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
80 # echo -e "$short.genome : $genome"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
81 # echo -e "$short.reference : ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
82 # echo
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
83 # done
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
84
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
85 # Back to parent dir
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
86 cd - > /dev/null
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
87
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
88 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
89 # Create build queue entries
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
90 #---
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
91
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
92 # rm -vf queue_build.txt
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
93 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
94 # # Build from TXT files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
95 # for genes in data/*/genes.txt*
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
96 # do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
97 # dir=`dirname $genes`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
98 # genomeName=`basename $dir`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
99 # echo "./scripts/snpEffXL.sh build -v $genomeName"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
100 # done | sort >> queue_build.txt
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
101 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
102 # # Build from GFF2 files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
103 # echo "./scripts/snpEffXL.sh build -v -gff2 amel2" >> queue_build.txt
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
104 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
105 # # Build from GFF3 files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
106 # for genes in `ls data/*/genes.gff* | grep -v amel2`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
107 # do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
108 # dir=`dirname $genes`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
109 # genomeName=`basename $dir`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
110 # echo "./scripts/snpEffXL.sh build -v -gff3 $genomeName"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
111 # done | sort >> queue_build.txt
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
112 #
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
113 # # Build from GTF22 files
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
114 # for genes in data/*/genes.gtf*
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
115 # do
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
116 # dir=`dirname $genes`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
117 # genomeName=`basename $dir`
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
118 # echo "./scripts/snpEffXL.sh build -v -gtf22 $genomeName"
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
119 # done | sort >> queue_build.txt
f8eaa3f8194b Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
greg
parents:
diff changeset
120