comparison snpEff_2_1a/scripts/download.sh @ 0:f8eaa3f8194b default tip

Uploaded snpEff_v2_1a_core.tgz from Pablo Cingolani
author greg
date Fri, 20 Apr 2012 14:47:09 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:f8eaa3f8194b
1 #!/bin/sh -e
2
3 RELEASE=66
4
5 # mkdir download
6 cd download
7
8 #---
9 # Download
10 #---
11
12 # # Download GTF files (annotations)
13 # wget -r -A "*gtf.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/"
14 #
15 # # Download FASTA files (reference genomes)
16 # wget -r -A "*toplevel.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
17 #
18 # # Download CDS sequences
19 # wget -r -A "*cdna.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
20 #
21 # # Download PROTEIN sequences
22 # wget -r -A "*.pep.all.fa.gz" "ftp://ftp.ensembl.org/pub/release-$RELEASE/fasta/"
23
24 #---
25 # Create directory structure
26 #---
27
28 # # Move all downloaded file to this directory
29 # mv `find ftp.ensembl.org -type f` .
30
31 # # Gene annotations files
32 # for gtf in *.gtf.gz
33 # do
34 # short=`../scripts/file2GenomeName.pl $gtf | cut -f 5`
35 # echo ANNOTATIONS: $short
36 #
37 # mkdir -p data/$short
38 # cp $gtf data/$short/genes.gtf.gz
39 # done
40 #
41 # # Reference genomes files
42 # mkdir -p data/genomes
43 # for fasta in *.dna.toplevel.fa.gz
44 # do
45 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
46 # echo REFERENCE: $genome
47 #
48 # cp $fasta data/genomes/$genome.fa.gz
49 # done
50 #
51 # # CDS genomes files
52 # for fasta in *.cdna.all.fa.gz
53 # do
54 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
55 # echo CDS: $genome
56 #
57 # cp $fasta data/$genome/cds.fa.gz
58 # done
59 #
60 # # Protein seuqence files
61 # for pep in *.pep.all.fa.gz
62 # do
63 # short=`../scripts/file2GenomeName.pl $pep | cut -f 5`
64 # echo PROTEIN: $short
65 #
66 # mkdir -p data/$short
67 # cp $pep data/$short/protein.fa.gz
68 # done
69
70 #---
71 # Config file entries
72 #---
73
74 # for fasta in *.cdna.all.fa.gz
75 # do
76 # genome=`../scripts/file2GenomeName.pl $fasta | cut -f 4`
77 # short=`../scripts/file2GenomeName.pl $fasta | cut -f 5`
78 #
79 # # Individual genome entry
80 # echo -e "$short.genome : $genome"
81 # echo -e "$short.reference : ftp://ftp.ensembl.org/pub/release-$RELEASE/gtf/"
82 # echo
83 # done
84
85 # Back to parent dir
86 cd - > /dev/null
87
88 #---
89 # Create build queue entries
90 #---
91
92 # rm -vf queue_build.txt
93 #
94 # # Build from TXT files
95 # for genes in data/*/genes.txt*
96 # do
97 # dir=`dirname $genes`
98 # genomeName=`basename $dir`
99 # echo "./scripts/snpEffXL.sh build -v $genomeName"
100 # done | sort >> queue_build.txt
101 #
102 # # Build from GFF2 files
103 # echo "./scripts/snpEffXL.sh build -v -gff2 amel2" >> queue_build.txt
104 #
105 # # Build from GFF3 files
106 # for genes in `ls data/*/genes.gff* | grep -v amel2`
107 # do
108 # dir=`dirname $genes`
109 # genomeName=`basename $dir`
110 # echo "./scripts/snpEffXL.sh build -v -gff3 $genomeName"
111 # done | sort >> queue_build.txt
112 #
113 # # Build from GTF22 files
114 # for genes in data/*/genes.gtf*
115 # do
116 # dir=`dirname $genes`
117 # genomeName=`basename $dir`
118 # echo "./scripts/snpEffXL.sh build -v -gtf22 $genomeName"
119 # done | sort >> queue_build.txt
120