annotate dnp-subset-dinuc-profile.sh @ 0:b45de206654d draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author erinija
date Fri, 01 May 2020 12:08:23 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
1 #!/bin/sh
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
2 if test "$#" -ne 3; then
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
3
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
4 echo " CALL "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
5 echo " sh subset_dinuc_profile.sh input.fasta dinucleotides output"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
6 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
7 echo " INPUT"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
8 echo " input.fasta - a batch of nucleosome (or any DNA) DNA sequences "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
9 echo " dinucleotides - any subset of dinucleotides enclosed by quotes as 'AA AC AG AT CA CC' "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
10 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
11 echo " OUTPUT"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
12 echo " output - file name to write the output in tabular format, columns have names as AA.f AA.r ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
13 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
14 echo " DESCRIPTION"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
15 echo " Compute dinucleotide frequency profiles on forward and its complementary "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
16 echo " sequences from a batch of fasta sequences. Output columns are labelled by AA.f, AA.r ... "
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
17 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
18 echo " Example of input fasta lines"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
19 echo " >chr9:42475963-42476182"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
20 echo " CCAGGCAGACCCCATATTCAAGCTGCTGCCCCAGGGTGGTGTACAGATCTGGGGAGAAGAAGGATGA"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
21 echo " >chr9:42476175-42476394"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
22 echo " TCTGCACTCCAGCATGCCTGAGGAGAGGAGGGAATGCAGGATCCTAGTGGAAAGAGTACCAAGCTGG"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
23 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
24 echo " Example of output table"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
25 echo " AA.f AA.r AC.f AC.r ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
26 echo " 0.076000 0.059000 0.065000 0.078000 ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
27 echo " 0.082000 0.060000 0.057000 0.076000 ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
28 echo " 0.067000 0.075000 0.049000 0.071000 ..."
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
29 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
30 echo ""
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
31 echo " REQUIREMENT"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
32 echo " dnp-diprofile installed"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
33 echo " conda install -c bioconda dnp-diprofile"
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
34
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
35 exit 0
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
36 fi
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
37
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
38 name=$1
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
39 diset=$2
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
40 out=$3
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
41
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
42 call=dnp-diprofile
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
43
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
44 ## the dinucleotide profiles are computed for the subset of dinucleotides listed in $diset
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
45 ## the profiles are outputs as columns of a table
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
46
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
47 # prepare fasta, we copy here because
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
48 # in galaxy we don't have fa ending which is required by the dinuc
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
49 cp ${name} ${name}.fa
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
50
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
51 # compute length of the fasta sequence
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
52 seq=`head -n2 $name | tail -n1`
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
53 len=${#seq}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
54 #echo "Sequence length = " $len
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
55
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
56
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
57 # for each dinucleotide compute the forward
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
58 # and complementary profile and save
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
59 # in separate columns that will be merged in the end
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
60 for di in ${diset}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
61 do
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
62 #echo ${di}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
63 echo ${di}.f > ${di}.f
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
64 ${call} ${name}.fa -di ${di} -sl ${len} >> ${di}.f
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
65 echo ${di}.r > ${di}.r
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
66 ${call} ${name}.fa -di ${di} -sl ${len} -c >> ${di}.r
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
67 echo ${di}.f >> names
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
68 echo ${di}.r >> names
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
69 done;
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
70 paste `cat names` > ${out}
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
71 rm names
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
72 rm ${name}.fa
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
73 rm *.f *.r
b45de206654d "planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff changeset
74 exit 0