Mercurial > repos > erinija > dnp_correlation_between_profiles
annotate dnp-subset-dinuc-profile.sh @ 0:b45de206654d draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:08:23 +0000 |
parents | |
children |
rev | line source |
---|---|
0
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
1 #!/bin/sh |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
2 if test "$#" -ne 3; then |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
3 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
4 echo " CALL " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
5 echo " sh subset_dinuc_profile.sh input.fasta dinucleotides output" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
6 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
7 echo " INPUT" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
8 echo " input.fasta - a batch of nucleosome (or any DNA) DNA sequences " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
9 echo " dinucleotides - any subset of dinucleotides enclosed by quotes as 'AA AC AG AT CA CC' " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
10 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
11 echo " OUTPUT" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
12 echo " output - file name to write the output in tabular format, columns have names as AA.f AA.r ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
13 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
14 echo " DESCRIPTION" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
15 echo " Compute dinucleotide frequency profiles on forward and its complementary " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
16 echo " sequences from a batch of fasta sequences. Output columns are labelled by AA.f, AA.r ... " |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
17 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
18 echo " Example of input fasta lines" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
19 echo " >chr9:42475963-42476182" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
20 echo " CCAGGCAGACCCCATATTCAAGCTGCTGCCCCAGGGTGGTGTACAGATCTGGGGAGAAGAAGGATGA" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
21 echo " >chr9:42476175-42476394" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
22 echo " TCTGCACTCCAGCATGCCTGAGGAGAGGAGGGAATGCAGGATCCTAGTGGAAAGAGTACCAAGCTGG" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
23 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
24 echo " Example of output table" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
25 echo " AA.f AA.r AC.f AC.r ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
26 echo " 0.076000 0.059000 0.065000 0.078000 ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
27 echo " 0.082000 0.060000 0.057000 0.076000 ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
28 echo " 0.067000 0.075000 0.049000 0.071000 ..." |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
29 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
30 echo "" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
31 echo " REQUIREMENT" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
32 echo " dnp-diprofile installed" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
33 echo " conda install -c bioconda dnp-diprofile" |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
34 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
35 exit 0 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
36 fi |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
37 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
38 name=$1 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
39 diset=$2 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
40 out=$3 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
41 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
42 call=dnp-diprofile |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
43 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
44 ## the dinucleotide profiles are computed for the subset of dinucleotides listed in $diset |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
45 ## the profiles are outputs as columns of a table |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
46 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
47 # prepare fasta, we copy here because |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
48 # in galaxy we don't have fa ending which is required by the dinuc |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
49 cp ${name} ${name}.fa |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
50 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
51 # compute length of the fasta sequence |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
52 seq=`head -n2 $name | tail -n1` |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
53 len=${#seq} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
54 #echo "Sequence length = " $len |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
55 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
56 |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
57 # for each dinucleotide compute the forward |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
58 # and complementary profile and save |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
59 # in separate columns that will be merged in the end |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
60 for di in ${diset} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
61 do |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
62 #echo ${di} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
63 echo ${di}.f > ${di}.f |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
64 ${call} ${name}.fa -di ${di} -sl ${len} >> ${di}.f |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
65 echo ${di}.r > ${di}.r |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
66 ${call} ${name}.fa -di ${di} -sl ${len} -c >> ${di}.r |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
67 echo ${di}.f >> names |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
68 echo ${di}.r >> names |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
69 done; |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
70 paste `cat names` > ${out} |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
71 rm names |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
72 rm ${name}.fa |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
73 rm *.f *.r |
b45de206654d
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
erinija
parents:
diff
changeset
|
74 exit 0 |