comparison dnp-subset-dinuc-profile.sh @ 0:b45de206654d draft default tip

"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author erinija
date Fri, 01 May 2020 12:08:23 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b45de206654d
1 #!/bin/sh
2 if test "$#" -ne 3; then
3
4 echo " CALL "
5 echo " sh subset_dinuc_profile.sh input.fasta dinucleotides output"
6 echo ""
7 echo " INPUT"
8 echo " input.fasta - a batch of nucleosome (or any DNA) DNA sequences "
9 echo " dinucleotides - any subset of dinucleotides enclosed by quotes as 'AA AC AG AT CA CC' "
10 echo ""
11 echo " OUTPUT"
12 echo " output - file name to write the output in tabular format, columns have names as AA.f AA.r ..."
13 echo ""
14 echo " DESCRIPTION"
15 echo " Compute dinucleotide frequency profiles on forward and its complementary "
16 echo " sequences from a batch of fasta sequences. Output columns are labelled by AA.f, AA.r ... "
17 echo ""
18 echo " Example of input fasta lines"
19 echo " >chr9:42475963-42476182"
20 echo " CCAGGCAGACCCCATATTCAAGCTGCTGCCCCAGGGTGGTGTACAGATCTGGGGAGAAGAAGGATGA"
21 echo " >chr9:42476175-42476394"
22 echo " TCTGCACTCCAGCATGCCTGAGGAGAGGAGGGAATGCAGGATCCTAGTGGAAAGAGTACCAAGCTGG"
23 echo ""
24 echo " Example of output table"
25 echo " AA.f AA.r AC.f AC.r ..."
26 echo " 0.076000 0.059000 0.065000 0.078000 ..."
27 echo " 0.082000 0.060000 0.057000 0.076000 ..."
28 echo " 0.067000 0.075000 0.049000 0.071000 ..."
29 echo ""
30 echo ""
31 echo " REQUIREMENT"
32 echo " dnp-diprofile installed"
33 echo " conda install -c bioconda dnp-diprofile"
34
35 exit 0
36 fi
37
38 name=$1
39 diset=$2
40 out=$3
41
42 call=dnp-diprofile
43
44 ## the dinucleotide profiles are computed for the subset of dinucleotides listed in $diset
45 ## the profiles are outputs as columns of a table
46
47 # prepare fasta, we copy here because
48 # in galaxy we don't have fa ending which is required by the dinuc
49 cp ${name} ${name}.fa
50
51 # compute length of the fasta sequence
52 seq=`head -n2 $name | tail -n1`
53 len=${#seq}
54 #echo "Sequence length = " $len
55
56
57 # for each dinucleotide compute the forward
58 # and complementary profile and save
59 # in separate columns that will be merged in the end
60 for di in ${diset}
61 do
62 #echo ${di}
63 echo ${di}.f > ${di}.f
64 ${call} ${name}.fa -di ${di} -sl ${len} >> ${di}.f
65 echo ${di}.r > ${di}.r
66 ${call} ${name}.fa -di ${di} -sl ${len} -c >> ${di}.r
67 echo ${di}.f >> names
68 echo ${di}.r >> names
69 done;
70 paste `cat names` > ${out}
71 rm names
72 rm ${name}.fa
73 rm *.f *.r
74 exit 0