Mercurial > repos > erinija > dnp_correlation_between_profiles
comparison dnp-subset-dinuc-profile.sh @ 0:b45de206654d draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
author | erinija |
---|---|
date | Fri, 01 May 2020 12:08:23 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b45de206654d |
---|---|
1 #!/bin/sh | |
2 if test "$#" -ne 3; then | |
3 | |
4 echo " CALL " | |
5 echo " sh subset_dinuc_profile.sh input.fasta dinucleotides output" | |
6 echo "" | |
7 echo " INPUT" | |
8 echo " input.fasta - a batch of nucleosome (or any DNA) DNA sequences " | |
9 echo " dinucleotides - any subset of dinucleotides enclosed by quotes as 'AA AC AG AT CA CC' " | |
10 echo "" | |
11 echo " OUTPUT" | |
12 echo " output - file name to write the output in tabular format, columns have names as AA.f AA.r ..." | |
13 echo "" | |
14 echo " DESCRIPTION" | |
15 echo " Compute dinucleotide frequency profiles on forward and its complementary " | |
16 echo " sequences from a batch of fasta sequences. Output columns are labelled by AA.f, AA.r ... " | |
17 echo "" | |
18 echo " Example of input fasta lines" | |
19 echo " >chr9:42475963-42476182" | |
20 echo " CCAGGCAGACCCCATATTCAAGCTGCTGCCCCAGGGTGGTGTACAGATCTGGGGAGAAGAAGGATGA" | |
21 echo " >chr9:42476175-42476394" | |
22 echo " TCTGCACTCCAGCATGCCTGAGGAGAGGAGGGAATGCAGGATCCTAGTGGAAAGAGTACCAAGCTGG" | |
23 echo "" | |
24 echo " Example of output table" | |
25 echo " AA.f AA.r AC.f AC.r ..." | |
26 echo " 0.076000 0.059000 0.065000 0.078000 ..." | |
27 echo " 0.082000 0.060000 0.057000 0.076000 ..." | |
28 echo " 0.067000 0.075000 0.049000 0.071000 ..." | |
29 echo "" | |
30 echo "" | |
31 echo " REQUIREMENT" | |
32 echo " dnp-diprofile installed" | |
33 echo " conda install -c bioconda dnp-diprofile" | |
34 | |
35 exit 0 | |
36 fi | |
37 | |
38 name=$1 | |
39 diset=$2 | |
40 out=$3 | |
41 | |
42 call=dnp-diprofile | |
43 | |
44 ## the dinucleotide profiles are computed for the subset of dinucleotides listed in $diset | |
45 ## the profiles are outputs as columns of a table | |
46 | |
47 # prepare fasta, we copy here because | |
48 # in galaxy we don't have fa ending which is required by the dinuc | |
49 cp ${name} ${name}.fa | |
50 | |
51 # compute length of the fasta sequence | |
52 seq=`head -n2 $name | tail -n1` | |
53 len=${#seq} | |
54 #echo "Sequence length = " $len | |
55 | |
56 | |
57 # for each dinucleotide compute the forward | |
58 # and complementary profile and save | |
59 # in separate columns that will be merged in the end | |
60 for di in ${diset} | |
61 do | |
62 #echo ${di} | |
63 echo ${di}.f > ${di}.f | |
64 ${call} ${name}.fa -di ${di} -sl ${len} >> ${di}.f | |
65 echo ${di}.r > ${di}.r | |
66 ${call} ${name}.fa -di ${di} -sl ${len} -c >> ${di}.r | |
67 echo ${di}.f >> names | |
68 echo ${di}.r >> names | |
69 done; | |
70 paste `cat names` > ${out} | |
71 rm names | |
72 rm ${name}.fa | |
73 rm *.f *.r | |
74 exit 0 |