Mercurial > repos > erinija > dnp_correlation_between_profiles
diff shuffle_sequences.sh @ 0:b45de206654d draft default tip
"planemo upload commit 1a32efb8343938e8d49190003f251c78b5a58225-dirty"
| author | erinija |
|---|---|
| date | Fri, 01 May 2020 12:08:23 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/shuffle_sequences.sh Fri May 01 12:08:23 2020 +0000 @@ -0,0 +1,52 @@ +#!/bin/sh + +if test "$#" -ne 3; then + +echo "" +echo "" +echo " CALL " +echo " sh shuffle_sequences.sh input.fasta k output.fasta" +echo "" +echo " INPUT" +echo " input.fasta - input fasta file " +echo " k - 'preserve frequency of k letters (dinucleotide k=2)'" +echo "" +echo " OUTPUT" +echo " output.fasta - shuffle fasta file " +echo "" +echo " DESCRIPTION" +echo " Wrapper for ushuffle program Created by M Jiang. Reference PMID: 18405375 ." +echo " The ushuffle shuffles nucleotides in fasta sequence preserving frequency" +echo " of k-mers, a positional order is changed. By default k=2." +echo " A sequence in fasta file is shuffled only once. The output fasta" +echo " in the identifier >line contains the original sequence; a shuffled sequence " +echo " is output on the second line." +echo "" +echo " REQUIREMENT" +echo " ushuffle installed" +echo " conda install -c bioconda ushuffle" +echo "" + + exit 1 +fi + +name=$1 +diset=$2 +out=$3 + +# comment for a galaxy tool +call=ushuffle + +# prepare fasta into tabular +cat ${name} | awk 'BEGIN{RS=">";OFS="\t"}NR>1{print ">"$1,$2}' > temp.tab.fa + +# read the tab file and create shuffled fasta +while read -r line; +do + seq=`echo $line | tr " " "\t" | cut -f2` + useq=`${call} -s ${seq} -k ${diset} -n 1`; + echo $line + echo $useq +done < temp.tab.fa > ${out} +rm temp.tab.fa +
