# HG changeset patch
# User davidvanzessen
# Date 1404826734 14400
# Node ID 35b55f1c0c598c1e5068afa365b38d2c750b7427
# Parent 8e3d95d7f342a4c429f8cea630b8b361c0a964d2
Uploaded
diff -r 8e3d95d7f342 -r 35b55f1c0c59 demultiplex.xml
--- a/demultiplex.xml Mon Jul 07 05:49:20 2014 -0400
+++ b/demultiplex.xml Tue Jul 08 09:38:54 2014 -0400
@@ -4,10 +4,12 @@
fastx_toolkit
- r_wrapper.sh $input $out_file $out_file.files_path $where $mismatches $partial $input.name $trim_start $trim_end
+ wrapper.sh $input $out_file $out_file.files_path $where $mismatches $partial $input.name
#for $i, $b in enumerate($barcodes)
"$b.id"
"$b.mid"
+ "$b.trim_start"
+ "$b.trim_end"
#end for
@@ -318,6 +320,10 @@
+
+
+
+
@@ -325,21 +331,19 @@
-
-
-
-
-
-
- Splitting FASTA or FASTQ files, this tool uses sff2fastq (https://github.com/indraniel/sff2fastq) to extract a fastq file and fastx_barcode_splitter.pl (http://hannonlab.cshl.edu/fastx_toolkit/commandline.html) to demultiplex.
+- Splitting sff files into FASTQ, FASTA and (optional) trimmed FASTA files with a FASTQC report on the FASTQ file, this tool uses:
+- sff2fastq (https://github.com/indraniel/sff2fastq) to extract a fastq file.
+- fastx_barcode_splitter.pl (http://hannonlab.cshl.edu/fastx_toolkit/commandline.html) to demultiplex.
+- fastqc (http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) to provide analysis of the fastq files.
+
diff -r 8e3d95d7f342 -r 35b55f1c0c59 fastqc_v0.11.2.zip
Binary file fastqc_v0.11.2.zip has changed
diff -r 8e3d95d7f342 -r 35b55f1c0c59 r_wrapper.sh
--- a/r_wrapper.sh Mon Jul 07 05:49:20 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,41 +0,0 @@
-#!/bin/bash
-input=$1
-output=$2
-outDir=$3
-mkdir $outDir
-EOL=$4
-mismatches=$5
-partial=$6
-name=$(basename "$7")
-ext="${name##*.}"
-name="${name%.*}"
-prefix=$name"_"
-trim_start=$8
-trim_end=$9
-dir="$(cd "$(dirname "$0")" && pwd)"
-
-for ((i=10;i<=$#;i=i+2))
-do
- j=$((i+1))
- echo -e "${!i}\t${!j}" >> $outDir/barcodes.txt
-done
-
-cd $outDir
-echo "$3"
-result=`$dir/sff2fastq $input | $dir/fastx_barcode_splitter.pl --bcfile $outDir/barcodes.txt --prefix "$prefix" --suffix ".fastq" --$EOL --mismatches $mismatches --partial $partial`
-echo "$result" | tail -n +2 | sed 's/\t/,/g' > output.txt
-echo "
$name demultiplexID | Count | FASTQ | FASTA | Trimmed FASTA |
" >> $output
-ls
-while IFS=, read barcode count location
- do
- if [ "total" == "$barcode" ]
- then
- echo "$barcode | $count | | |
" >> $output
- break
- fi
- file=$name"_"$barcode
- cat $file.fastq | awk 'NR%4==1{printf ">%s\n", substr($0,2)}NR%4==2{print}' > $file.fasta
- python $dir/trim.py --input $file.fasta --output ${file}_trimmed.fasta --start $trim_start --end $trim_end
- echo "$barcode | $count | $file.fastq | $file.fasta | ${file}_trimmed.fasta |
" >> $output
-done < output.txt
-echo "" >> $output
diff -r 8e3d95d7f342 -r 35b55f1c0c59 trim.py
--- a/trim.py Mon Jul 07 05:49:20 2014 -0400
+++ b/trim.py Tue Jul 08 09:38:54 2014 -0400
@@ -11,6 +11,11 @@
start = int(args.start)
end = int(args.end)
+print args.input
+print args.output
+print start
+print end
+
if end <= 0 and start <= 0:
import shutil
shutil.copy(args.input, args.output)
diff -r 8e3d95d7f342 -r 35b55f1c0c59 wrapper.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/wrapper.sh Tue Jul 08 09:38:54 2014 -0400
@@ -0,0 +1,58 @@
+#!/bin/bash
+input=$1
+output=$2
+outDir=$3
+mkdir $outDir
+EOL=$4
+mismatches=$5
+partial=$6
+name=$(basename "$7")
+ext="${name##*.}"
+name="${name%.*}"
+prefix=$name"_"
+dir="$(cd "$(dirname "$0")" && pwd)"
+
+unzip $dir/fastqc_v0.11.2.zip -d $PWD/ > $PWD/unziplog.log
+chmod 755 $PWD/FastQC/fastqc
+
+declare -A trim_start
+declare -A trim_end
+for ((i=8;i<=$#;i=i+4))
+do
+ j=$((i+1))
+ start_int=$((i+2))
+ end_int=$((i+3))
+ id="${!i}"
+ echo "$id"
+ trim_start[$id]=${!start_int}
+ trim_end[$id]=${!end_int}
+ echo -e "$id\t${!j}" >> $outDir/barcodes.txt
+
+done
+trim_start["unmatched"]=0
+trim_end["unmatched"]=0
+
+echo "trim_start = ${trim_start[@]}"
+echo "trim_end = ${trim_end[@]}"
+
+workdir=$PWD
+cd $outDir
+echo "$3"
+result=`$dir/sff2fastq $input | $dir/fastx_barcode_splitter.pl --bcfile $outDir/barcodes.txt --prefix "$prefix" --suffix ".fastq" --$EOL --mismatches $mismatches --partial $partial`
+echo "$result" | tail -n +2 | sed 's/\t/,/g' > output.txt
+echo "$name demultiplexID | Count | FASTQ | FASTA | Trimmed FASTA | FASTQC |
" >> $output
+while IFS=, read barcode count location
+ do
+ if [ "total" == "$barcode" ]
+ then
+ echo "$barcode | $count | | | | | | |
" >> $output
+ break
+ fi
+ file=$name"_"$barcode
+ mkdir $outDir/fastqc_$barcode
+ $workdir/FastQC/fastqc $file.fastq -o $outDir 2> /dev/null
+ cat $file.fastq | awk 'NR%4==1{printf ">%s\n", substr($0,2)}NR%4==2{print}' > $file.fasta
+ python $dir/trim.py --input $file.fasta --output ${file}_trimmed.fasta --start ${trim_start[$barcode]} --end ${trim_end[$barcode]}
+ echo "$barcode | $count | $file.fastq | $file.fasta | ${file}_trimmed.fasta | Report |
" >> $output
+done < output.txt
+echo "" >> $output