Mercurial > repos > rnateam > bctools
comparison merge_pcr_duplicates.py @ 58:bbbae1ee87e0 draft default tip
fix for flexbar with small data issue
| author | rnateam |
|---|---|
| date | Tue, 16 Feb 2016 10:08:58 -0500 |
| parents | 4bedd35bcdff |
| children |
comparison
equal
deleted
inserted
replaced
| 57:537657678b2b | 58:bbbae1ee87e0 |
|---|---|
| 106 # prepare alinments | 106 # prepare alinments |
| 107 syscall2 = "cat " + args.alignments + " | awk -F \"\\t\" 'BEGIN{OFS=\"\\t\"}{split($4, a, \" \"); $4 = a[1]; print}'| sort --compress-program=gzip -k4,4 > " + tmpdir + "/alns.csv" | 107 syscall2 = "cat " + args.alignments + " | awk -F \"\\t\" 'BEGIN{OFS=\"\\t\"}{split($4, a, \" \"); $4 = a[1]; print}'| sort --compress-program=gzip -k4,4 > " + tmpdir + "/alns.csv" |
| 108 check_call(syscall2, shell=True) | 108 check_call(syscall2, shell=True) |
| 109 | 109 |
| 110 # join barcode library and alignments | 110 # join barcode library and alignments |
| 111 syscall3 = "cat " + args.bclib + " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' | sort --compress-program=gzip -k1,1 | join -1 1 -2 4 - " + tmpdir + "/alns.csv " + " | awk 'BEGIN{OFS=\"\\t\"}$4!~/N/{print $3,$4,$5,$2,$6,$7}' | datamash --sort -g 1,2,3,4,6 count 4 | awk 'BEGIN{OFS=\"\\t\"}$4!~/N/{print $1,$2,$3,$4,$6,$5}' > " + args.outfile | 111 # after join: id, bc, chr, start, stop, mapscore, strand |
| 112 # 'chrom', 'start', 'stop', 'bc', 'ndupes', 'strand' | 112 # after datamash: bc, chr, start, stop, strand, ndupes, idrepresentative |
| 113 syscall3 = "cat " + \ | |
| 114 args.bclib + \ | |
| 115 " | awk 'BEGIN{OFS=\"\\t\"}NR%4==1{gsub(/^@/,\"\"); id=$1}NR%4==2{bc=$1}NR%4==3{print id,bc}' " + \ | |
| 116 " | sort --compress-program=gzip -k1,1 | join -1 1 -2 4 - " + tmpdir + "/alns.csv " + \ | |
| 117 " | awk 'BEGIN{OFS=\"\\t\"}$2!~/N/{print $1,$2,$3,$4,$5,$6,$7}' " + \ | |
| 118 " | datamash --sort -g 2,3,4,5,7 count 2 first 1 " + \ | |
| 119 " | awk 'BEGIN{OFS=\"\\t\"}{print $2,$3,$4,$7,$6,$5}' > " + args.outfile | |
| 113 check_call(syscall3, shell=True) | 120 check_call(syscall3, shell=True) |
| 114 finally: | 121 finally: |
| 115 logging.debug("removed tmpdir: " + tmpdir) | 122 logging.debug("removed tmpdir: " + tmpdir) |
| 116 rmtree(tmpdir) | 123 rmtree(tmpdir) |
