annotate k2n.R @ 4:d7af39b1fb6c draft

Uploaded
author nikos
date Tue, 04 Nov 2014 14:58:34 -0500
parents 83dfe38f6a09
children 2ae336f19de0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
1 suppressMessages(library('getopt'))
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
2
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
3 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
4 args <- commandArgs(trailingOnly = TRUE)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
5
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
6 #get options, using the spec as defined by the enclosed list.
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
7 #we read the options from the default: commandArgs(TRUE).
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
8 spec = matrix( c(
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
9 'verbose', 'v', 2, "integer",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
10 'help' , 'h', 0, "logical",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
11 'merged', 'm', 1, "character",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
12 'read_counts', 'c', 1, "character",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
13 'max_observed', 'b', 1, "integer",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
14 'barcode_length', 'l', 1, "integer",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
15 'output', 'o', 1, "character"
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
16 ), byrow = TRUE, ncol = 4 );
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
17
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
18 opt = getopt( spec );
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
19
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
20 # if help was asked for print a friendly message
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
21 # and exit with a non-zero error code
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
22 if ( !is.null( opt$help ) ) {
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
23 cat( getopt( spec, usage=TRUE ) ) ;
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
24 q( status = 1 );
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
25 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
26
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
27 # Read inputs
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
28 merged <- read.table( opt$merged )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
29 read_counts <- read.table( opt$read_counts )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
30
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
31 barcodes_nt <- merged[ do.call( paste, as.list( merged[ ,1:3 ] ) ) %in% do.call( paste, as.list( read_counts[ ( read_counts[ , 4 ] ) <= summary( read_counts[ , 4 ] )[ 5 ], 1:3 ] ) ) , 4 ]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
32
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
33 ##make the matrix with the nucleotide freqs per position:
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
34 nt_counts <- matrix( nrow = 4, ncol = opt$barcode_length )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
35 for( h in 1:ncol( nt_counts ) ){
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
36 j <- 1
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
37 for( nt_local in c( "A","C","G","T" ) ) {
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
38 nt_counts[ j, h ] <- sum( substr( as.character( barcodes_nt ), h, h) == nt_local )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
39 j <- j + 1
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
40 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
41 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
42
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
43 # Calculate frequencies
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
44 nt_freqs <- nt_counts / colSums( nt_counts )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
45
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
46 nt_values <- list()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
47 for( i in 1:ncol( nt_freqs ) ) {
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
48 nt_values[[ i ]] <- nt_freqs[ , i ]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
49 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
50
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
51
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
52 all_posible_comb <- expand.grid( nt_values )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
53
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
54 probs <- apply( all_posible_comb, 1, prod )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
55
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
56 ###Create Mf_to_Sf:
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
57 results <- c()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
58
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
59 i <- 1
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
60 results[ i ] <- sum( 1 - ( 1 - probs )**i )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
61
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
62 while( results[ i ] <= opt$max_observed ) {
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
63 i <- i + 1
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
64 results[ i ] <- sum( 1 - ( 1 - probs )**i ) #Mf to Sf
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
65 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
66
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
67 #assign molecules number to unique barcode number:
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
68 Uf_to_Mf <- c()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
69 for( i in 1:floor( max( results ) ) ) {
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
70 abs( results - i ) -> difference
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
71 Uf_to_Mf[ i ] <- which( difference == min( difference ) ) #if you want to know how many molecules underlie n unique barcodes ask Uf_to_Mf[n]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
72 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
73
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
74 write( Uf_to_Mf, file = opt$output )