annotate k2n.R @ 11:f6265e05c55c draft

Uploaded
author nikos
date Wed, 05 Nov 2014 10:00:47 -0500
parents 33e625bef2b9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
1 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
2 args <- commandArgs(trailingOnly = TRUE)
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
3
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
4 # Read inputs
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
5 merged <- read.table( args[1] )
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
6 read_counts <- read.table( args[2] )
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
7
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
8 barcodes_nt <- merged[ do.call( paste, as.list( merged[ ,1:3 ] ) ) %in% do.call( paste, as.list( read_counts[ ( read_counts[ , 4 ] ) <= summary( read_counts[ , 4 ] )[ 5 ], 1:3 ] ) ) , 4 ]
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
9
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
10 ##make the matrix with the nucleotide freqs per position:
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
11 nt_counts <- matrix( nrow = 4, ncol = as.numeric(args[4]) )
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
12 for( h in 1:ncol( nt_counts ) ){
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
13 j <- 1
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
14 for( nt_local in c( "A","C","G","T" ) ) {
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
15 nt_counts[ j, h ] <- sum( substr( as.character( barcodes_nt ), h, h) == nt_local )
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
16 j <- j + 1
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
17 }
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
18 }
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
19
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
20 # Calculate frequencies
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
21 nt_freqs <- nt_counts / colSums( nt_counts )
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
22
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
23 nt_values <- list()
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
24 for( i in 1:ncol( nt_freqs ) ) {
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
25 nt_values[[ i ]] <- nt_freqs[ , i ]
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
26 }
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
27
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
28
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
29 all_posible_comb <- expand.grid( nt_values )
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
30
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
31 probs <- apply( all_posible_comb, 1, prod )
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
32
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
33 ###Create Mf_to_Sf:
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
34 results <- c()
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
35
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
36 i <- 1
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
37 results[ i ] <- sum( 1 - ( 1 - probs )**i )
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
38
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
39 while( results[ i ] <= as.numeric(args[3]) ) {
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
40 i <- i + 1
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
41 results[ i ] <- sum( 1 - ( 1 - probs )**i ) #Mf to Sf
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
42 }
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
43
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
44 #assign molecules number to unique barcode number:
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
45 Uf_to_Mf <- c()
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
46 for( i in 1:floor( max( results ) ) ) {
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
47 abs( results - i ) -> difference
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
48 Uf_to_Mf[ i ] <- which( difference == min( difference ) ) #if you want to know how many molecules underlie n unique barcodes ask Uf_to_Mf[n]
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
49 }
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
50
33e625bef2b9 Uploaded
nikos
parents:
diff changeset
51 write( Uf_to_Mf, file = args[5] )