| 0 | 1 suppressMessages(library('getopt')) | 
|  | 2 | 
|  | 3 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | 
|  | 4 args <- commandArgs(trailingOnly = TRUE) | 
|  | 5 | 
|  | 6 #get options, using the spec as defined by the enclosed list. | 
|  | 7 #we read the options from the default: commandArgs(TRUE). | 
|  | 8 spec = matrix( c( | 
|  | 9   'verbose', 'v', 2, "integer", | 
|  | 10   'help' , 'h', 0, "logical", | 
|  | 11   'merged', 'm', 1, "character", | 
|  | 12   'read_counts', 'c', 1, "character", | 
|  | 13   'max_observed', 'b', 1, "integer", | 
|  | 14   'barcode_length', 'l', 1, "integer", | 
|  | 15   'output', 'o', 1, "character" | 
|  | 16   ), byrow = TRUE, ncol = 4 ); | 
|  | 17 | 
|  | 18 opt = getopt( spec ); | 
|  | 19 | 
|  | 20 # if help was asked for print a friendly message | 
|  | 21 # and exit with a non-zero error code | 
|  | 22 if ( !is.null( opt$help ) ) { | 
|  | 23     cat( getopt( spec, usage=TRUE ) ) ; | 
|  | 24     q( status = 1 ); | 
|  | 25 } | 
|  | 26 | 
|  | 27 # Read inputs | 
|  | 28 merged <- read.table( opt$merged ) | 
|  | 29 read_counts <- read.table( opt$read_counts ) | 
|  | 30 | 
|  | 31 barcodes_nt <- merged[ do.call( paste, as.list( merged[ ,1:3 ] ) ) %in% do.call( paste, as.list( read_counts[ ( read_counts[ , 4 ] ) <= summary( read_counts[ , 4 ] )[ 5 ], 1:3 ] ) ) , 4 ] | 
|  | 32 | 
|  | 33 ##make the matrix with the nucleotide freqs per position: | 
|  | 34 nt_counts <- matrix( nrow = 4, ncol = opt$barcode_length ) | 
|  | 35 for( h in 1:ncol( nt_counts ) ){ | 
|  | 36     j <- 1 | 
|  | 37     for( nt_local in c( "A","C","G","T" ) ) { | 
|  | 38         nt_counts[ j, h ] <- sum( substr( as.character( barcodes_nt ), h, h) == nt_local ) | 
|  | 39         j <- j + 1 | 
|  | 40     } | 
|  | 41 } | 
|  | 42 | 
|  | 43 # Calculate frequencies | 
|  | 44 nt_freqs <- nt_counts / colSums( nt_counts ) | 
|  | 45 | 
|  | 46 nt_values <- list() | 
|  | 47 for( i in 1:ncol( nt_freqs ) ) { | 
|  | 48     nt_values[[ i ]] <- nt_freqs[ , i ] | 
|  | 49 } | 
|  | 50 | 
|  | 51 | 
|  | 52 all_posible_comb <- expand.grid( nt_values ) | 
|  | 53 | 
|  | 54 probs <- apply( all_posible_comb, 1, prod ) | 
|  | 55 | 
|  | 56 ###Create Mf_to_Sf: | 
|  | 57 results <- c() | 
|  | 58 | 
|  | 59 i <- 1 | 
|  | 60 results[ i ] <- sum( 1 - ( 1 - probs )**i ) | 
|  | 61 | 
|  | 62 while( results[ i ] <= opt$max_observed ) { | 
|  | 63     i <- i + 1 | 
|  | 64     results[ i ] <- sum( 1 - ( 1 - probs )**i )   #Mf to Sf | 
|  | 65 } | 
|  | 66 | 
|  | 67 #assign molecules number to unique barcode number: | 
|  | 68 Uf_to_Mf <- c() | 
|  | 69 for( i in 1:floor( max( results ) ) ) { | 
|  | 70 abs( results - i ) -> difference | 
|  | 71 Uf_to_Mf[ i ] <- which( difference == min( difference ) ) #if you want to know how many molecules underlie n unique barcodes ask Uf_to_Mf[n] | 
|  | 72 } | 
|  | 73 | 
|  | 74 write( Uf_to_Mf, file = opt$output ) |