|
0
|
1 suppressMessages(library('getopt'))
|
|
|
2
|
|
|
3 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
|
|
|
4 args <- commandArgs(trailingOnly = TRUE)
|
|
|
5
|
|
|
6 #get options, using the spec as defined by the enclosed list.
|
|
|
7 #we read the options from the default: commandArgs(TRUE).
|
|
|
8 spec = matrix( c(
|
|
|
9 'verbose', 'v', 2, "integer",
|
|
|
10 'help' , 'h', 0, "logical",
|
|
|
11 'merged', 'm', 1, "character",
|
|
|
12 'read_counts', 'c', 1, "character",
|
|
|
13 'max_observed', 'b', 1, "integer",
|
|
|
14 'barcode_length', 'l', 1, "integer",
|
|
|
15 'output', 'o', 1, "character"
|
|
|
16 ), byrow = TRUE, ncol = 4 );
|
|
|
17
|
|
|
18 opt = getopt( spec );
|
|
|
19
|
|
|
20 # if help was asked for print a friendly message
|
|
|
21 # and exit with a non-zero error code
|
|
|
22 if ( !is.null( opt$help ) ) {
|
|
|
23 cat( getopt( spec, usage=TRUE ) ) ;
|
|
|
24 q( status = 1 );
|
|
|
25 }
|
|
|
26
|
|
|
27 # Read inputs
|
|
|
28 merged <- read.table( opt$merged )
|
|
|
29 read_counts <- read.table( opt$read_counts )
|
|
|
30
|
|
|
31 barcodes_nt <- merged[ do.call( paste, as.list( merged[ ,1:3 ] ) ) %in% do.call( paste, as.list( read_counts[ ( read_counts[ , 4 ] ) <= summary( read_counts[ , 4 ] )[ 5 ], 1:3 ] ) ) , 4 ]
|
|
|
32
|
|
|
33 ##make the matrix with the nucleotide freqs per position:
|
|
|
34 nt_counts <- matrix( nrow = 4, ncol = opt$barcode_length )
|
|
|
35 for( h in 1:ncol( nt_counts ) ){
|
|
|
36 j <- 1
|
|
|
37 for( nt_local in c( "A","C","G","T" ) ) {
|
|
|
38 nt_counts[ j, h ] <- sum( substr( as.character( barcodes_nt ), h, h) == nt_local )
|
|
|
39 j <- j + 1
|
|
|
40 }
|
|
|
41 }
|
|
|
42
|
|
|
43 # Calculate frequencies
|
|
|
44 nt_freqs <- nt_counts / colSums( nt_counts )
|
|
|
45
|
|
|
46 nt_values <- list()
|
|
|
47 for( i in 1:ncol( nt_freqs ) ) {
|
|
|
48 nt_values[[ i ]] <- nt_freqs[ , i ]
|
|
|
49 }
|
|
|
50
|
|
|
51
|
|
|
52 all_posible_comb <- expand.grid( nt_values )
|
|
|
53
|
|
|
54 probs <- apply( all_posible_comb, 1, prod )
|
|
|
55
|
|
|
56 ###Create Mf_to_Sf:
|
|
|
57 results <- c()
|
|
|
58
|
|
|
59 i <- 1
|
|
|
60 results[ i ] <- sum( 1 - ( 1 - probs )**i )
|
|
|
61
|
|
|
62 while( results[ i ] <= opt$max_observed ) {
|
|
|
63 i <- i + 1
|
|
|
64 results[ i ] <- sum( 1 - ( 1 - probs )**i ) #Mf to Sf
|
|
|
65 }
|
|
|
66
|
|
|
67 #assign molecules number to unique barcode number:
|
|
|
68 Uf_to_Mf <- c()
|
|
|
69 for( i in 1:floor( max( results ) ) ) {
|
|
|
70 abs( results - i ) -> difference
|
|
|
71 Uf_to_Mf[ i ] <- which( difference == min( difference ) ) #if you want to know how many molecules underlie n unique barcodes ask Uf_to_Mf[n]
|
|
|
72 }
|
|
|
73
|
|
|
74 write( Uf_to_Mf, file = opt$output )
|