view dartseq_seeduk_1.R @ 28:27777dd17bfe draft default tip

Uploaded
author cropgeeks
date Sun, 22 Apr 2018 15:23:50 -0400
parents ae6b94d10bff
children
line wrap: on
line source

# yum install gdal-devel libcurl-devel libpng-devel mesa-libGL-devel mesa-libGLU-devel proj-nad proj-epsg
#
# source("https://bioconductor.org/biocLite.R")
# biocLite("SNPRelate")
# biocLite("qvalue")
# 
# install.packages("dartR")
# install.packages("amap")

args = commandArgs(trailingOnly=TRUE)


library("dartR")

#Read DarT data 
gl <- gl.read.dart(filename=args[1],  nas = "-", topskip = 5, lastmetric = "TotalPicRepSnpTest", probar = TRUE)
gl <- gl.filter.monomorphs(gl)

#Trying to determine if one row or two row format...
#Found  2  row(s) format. Proceed...
#Added the following covmetrics:
#AlleleID CloneID ClusterTempIndex AlleleSequence ClusterConsensusSequence ClusterSize AlleleSeqDist SNP SnpPosition CallRate OneRatioRef OneRatioSnp FreqHomRef FreqHomSnp FreqHets PICRef PICSnp AvgPIC AvgCountRef AvgCountSnp RatioAvgCountRefAvgCountSnp FreqHetsMinusFreqMinHom AlleleCountsCorrelation aggregateTagsTotal DerivedCorrMinusSeedCorr RepRef RepSNP RepAvg PicRepRef PicRepSNP TotalPicRepRefTest TotalPicRepSnpTest .
#Number of rows per Clone. Should be only  2 s: 2 
# Recognised: 376 individuals and 113138  SNPs in a 2 row format using /data/projects/seed/dart_calls/BBSRC-Panel-DArTSEQ-SNPs.csv 
#Start conversion....
#Format is 2 rows.
#Please note conversion of bigger data sets will take some time!
#Once finished, we recommend to save the object using save(object, file="object.rdata")
#  |======================================================================| 100%
#>
gl.report.callrate(gl)
#Reporting for a genlight object
#Note: Missing values most commonly arise from restriction site mutation.

#  Loci with no missing values = 499 [0.4%]
#    < 5% missing values = 23669 [20.9%]
#    < 10% missing values = 45298 [40%]
#    < 15% missing values = 60678 [53.6%]
#    < 20% missing values = 72478 [64.1%]
#    < 25% missing values = 81629 [72.1%]
#    < 30% missing values = 89227 [78.9%]
#    < 35% missing values = 95969 [84.8%]
#    < 40% missing values = 101973 [90.1%]
#    < 45% missing values = 107590 [95.1%]
#    < 50% missing values = 113138 [100%]
#[1] "Completed"
gl.report.callrate(gl,method='ind' )
#Reporting for a genlight object
#Note: Missing values most commonly arise from restriction site mutation.

#Individuals no missing values = 0 [0%] across loci
#Individuals with less than 5% missing values = 1 [0.3%]
#Individuals with less than 10% missing values = 73 [19.4%]
#Individuals with less than 15% missing values = 194 [51.6%]
#Individuals with less than 20% missing values = 268 [71.3%]
#Individuals with less than 25% missing values = 320 [85.1%]
#Individuals with less than 30% missing values = 341 [90.7%]
#Individuals with less than 35% missing values = 352 [93.6%]
#Individuals with less than 40% missing values = 358 [95.2%]
#Individuals with less than 45% missing values = 366 [97.3%]
#Individuals with less than 50% missing values = 371 [98.7%]
#Individuals with less than 55% missing values = 372 [98.9%]
#Individuals with less than 60% missing values = 374 [99.5%]
#Individuals with less than 65% missing values = 375 [99.7%]
#[1] "Completed"
#
#