annotate dartseq_seeduk_1.R @ 26:d1d232d4cb2f draft

Uploaded
author cropgeeks
date Fri, 20 Apr 2018 14:44:25 -0400
parents ae6b94d10bff
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
1 # yum install gdal-devel libcurl-devel libpng-devel mesa-libGL-devel mesa-libGLU-devel proj-nad proj-epsg
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
2 #
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
3 # source("https://bioconductor.org/biocLite.R")
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
4 # biocLite("SNPRelate")
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
5 # biocLite("qvalue")
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
6 #
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
7 # install.packages("dartR")
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
8 # install.packages("amap")
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
9
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
10 args = commandArgs(trailingOnly=TRUE)
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
11
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
12
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
13 library("dartR")
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
14
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
15 #Read DarT data
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
16 gl <- gl.read.dart(filename=args[1], nas = "-", topskip = 5, lastmetric = "TotalPicRepSnpTest", probar = TRUE)
6
2a49f0396e8b Uploaded
cropgeeks
parents: 3
diff changeset
17 gl <- gl.filter.monomorphs(gl)
1
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
18
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
19 #Trying to determine if one row or two row format...
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
20 #Found 2 row(s) format. Proceed...
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
21 #Added the following covmetrics:
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
22 #AlleleID CloneID ClusterTempIndex AlleleSequence ClusterConsensusSequence ClusterSize AlleleSeqDist SNP SnpPosition CallRate OneRatioRef OneRatioSnp FreqHomRef FreqHomSnp FreqHets PICRef PICSnp AvgPIC AvgCountRef AvgCountSnp RatioAvgCountRefAvgCountSnp FreqHetsMinusFreqMinHom AlleleCountsCorrelation aggregateTagsTotal DerivedCorrMinusSeedCorr RepRef RepSNP RepAvg PicRepRef PicRepSNP TotalPicRepRefTest TotalPicRepSnpTest .
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
23 #Number of rows per Clone. Should be only 2 s: 2
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
24 # Recognised: 376 individuals and 113138 SNPs in a 2 row format using /data/projects/seed/dart_calls/BBSRC-Panel-DArTSEQ-SNPs.csv
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
25 #Start conversion....
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
26 #Format is 2 rows.
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
27 #Please note conversion of bigger data sets will take some time!
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
28 #Once finished, we recommend to save the object using save(object, file="object.rdata")
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
29 # |======================================================================| 100%
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
30 #>
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
31 gl.report.callrate(gl)
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
32 #Reporting for a genlight object
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
33 #Note: Missing values most commonly arise from restriction site mutation.
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
34
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
35 # Loci with no missing values = 499 [0.4%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
36 # < 5% missing values = 23669 [20.9%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
37 # < 10% missing values = 45298 [40%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
38 # < 15% missing values = 60678 [53.6%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
39 # < 20% missing values = 72478 [64.1%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
40 # < 25% missing values = 81629 [72.1%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
41 # < 30% missing values = 89227 [78.9%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
42 # < 35% missing values = 95969 [84.8%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
43 # < 40% missing values = 101973 [90.1%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
44 # < 45% missing values = 107590 [95.1%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
45 # < 50% missing values = 113138 [100%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
46 #[1] "Completed"
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
47 gl.report.callrate(gl,method='ind' )
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
48 #Reporting for a genlight object
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
49 #Note: Missing values most commonly arise from restriction site mutation.
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
50
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
51 #Individuals no missing values = 0 [0%] across loci
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
52 #Individuals with less than 5% missing values = 1 [0.3%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
53 #Individuals with less than 10% missing values = 73 [19.4%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
54 #Individuals with less than 15% missing values = 194 [51.6%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
55 #Individuals with less than 20% missing values = 268 [71.3%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
56 #Individuals with less than 25% missing values = 320 [85.1%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
57 #Individuals with less than 30% missing values = 341 [90.7%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
58 #Individuals with less than 35% missing values = 352 [93.6%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
59 #Individuals with less than 40% missing values = 358 [95.2%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
60 #Individuals with less than 45% missing values = 366 [97.3%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
61 #Individuals with less than 50% missing values = 371 [98.7%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
62 #Individuals with less than 55% missing values = 372 [98.9%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
63 #Individuals with less than 60% missing values = 374 [99.5%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
64 #Individuals with less than 65% missing values = 375 [99.7%]
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
65 #[1] "Completed"
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
66 #
a8f30e5859c9 Uploaded
cropgeeks
parents:
diff changeset
67 #