annotate dartseq_seeduk_1b.R @ 17:86b97a659d72 draft

Uploaded
author cropgeeks
date Fri, 20 Apr 2018 05:10:48 -0400
parents 8c273315322b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
13
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
1 # yum install gdal-devel libcurl-devel libpng-devel mesa-libGL-devel mesa-libGLU-devel proj-nad proj-epsg
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
2 #
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
3 # source("https://bioconductor.org/biocLite.R")
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
4 # biocLite("SNPRelate")
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
5 # biocLite("qvalue")
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
6 #
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
7 # install.packages("dartR")
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
8 # install.packages("amap")
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
9
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
10 args = commandArgs(trailingOnly=TRUE)
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
11
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
12
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
13 library("dartR")
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
14
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
15 #Read DarT data
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
16 gl <- gl.read.dart(filename=args[1], nas = "-", topskip = 5, lastmetric = "TotalPicRepSnpTest", probar = TRUE)
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
17 gl <- gl.filter.monomorphs(gl)
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
18
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
19 #Trying to determine if one row or two row format...
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
20 #Found 2 row(s) format. Proceed...
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
21 #Added the following covmetrics:
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
22 #AlleleID CloneID ClusterTempIndex AlleleSequence ClusterConsensusSequence ClusterSize AlleleSeqDist SNP SnpPosition CallRate OneRatioRef OneRatioSnp FreqHomRef FreqHomSnp FreqHets PICRef PICSnp AvgPIC AvgCountRef AvgCountSnp RatioAvgCountRefAvgCountSnp FreqHetsMinusFreqMinHom AlleleCountsCorrelation aggregateTagsTotal DerivedCorrMinusSeedCorr RepRef RepSNP RepAvg PicRepRef PicRepSNP TotalPicRepRefTest TotalPicRepSnpTest .
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
23 #Number of rows per Clone. Should be only 2 s: 2
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
24 # Recognised: 376 individuals and 113138 SNPs in a 2 row format using /data/projects/seed/dart_calls/BBSRC-Panel-DArTSEQ-SNPs.csv
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
25 #Start conversion....
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
26 #Format is 2 rows.
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
27 #Please note conversion of bigger data sets will take some time!
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
28 #Once finished, we recommend to save the object using save(object, file="object.rdata")
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
29 # |======================================================================| 100%
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
30 #>
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
31 gl.report.callrate(gl)
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
32 #Reporting for a genlight object
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
33 #Note: Missing values most commonly arise from restriction site mutation.
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
34
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
35 # Loci with no missing values = 499 [0.4%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
36 # < 5% missing values = 23669 [20.9%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
37 # < 10% missing values = 45298 [40%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
38 # < 15% missing values = 60678 [53.6%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
39 # < 20% missing values = 72478 [64.1%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
40 # < 25% missing values = 81629 [72.1%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
41 # < 30% missing values = 89227 [78.9%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
42 # < 35% missing values = 95969 [84.8%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
43 # < 40% missing values = 101973 [90.1%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
44 # < 45% missing values = 107590 [95.1%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
45 # < 50% missing values = 113138 [100%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
46 #[1] "Completed"
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
47 gl.report.callrate(gl,method='ind' )
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
48 #Reporting for a genlight object
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
49 #Note: Missing values most commonly arise from restriction site mutation.
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
50
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
51 #Individuals no missing values = 0 [0%] across loci
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
52 #Individuals with less than 5% missing values = 1 [0.3%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
53 #Individuals with less than 10% missing values = 73 [19.4%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
54 #Individuals with less than 15% missing values = 194 [51.6%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
55 #Individuals with less than 20% missing values = 268 [71.3%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
56 #Individuals with less than 25% missing values = 320 [85.1%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
57 #Individuals with less than 30% missing values = 341 [90.7%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
58 #Individuals with less than 35% missing values = 352 [93.6%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
59 #Individuals with less than 40% missing values = 358 [95.2%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
60 #Individuals with less than 45% missing values = 366 [97.3%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
61 #Individuals with less than 50% missing values = 371 [98.7%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
62 #Individuals with less than 55% missing values = 372 [98.9%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
63 #Individuals with less than 60% missing values = 374 [99.5%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
64 #Individuals with less than 65% missing values = 375 [99.7%]
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
65 #[1] "Completed"
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
66 #
8c273315322b Uploaded
cropgeeks
parents:
diff changeset
67 #