comparison dartseq_seeduk_1.R @ 25:ae6b94d10bff draft

Uploaded
author cropgeeks
date Fri, 20 Apr 2018 12:42:06 -0400
parents 2a49f0396e8b
children
comparison
equal deleted inserted replaced
24:27b626c1c120 25:ae6b94d10bff
63 #Individuals with less than 60% missing values = 374 [99.5%] 63 #Individuals with less than 60% missing values = 374 [99.5%]
64 #Individuals with less than 65% missing values = 375 [99.7%] 64 #Individuals with less than 65% missing values = 375 [99.7%]
65 #[1] "Completed" 65 #[1] "Completed"
66 # 66 #
67 # 67 #
68 gl_call_rate <- gl.filter.callrate(gl,method = 'loc', t=0.75)
69 #Reporting for a genlight object
70 #Note: Missing values most commonly arise from restriction site mutation.
71 #
72 #Initial no. of loci = 113138
73 # No. of loci deleted = 31509
74 #Summary of filtered dataset
75 # Call Rate > 0.75
76 # No. of loci: 81629
77 # No. of individuals: 376
78 # No. of populations: 0
79 #
80 gl_rep <- gl.filter.repavg(gl_call_rate,t=0.98)
81 #Reporting for a genlight object
82 #Note: RepAvg is a DArT statistic reporting reproducibility averaged across alleles for each locus.
83
84 #Initial no. of loci = 81629
85 #No. of loci deleted = 6446
86 #Summary of filtered dataset
87 # Reproducibility >= 0.98
88 # No. of loci: 75183
89 # No. of individuals: 376
90 # No. of populations: 0
91
92 gl.report.callrate(gl_rep,method='ind' )
93 #Reporting for a genlight object
94 #Note: Missing values most commonly arise from restriction site mutation.
95
96 #Individuals no missing values = 0 [0%] across loci
97 #Individuals with less than 5% missing values = 161 [42.8%]
98 #Individuals with less than 10% missing values = 245 [65.2%]
99 #Individuals with less than 15% missing values = 301 [80.1%]
100 #Individuals with less than 20% missing values = 337 [89.6%]
101 #Individuals with less than 25% missing values = 347 [92.3%]
102 #Individuals with less than 30% missing values = 358 [95.2%]
103 #Individuals with less than 35% missing values = 359 [95.5%]
104 #Individuals with less than 40% missing values = 364 [96.8%]
105 #Individuals with less than 45% missing values = 372 [98.9%]
106 #Individuals with less than 50% missing values = 373 [99.2%]
107 #Individuals with less than 55% missing values = 374 [99.5%]
108 #Individuals with less than 60% missing values = 375 [99.7%]
109 #[1] "Completed"
110
111 gl_final <- gl.filter.callrate(gl_rep,method = 'ind', t=0.8)
112 #Reporting for a genlight object
113 #Note: Missing values most commonly arise from restriction site mutation.
114
115 #Initial no. of individuals = 376
116 #Filtering a genlight object
117 # no. of individuals deleted = 39
118 #Individuals retained = 337
119 #List of individuals deleted because of low call rate
120 # 908017247001_E_5 908017247001_F_4 908017247002_A_10 908017247002_B_4 908017247002_B_5 908017247002_C_3 908017247002_D_12 908017247002_D_2 908017247002_D_6 908017247002_D_9 908017247002_E_6 908017247002_E_7 908017247002_E_9 908017247002_F_2 908017247002_F_6 908017247002_G_8 908017247002_H_10 908017247002_H_7 908017247002_H_8 908017247003_B_8 908017247003_C_8 908017247003_D_8 908017247003_E_8 908017247003_F_8 908017247003_G_6 908017247003_G_8 908017247003_H_7 908017247004_C_11 908017247004_D_11 908017247004_D_8 908017247004_D_9 908017247004_E_10 908017247004_E_11 908017247004_E_9 908017247004_F_11 908017247004_F_12 908017247004_F_6 908017247004_G_11 908017247004_H_11
121 # from populations
122
123 #Summary of filtered dataset
124 # Call Rate > 0.8
125 # No. of loci: 75183
126 # No. of individuals: 337
127 # No. of populations: 0
128 #
129 gl2gds(gl_final,outfile="gl2gds.gds")
130 #Converting gl object to gds formatted file gl2gds.gds
131
132 #Structure of gds file
133
134 #The file name: /data/projects/seed/dart_calls/gl2gds.gds
135 #The total number of samples: 268
136 #The total number of SNPs: 113138
137 #SNP genotypes are stored in SNP-major mode (Sample X SNP).
138 #The SNP positions are not in ascending order on chromosome 1.
139 #File: /data/projects/seed/dart_calls/gl2gds.gds (32.8M)
140 #+ [ ] *
141 #|--+ https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id { Str8 268 ZIP_ra(13.7%), 641B }
142 #|--+ https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id { Str8 113138 ZIP_ra(37.9%), 637.3K }
143 #|--+ https://protect-eu.mimecast.com/s/0diWClOjDH12EMtyg-Gp?domain=snp.rs.id { Int32 113138 ZIP_ra(78.4%), 346.6K }
144 #|--+ snp.position { Float64 113138 ZIP_ra(14.9%), 131.5K }
145 #|--+ snp.chromosome { Int32 113138 ZIP_ra(0.10%), 481B }
146 #|--+ snp.allele { Str8 113138 ZIP_ra(14.4%), 63.6K }
147 #|--+ genotype { Bit2 268x113138, 7.2M } *
148 #\--+ loc.metrics [ data.frame ] *
149 # |--+ AlleleID { Int32,factor 113138 ZIP_ra(68.9%), 304.3K } *
150 # |--+ CloneID { Int32 113138 ZIP_ra(78.4%), 346.6K }
151 # |--+ ClusterTempIndex { Int32 113138 ZIP_ra(63.6%), 281.1K }
152 # |--+ AlleleSequence { Int32,factor 113138 ZIP_ra(68.9%), 304.4K } *
153 # |--+ ClusterConsensusSequence { Int32,factor 113138 ZIP_ra(66.2%), 292.5K } *
154 # |--+ ClusterSize { Int32 113138 ZIP_ra(7.27%), 32.1K }
155 # |--+ AlleleSeqDist { Int32 113138 ZIP_ra(8.49%), 37.5K }
156 # |--+ SNP { Int32,factor 113138 ZIP_ra(38.3%), 169.2K } *
157 # |--+ SnpPosition { Int32 113138 ZIP_ra(26.0%), 115.1K }
158 # |--+ CallRate { Float64 113138 ZIP_ra(2.84%), 25.1K }
159 # |--+ OneRatioRef { Float64 113138 ZIP_ra(32.7%), 289.2K }
160 # |--+ OneRatioSnp { Float64 113138 ZIP_ra(36.1%), 318.8K }
161 # |--+ FreqHomRef { Float64 113138 ZIP_ra(36.6%), 323.6K }
162 # |--+ FreqHomSnp { Float64 113138 ZIP_ra(32.6%), 288.4K }
163 # |--+ FreqHets { Float64 113138 ZIP_ra(20.0%), 177.2K }
164 # |--+ PICRef { Float64 113138 ZIP_ra(29.9%), 264.1K }
165 # |--+ PICSnp { Float64 113138 ZIP_ra(33.7%), 297.7K }
166 # |--+ AvgPIC { Float64 113138 ZIP_ra(44.0%), 388.6K }
167 # |--+ AvgCountRef { Float64 113138 ZIP_ra(55.3%), 489.1K }
168 # |--+ AvgCountSnp { Float64 113138 ZIP_ra(36.6%), 323.8K }
169 # |--+ RatioAvgCountRefAvgCountSnp { Float64 113138 ZIP_ra(57.6%), 509.2K }
170 # |--+ FreqHetsMinusFreqMinHom { Float64 113138 ZIP_ra(31.6%), 279.2K }
171 # |--+ AlleleCountsCorrelation { Float64 113138 ZIP_ra(48.2%), 425.8K }
172 # |--+ aggregateTagsTotal { Int32 113138 ZIP_ra(0.10%), 481B }
173 # |--+ DerivedCorrMinusSeedCorr { Int32 113138 ZIP_ra(0.10%), 478B }
174 # |--+ RepRef { Float64 113138 ZIP_ra(2.50%), 22.1K }
175 # |--+ RepSNP { Float64 113138 ZIP_ra(2.56%), 22.7K }
176 # |--+ RepAvg { Float64 113138 ZIP_ra(0.38%), 3.4K }
177 # |--+ PicRepRef { Float64 113138 ZIP_ra(3.02%), 26.7K }
178 # |--+ PicRepSNP { Float64 113138 ZIP_ra(3.59%), 31.7K }
179 # |--+ TotalPicRepRefTest { Int32 113138 ZIP_ra(9.95%), 44.0K }
180 # |--+ TotalPicRepSnpTest { Int32 113138 ZIP_ra(10.2%), 45.2K }
181 # |--+ clone { Int32,factor 113138 ZIP_ra(67.8%), 299.5K } *
182 # \--+ uid { Int32,factor 113138 ZIP_ra(68.9%), 304.3K } *
183 #NULL
184
185 #Workaround to convert Dart format to 0-1-2 format
186 library("SNPRelate")
187 genofile <- snpgdsOpen("./gl2gds.gds")
188 #snpgdsGDS2BED(genofile, bed.fn="test", snp.id=snpset)
189 #Error in .InitFile(gdsobj, https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id = https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id, https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id = https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id) :
190 # object 'snpset' not found
191 snpgdsGDS2BED(genofile, bed.fn="test")
192
193