Mercurial > repos > cropgeeks > ukseed
comparison dartseq_seeduk_1.R @ 25:ae6b94d10bff draft
Uploaded
author | cropgeeks |
---|---|
date | Fri, 20 Apr 2018 12:42:06 -0400 |
parents | 2a49f0396e8b |
children |
comparison
equal
deleted
inserted
replaced
24:27b626c1c120 | 25:ae6b94d10bff |
---|---|
63 #Individuals with less than 60% missing values = 374 [99.5%] | 63 #Individuals with less than 60% missing values = 374 [99.5%] |
64 #Individuals with less than 65% missing values = 375 [99.7%] | 64 #Individuals with less than 65% missing values = 375 [99.7%] |
65 #[1] "Completed" | 65 #[1] "Completed" |
66 # | 66 # |
67 # | 67 # |
68 gl_call_rate <- gl.filter.callrate(gl,method = 'loc', t=0.75) | |
69 #Reporting for a genlight object | |
70 #Note: Missing values most commonly arise from restriction site mutation. | |
71 # | |
72 #Initial no. of loci = 113138 | |
73 # No. of loci deleted = 31509 | |
74 #Summary of filtered dataset | |
75 # Call Rate > 0.75 | |
76 # No. of loci: 81629 | |
77 # No. of individuals: 376 | |
78 # No. of populations: 0 | |
79 # | |
80 gl_rep <- gl.filter.repavg(gl_call_rate,t=0.98) | |
81 #Reporting for a genlight object | |
82 #Note: RepAvg is a DArT statistic reporting reproducibility averaged across alleles for each locus. | |
83 | |
84 #Initial no. of loci = 81629 | |
85 #No. of loci deleted = 6446 | |
86 #Summary of filtered dataset | |
87 # Reproducibility >= 0.98 | |
88 # No. of loci: 75183 | |
89 # No. of individuals: 376 | |
90 # No. of populations: 0 | |
91 | |
92 gl.report.callrate(gl_rep,method='ind' ) | |
93 #Reporting for a genlight object | |
94 #Note: Missing values most commonly arise from restriction site mutation. | |
95 | |
96 #Individuals no missing values = 0 [0%] across loci | |
97 #Individuals with less than 5% missing values = 161 [42.8%] | |
98 #Individuals with less than 10% missing values = 245 [65.2%] | |
99 #Individuals with less than 15% missing values = 301 [80.1%] | |
100 #Individuals with less than 20% missing values = 337 [89.6%] | |
101 #Individuals with less than 25% missing values = 347 [92.3%] | |
102 #Individuals with less than 30% missing values = 358 [95.2%] | |
103 #Individuals with less than 35% missing values = 359 [95.5%] | |
104 #Individuals with less than 40% missing values = 364 [96.8%] | |
105 #Individuals with less than 45% missing values = 372 [98.9%] | |
106 #Individuals with less than 50% missing values = 373 [99.2%] | |
107 #Individuals with less than 55% missing values = 374 [99.5%] | |
108 #Individuals with less than 60% missing values = 375 [99.7%] | |
109 #[1] "Completed" | |
110 | |
111 gl_final <- gl.filter.callrate(gl_rep,method = 'ind', t=0.8) | |
112 #Reporting for a genlight object | |
113 #Note: Missing values most commonly arise from restriction site mutation. | |
114 | |
115 #Initial no. of individuals = 376 | |
116 #Filtering a genlight object | |
117 # no. of individuals deleted = 39 | |
118 #Individuals retained = 337 | |
119 #List of individuals deleted because of low call rate | |
120 # 908017247001_E_5 908017247001_F_4 908017247002_A_10 908017247002_B_4 908017247002_B_5 908017247002_C_3 908017247002_D_12 908017247002_D_2 908017247002_D_6 908017247002_D_9 908017247002_E_6 908017247002_E_7 908017247002_E_9 908017247002_F_2 908017247002_F_6 908017247002_G_8 908017247002_H_10 908017247002_H_7 908017247002_H_8 908017247003_B_8 908017247003_C_8 908017247003_D_8 908017247003_E_8 908017247003_F_8 908017247003_G_6 908017247003_G_8 908017247003_H_7 908017247004_C_11 908017247004_D_11 908017247004_D_8 908017247004_D_9 908017247004_E_10 908017247004_E_11 908017247004_E_9 908017247004_F_11 908017247004_F_12 908017247004_F_6 908017247004_G_11 908017247004_H_11 | |
121 # from populations | |
122 | |
123 #Summary of filtered dataset | |
124 # Call Rate > 0.8 | |
125 # No. of loci: 75183 | |
126 # No. of individuals: 337 | |
127 # No. of populations: 0 | |
128 # | |
129 gl2gds(gl_final,outfile="gl2gds.gds") | |
130 #Converting gl object to gds formatted file gl2gds.gds | |
131 | |
132 #Structure of gds file | |
133 | |
134 #The file name: /data/projects/seed/dart_calls/gl2gds.gds | |
135 #The total number of samples: 268 | |
136 #The total number of SNPs: 113138 | |
137 #SNP genotypes are stored in SNP-major mode (Sample X SNP). | |
138 #The SNP positions are not in ascending order on chromosome 1. | |
139 #File: /data/projects/seed/dart_calls/gl2gds.gds (32.8M) | |
140 #+ [ ] * | |
141 #|--+ https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id { Str8 268 ZIP_ra(13.7%), 641B } | |
142 #|--+ https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id { Str8 113138 ZIP_ra(37.9%), 637.3K } | |
143 #|--+ https://protect-eu.mimecast.com/s/0diWClOjDH12EMtyg-Gp?domain=snp.rs.id { Int32 113138 ZIP_ra(78.4%), 346.6K } | |
144 #|--+ snp.position { Float64 113138 ZIP_ra(14.9%), 131.5K } | |
145 #|--+ snp.chromosome { Int32 113138 ZIP_ra(0.10%), 481B } | |
146 #|--+ snp.allele { Str8 113138 ZIP_ra(14.4%), 63.6K } | |
147 #|--+ genotype { Bit2 268x113138, 7.2M } * | |
148 #\--+ loc.metrics [ data.frame ] * | |
149 # |--+ AlleleID { Int32,factor 113138 ZIP_ra(68.9%), 304.3K } * | |
150 # |--+ CloneID { Int32 113138 ZIP_ra(78.4%), 346.6K } | |
151 # |--+ ClusterTempIndex { Int32 113138 ZIP_ra(63.6%), 281.1K } | |
152 # |--+ AlleleSequence { Int32,factor 113138 ZIP_ra(68.9%), 304.4K } * | |
153 # |--+ ClusterConsensusSequence { Int32,factor 113138 ZIP_ra(66.2%), 292.5K } * | |
154 # |--+ ClusterSize { Int32 113138 ZIP_ra(7.27%), 32.1K } | |
155 # |--+ AlleleSeqDist { Int32 113138 ZIP_ra(8.49%), 37.5K } | |
156 # |--+ SNP { Int32,factor 113138 ZIP_ra(38.3%), 169.2K } * | |
157 # |--+ SnpPosition { Int32 113138 ZIP_ra(26.0%), 115.1K } | |
158 # |--+ CallRate { Float64 113138 ZIP_ra(2.84%), 25.1K } | |
159 # |--+ OneRatioRef { Float64 113138 ZIP_ra(32.7%), 289.2K } | |
160 # |--+ OneRatioSnp { Float64 113138 ZIP_ra(36.1%), 318.8K } | |
161 # |--+ FreqHomRef { Float64 113138 ZIP_ra(36.6%), 323.6K } | |
162 # |--+ FreqHomSnp { Float64 113138 ZIP_ra(32.6%), 288.4K } | |
163 # |--+ FreqHets { Float64 113138 ZIP_ra(20.0%), 177.2K } | |
164 # |--+ PICRef { Float64 113138 ZIP_ra(29.9%), 264.1K } | |
165 # |--+ PICSnp { Float64 113138 ZIP_ra(33.7%), 297.7K } | |
166 # |--+ AvgPIC { Float64 113138 ZIP_ra(44.0%), 388.6K } | |
167 # |--+ AvgCountRef { Float64 113138 ZIP_ra(55.3%), 489.1K } | |
168 # |--+ AvgCountSnp { Float64 113138 ZIP_ra(36.6%), 323.8K } | |
169 # |--+ RatioAvgCountRefAvgCountSnp { Float64 113138 ZIP_ra(57.6%), 509.2K } | |
170 # |--+ FreqHetsMinusFreqMinHom { Float64 113138 ZIP_ra(31.6%), 279.2K } | |
171 # |--+ AlleleCountsCorrelation { Float64 113138 ZIP_ra(48.2%), 425.8K } | |
172 # |--+ aggregateTagsTotal { Int32 113138 ZIP_ra(0.10%), 481B } | |
173 # |--+ DerivedCorrMinusSeedCorr { Int32 113138 ZIP_ra(0.10%), 478B } | |
174 # |--+ RepRef { Float64 113138 ZIP_ra(2.50%), 22.1K } | |
175 # |--+ RepSNP { Float64 113138 ZIP_ra(2.56%), 22.7K } | |
176 # |--+ RepAvg { Float64 113138 ZIP_ra(0.38%), 3.4K } | |
177 # |--+ PicRepRef { Float64 113138 ZIP_ra(3.02%), 26.7K } | |
178 # |--+ PicRepSNP { Float64 113138 ZIP_ra(3.59%), 31.7K } | |
179 # |--+ TotalPicRepRefTest { Int32 113138 ZIP_ra(9.95%), 44.0K } | |
180 # |--+ TotalPicRepSnpTest { Int32 113138 ZIP_ra(10.2%), 45.2K } | |
181 # |--+ clone { Int32,factor 113138 ZIP_ra(67.8%), 299.5K } * | |
182 # \--+ uid { Int32,factor 113138 ZIP_ra(68.9%), 304.3K } * | |
183 #NULL | |
184 | |
185 #Workaround to convert Dart format to 0-1-2 format | |
186 library("SNPRelate") | |
187 genofile <- snpgdsOpen("./gl2gds.gds") | |
188 #snpgdsGDS2BED(genofile, bed.fn="test", snp.id=snpset) | |
189 #Error in .InitFile(gdsobj, https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id = https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id, https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id = https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id) : | |
190 # object 'snpset' not found | |
191 snpgdsGDS2BED(genofile, bed.fn="test") | |
192 | |
193 |