Mercurial > repos > cropgeeks > ukseed
diff dartseq_seeduk_1.R @ 25:ae6b94d10bff draft
Uploaded
author | cropgeeks |
---|---|
date | Fri, 20 Apr 2018 12:42:06 -0400 |
parents | 2a49f0396e8b |
children |
line wrap: on
line diff
--- a/dartseq_seeduk_1.R Fri Apr 20 11:15:45 2018 -0400 +++ b/dartseq_seeduk_1.R Fri Apr 20 12:42:06 2018 -0400 @@ -65,129 +65,3 @@ #[1] "Completed" # # -gl_call_rate <- gl.filter.callrate(gl,method = 'loc', t=0.75) -#Reporting for a genlight object -#Note: Missing values most commonly arise from restriction site mutation. -# -#Initial no. of loci = 113138 -# No. of loci deleted = 31509 -#Summary of filtered dataset -# Call Rate > 0.75 -# No. of loci: 81629 -# No. of individuals: 376 -# No. of populations: 0 -# -gl_rep <- gl.filter.repavg(gl_call_rate,t=0.98) -#Reporting for a genlight object -#Note: RepAvg is a DArT statistic reporting reproducibility averaged across alleles for each locus. - -#Initial no. of loci = 81629 -#No. of loci deleted = 6446 -#Summary of filtered dataset -# Reproducibility >= 0.98 -# No. of loci: 75183 -# No. of individuals: 376 -# No. of populations: 0 - -gl.report.callrate(gl_rep,method='ind' ) -#Reporting for a genlight object -#Note: Missing values most commonly arise from restriction site mutation. - -#Individuals no missing values = 0 [0%] across loci -#Individuals with less than 5% missing values = 161 [42.8%] -#Individuals with less than 10% missing values = 245 [65.2%] -#Individuals with less than 15% missing values = 301 [80.1%] -#Individuals with less than 20% missing values = 337 [89.6%] -#Individuals with less than 25% missing values = 347 [92.3%] -#Individuals with less than 30% missing values = 358 [95.2%] -#Individuals with less than 35% missing values = 359 [95.5%] -#Individuals with less than 40% missing values = 364 [96.8%] -#Individuals with less than 45% missing values = 372 [98.9%] -#Individuals with less than 50% missing values = 373 [99.2%] -#Individuals with less than 55% missing values = 374 [99.5%] -#Individuals with less than 60% missing values = 375 [99.7%] -#[1] "Completed" - -gl_final <- gl.filter.callrate(gl_rep,method = 'ind', t=0.8) -#Reporting for a genlight object -#Note: Missing values most commonly arise from restriction site mutation. - -#Initial no. of individuals = 376 -#Filtering a genlight object -# no. of individuals deleted = 39 -#Individuals retained = 337 -#List of individuals deleted because of low call rate -# 908017247001_E_5 908017247001_F_4 908017247002_A_10 908017247002_B_4 908017247002_B_5 908017247002_C_3 908017247002_D_12 908017247002_D_2 908017247002_D_6 908017247002_D_9 908017247002_E_6 908017247002_E_7 908017247002_E_9 908017247002_F_2 908017247002_F_6 908017247002_G_8 908017247002_H_10 908017247002_H_7 908017247002_H_8 908017247003_B_8 908017247003_C_8 908017247003_D_8 908017247003_E_8 908017247003_F_8 908017247003_G_6 908017247003_G_8 908017247003_H_7 908017247004_C_11 908017247004_D_11 908017247004_D_8 908017247004_D_9 908017247004_E_10 908017247004_E_11 908017247004_E_9 908017247004_F_11 908017247004_F_12 908017247004_F_6 908017247004_G_11 908017247004_H_11 -# from populations - -#Summary of filtered dataset -# Call Rate > 0.8 -# No. of loci: 75183 - # No. of individuals: 337 - # No. of populations: 0 -# -gl2gds(gl_final,outfile="gl2gds.gds") -#Converting gl object to gds formatted file gl2gds.gds - -#Structure of gds file - -#The file name: /data/projects/seed/dart_calls/gl2gds.gds -#The total number of samples: 268 -#The total number of SNPs: 113138 -#SNP genotypes are stored in SNP-major mode (Sample X SNP). -#The SNP positions are not in ascending order on chromosome 1. -#File: /data/projects/seed/dart_calls/gl2gds.gds (32.8M) -#+ [ ] * -#|--+ https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id { Str8 268 ZIP_ra(13.7%), 641B } -#|--+ https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id { Str8 113138 ZIP_ra(37.9%), 637.3K } -#|--+ https://protect-eu.mimecast.com/s/0diWClOjDH12EMtyg-Gp?domain=snp.rs.id { Int32 113138 ZIP_ra(78.4%), 346.6K } -#|--+ snp.position { Float64 113138 ZIP_ra(14.9%), 131.5K } -#|--+ snp.chromosome { Int32 113138 ZIP_ra(0.10%), 481B } -#|--+ snp.allele { Str8 113138 ZIP_ra(14.4%), 63.6K } -#|--+ genotype { Bit2 268x113138, 7.2M } * -#\--+ loc.metrics [ data.frame ] * -# |--+ AlleleID { Int32,factor 113138 ZIP_ra(68.9%), 304.3K } * -# |--+ CloneID { Int32 113138 ZIP_ra(78.4%), 346.6K } -# |--+ ClusterTempIndex { Int32 113138 ZIP_ra(63.6%), 281.1K } -# |--+ AlleleSequence { Int32,factor 113138 ZIP_ra(68.9%), 304.4K } * -# |--+ ClusterConsensusSequence { Int32,factor 113138 ZIP_ra(66.2%), 292.5K } * -# |--+ ClusterSize { Int32 113138 ZIP_ra(7.27%), 32.1K } -# |--+ AlleleSeqDist { Int32 113138 ZIP_ra(8.49%), 37.5K } -# |--+ SNP { Int32,factor 113138 ZIP_ra(38.3%), 169.2K } * -# |--+ SnpPosition { Int32 113138 ZIP_ra(26.0%), 115.1K } -# |--+ CallRate { Float64 113138 ZIP_ra(2.84%), 25.1K } -# |--+ OneRatioRef { Float64 113138 ZIP_ra(32.7%), 289.2K } -# |--+ OneRatioSnp { Float64 113138 ZIP_ra(36.1%), 318.8K } -# |--+ FreqHomRef { Float64 113138 ZIP_ra(36.6%), 323.6K } -# |--+ FreqHomSnp { Float64 113138 ZIP_ra(32.6%), 288.4K } -# |--+ FreqHets { Float64 113138 ZIP_ra(20.0%), 177.2K } -# |--+ PICRef { Float64 113138 ZIP_ra(29.9%), 264.1K } -# |--+ PICSnp { Float64 113138 ZIP_ra(33.7%), 297.7K } -# |--+ AvgPIC { Float64 113138 ZIP_ra(44.0%), 388.6K } -# |--+ AvgCountRef { Float64 113138 ZIP_ra(55.3%), 489.1K } -# |--+ AvgCountSnp { Float64 113138 ZIP_ra(36.6%), 323.8K } -# |--+ RatioAvgCountRefAvgCountSnp { Float64 113138 ZIP_ra(57.6%), 509.2K } -# |--+ FreqHetsMinusFreqMinHom { Float64 113138 ZIP_ra(31.6%), 279.2K } -# |--+ AlleleCountsCorrelation { Float64 113138 ZIP_ra(48.2%), 425.8K } -# |--+ aggregateTagsTotal { Int32 113138 ZIP_ra(0.10%), 481B } -# |--+ DerivedCorrMinusSeedCorr { Int32 113138 ZIP_ra(0.10%), 478B } -# |--+ RepRef { Float64 113138 ZIP_ra(2.50%), 22.1K } -# |--+ RepSNP { Float64 113138 ZIP_ra(2.56%), 22.7K } -# |--+ RepAvg { Float64 113138 ZIP_ra(0.38%), 3.4K } -# |--+ PicRepRef { Float64 113138 ZIP_ra(3.02%), 26.7K } -# |--+ PicRepSNP { Float64 113138 ZIP_ra(3.59%), 31.7K } -# |--+ TotalPicRepRefTest { Int32 113138 ZIP_ra(9.95%), 44.0K } -# |--+ TotalPicRepSnpTest { Int32 113138 ZIP_ra(10.2%), 45.2K } -# |--+ clone { Int32,factor 113138 ZIP_ra(67.8%), 299.5K } * -# \--+ uid { Int32,factor 113138 ZIP_ra(68.9%), 304.3K } * -#NULL - -#Workaround to convert Dart format to 0-1-2 format -library("SNPRelate") -genofile <- snpgdsOpen("./gl2gds.gds") -#snpgdsGDS2BED(genofile, bed.fn="test", snp.id=snpset) -#Error in .InitFile(gdsobj, https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id = https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id, https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id = https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id) : -# object 'snpset' not found -snpgdsGDS2BED(genofile, bed.fn="test") - -