diff dartseq_seeduk_1.R @ 25:ae6b94d10bff draft

Uploaded
author cropgeeks
date Fri, 20 Apr 2018 12:42:06 -0400
parents 2a49f0396e8b
children
line wrap: on
line diff
--- a/dartseq_seeduk_1.R	Fri Apr 20 11:15:45 2018 -0400
+++ b/dartseq_seeduk_1.R	Fri Apr 20 12:42:06 2018 -0400
@@ -65,129 +65,3 @@
 #[1] "Completed"
 #
 #
-gl_call_rate <-  gl.filter.callrate(gl,method = 'loc', t=0.75)
-#Reporting for a genlight object
-#Note: Missing values most commonly arise from restriction site mutation.
-#
-#Initial no. of loci = 113138 
-#  No. of loci deleted = 31509 
-#Summary of filtered dataset
-#  Call Rate > 0.75 
-#  No. of loci: 81629 
-#  No. of individuals: 376 
-#  No. of populations:  0 
-#
-gl_rep <- gl.filter.repavg(gl_call_rate,t=0.98)
-#Reporting for a genlight object
-#Note: RepAvg is a DArT statistic reporting reproducibility averaged across alleles for each locus. 
-
-#Initial no. of loci = 81629 
-#No. of loci deleted = 6446 
-#Summary of filtered dataset
-#  Reproducibility >= 0.98 
-#  No. of loci: 75183 
-#  No. of individuals: 376 
-#  No. of populations:  0 
-
-gl.report.callrate(gl_rep,method='ind' )
-#Reporting for a genlight object
-#Note: Missing values most commonly arise from restriction site mutation.
-
-#Individuals no missing values = 0 [0%] across loci
-#Individuals with less than 5% missing values = 161 [42.8%]
-#Individuals with less than 10% missing values = 245 [65.2%]
-#Individuals with less than 15% missing values = 301 [80.1%]
-#Individuals with less than 20% missing values = 337 [89.6%]
-#Individuals with less than 25% missing values = 347 [92.3%]
-#Individuals with less than 30% missing values = 358 [95.2%]
-#Individuals with less than 35% missing values = 359 [95.5%]
-#Individuals with less than 40% missing values = 364 [96.8%]
-#Individuals with less than 45% missing values = 372 [98.9%]
-#Individuals with less than 50% missing values = 373 [99.2%]
-#Individuals with less than 55% missing values = 374 [99.5%]
-#Individuals with less than 60% missing values = 375 [99.7%]
-#[1] "Completed"
-
-gl_final <-  gl.filter.callrate(gl_rep,method = 'ind', t=0.8)
-#Reporting for a genlight object
-#Note: Missing values most commonly arise from restriction site mutation.
-
-#Initial no. of individuals = 376 
-#Filtering a genlight object
-#  no. of individuals deleted = 39 
-#Individuals retained = 337 
-#List of individuals deleted because of low call rate
-# 908017247001_E_5 908017247001_F_4 908017247002_A_10 908017247002_B_4 908017247002_B_5 908017247002_C_3 908017247002_D_12 908017247002_D_2 908017247002_D_6 908017247002_D_9 908017247002_E_6 908017247002_E_7 908017247002_E_9 908017247002_F_2 908017247002_F_6 908017247002_G_8 908017247002_H_10 908017247002_H_7 908017247002_H_8 908017247003_B_8 908017247003_C_8 908017247003_D_8 908017247003_E_8 908017247003_F_8 908017247003_G_6 908017247003_G_8 908017247003_H_7 908017247004_C_11 908017247004_D_11 908017247004_D_8 908017247004_D_9 908017247004_E_10 908017247004_E_11 908017247004_E_9 908017247004_F_11 908017247004_F_12 908017247004_F_6 908017247004_G_11 908017247004_H_11 
-#   from populations
-  
-#Summary of filtered dataset
-#  Call Rate > 0.8 
-#  No. of loci: 75183 
- # No. of individuals: 337 
- # No. of populations:  0 
-#
-gl2gds(gl_final,outfile="gl2gds.gds")
-#Converting gl object to gds formatted file gl2gds.gds 
-
-#Structure of gds file
-
-#The file name: /data/projects/seed/dart_calls/gl2gds.gds 
-#The total number of samples: 268 
-#The total number of SNPs: 113138 
-#SNP genotypes are stored in SNP-major mode (Sample X SNP).
-#The SNP positions are not in ascending order on chromosome 1.
-#File: /data/projects/seed/dart_calls/gl2gds.gds (32.8M)
-#+    [  ] *
-#|--+ https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id   { Str8 268 ZIP_ra(13.7%), 641B }
-#|--+ https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id   { Str8 113138 ZIP_ra(37.9%), 637.3K }
-#|--+ https://protect-eu.mimecast.com/s/0diWClOjDH12EMtyg-Gp?domain=snp.rs.id   { Int32 113138 ZIP_ra(78.4%), 346.6K }
-#|--+ snp.position   { Float64 113138 ZIP_ra(14.9%), 131.5K }
-#|--+ snp.chromosome   { Int32 113138 ZIP_ra(0.10%), 481B }
-#|--+ snp.allele   { Str8 113138 ZIP_ra(14.4%), 63.6K }
-#|--+ genotype   { Bit2 268x113138, 7.2M } *
-#\--+ loc.metrics   [ data.frame ] *
-#   |--+ AlleleID   { Int32,factor 113138 ZIP_ra(68.9%), 304.3K } *
-#   |--+ CloneID   { Int32 113138 ZIP_ra(78.4%), 346.6K }
-#   |--+ ClusterTempIndex   { Int32 113138 ZIP_ra(63.6%), 281.1K }
-#   |--+ AlleleSequence   { Int32,factor 113138 ZIP_ra(68.9%), 304.4K } *
-#   |--+ ClusterConsensusSequence   { Int32,factor 113138 ZIP_ra(66.2%), 292.5K } *
-#   |--+ ClusterSize   { Int32 113138 ZIP_ra(7.27%), 32.1K }
-#   |--+ AlleleSeqDist   { Int32 113138 ZIP_ra(8.49%), 37.5K }
-#   |--+ SNP   { Int32,factor 113138 ZIP_ra(38.3%), 169.2K } *
-#   |--+ SnpPosition   { Int32 113138 ZIP_ra(26.0%), 115.1K }
-#   |--+ CallRate   { Float64 113138 ZIP_ra(2.84%), 25.1K }
-#   |--+ OneRatioRef   { Float64 113138 ZIP_ra(32.7%), 289.2K }
-#   |--+ OneRatioSnp   { Float64 113138 ZIP_ra(36.1%), 318.8K }
-#   |--+ FreqHomRef   { Float64 113138 ZIP_ra(36.6%), 323.6K }
-#   |--+ FreqHomSnp   { Float64 113138 ZIP_ra(32.6%), 288.4K }
-#   |--+ FreqHets   { Float64 113138 ZIP_ra(20.0%), 177.2K }
-#   |--+ PICRef   { Float64 113138 ZIP_ra(29.9%), 264.1K }
-#   |--+ PICSnp   { Float64 113138 ZIP_ra(33.7%), 297.7K }
-#   |--+ AvgPIC   { Float64 113138 ZIP_ra(44.0%), 388.6K }
-#   |--+ AvgCountRef   { Float64 113138 ZIP_ra(55.3%), 489.1K }
-#   |--+ AvgCountSnp   { Float64 113138 ZIP_ra(36.6%), 323.8K }
-#   |--+ RatioAvgCountRefAvgCountSnp   { Float64 113138 ZIP_ra(57.6%), 509.2K }
-#   |--+ FreqHetsMinusFreqMinHom   { Float64 113138 ZIP_ra(31.6%), 279.2K }
-#   |--+ AlleleCountsCorrelation   { Float64 113138 ZIP_ra(48.2%), 425.8K }
-#   |--+ aggregateTagsTotal   { Int32 113138 ZIP_ra(0.10%), 481B }
-#   |--+ DerivedCorrMinusSeedCorr   { Int32 113138 ZIP_ra(0.10%), 478B }
-#   |--+ RepRef   { Float64 113138 ZIP_ra(2.50%), 22.1K }
-#   |--+ RepSNP   { Float64 113138 ZIP_ra(2.56%), 22.7K }
-#   |--+ RepAvg   { Float64 113138 ZIP_ra(0.38%), 3.4K }
-#   |--+ PicRepRef   { Float64 113138 ZIP_ra(3.02%), 26.7K }
-#   |--+ PicRepSNP   { Float64 113138 ZIP_ra(3.59%), 31.7K }
-#   |--+ TotalPicRepRefTest   { Int32 113138 ZIP_ra(9.95%), 44.0K }
-#   |--+ TotalPicRepSnpTest   { Int32 113138 ZIP_ra(10.2%), 45.2K }
-#   |--+ clone   { Int32,factor 113138 ZIP_ra(67.8%), 299.5K } *
-#   \--+ uid   { Int32,factor 113138 ZIP_ra(68.9%), 304.3K } *
-#NULL
-
-#Workaround to convert Dart format to 0-1-2 format
-library("SNPRelate")
-genofile <- snpgdsOpen("./gl2gds.gds")
-#snpgdsGDS2BED(genofile, bed.fn="test", snp.id=snpset)
-#Error in .InitFile(gdsobj, https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id = https://protect-eu.mimecast.com/s/cfduCj27LTYnmOHWrcoC?domain=sample.id, https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id = https://protect-eu.mimecast.com/s/byfzCk59DIkOBwfVgChE?domain=snp.id) : 
-#  object 'snpset' not found
-snpgdsGDS2BED(genofile, bed.fn="test")
-
-