Download rice data from http://ricediversity.org/data/sets/44kgwas/
library(BGLR)
out<- read_ped("RiceDiversity_44K_Genotypes_PLINK/sativas413.ped")
p=out$p
n=out$n
out=out$x
#Recode snp to 0,1,2 format using allele 1
# 0 --> 0
# 1 --> 1
# 2 --> NA
# 3 --> 2
out[out==2]=NA
out[out==3]=2
X=matrix(out,nrow=p,ncol=n,byrow=TRUE)
X=t(X)
# genotype imputation
for (j in 1:ncol(X)){
X[,j] <- ifelse(is.na(X[,j]), mean(X[,j], na.rm=TRUE), X[,j])
}
# accession ID
fam <-read.table("RiceDiversity_44K_Genotypes_PLINK/sativas413.fam", header = FALSE, stringsAsFactors = FALSE)
head(fam)
## V1 V2 V3 V4 V5 V6
## 1 081215-A05 1 0 0 -9 -9
## 2 081215-A06 3 0 0 -9 -9
## 3 081215-A07 4 0 0 -9 -9
## 4 081215-A08 5 0 0 -9 -9
## 5 090414-A09 6 0 0 -9 -9
## 6 090414-A10 7 0 0 -9 -9
rownames(X) <- fam$V2 # 413 x 36901
# MAF=0.05
p <- colSums(X) / (2*nrow(X))
maf <- ifelse(p > 0.5, 1-p, p)
maf.index <- which(maf < 0.05)
X <- X[, -maf.index] # 413 x 33701
# phenotypes
rice.pheno <- read.table("http://www.ricediversity.org/data/sets/44kgwas/RiceDiversity_44K_Phenotypes_34traits_PLINK.txt", header=TRUE, stringsAsFactors = FALSE, sep = "\t")
table(rownames(X) == rice.pheno$NSFTVID)
##
## TRUE
## 413
y <- rice.pheno$Flowering.time.at.Arkansas # # use the first trait
index <- !is.na(y)
y <- y[index] # 374
X <- X[index,] # 374 33701
# training - testing sets partition
set.seed(0728)
tst <- sample(1:nrow(X), size=80)
y.na <- y
y.na[tst] <- NA
# user specified priors
ETA <- list(MRK=list(X = X, df0 = 5, S0 = 1, model="BRR"))
fit.BRR <- BGLR(y = y.na, ETA = ETA, nIter = 3000, burnIn = 1000, verbose = FALSE)
cor(y[tst], fit.BRR$yHat[tst])
## [1] 0.728925
# rule-based priors
ETA <- list(MRK=list(X = X, model="BRR"))
fit.BRR <- BGLR(y = y.na, ETA = ETA, nIter = 3000, burnIn = 1000, verbose = FALSE)
cor(y[tst], fit.BRR$yHat[tst])
## [1] 0.7223458
# training - testing sets partition
set.seed(0728)
tst <- sample(1:nrow(X), size=80)
y.na <- y
y.na[tst] <- NA
# user specified priors
ETA <- list(MRK=list(X = X, type="gamma", lambda=10, shape=1.1, rate=0.5, model="BL"))
fit.BL <- BGLR(y = y.na, ETA = ETA, nIter = 3000, burnIn = 1000, verbose = FALSE)
cor(y[tst], fit.BL$yHat[tst])
## [1] 0.6936454
# rule-based priors
ETA <- list(MRK=list(X = X, model="BL"))
fit.BL <- BGLR(y = y.na, ETA = ETA, nIter = 3000, burnIn = 1000, verbose = FALSE)
cor(y[tst], fit.BL$yHat[tst])
## [1] 0.7155831