ASCI 896 Statistical Genomics
GBLUP and RR-BLUP
Rice data
Download rice data from here.
Read genomic data
library(BGLR)
out <- read_ped("../../../2016/ASCI896/day16/RiceDiversity_44K_Genotypes_PLINK/sativas413.ped")
p = out$p
n = out$n
out = out$x
# Recode snp to 0,1,2 format using allele 1 0 --> 0 1 --> 1 2 --> NA 3 --> 2
out[out == 2] = NA
out[out == 3] = 2
W = matrix(out, nrow = p, ncol = n, byrow = TRUE)
W = t(W)
Genotype imputation
for (j in 1:ncol(W)) {
W[, j] <- ifelse(is.na(W[, j]), mean(W[, j], na.rm = TRUE), W[, j])
}
Read .fam file (accession IDs)
fam <- read.table("../../../2016/ASCI896/day16/RiceDiversity_44K_Genotypes_PLINK/sativas413.fam",
header = FALSE, stringsAsFactors = FALSE)
head(fam)
rownames(W) <- fam$V2 # 413 x 36901
Read phenotype file
# phenotypes
rice.pheno <- read.table("https://ricediversity.org/data/sets/44kgwas/RiceDiversity_44K_Phenotypes_34traits_PLINK.txt",
header = TRUE, stringsAsFactors = FALSE, sep = "\t")
table(rownames(W) == rice.pheno$NSFTVID)
y <- rice.pheno$Flowering.time.at.Arkansas # # use the first trait
index <- !is.na(y)
y <- y[index] # 374
W <- W[index, ] # 374x36901
GBLUP
regress package
library(regress)
p <- colSums(W)/(2 * nrow(W))
maf <- ifelse(p > 0.5, 1 - p, p) # or pmin(p, 1-p)
maf.index <- which(maf < 0.05)
W2 <- W[, -maf.index]
dim(W2)
Wcs <- scale(W2, center = TRUE, scale = TRUE)
G <- tcrossprod(Wcs)/ncol(Wcs)
varcompG <- regress(y ~ 1, ~G)
names(varcompG)
# intercept, additive genomic variance, residual variance
varcompG
# genetic values (breeding values)
GBLUP <- BLUP(varcompG)
head(GBLUP$Mean)
tail(GBLUP$Mean)
rrBLUP package
library(rrBLUP)
GBLUP2 <- mixed.solve(y = y, K = G)
names(GBLUP2)
# intercept
GBLUP2$beta
# additive genomic variance
GBLUP2$Vu
# residual variance
GBLUP2$Ve
# genetic values (breeding values)
head(GBLUP2$u)
tail(GBLUP2$u)
RR-BLUP
regress package
Wc <- scale(W2, center = TRUE, scale = FALSE)
varcompW <- regress(y ~ 1, ~Wc)
WWt <- tcrossprod(Wc)
varcompW <- regress(y ~ 1, ~WWt)
names(varcompW)
# intercept, additive marker variance, residual variance
varcompW
# genetic values (breeding values)
RRBLUP <- BLUP(varcompW)
head(RRBLUP$Mean)
tail(RRBLUP$Mean)
rrBLUP package
RRBLUP2 <- mixed.solve(y = y, Z = Wc)
names(RRBLUP2)
# intercept
RRBLUP2$beta
# additive marker variance
RRBLUP2$Vu
# residual variance
RRBLUP2$Ve
# marker effects
head(RRBLUP2$u)
tail(RRBLUP2$u)