ASCI 896 Statistical Genomics

Rice data

Download rice data from here.

Read genomic data

library(BGLR)
out <- read_ped("../../../2016/ASCI896/day16/RiceDiversity_44K_Genotypes_PLINK/sativas413.ped")
p = out$p
n = out$n
out = out$x
# Recode snp to 0,1,2 format using allele 1 0 --> 0 1 --> 1 2 --> NA 3 --> 2
out[out == 2] = NA
out[out == 3] = 2
W = matrix(out, nrow = p, ncol = n, byrow = TRUE)
W = t(W)

Genotype imputation

for (j in 1:ncol(W)) {
    W[, j] <- ifelse(is.na(W[, j]), mean(W[, j], na.rm = TRUE), W[, j])
}

Read .fam file (accession IDs)

fam <- read.table("../../../2016/ASCI896/day16/RiceDiversity_44K_Genotypes_PLINK/sativas413.fam", 
    header = FALSE, stringsAsFactors = FALSE)
head(fam)
rownames(W) <- fam$V2  # 413 x 36901

Read phenotype file

# phenotypes
rice.pheno <- read.table("https://ricediversity.org/data/sets/44kgwas/RiceDiversity_44K_Phenotypes_34traits_PLINK.txt", 
    header = TRUE, stringsAsFactors = FALSE, sep = "\t")
table(rownames(W) == rice.pheno$NSFTVID)
y <- rice.pheno$Flowering.time.at.Arkansas  # # use the first trait 
index <- !is.na(y)
y <- y[index]  # 374
W <- W[index, ]  # 374x36901

GBLUP

regress package

library(regress)
p <- colSums(W)/(2 * nrow(W))
maf <- ifelse(p > 0.5, 1 - p, p)  # or pmin(p, 1-p)
maf.index <- which(maf < 0.05)
W2 <- W[, -maf.index]
dim(W2)
Wcs <- scale(W2, center = TRUE, scale = TRUE)
G <- tcrossprod(Wcs)/ncol(Wcs)

varcompG <- regress(y ~ 1, ~G)
names(varcompG)

# intercept, additive genomic variance, residual variance
varcompG

# genetic values (breeding values)
GBLUP <- BLUP(varcompG)
head(GBLUP$Mean)
tail(GBLUP$Mean)

rrBLUP package

library(rrBLUP)
GBLUP2 <- mixed.solve(y = y, K = G)
names(GBLUP2)

# intercept
GBLUP2$beta
# additive genomic variance
GBLUP2$Vu
# residual variance
GBLUP2$Ve

# genetic values (breeding values)
head(GBLUP2$u)
tail(GBLUP2$u)

RR-BLUP

regress package

Wc <- scale(W2, center = TRUE, scale = FALSE)
varcompW <- regress(y ~ 1, ~Wc)


WWt <- tcrossprod(Wc)
varcompW <- regress(y ~ 1, ~WWt)

names(varcompW)

# intercept, additive marker variance, residual variance
varcompW

# genetic values (breeding values)
RRBLUP <- BLUP(varcompW)
head(RRBLUP$Mean)
tail(RRBLUP$Mean)

rrBLUP package

RRBLUP2 <- mixed.solve(y = y, Z = Wc)
names(RRBLUP2)

# intercept
RRBLUP2$beta
# additive marker variance
RRBLUP2$Vu
# residual variance
RRBLUP2$Ve
# marker effects
head(RRBLUP2$u)
tail(RRBLUP2$u)

Gota Morota

March 28, 2017