Contents

1 Bayesian penalized regression

1.1 Rice data

Download rice data from http://ricediversity.org/data/sets/44kgwas/

1.2 Data cleaning

library(BGLR)
out <- read_ped("../../../2016/ASCI896/day16/RiceDiversity_44K_Genotypes_PLINK/sativas413.ped")
p=out$p
n=out$n
out=out$x
#Recode snp to 0,1,2 format using allele 1
# 0 --> 0
# 1 --> 1
# 2 --> NA
# 3 --> 2
out[out==2]=NA
out[out==3]=2
X=matrix(out,nrow=p,ncol=n,byrow=TRUE)
X=t(X)

# genotype imputation
for (j in 1:ncol(X)){
  X[,j] <- ifelse(is.na(X[,j]), mean(X[,j], na.rm=TRUE), X[,j])
}

# accession ID
fam <- read.table("../../../2016/ASCI896/day16/RiceDiversity_44K_Genotypes_PLINK/sativas413.fam", header = FALSE, stringsAsFactors = FALSE)
head(fam)
rownames(X) <- fam$V2 # 413 x 36901

# MAF=0.05
p <- colSums(X) / (2*nrow(X))
maf <- ifelse(p > 0.5, 1-p, p)
maf.index <- which(maf < 0.05)
X <- X[, -maf.index]  # 413 x 33701

# phenotypes
rice.pheno <- read.table("https://ricediversity.org/data/sets/44kgwas/RiceDiversity_44K_Phenotypes_34traits_PLINK.txt", header = TRUE, stringsAsFactors = FALSE, sep = "\t")
table(rownames(X) == rice.pheno$NSFTVID)
y <- rice.pheno$Flowering.time.at.Arkansas # # use the first trait 
index <- !is.na(y)
y <- y[index] # 374
X <- X[index,] # 374 33701

1.3 Bayesian ridge regression

# training - testing sets partition
set.seed(0728)
tst <- sample(1:nrow(X), size=80)
y.na <- y
y.na[tst] <- NA

# user specified priors
ETA <- list(MRK=list(X = X, df0 = 5, S0 = 1, model="BRR"))
fit.BRR <- BGLR(y = y.na, ETA = ETA, nIter = 1000, burnIn = 500, verbose = TRUE)
cor(y[tst], fit.BRR$yHat[tst])

# rule-based priors
ETA <- list(MRK=list(X = X, model="BRR"))
fit.BRR <- BGLR(y = y.na, ETA = ETA, nIter = 1000, burnIn = 500, verbose = FALSE)
cor(y[tst], fit.BRR$yHat[tst])

1.4 Bayesian LASSO

# training - testing sets partition
set.seed(0728)
tst <- sample(1:nrow(X), size=80)
y.na <- y
y.na[tst] <- NA

# user specified priors
ETA <- list(MRK=list(X = X, type="gamma", lambda=10, shape=1.1, rate=0.5, model="BL"))
fit.BL <- BGLR(y = y.na, ETA = ETA, nIter = 1000, burnIn = 500, verbose = FALSE)
cor(y[tst], fit.BL$yHat[tst])

# rule-based priors 
ETA <- list(MRK=list(X = X, model="BL"))
fit.BL <- BGLR(y = y.na, ETA = ETA, nIter = 1000, burnIn = 500, verbose = FALSE)
cor(y[tst], fit.BL$yHat[tst])