Rice data
Download rice data (RiceDiversity.44K.MSU6.Genotypes_PLINK.zip) from here.
Read genomic data
library(BGLR)
out <- read_ped("../../../../../teaching/2016/ASCI896/day16/RiceDiversity_44K_Genotypes_PLINK/sativas413.ped")
p = out$p
n = out$n
out = out$x
# Recode snp to 0,1,2 format using allele 1 0 --> 0 1 --> 1 2 --> NA 3 --> 2
out[out == 2] = NA
out[out == 3] = 2
W = matrix(out, nrow = p, ncol = n, byrow = TRUE)
W = t(W)
Genotype imputation
Read .fam file (accession IDs)
Read phenotype file
# phenotypes
rice.pheno <- read.table("http://ricediversity.org/data/sets/44kgwas/RiceDiversity_44K_Phenotypes_34traits_PLINK.txt",
header = TRUE, stringsAsFactors = FALSE, sep = "\t")
table(rownames(W) == rice.pheno$NSFTVID)
y <- rice.pheno$Flowering.time.at.Arkansas # # use the first trait
index <- !is.na(y)
y <- y[index] # 374
W <- W[index, ] # 374x36901