Load R packages.

library(bnlearn)
library(ggplot2)
library(gaston)
library(Rgraphviz)
library(BGLR)
library(MTM)

Data Import

# Set working directory to Data
rm(list = ls())
setwd('Data/')

# Import Phenotype
pheno <- read.csv("salt_phenotypes.csv", row.names = 1)
dim(pheno) # 385   7

## [1] 385   7

pheno <- na.omit(pheno) # remove NAs
dim(pheno) # 366   7

## [1] 366   7

# Import genotype
geno <- read.bed.matrix("sativas413")

## Reading sativas413.fam 
## Reading sativas413.bim 
## Reading sativas413.bed 
## ped stats and snps stats have been set. 
## 'p' has been set. 
## 'mu' and 'sigma' have been set.

dim(geno) # 413 36901

## [1]   413 36901

# str(geno)
## Subset accesions with both genotype and phenotype records.  
geno@ped$id <- paste0("NSFTV_", geno@ped$id)
geno <- geno[geno@ped$id %in% row.names(pheno), ]
dim(geno) # 363 36901

## [1]   363 36901

pheno <- pheno[match(geno@ped$id, row.names(pheno)), ]
dim(pheno) # 363   7

## [1] 363   7

## Check ids in genotype and phenotype are identical
table(geno@ped$id == rownames(pheno))

## 
## TRUE 
##  363

Construct GRM

# Quality control of genotype file with MAF & Callrate
geno <- select.snps(geno, c(maf > 0.05 & callrate > 0.9))
dim(geno) # 363 29499

## [1]   363 29499

# GRM matrix
grm <- GRM(geno)
dim(grm) # 363 363

## [1] 363 363

Fit MTM Model

# Define a MTM_func with MTM Package
MTM_func <- function( Y, G, nTrait, nIter, burnIn, thin, prefix) {
  library(MTM)
  set.seed(007) # reproductive results
  MTM ( Y = Y, K = list ( list ( K = G, COV = list ( type = 'UN', df0 = nTrait,
        S0 = diag (nTrait) ) ) ), resCov = list ( type = 'UN', S0 = diag (nTrait), df0 = nTrait),
        nIter = nIter, burnIn = burnIn, thin = thin, saveAt = prefix)
  }

Y <- scale(pheno, center = TRUE, scale = TRUE)
G <- grm
nTrait <- ncol(Y)

MTM_fit <- MTM_func( Y = Y, G = G, nTrait = nTrait, nIter = 1200, burnIn = 200,
                     thin = 5, prefix  = 'MTM_fit')

## Check gibbs samples
list.files(pattern = 'MTM_fit')

## Retrieve estimates
str(MTM_fit)
BV <- MTM_fit$K[[1]]$U # estimated breeding values
dim(BV) # 363   7
colnames(BV) <- colnames(pheno)

Remove the dependence in EBV with Cholesky Decomposition

We can decompose \(\mathbf{G}\) into its Cholesky factors \(\mathbf{G} = \mathbf{L} \mathbf{L'}\). Here, \(\mathbf{L}\) is an \(n \times n\) lower triangular matrix. For a single trait we can remove the dependancy from our breeding values \(\mathbf{u}\) by computing the adjusted breeding values \(\mathbf{u^*}\) by \(\mathbf{u^*} = \mathbf{L^{-1}} \mathbf{u}\). However, since we have multiple traits \(\mathbf{u}\) is \((n \times t) \times 1\), where \(t\) is the number of traits. Thus, we will construct a \((n \times t) \times (n \times t)\) matrix \(\mathbf{M^{-1}} = \mathbf{I_{(n \times t) \times (n \times t)}} \otimes \mathbf{L^{-1}}\). Then we have \(\mathbf{u^*} = \mathbf{M^{-1}} \mathbf{u}\)

Linv <- solve(t(chol(grm)))
Minv <- kronecker(diag(7), Linv)
BV_adj <- matrix(Minv %*% c(BV), nrow = 363, ncol = 7)
colnames(BV_adj) <- names(pheno)
BV_adj <- as.data.frame(BV_adj)

Structure Learning (Gaussian-BN)

#-------------------------
# Predefined function
#-------------------------
## Histogram with density curve for normality check
hist_graph <- function (df_all, trait, labels) {
  library (ggplot2)
  ggplot (df_all, aes(trait)) + 
    geom_histogram (aes(y = ..density..), colour = "black", fill = "white", bins = 10) +
    geom_density (alpha = .2, fill = "#FF6666") +
    labs (x = labels) }

## Check strength and direction in BN
check_boot <- function(boot, Strength_Thres, Direction_Thres) {
  boot[(boot$strength>= Strength_Thres) & (boot$direction> Direction_Thres),]
}

# Normality check
hist_graph (BV_adj, BV_adj[, 1], colnames(BV_adj)[1])

#------------------------------------------
# Structure leaning : Method 1 and Method 2
#------------------------------------------
# Method 1 : Subjective prior + Data
## Score-baesd algorithm: Tabu with Data
tabu_simple <- tabu(BV_adj) 
graphviz.plot(tabu_simple, main = "Tabu", shape = "ellipse", layout = "dot")

## Incorporate subjective prior
blklist <- data.frame(from = c("K.Shoot.Salt", "K.Shoot.Control" ), 
                      to = c("K.Shoot.Control", "K.Shoot.Salt"))
## Subjective prior + Data
tabu_blk <- tabu(BV_adj, blacklist = blklist) 
graphviz.plot(tabu_blk, main = "Tabu_blk", shape = "ellipse", layout = "dot")

#----------------------------------------------------------------------------
# Method 2 : Only with Data 
## Score-baesd algorithm: Tabu
tabu_simple <- tabu(BV_adj) 
graphviz.plot(tabu_simple, main = "Tabu", shape = "ellipse", layout = "dot")

## Model Averaging with bootstrap resampling to account for uncertainty
set.seed(007)
boot_tabu <- boot.strength (BV_adj, algorithm = "tabu", R = 500)
## Check strength and direction
check_boot(boot_tabu, 0.8, 0.5)

##            from              to strength direction
## 1    Na.K.Shoot        Na.Shoot    1.000 0.7110000
## 2    Na.K.Shoot    K.Shoot.Salt    1.000 0.6570000
## 6    Na.K.Shoot     K.Root.Salt    0.904 0.5077434
## 14 K.Shoot.Salt        Na.Shoot    0.932 0.6062232
## 15 K.Shoot.Salt K.Shoot.Control    1.000 0.8690000
## 35      Na.Root       Na.K.Root    1.000 0.5380000
## 40  K.Root.Salt K.Shoot.Control    0.924 0.9166667
## 41  K.Root.Salt       Na.K.Root    1.000 0.5640000
## 42  K.Root.Salt         Na.Root    1.000 0.6140000

## Average bootstrap resampling
ave_model_tabu <- averaged.network(boot_tabu, threshold = 0.8)
graphviz.plot(ave_model_tabu, shape = 'ellipse')

Gaussian Bayesian Network