################################################################### ### ### ### Multiple Interval Genetic Trait Loci Mapping: source files ### ### ### ################################################################### ################### # Basic functions # ################### # canonical.para: Compute the canonical parameter in the exponential # family distribution as a fucntion of the linear predictor. # eta: the linear predictor. canonical.para <- function(eta, distribution="Poisson", link = "log" ) { if ( (distribution=="Poisson" | distribution=="weighted.Poisson") & link == "log" ) { return( eta ) } else if ( distribution=="binomial" & link == "logistic" ) { return( eta ) } else if ( distribution=="normal" & link == "identity" ) { return( eta ) } } # b.func: Compute the b function in the exponential family distribution # as a function of the canonical parameter. # theta: the canonica parameter. b.func <- function(theta, distribution = "Poisson") { if (distribution == "Poisson") { return(exp(theta)) } else if (distribution == "weighted.Poisson") { return( exp(theta) + log(1-exp(-exp(theta))) ) } else if (distribution == "binomial") { return( log(1+exp(theta)) ) } else if (distribution == "normal") { return( theta ) ) } } # mu.func: Compute the mean as a function of the canonical parameter. # theta: the canonical parameter mu.func <- function(theta, distribution = "Poisson" ) { if ( distribution == "Poisson" ) { return(exp(theta)) } else if ( distribution == "weighted.Poisson" ) { return( exp(theta)/(1-exp(-exp(theta))) ) } else if ( distribution == "binomial" ) { return( exp(theta)/(1+exp(theta)) ) } } # g.first.deri: Compute the first derivative of the link function as a composite # function of the linear pridictor. # eta: the linear predictor g.first.deri <- function(eta, distribution = "Poisson", link = "log") { if (link == "log" & distribution == "Poisson"){ return( exp(-eta) ) } else if ( link == "log" & distribution == "weighted.Poisson" ) { lambda <- exp(eta) return( (1-exp(-lambda))^2/lambda/( 1-exp(-lambda)*(1+lambda) ) ) } else if ( link == "logistic" & distribution == "binomial" ) { mu <- exp(eta)/(1+exp(eta)) return( 1/mu/(1-mu) ) } } # b.second.deri: Compute the second derivative of the b function in the exponential # family distribution as a function of the canonical parameter. # theta: the canonical parameter. b.second.deri <- function(theta, distribution = "Poisson") { if (distribution == "Poisson") { return(exp(theta)) } else if (distribution == "weighted.Poisson") { tmp1 <- exp(theta) tmp2 <- tmp1/(1-exp(-tmp1)) return( tmp2*(1-tmp2*exp(-tmp1)) ) } else if (distribution == "binomial") { tmp <- exp(theta) return( tmp /(1+tmp)^2 ) } } # model.null.fit: Carry out the estimation of the null model. # y: vector of trait values. # weight: the weights on individual observations. In the case of binomial # distribution, they are the numbers of trials of the binomial # distributions. # Output: # beta0: the estimate of the intercept in the linear predictor; # MLL: the maximum likelihood of the null model. model.null.fit <- function(y, weight,distribution="Poisson",link="canonical") { n <- length(y) if (distribution == "Poisson") { mu <- mean(y) MLL <- n*mu*(log(mu)-1) if (link == "canonical" ) { beta0 <- log (mu) } } else if (distribution == "weighted.Poisson") { ybar <- mean(y) lambda0 <- ybar k <- 1 repeat { lambda1 <- ybar*(1-exp(-lambda0) ) if (abs(lambda1-lambda0) < 10^(-5) | k >30 ) { break } else { lambda0 <- lambda1 k <- k+1 } } MLL <- n*(ybar * log(lambda1) - lambda1 - log(1-exp( -lambda1 )) ) if (link == "canonical" ) { beta0 <- log (lambda1) } } else if (distribution == "binomial") { ybar <- mean(y) n <- sum(weight) MLL <- n*( ybar*log(ybar) + (1-ybar)*log(1-ybar) ) if (link == "canonical" ) { beta0 <- log(ybar/(1-ybar)) } } else if (distribution == "normal") { MLL <- -n/2*(log(var(y))+1) if (link == "canonical" ) { beta0 <- mean(y) } } return(list(beta0=beta0, MLL=MLL)) } # DesignMatrix(m): Generate the design matrix with only the # QTL main effects for the backcross model. # Input: # m --- Number of QTLs # Output: # A matrix of rows { (k1,k2,...,km): kj=0 or 1 } # arranged in lexicographical order. DesignMatrix<-function(m){ v2 <- c(1,1) e2 <- c(0,1) DM <- cbind(v2,e2) if (m>1) { i<-2 while (i <= m) { v <- rep(1,2^(i-1)) DM<-cbind(kronecker(DM,v2),kronecker(v,e2)) i<-i+1 } } return(DM) } # DesignMatrixUpdate: Update the design matrix DM (output of DesignMatrix) # with columns representing interactions as specified by INT. # Input: # DM --- desing matrix with only main effects (output of DesignMatrix). # INT --- r by 2 matrix specifying r interactions: the interger in each row # specifies the two QTLs which have an interation. # Output: # A design matrix consists of both main effects and specified interactions. DesignMatrixUpdate<-function(DM,INT) { n.i <- dim(INT)[1] for (i in 1:n.i) { if (INT[i,1]*INT[i,1] == 0) next DM<-cbind(DM, DM[,INT[i,1]+1]*DM[,INT[i,2]+1]) } return(DM) } # mix.weights: Compute the weights of the components in the mixture distribution # for an individual given the individual's marker classes, the positions of QTL # and interval lengths. The weights are products of QTL genotype probabilities. # Input: # x: marker classes of the intervals for the individual # interval.length: the lengths of intervals in cM. # r: recombination fractions between the left marker and the putative QTL. mix.weights<-function(x,r,interval.length) { m <- length(interval.length) rr <- (1-exp(-2*interval.length/100))/2 s <- (rr-r)/(1-2*r) qtl.p <- NULL for (j in 1:m) { if (x[j]==1) { qtl.p[j]<-(1-r[j])*(1-s[j])/(1-rr[j]) } else if ( x[j]==2) { qtl.p[j]<-(1-r[j])*s[j]/rr[j] } else if ( x[j]==3) { qtl.p[j]<-(1-s[j])*r[j]/rr[j] } else { qtl.p[j]<-r[j]*s[j]/(1-rr[j]) } } pp<-rbind(1-qtl.p, qtl.p) weights<-pp[,1] if (m>1) { for ( j in 2:m ) { weights<-kronecker(weights,pp[,j]) } } return(weights) } #################################### # Major functions: E-step, M-step, # # EM.procedure, Fisher.info, # # mimglm, migtlm. # #################################### ##################################################### # E.step: Carry out the E-step of the EM-algorithm. # ##################################################### # # Input: # y --- a vector containing the trait values of n individuals. # weight --- vector of weights # x --- a n by m matrix containing the marker classes of concerned intervals of n # individuals. # inerval.length --- a vector containing the lengthes in cM # of the concerned intervals. # Z --- the design matrix associated with the concerned intervals augamented # by interaction effects. # old.parameter.value --- list of old values of parameters: # old.parameter.value[[1]] --- old values of main effect parameters. # old.parameter.value[[2]] --- old values of intereaction effect parameters. # old.parameter.value[[3]] --- old values of the position parameters. # old.parameter.value[[4]] --- old value of the dispersion parameter. # distribution --- exponential family distribution. # link --- link function. # position.fix --- logic, whether position parameter is fixed or not. # Output: # A list containing: # Delta --- n by 2^m matrix of Delta_{ik1...km} values. # ddelta --- n by m matrix of delta_{ij} values. # N.matrix --- matrix needed to estimate position parameters. # W.matrix --- matrix needed to estimate position parameters. E.step <- function(y, weight, x, interval.length, Z, old.parameter.value, distribution, link, position.fix) { m <- length(interval.length) n<-length(y) effect.para <- c(old.parameter.value[[1]], old.parameter.value[[2]]) r <- old.parameter.value[[3]] dispersion <- old.parameter.value[[4]] DM <- Z[,2:(m+1)] predictor<-Z%*%effect.para theta <- canonical.para(predictor, distribution, link) b.theta<-b.func(theta, distribution) Delta <- NULL ddelta <- matrix(0,n,m) for ( i in 1:n ) { mix.weight.i <- mix.weights(x[i,],r,interval.length) Delta.i <- mix.weight.i* as.vector(exp(weight[i]*(y[i]*theta-b.theta)/dispersion ) ) Delta.i <- Delta.i/sum(Delta.i) for (j in 1:m) { ddelta[i,j] <- sum(Delta.i[DM[,j]==1]) } Delta <- rbind(Delta, Delta.i) } if (position.fix == T) { return(list(Delta=Delta, ddelta=ddelta)) } else { N.matrix <- matrix(0,4,m) W.matrix <- matrix(0,4,m) count <- rep(1,n) for (j in 1:m) { N.matrix[1,j] <- sum(count[x[,j]==1]) N.matrix[2,j] <- sum(count[x[,j]==2]) N.matrix[3,j] <- sum(count[x[,j]==3]) N.matrix[4,j] <- sum(count[x[,j]==4]) W.matrix[1,j] <- sum(ddelta[x[,j]==1,j]) W.matrix[2,j] <- sum(ddelta[x[,j]==2,j]) W.matrix[3,j] <- sum(ddelta[x[,j]==3,j]) W.matrix[4,j] <- sum(ddelta[x[,j]==4,j]) } return(list(Delta=Delta, ddelta = ddelta, N.matrix=N.matrix, W.matrix=W.matrix)) } } ###################################################### # M.step: Carry out the M-step of the EM-algorithm. # ###################################################### # # Input: # E.step.object --- output from E-step of the EM algorithm. # y --- a vector containing the trait values of n individuals. # weight --- vector of weights # inerval.length --- a vector containing the lengthes in cM # of the concerned intervals. # Z --- the design matrix associated with the concerned intervals augamented # by interaction effects. # old.parameter.value --- list of old values of parameters: # old.parameter.value[[1]] --- old values of main effect parameters. # old.parameter.value[[2]] --- old values of intereaction effect parameters. # old.parameter.value[[3]] --- old values of the position parameters. # old.parameter.value[[4]] --- old value of the dispersion parameter. # distribution --- exponential family distribution. # link --- link function. # position.fix --- logic, whether position parameter is fixed or not. # dispersion.fix --- logic, whether dispersion parameter is fixed or not. # Output: # A list of same structure of old.parameter.value containing updated parameter values. M.step <- function(E.step.object, y, weight, Z, interval.length, old.parameter.value, distribution, link, position.fix, dispersion.fix, precision, max.iteration) { Delta<-E.step.object$Delta if (position.fix==F) { N.matrix <- E.step.object$N.matrix W.matrix <- E.step.object$W.matrix } n <- length(y) m <- lenght(interval.length) beta.old <- c(old.parameter.value[[1]], old.parameter.value[[2]]) n.main <- length(old.parameter.value[[1]]) rr <- (1-exp(-2*interval.length/100))/2 M.y <- diag(y*weight) Delta.y <- M.y%*%Delta Delta.y <- apply(Delta.y, 2, sum) M.n <- diag(weight) Delta.n <- M.n%*%Delta Delta.n <- apply(Delta.n, 2, sum) if (distribution == "binomial") { predictor <- as.vector(Z%*%beta.old) theta <- as.vector(canonical.para(predictor, distribution, link)) mu <- as.vector(mu.func(theta, distribution)) W <- diag(Delta.n*mu*(1-mu)) } convergence<-0 iteration<-0 while ( convergence==0 ) { predictor <- as.vector(Z%*%beta.old) theta <- as.vector(canonical.para(predictor, distribution, link)) mu <- as.vector(mu.func(theta, distribution)) gprime <- as.vector(g.first.deri(predictor, distribution,link)) b2prime<- as.vector(b.second.deri(theta,distribution)) if ( distribution == "binomial" ) { omega <- t(Z)%*%W%*%Z omega <- solve(omega) h <- diag( Z%*%omega%*%t(Z) ) * diag(W) Delta.nh <- Delta.n + 2^(m-1)*h Delta.yh <- Delta.y + 2^(m-2)*h W <- diag( Delta.nh*b2prime ) Delta.yh1 <- Delta.yh Delta.yh1[Delta.nh==0] <- 0 Delta.yh1[Delta.nh!=0] <- Delta.yh[Delta.nh!=0]/Delta.nh[Delta.nh!=0] z.beta <- predictor+gprime*(Delta.yh1-mu) } else { W<-diag( Delta.n/as.vector(gprime^2*b2prime) ) Delta.y1 <- Delta.y Delta.y1[Delta.n==0] <- 0 Delta.y1[Delta.n!=0] <- Delta.y[Delta.n!=0]/Delta.n[Delta.n!=0] z.beta <- predictor+gprime*(Delta.y1-mu) } A <- t(Z)%*%W%*%Z b <- t(Z)%*%W%*%z.beta beta.new <- solve(A,b) test <- max(abs(beta.old-beta.new)) if (test < precision | iteration > max.iteration) { convergence<-1 } else { beta.old <- beta.new iteration <- iteration+1 } } old.parameter.value[[1]] <- beta.new[1:n.main] old.parameter.value[[2]] <- beta.new[-c(1:n.main)] if (position.fix == F) { for (j in 1:m) { old.parameter.value[[3]][j] <- argmax.ll1(N.matrix[,j],W.matrix[,j], rr[j]) } } if (dispersion.fix == F) { predictor <- Z%*%beta.new theta <- canonical.para(predictor, distribution, link) mu <- mu.func(theta,distribution) b2prime <- b.second.deri(theta,distribution) Delta.y2 <- apply( diag(y^2*weight)%*%Delta, 2, sum) dispersion <- sum( (Delta.y2-2*mu*Delta.y +mu^2*Delta0)/b2prime )/(n-length(beta.new)) old.parameter.value[[4]] <- dispersion } return(old.parameter.value) } # ll1, argmax.ll1: Two functions needed to update position parameters: # ll1 <- function(nn,ww,rr, r) { c1 <- nn[4]-ww[4]+ww[1] c2 <- nn[1]-ww[1]+ww[4] c3 <- nn[3]-ww[3]+ww[2] c4 <- nn[2]-ww[2]+ww[3] s <- (rr-r)/(1-2*r) p1 <- (1-r)*(1-s)/(1-rr) p2 <- (1-r)*s/rr ll1 <- c1*log(p1) + c2*log(1-p1) +c3*log(p2) + c4*log(1-p2) return(ll1) } # argmax.ll1 <- function(nn,ww,rr) { grid <- seq(0.005, rr-0.005, by=0.005) n.grid <- length(grid) mm <- -1.0e20 position <- 0.001 for ( k in 1:n.grid ) { ll1.value <-ll1(nn,ww,rr,grid[k]) if (ll1.value > mm ) { mm <- ll1.value position <- grid[k] } } return(position) } ############################################### # EM.procedure: Compute the MLE of a multiple # # interval mapping model. # ############################################### EM.procedure<-function(y,x,weight,interval.length,Z, initial.value, distribution,link,position.fix, dispersion.fix, precision, max.iteration) { old.parameter.value <- initial.value convergence <- 0 iteration <- 0 n <- length(y) while ( convergence==0 ) { E.step.object <- E.step(y,weight,x, interval.length, Z, old.parameter.value,distribution, link,TRUE) new.parameter.value <- M.step(E.step.object, y, weight, Z, interval.length, old.parameter.value, distribution, link, TRUE, dispersion.fix,precision, max.iteration) effect.para.0 <- c(old.parameter.value[[1]], old.parameter.value[[2]], old.parameter.value[[4]]) effect.para.1 <- c(new.parameter.value[[1]], new.parameter.value[[2]], new.parameter.value[[4]]) test1 <- max( abs( effect.para.1 - effect.para.0 ) ) test2 <- max(abs(old.parameter.value[[3]] - new.parameter.value[[3]])) if ((test1 < precision & test2 < 0.01) | iteration > max.iteration) { convergence <- 1 } else { old.parameter.value <- new.parameter.value iteration <- iteration+1 } } if (position.fix == F) { old.parameter.value <- new.parameter.value convergence <- 0 iteration <- 0 while ( convergence==0 ) { E.step.object <- E.step(y,weight,x, interval.length, Z, old.parameter.value,distribution, link,position.fix) new.parameter.value <- M.step(E.step.object, y, weight, Z, interval.length, old.parameter.value, distribution, link, position.fix, dispersion.fix,precision, max.iteration) effect.para.0 <- c(old.parameter.value[[1]], old.parameter.value[[2]], old.parameter.value[[4]]) effect.para.1 <- c(new.parameter.value[[1]], new.parameter.value[[2]], new.parameter.value[[4]]) test1 <- max( abs( effect.para.1 - effect.para.0 ) ) test2 <- max(abs(old.parameter.value[[3]] - new.parameter.value[[3]])) if ((test1 < precision & test2 < 0.01) | iteration > max.iteration) { convergence <- 1 } else { old.parameter.value <- new.parameter.value iteration <- iteration+1 } } } return(new.parameter.value) } # LL.eval: Evaluate the likelihood of the mixture model with output of the EM.procedure. LL.eval <- function(y,x,weight,interval.length,Z,para,distribution) { n <- length(y) effect.para <- c(para[[1]],para[[2]]) r <- para[[3]] dispersion <- para[[4]] predictor <- Z%*%effect.para theta <- canonical.para(predictor,distribution,link) b.theta <- b.func(theta,distribution) max.ll <- 0 for (i in 1:n) { mix.weight.i <- mix.weights(x[i,],r,interval.length) ll.i <- log ( sum( mix.weight.i*as.vector(exp(weight[i]*(y[i]*theta-b.theta)/dispersion))) ) max.ll <- max.ll + ll.i } if (distribution == "normal") { max.ll <- max.ll -log(dispersion)/2 - sum(y^2)/2/dispersion } return( max.ll ) } ####################################### # Fisher.informaton: Compute observed # # Fisher information matrix of the # # multiple interval mapping model. # ####################################### Fisher.information<-function(y, x, interval.length, Z, parameter.value, distribution, link) { m <- length(interval.length) n<-length(y) effect.para <- c(parameter.value[[1]], parameter.value[[2]]) q <- length(effect.para) r <- parameter.value[[3]] rr <- (1-exp(-2*interval.length/100))/2 dispersion <- parameter.value[[4]] DM <- Z[,2:(m+1)] ### Compute basic quantities predictor<-Z%*%effect.para theta <- canonical.para(predictor, distribution, link) b.theta<-b.func(theta, distribution) mu <- mu.func(theta, distribution) gprime <- g.first.deri(predictor, distribution,link) b2prime <-b.second.deri(theta,distribution) Omega <- diag(as.vector(1/gprime/b2prime),ncol=2^m) OmegaZ <- Omega%*%Z MOmegaZ <- diag(as.vector(mu),ncol=2^m)%*%OmegaZ Delta <- vector("numeric",2^m) Delta1 <- vector("numeric",2^m) Delta2 <- vector("numeric",2^m) b1 <- vector("numeric",m) Dd <- vector("numeric",m) DED <- matrix(0,m,m) DddD <- matrix(0,m,m) B01 <- matrix(0,m,q) B02 <- B01 B11 <- B01 B12 <- B01 C1 <- matrix(0,q,q) C2 <- C1 C3 <- C1 gradient <- vector("numeric",m+q) # i<-1 sum(weight.i) for ( i in 1:n ) { weight.i <- mix.weights(x[i,],r,interval.length) pdf.ik <- weight.i* as.vector( exp((y[i]*theta-b.theta)/dispersion) ) pdf.i <- sum(pdf.ik) Delta.i <- pdf.ik/pdf.i Delta1.i <- y[i]*Delta.i Delta2.i <- y[i]*Delta1.i Delta <- Delta + Delta.i Delta1 <- Delta1 + Delta1.i Delta2 <- Delta2 + Delta2.i ddelta1 <- vector("numeric",m) for (j in 1:m) { ddelta1[j] <- sum(Delta.i[DM[,j]==1]) } tmp1 <- dlq1(r,x[i,],rr) tmp2 <- dlp1(r,x[i,],rr) D.i <- matrix(0,m,2*m) for (j in 1:m) { D.i[j,2*j-1] <- tmp1[j] D.i[j,2*j] <- tmp2[j] } dr.ik <- (1-t(DM))*tmp1 + t(DM)*tmp2 dr.i <- apply (dr.ik%*%diag(pdf.ik),1,sum) dr.i <- dr.i / pdf.i dbeta.ik <- t(Z)%*%diag(pdf.ik* as.vector((y[i]-mu)/gprime/b2prime)) dbeta.i <- apply (dbeta.ik, 1, sum) dbeta.i <- dbeta.i / pdf.i gradient <- gradient + c(dr.i,dbeta.i) b.i <- (1-ddelta1)*dlq2(r,x[i,],rr) + ddelta1*dlp2(r,x[i,],rr) b1 <- b1 + b.i ddelta2 <- vector("numeric",2*m) ddelta2[ seq(1,2*m-1,by=2) ] <- 1-ddelta1 ddelta2[ seq(2,2*m,by=2) ] <- ddelta1 Dd.i <- D.i%*%ddelta2 DMOmegaZ <- t(Delta.i)%*%MOmegaZ D1OmegaZ <- t(Delta1.i)%*%OmegaZ # Compute components of DE11D' E.i <- matrix( c((1-ddelta1[1])^2,(1-ddelta1[1])*ddelta1[1], (1-ddelta1[1])*ddelta1[1], ddelta1[1]^2), ncol=2,byrow=T) for (s in 2:m) { e22 <- sum(Delta.i[DM[,1]==1 & DM[,s]==1]) e11 <- 1 - ddelta1[1] - ddelta1[s] + e22 e12 <- ddelta1[s] - e22 e21 <- ddelta1[1] - e22 E.i <- cbind(E.i,matrix(c(e11,e12,e21,e22),ncol=2,byrow=T)) } for (l in 2:(m-1)) { tmp <- matrix(0,2,2*l-2) tmp <- cbind( tmp, matrix( c((1-ddelta1[l])^2,(1-ddelta1[l])*ddelta1[l], (1-ddelta1[l])*ddelta1[l], ddelta1[l]^2), ncol=2,byrow=T) ) for (s in (l+1):m) { e22 <- sum(Delta.i[DM[,l]==1 & DM[,s]==1]) e11 <- 1 - ddelta1[l] - ddelta1[s] + e22 e12 <- ddelta1[s] - e22 e21 <- ddelta1[l] - e22 tmp <- cbind( tmp, matrix(c(e11,e12,e21,e22),ncol=2,byrow=T) ) } E.i <- rbind(E.i, tmp) } tmp <- matrix(0,2,2*m-2) tmp <- cbind(tmp, matrix( c((1-ddelta1[m])^2,(1-ddelta1[m])*ddelta1[m], (1-ddelta1[m])*ddelta1[m],ddelta1[m]^2), ncol=2,byrow=T) ) E.i <- rbind(E.i, tmp) for (l in 2:(2*m) ) { for (s in 1:(l-1)) { E.i[l,s] <- E.i[s,l] } } Dd <- Dd + Dd.i DED <- DED + D.i%*%E.i%*%t(D.i) DddD <- DddD + Dd.i%*%t(Dd.i) # Compute components of DE12OmegaZ B01.i <- NULL B11.i <- NULL for (l in 1:m) { tmp <- Delta.i tmp[DM[,l]==1] <- 0 B01.i <- rbind(B01.i, t(tmp)%*%MOmegaZ) B11.i <- rbind(B11.i, y[i]*(t(tmp)%*%OmegaZ) ) tmp <- Delta.i tmp[DM[,l]==0] <- 0 B01.i <- rbind(B01.i, t(tmp)%*%MOmegaZ) B11.i <- rbind(B11.i, y[i]*(t(tmp)%*%OmegaZ) ) } B01 <- B01 + D.i%*%B01.i B11 <- B11 + D.i%*%B11.i B02 <- B02 + Dd.i %*% DMOmegaZ B12 <- B12 + Dd.i%*% D1OmegaZ # Compute components of ZOmegaE22OmegaZ C1 <- C1 + t(D1OmegaZ)%*%D1OmegaZ C2 <- C2 + t(DMOmegaZ)%*%D1OmegaZ C3 <- C3 + t(DMOmegaZ)%*%DMOmegaZ } # A11 A11 <- Dd %*% t(Dd) + DED - DddD A12 <- Dd %*% ( t(Delta1)%*%OmegaZ - t(Delta)%*%MOmegaZ ) - B01 + B11 + B02 - B12 a1 <- t(Delta1)%*%OmegaZ a2 <- t(Delta)%*%MOmegaZ a11 <- t(a1)%*%a1 + t(OmegaZ)%*%diag(Delta2)%*%OmegaZ - C1 a12 <- t(a2)%*%a1 + t(MOmegaZ)%*%diag(Delta1)%*%OmegaZ - C2 a22 <- t(a2)%*%a2 + t(MOmegaZ)%*%diag(Delta)%*%MOmegaZ - C3 A22 <- a11 - a12 - t(a12) + a22 A <- rbind(cbind(A11, A12),cbind(t(A12),A22)) B1 <- - diag(b1) W <- diag(Omega)*Delta/as.vector(gprime) W <- diag(W) B2 <- t(Z)%*%W%*%Z B <- rbind( cbind(B1, matrix(0,m,q)), cbind( matrix(0,q,m), B2) ) Fisher.info <- B-A + gradient %*% t(gradient) return(Fisher.info) } ######################################## # Fuctions used by Fisher.information # ######################################## p.Q <- function(r,x,rr) { m <- length(r) s <- (rr-r)/(1-2*r) qtl.p <- NULL for (j in 1:m) { if (x[j]==1) { qtl.p[j]<-(1-r[j])*(1-s[j])/(1-rr[j]) } else if ( x[j]==2) { qtl.p[j]<-(1-r[j])*s[j]/rr[j] } else if ( x[j]==3) { qtl.p[j]<-(1-s[j])*r[j]/rr[j] } else { qtl.p[j]<-r[j]*s[j]/(1-rr[j]) } } return(qtl.p) } dp1 <- function(r,x,rr) { m <- length(r) s <- (rr-r)/(1-2*r) ds1 <- (2*rr-1)/(1-2*r)^2 dp1 <- NULL for (j in 1:m) { if (x[j]==1) { dp1[j] <- - ( (1-s[j]) + (1-r[j])*ds1[j] ) / (1-rr[j]) } else if (x[j]==2) { dp1[j] <- ( (1-r[j])*ds1[j] - s[j] ) / rr[j] } else if ( x[j]==3) { dp1[j] <- - ( (1-r[j])*ds1[j] - s[j] ) / rr[j] } else { dp1[j] <- ( (1-s[j]) + (1-r[j])*ds1[j] ) / (1-rr[j]) } } return(dp1) } dp2 <- function(r,x,rr) { m <- length(r) s <- (rr-r)/(1-2*r) ds1 <- (2*rr-1)/(1-2*r)^2 ds2 <- 4*ds1/(1-2*r) dp2 <- NULL for (j in 1:m) { if (x[j]==1) { dp2[j] <- ( 2*ds1[j] - (1-r[j])*ds2[j] ) / (1-rr[j]) } else if (x[j]==2) { dp2[j] <- ( (1-r[j])*ds2[j] - 2*ds1[j] ) / rr[j] } else if ( x[j]==3) { dp2[j] <- - ( (1-r[j])*ds2[j] - 2*ds1[j] ) / rr[j] } else { dp2[j] <- - ( 2*ds1[j] - (1-r[j])*ds2[j] ) / (1-rr[j]) } } return(dp2) } dlp1 <- function(r,x,rr) { dlp1 <- dp1(r,x,rr)/p.Q(r,x,rr) return(dlp1) } dlp2 <- function(r,x,rr) { tmp <- p.Q(r,x,rr) dlp2 <- tmp*dp2(r,x,rr) - (dp1(r,x,rr))^2 dlp2 < dlp2/tmp^2 return(dlp2) } dlq1 <- function(r,x,rr) { dlq1 <- - dp1(r,x,rr)/(1-p.Q(r,x,rr)) return(dlq1) } dlq2 <- function(r,x,rr) { tmp <- 1-p.Q(r,x,rr) dlq2 <- tmp*dp2(r,x,rr) + (dp1(r,x,rr))^2 dlq2 <- - dlq2/tmp^2 return(dlq2) } ######################################################## # mimglm: Form a multiple interval mapping model with # # with given intervals and interactions, and # # fit the model. # ######################################################## # genetic.data --- a list containing observations for multiple interval mapping: # genetic.data[[1]], vector, observed trait values of n individuals. # genetic.data[[2]], n by M matrix of interval marker classes for n individuals # on M intervals, the interval marker classes take values 1,2,3,4. # genetic.data[[3]], the lengthes of the M intervals in units of cM. # weight --- weights for the individuals. # MODEL --- a list specifying the model: # MODEL[[1]], vector, specifying the intervals for the model (the column indices # of genetic.data[[2]]). # MODEL[[2]], a matrix of two columns, each row specifying the indices of two # intervals in an interaction term. # initial.value --- list: # initial.value[[1]]: vector of length 1+length(MODEL[[1]]), initial values for # the intercept and main effect parameters. # initial.value[[2]]: vector of length dim(MODEL[[s]])[1], initial values for # the interaction effect parameters. # Fisher: logic, TRUE --- Fisher information matrix is computed, FALSE --- Fisher # information matrix is not computed. mimglm <- function(genetic.data,weight,MODEL,initial.value,distribution,link, position.fix, dispersion.fix, Fisher, precision, max.iteration) { intervals <- MODEL[[1]] y <- genetic.data[[1]] x <- genetic.data[[2]][,intervals] x <- as.matrix(x) interval.length <- genetic.data[[3]][intervals] m <- length(intervals) Z <- DesignMatrix(m) is.interaction <- MODEL[[3]] if (is.interaction == T) { INT <- as.matrix(MODEL[[2]]) if (dim(INT)[2] == 1) { INT <- t(INT) } ID <- 1:m for (j in 1:2) { for (i in 1:dim(INT)[1]) { if (INT[i,j]==0) next INT[i,j] <- ID[intervals==INT[i,j]] } } Z <- DesignMatrixUpdate(Z,INT) } para.mle <- EM.procedure(y,x,weight,interval.length,Z,initial.value, distribution,link,position.fix, dispersion.fix, precision, max.iteration) max.ll <- LL.eval(y,x,weight,interval.length,Z,para.mle,distribution) if (Fisher == T) { fisher.info <- Fisher.information(y, x, interval.length, Z, para.mle, distribution, link) return(list(MLE=para.mle, Max.likelihood=max.ll, Info.matrix=fisher.info)) } else { return(list(MLE=para.mle, Max.likelihood=max.ll)) } } ######################################################### # migtlm: Multiple Interval Genetic Trait Loci Mapping # ######################################################### # # Input # genetic.data: list. # genetic.data[[1]], vector, observed trait values of n individuals. # genetic.data[[2]], n by M matrix of interval marker classes for n individuals # on M intervals, the interval marker classes take values 1,2,3,4. # genetic.data[[3]], the lengthes of the M intervals in units of cM. # weight: weights for the individuals. # distribution: character, exponential family distribution in the multiple # interval mapping model. # line: link function. # dispersion.fix: logic, indicates whether dispersion parameter is to be estimated. # max.intervals: maximal number of intervals allowed to be included in a model. # migtlm <- function(genetic.data,weight=1,distribution,link,dispersion.fix, precision=0.0001,max.iteration=50,max.intervals) { n <- length(genetic.data[[1]]) M <- length(genetic.data[[3]]) middle.position <- (1-exp(-genetic.data[[3]]/100))/2 if (length(weight) == 1) { weight <- rep(1,n) } MODEL <- list(main=NULL,interaction=matrix(numeric(),ncol=2,byrow=T),is.interaction=F) MODEL.tmp <- MODEL model.fitted <- list(effects=list(main=0, interaction=NULL, QTL.posi=NULL,dispersion=1), MLL=NULL) initial.value <- list(main=0,interaction=NULL, QTL.posi=NULL,dispersion=1) ITV.remain <- 1:M ITV.select <- NULL n.select <- 0 n.interaction <- 0 c.alpha <- qchisq(1-0.05/M,1) ### Fit the null model with no QTL fitted.tmp <- model.null.fit(genetic.data[[1]],weight,distribution, link) model.fitted[[1]][[1]] <- fitted.tmp[[1]] LL.null <- fitted.tmp[[2]] ### Begin mapping loop k <- 1 map.stop <- 0 while (map.stop == 0 & k <= max.intervals) # while (1). { mLL <- LL.null effects <- model.fitted[[1]] initial.value[[1]] <- c( effects[[1]],0) initial.value[[2]] <- c( effects[[2]],rep(0,n.select)) initial.value[[4]] <- effects[[4]] for (j in ITV.remain) { MODEL.tmp[[1]] <- c( MODEL[[1]],j ) inter.terms <- cbind( ITV.select, rep(j,n.select) ) if (is.matrix(inter.terms) == F) { inter.terms <- matrix(inter.terms, ncol=2, byrow=T) } MODEL.tmp[[2]] <- rbind(MODEL[[2]], inter.terms) if (dim(MODEL.tmp[[2]])[1] == 0) { MODEL.tmp[[3]] <- F } else { MODEL.tmp[[3]] <- T } initial.value[[3]] <- c(effects[[3]] , middle.position[j]) fit.object <- mimglm(genetic.data,weight,MODEL.tmp,initial.value, distribution,link, TRUE, dispersion.fix, FALSE, precision,max.iteration) LL <- fit.object[[2]] if ( LL > mLL ) { mLL <- LL fitted.tmp <- fit.object interval.add <- j interaction.add <- inter.terms } } LL.full <- mLL effects <- fitted.tmp[[1]] n.interaction.new <- n.select MODEL.tmp[[1]] <- c(MODEL[[1]],interval.add) delete <- 1 while ( delete == 1 & n.interaction.new > 0 ) # while (2) { # test whether interactions can be deleted. mLL <- LL.null initial.value <- effects for ( j in 1:n.interaction.new) { MODEL.tmp[[2]] <- rbind(MODEL[[2]] ,interaction.add[-j,] ) if (dim(MODEL.tmp[[2]])[1] == 0) { MODEL.tmp[[3]] <- F } else { MODEL.tmp[[3]] <- T } initial.value[[2]] <- effects[[2]][-(n.interaction+j)] fit.object <- mimglm(genetic.data,weight,MODEL.tmp,initial.value, distribution,link, TRUE, dispersion.fix,FALSE, precision,max.iteration) LL <- fit.object[[2]] if ( LL > mLL ) { mLL <- LL fitted.tmp <- fit.object j.max <- j } } LRT <- 2*(LL.full - mLL) if ( LRT < c.alpha ) { LL.full <- mLL interaction.add <- interaction.add[-j.max,] interaction.add <- matrix(interaction.add,ncol=2) n.interaction.new <- n.interaction.new -1 effects <- fitted.tmp[[1]] } else { delete <- 0 } } # end of while (2) if ( n.interaction.new > 0 ) { MODEL[[1]] <- c(MODEL[[1]],interval.add) MODEL[[2]] <- rbind(MODEL[[2]],interaction.add) model.fitted <- list(effects, LL.full) LL.null <- LL.full n.interaction <- n.interaction + n.interaction.new n.select <- n.select + 1 ITV.remain <- ITV.remain[ITV.remain != interval.add] ITV.select <- c(ITV.select, interval.add) k <- k+1 } else { LRT <- 2*( LL.full - LL.null) if (LRT < c.alpha) { map.stop <- 1 } else { MODEL[[1]] <- c(MODEL[[1]],interval.add ) model.fitted <- list(effects, LL.full) LL.null <- LL.full n.select <- n.select + 1 ITV.remain <- ITV.remain[ITV.remain != interval.add] ITV.select <- c(ITV.select, interval.add) k <- k+1 } } } # end of while (1). return( list( Selected.Intervals = MODEL[[1]], Interactions = MODEL[[2]], Fitted.parameters = model.fitted[[1]], Log.likelihood=model.fitted[[2]] ) ) }