# # Extract IBD matrix for given pedigree # Note that MERLIN merlin.ibd and merlin.ped order of IDs # may not agree # merlin.ibd <- function(idx, ped, ibd) { ids <- ped[ped[,1] %in% idx,2] if (length(ids)==0 || any(is.na(ids))) { stop(paste("ERROR: Pedigree ID ",idx, " in ibd file not found in pedigree file", sep="")) } curped <- which(ibd[,1] %in% idx) pihat <- 0.5*ibd[curped,6]+ibd[curped,7] id1 <- match(ibd[curped,2], ids) id2 <- match(ibd[curped,3], ids) ibdmat <- diag(nr=length(ids)) colnames(ibdmat) <- rownames(ibdmat) <- ids address <- matrix(c(id1, id2), nc=2) swp <- (address[,1] < address[,2]) tmp <- address[swp,1] address[swp,1] <- address[swp,2] address[swp,2] <- tmp ibdmat[address] <- pihat ibdmat[upper.tri(ibdmat)] <- t(ibdmat)[upper.tri(ibdmat)] ibdmat[lower.tri(ibdmat, diag=TRUE)] } # # Read Merlin files into R objects # merlin2kinship <- function(phenotypes=NULL, mappos=NULL, merlinped="merlin.ped", merlindat="merlin.dat", merlinibd="merlin.ibd", merlinmap="merlin.map") { require(kinship) locfil <- read.table(merlindat, head=FALSE, as.is=TRUE) loci <- locfil[,2] mapfil <- read.table(merlinmap, head=, as.is=TRUE) names(mapfil) <- c("chr", "marker", "pos") locpos <- loctyp <- locfil[,1] locpos[loctyp %in% c("A","C","S","T","Z")]<-1 locpos[loctyp %in% c("S2","M")]<-2 locpos <- c(6,6+cumsum(locpos)[-length(locpos)]) if (is.null(phenotypes)) { phenotypes <- loci[loctyp %in% c("A","C","M","T","Z")] } if (all(is.numeric(phenotypes))) { if (any(is.na(trait.list <- match(phenotypes, seq(1, length(loci)))))) { warning(paste("Some requested phenotypes not in locus file:", paste(phenotypes[is.na(trait.list)], collapse=" "))) trait.list <- trait.list[!is.na(trait.list)] phenotypes <- phenotypes[!is.na(trait.list)] } trait.pos <- locpos[trait.list] trait.type <- loctyp[trait.list] }else{ if (any(is.na(trait.list <- match(phenotypes, loci)))) { warning(paste("Some requested phenotypes not in locus file:", paste(phenotypes[is.na(trait.list)], collapse=" "))) phenotypes <- phenotypes[!is.na(trait.list)] trait.list <- trait.list[!is.na(trait.list)] } trait.pos <- locpos[trait.list] trait.type <- loctyp[trait.list] } if (length(trait.pos)==0 || !(all(trait.pos>0))) { stop("Phenotypes not correctly specified!") } # # Produce phenotype file # ped <- read.table(merlinped, head=FALSE, na.string=c("x","-"), as.is=TRUE) ped[ped[,3]==0,3] <- NA ped[ped[,4]==0,4] <- NA pheno.pedigree <- factor(ped[,1]) pheno.id <- paste(ped[,1], ped[,2]) pheno.fa <- paste(ped[,1], ped[,3]) pheno.mo <- paste(ped[,1], ped[,4]) pheno.fa[is.na(ped[,3])] <- NA pheno.mo[is.na(ped[,4])] <- NA pheno.sex=factor(ped[,5], levels=c(1,2), labels=c("m","f")) pheno <- as.data.frame(ped[, trait.pos]) # Deal with marker loci markers <- which(trait.type=="M") for (i in markers) { a1 <- trait.pos[i] a2 <- a1+1 ped[ped[,a1]==0,c(a1,a2)] <- NA pheno[,i] <- factor(paste(ped[,a1], ped[,a2], sep="/")) pheno[is.na(ped[,a1]),i] <- NA pheno[,i] <- factor(pheno[,i]) } names(pheno) <- phenotypes use <- complete.cases(pheno) if (sum(use)==0) { warning("No individuals with complete phenotype data") } all.id <- pheno.id new.id <- 1:length(pheno.id) # # newid new.faid new.moid # lmeped <- data.frame(pedigree=pheno.pedigree, id=new.id, fa=match(pheno.fa, all.id), mo=match(pheno.mo, all.id)) lmeped[is.na(lmeped)] <- 0 # # newid trait1..traitN # lmeped <- data.frame(lmeped, sex=pheno.sex, pheno) # # Generate ibdmatrix for map position # ibd <- read.table(merlinibd, h=T, colClasses=c(rep("character",3),rep("numeric",4))) ibd.pos <- unique(ibd$MARKER) names(ibd.pos) <- mapfil$marker[match(mapfil$pos, ibd.pos)] cat("\"", merlinibd, "\" contains IBD information for ", length(ibd.pos), " positions:\n",sep="") print(ibd.pos) cat("\n") if (!is.null(mappos)) { if (!any(mappos %in% ibd.pos)) { stop(paste("Specified map position not matched in", merlinibd)) }else{ ibd.pos <- mappos[mappos %in% ibd.pos] } }else if (length(ibd.pos)==0) { stop(paste("No eligible map positions in", merlinibd)) } ped.list <- unique(ibd[,1]) # # household and kinship matrices # blocksize <- rle(ped[,1])$lengths blockn <- blocksize*(blocksize+1)/2 famblocks <- integer(sum(blockn)) fin <- 0 pos <- 0 for (p in seq(along=ped.list)) { pos <- pos + 1 sta <- fin + 1 fin <- sta + blockn[p] - 1 famblocks[sta:fin] <- 1 } fammatrices <- bdsmatrix(blocksize, famblocks, dimnames = list(new.id, new.id)) kin <- makekinship(as.numeric(lmeped$pedigree), lmeped$id, lmeped$fa, lmeped$mo) # # IBDs # ibdmatrices <- list() for(i in seq(along=ibd.pos)) { blocksize <- rle(ped[,1])$lengths blockn <- blocksize*(blocksize+1)/2 blocks <- integer(sum(blockn)) ibd.at.pos <- ibd[ibd$MARKER==ibd.pos[i],] cat("reading ibds for map position #", i, "\n", sep="") fin <- 0 pos <- 0 for (p in seq(along=ped.list)) { pos <- pos + 1 sta <- fin + 1 fin <- sta + blockn[p] - 1 blocks[sta:fin] <- merlin.ibd(ped.list[p], ped, ibd.at.pos) } ibdmatrices[[i]] <- bdsmatrix(blocksize, blocks, dimnames = list(new.id, new.id)) } list(ped=lmeped, kin=kin, fammatrices=fammatrices, map.positions=ibd.pos, ibdmatrices=ibdmatrices) } ## debug(merlin2kinship) runtests <- function() { x <- merlin2kinship(phenotypes=c("V1.NF","V1.AGE","sx","V1.BS","V1.SC", "light","red","brown", "V1.FF", "V1.YR","y2")) m0 <- lmekin(V1.NF ~ V1.AGE + sx + V1.BS + V1.SC + light + red + brown + V1.FF + V1.YR + y2, data=x$ped, subset=complete.cases(x$ped[,6:16]), random=~1|id, varlist=list(2*x$kin)) m1 <- lme(V1.NF ~ V1.AGE + sx + V1.BS + V1.SC + light + red + brown + V1.FF + V1.YR + y2, data=x$ped, subset=complete.cases(x$ped[,6:16]), random=~1|pedigree) m1b <- lmekin(V1.NF ~ V1.AGE + sx + V1.BS + V1.SC + light + red + brown + V1.FF + V1.YR + y2, data=x$ped, subset=complete.cases(x$ped[,6:16]), random=~1|id, varlist=list(x$fammatrices)) m2 <- lmekin(V1.NF ~ V1.AGE + sx + V1.BS + V1.SC + light + red + brown + V1.FF + V1.YR + y2, data=x$ped, subset=complete.cases(x$ped[,6:16]), random=~1|id, varlist=list(qtl=x$ibdmatrices[[1]])) m3 <- lmekin(V1.NF ~ V1.AGE + sx + V1.BS + V1.SC + light + red + brown + V1.FF + V1.YR + y2, data=x$ped, subset=complete.cases(x$ped[,6:16]), random=~1|id, varlist=list(id=2*x$kin, qtl=x$ibdmatrices[[1]])) m4 <- lmekin(V1.NF ~ V1.AGE + sx + V1.BS + V1.SC + light + red + brown + V1.FF + V1.YR + y2, data=x$ped, subset=complete.cases(x$ped[,6:16]), random=~1|id, varlist=list(id=x$fammatrices, qtl=x$ibdmatrices[[1]])) }