# # Reimplementation of Sib-pair in R # # Author: David L Duffy 2002-2005 # # Data structures: # # nspObject: # list of loci as data frame # pedigree data as data frame: # pedigree fa.pointer mo.pointer generation pedigree id fa mo ... # nsp <- function(nsp.data=NULL, text=NULL) { start.time<-proc.time()[3] pro<-">> " datadir<-"" keyword<-"" burnin<-100 datadir<-"." echo<-0 ilevel<-1 imp<-1 iter<-200 mapf<-"Haldane" mincnt<-20 plevel<-0 showorig<-2 wrkdir<-"" pedfil<-"" outfil<-"" batch.lines<-0 ngeno<-0 nloci<-0 numloc<-0 wrknum<-1 chek<-1 droperr<-0 fndr<-0 linkage.format<-0 use2<-2 version<-"R 0.65 18-Aug-2005" # # Existing or new dataset # if (is.null(nsp.data)) { locus.data<-NULL ped<-NULL red<-0 }else{ locus.data<-nsp.data$locus.data ped<-nsp.data$ped red<-1 } if (!is.null(text)) { batch.lines<-1 batch<-text } cat( "|||| SIB-PAIR: A program for simple genetic analysis\n", "|\\/| Version : ",version,"\n", "|/\\| Author : David L Duffy (c) 1995-2005\n", "|||| Job run : ",date(),"\n\n",sep="") while(keyword!="qui" && keyword!="exi" && keyword!="bye" || batch.lines>0) { if (batch.lines==0) { lin<-readline(prompt=pro) }else{ lin<-batch[1] batch<-batch[2:batch.lines] batch.lines<-batch.lines-1 cat("batch> ",lin,"\n") } words<-unlist(strsplit(lin,split="[ ]+")) narg<-length(words) keyword<-"" key2<-"" if (narg>0) keyword<-substr(words[1],1,3) if (narg>1) key2<-substr(words[2],1,3) if (keyword=="set") { if (key2=="loc" && narg>=3) { if (narg>=4) loctyp<-substr(words[4],1,1) if (loctyp=="n") loctyp<-"m" if (narg<4 || !(loctyp %in% c("a","q","m","x"))) { cat("NOTE: Locus type not recognised. Setting ", words[3], " to quantitative.\n") loctyp<-"q" } mappos <- NA comments <- "" if (narg>=5) { mappos<-as.double(words[5]) comments <- paste(words[6:narg], collapse=" ") } locus.data <- set.locus(words[3], loctyp, mappos, comments, locus.data=locus.data) if (red) { ped<-data.frame(ped,NA) colnames(ped)[ncol(ped)]<-locus.data$locnam[nrow(locus.data)] } }else if (key2=="dat") { datadir<-gsub("\"","",words[3]) }else if (key2=="map") { if (substr(words[3],1,3)=="fun") { if (substr(words[4],1,3)=="kos") { mapf<-"Kosambi" }else{ mapf<-"Haldane" } } }else if (key2=="dis") { mappos[loctyp %in% c("m","x")]<-as.double(words[3:narg]) }else if (key2=="hap") { if (substr(words[3],1,3)=="ver") { showorig<-2 }else if (words[3]=="on") { showorig<-1 }else if (substr(words[3],1,3)=="off") { showorig<-0 }else{ showorig<-as.integer(words[3]) } cat("NOTE: Haplotype print level ", showorig,"\n") }else if (key2=="wei") { cat("NOTE: Unimplemented command\n") }else if (key2=="wor") { cat("NOTE: Superceded command\n") }else if (key2=="ech") { cat("NOTE: Unimplemented command\n") }else if (key2=="bur") { burnin<-as.integer(words[3]) cat("NOTE: Number of MC burn-in iterations ",burnin,"\n") }else if (key2=="ite") { iter<-as.integer(words[3]) cat("NOTE: Number of MC iterations ",iter,"\n") }else if (key2=="min") { mincnt<-as.integer(words[3]) cat("NOTE: Minimum numerator for MC P-values ",mincnt,"\n") }else if (key2=="see") { cat("NOTE: Unimplemented command\n") }else if (key2=="pro") { pro<-paste(paste(words[-c(1:2)],collapse="")," ",sep="") }else if (key2=="che") { chek<-as.integer(words[3]=="off") }else if (key2=="err") { droperr<-as.integer(words[3]=="off") }else if (key2=="tdt") { use2<-substr(words[3],1,3) if (use2=="bot") { cat("NOTE: Index may contribute to TDT only where", "both parents typed.\n") }else if (use2=="fir") { cat("NOTE: Only one index case per pedigree used -- ", "both parents must be typed.\n") }else if (use2=="one") { cat("NOTE: Index may contribute to TDT if one or ", "both parents typed.\n") } }else if (key2=="imp") { imp<-substr(words[3],1,3) }else{ cat("Command not recognised:",lin,"\n") } }else if (keyword=="sho" && key2=="map") { showMap(locus.data) }else if (keyword=="dro") { loci<-listloci(words[2:narg], locus.data) locus.data[locus.data$locnam %in% loci,"active"]<-FALSE }else if (keyword=="kee") { loci<-listloci(words[2:narg], locus.data) locus.data[,"active"]<-FALSE locus.data$active[locus.data$locnam %in% loci]<-TRUE }else if (keyword=="und") { if (narg==1) { locus.data[,"active"]<-TRUE }else{ loci<-listloci(words[2:narg], locus.data) locus.data[locus.data$locnam %in% loci,"active"]<-TRUE } }else if (keyword=="nuc" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="sub" && red) { ped$pedigree<-paste(ped$pedigree,ped$pedigree,sep="_") }else if (keyword=="pru" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="edi" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="del" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="sta" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="kap" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="adj") { cat("NOTE: Unimplemented command\n") }else if ((keyword=="tra" || keyword=="box") && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="rec" && words[narg-1]=="to" && red) { cat("NOTE: Unimplemented command\n") }else if ((keyword=="fac" || keyword=="ran") && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="sel" && red) { ped<-select(words[2:narg], locus.data, ped) }else if ((keyword=="cou" || keyword=="pri") && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="hap" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="gen" && red) { print(pedframe) }else if (keyword=="dra" && red) { draw.ped(words[-1],locus.data, ped) }else if (keyword=="hwe" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="dis" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="ass" && red) { assoc(words[2:narg], locus.data, ped, mincnt=mincnt, iter=iter) }else if (keyword=="hom" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="sch" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="tdt" && red) { cat("NOTE: Unimplemented command\n") }else if ((keyword=="fre" || keyword=="des") && red) { if (narg==1) { for(marker in locus.data$locnam[locus.data$loctyp=="m" & locus.data$active]) { print.allele.freqs(describe.marker(marker, locus.data, ped)) } } }else if (keyword=="dav" && red) { davie(words[2],words[3], ped) }else if (keyword=="var" && red) { if (narg==2) words[3]<- "L-BFGS-B" do_varcomp(words[2],ped,method.opt=words[3]) }else if (keyword=="sha" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="cki" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="mix" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="his" && red) { prhisto(listloci(words[2:narg], locus.data), locus.data, ped) }else if (keyword=="plo" && red) { prplot(listloci(words[2:narg], locus.data), locus.data, ped) }else if ((keyword=="mea" || keyword=="cor") && red) { prcor(listloci(words[2:narg], locus.data), locus.data, ped) }else if (keyword=="reg" && red) { prreg(words[2:narg], locus.data, ped) }else if (keyword=="tab" && red) { prtable(listloci(words[2:narg], locus.data),locus.data,ped) }else if (keyword=="kru" && red) { prkru(words, locus.data, ped) }else if (keyword=="apm" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="asp" && red) { cat("NOTE: Unimplemented command\n") }else if ((keyword=="sib" || keyword=="he1" || keyword=="he2" || keyword=="vis") && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="qtl" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="lin" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="two" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="anc" && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="kin" && red) { kinship(ped,key2) }else if ((keyword=="ibd" || keyword=="ibs") && red) { cat("NOTE: Unimplemented command\n") }else if (keyword=="wri" && red) { if (narg==1) { print(ped[,c("pedigree","id","fa","mo","sex", locus.data$locnam[locus.data$active])], quote=FALSE, na.print="x", right=TRUE) }else if (key2=="ped") { pedout(words[3], ped, locus.data) }else if (key2=="lin") { wrlink(words[3], ped, locus.data) }else if (key2=="men") { wrfish(words[3], locus.data, ped, typ=key2) }else{ cat("NOTE: Unimplemented command\n") } }else if (keyword=="grr") { grrpen(model=substr(words[5],1,3), genefreq=as.double(words[3]), grr=as.double(words[4]), prev=as.double(words[2])) }else if (keyword=="sml") { recrisk(as.double(words[2]), as.double(words[3]), as.double(words[4]), as.double(words[5])) }else if (keyword=="pro") { cat("Prop=", as.double(words[2])/as.double(words[3]), "95%CI=",sep=" ") cat(propci(as.double(words[2]), as.double(words[3])),sep="--") cat("\n",sep="") }else if (keyword=="pch") { pchisq(as.double(words[2]), max(1,as.double(words[3]),na.rm=T), lower=F) }else if (keyword=="lis") { print(locus.data) }else if (keyword=="ls") { ls.loci(locus.data) }else if (keyword=="inf") { cat("Program version = Version ",version,"\n", "Imputation level = ",imp,"\n", "Simple Mendel checks = ",chek,"\n", "Drop incon. genotypes= ",droperr,"\n\n", "Maximum MC iterations= ",iter,"\n", "Min numerator P-value= ",mincnt,"\n", "Burn-in MC iterations= ",burnin,"\n\n", "Output detail level = ",plevel,"\n\n", "Data file directory = ",datadir,"\n\n", "Map function = ",mapf,"\n",sep="") }else if (keyword=="tim") { cat("Time used: ",proc.time()[3]-start.time," s.\n") }else if (substr(keyword,1,1)=="!" || substr(keyword,1,1)=="#") { cat(lin,"\n") }else if (substr(keyword,1,1)=="%" || substr(keyword,1,1)=="$") { system(substr(lin,2,nchar(lin))) }else if (keyword=="inc" || keyword=="loc") { cat("Reading commands from `",words[2],"'\n",sep="") if (!file.exists(words[2])) { cat("ERROR: No such file!\n") }else{ cmds<-file(words[2],"r") batch.tmp<-readLines(cmds) close(cmds) if (keyword=="loc") batch.tmp<-batch.tmp[1:grep("^run", batch.tmp)] tmp.lines<-length(batch.tmp) if (batch.lines==0) { batch<-batch.tmp batch.lines<-tmp.lines }else{ batch<-rbind(batch.tmp.batch) batch.lines<-batch.lines+tmp.lines } rm(tmp.lines, batch.tmp) } }else if (keyword=="cle") { locus.data<-NULL ped<-NULL red<-0 }else if (keyword=="dir") { pattern<-NULL if (narg>1) pattern<-words[2] print(dir(pattern=pattern)) }else if (keyword=="rea") { file.type<-key2 fil<-words[3] if (fil=="inline") { fil <- "inline.ped" if (file.exists(fil)) file.remove(fil) repeat { if (batch.lines==0) { lin<-readline(prompt=pro) }else{ lin<-batch[1] batch<-batch[2:batch.lines] batch.lines<-batch.lines-1 } if (lin == ";;;;") break cat(lin, file=fil, sep="\n", append=TRUE) } } }else if (keyword=="run") { ped<-read.pedigree(datadir, fil, locus.data) pedtable<-table(ped$pedigree) subpedtable<-apply(table(ped$pedigree, ped$pedigree), 1, function(x) sum(x>0)) gentable<-sapply(split(ped$gener, ped$pedigree), max) fndtable<-sapply(split(is.na(ped$fa), ped$pedigree), sum) pedframe<-cbind(as.data.frame.table(pedtable), fndtable, gentable, subpedtable) rm(subpedtable,gentable,fndtable) colnames(pedframe)<-c("Pedigree","No. Members", "No. Founders","No. Gens", "No. Subpeds") print(pedframe) deepest<-max(ped$generation, na.rm=T) cat("Total number of pedigrees = ", length(pedtable),"\n") cat("Number with only 1 member = ", table(pedtable)[1], "\n") cat("Total number of sibships = ", length(unique(paste(ped$pedigree,ped$fa, ped$mo))), "\n") cat("Total number of subjects = ", sum(pedtable), "\n") cat("Total subjects genotyped = ", 0 , " (", 0 ,"%)\n") cat("Total number of genotypes = ", 0 , "\n") cat("Largest pedigree (members) = ", max(pedtable) , "(Pedigree ", ped$pedigree[pedtable==max(pedtable)][1], ")\n") cat("Deepest pedigree (genrtns) = ", max(ped$generation) , "(Pedigree ", ped$pedigree[ped$generation==deepest][1], ")\n") cat("Mean size of pedigrees = ", mean(pedtable) , "\n") cat("Mean pedigree depth = ", mean(ped$generation) , "\n",sep="") red<-1 }else if (keyword=="hel") { nsp.help(words[2]) }else if (keyword=="qui" || keyword=="exi" || keyword=="bye") { if (batch.lines==0) { cat("Time used: ",proc.time()[3]-start.time," s.\n") }else{ rm(batch) batch.lines<-0 keyword<-"" } }else if (narg==0) { cat("\n") }else{ # # Pass to R parser # attach(ped) try(print(eval(parse(text=lin)))) detach(ped) } } res <- list(locus.data=locus.data, ped=ped) class(res) <- c("nspObject", class(res)) attr(res,"created") <- date() res } head.nspObject <- function(x) { head(x$ped) } # # Add a locus to locus.data # set.locus <- function(locnam, loctyp, mappos, comments, locus.data=NULL) { if (regexpr("[()_]", locnam)>0) { badnam<-locnam locnam<-gsub("[()_]",".",locnam) locnam<-gsub("[\.]+",".",locnam) locnam<-gsub("\.$","",locnam) cat("NOTE: The locus name ",badnam," contains illegal characters.\n", " Renaming to ", locnam, "\n", sep="") } if (!is.null(locus.data) && !is.na(match(locnam,locus.data$locnam))) { cat("NOTE: The locus name ",locnam," is already in use.\n", sep="") locnam<-paste(locnam,".2",sep="") cat(" Renaming to ", locnam, "\n", sep="") } new<-data.frame(locnam=I(locnam), loctyp=I(loctyp), active=TRUE, mappos=mappos, comments=comments) if (!is.null(locus.data)) { locus.data<-rbind(locus.data, new) }else{ locus.data<-new } rownames(locus.data)<-1:nrow(locus.data) locus.data } # # Print and plot marker map # show.map <- function(locus.data, graphic=FALSE) { if ("nspObject" %in% class(locus.data)) locus.data <- x$locus.data idx<-locus.data$loctyp %in% c("m","x") & locus.data$active & !is.na(locus.data$mappos) pos<-locus.data$mappos[idx] marker<-locus.data$locnam[idx] if (graphic) { n<-length(pos) x<-c(min(pos, na.rm=T), 10+max(pos, na.rm=T)) plot(x,c(0,100),type="n", axes=F, ylab="", xlab="Map position (cM)") axis(1) lines(pos,rep(50,n)) segments(pos,rep(50,n),pos,rep(53,n)) text(pos,rep(55,n),marker) text(pos,rep(45,n),pos) } data.frame(Marker=marker, Map.position=pos) } # # Calculate marker allele frequencies # describe.marker <- function(marker, locus.data, ped) { if (length(marker)<1) { return(NA) } loc.pos<- match(marker, locus.data$locnam) if (locus.data$loctyp[loc.pos]=="m" && locus.data$active[loc.pos]) { loc<-ped[,loc.pos+7] n.tot<-length(loc) loc<-loc[!is.na(loc)] n.typed<-length(loc) alleles<- table(unlist(strsplit(loc, "/"))) alleles<-alleles/sum(alleles) n.alleles<-length(alleles) list(name=marker, n.tot=n.tot, n.typed=n.typed, n.alleles=n.alleles, freqs=alleles) } } # # Print marker allele frequencies, het, PIC value # print.allele.freqs <- function(x) { correction<-x$n.typed/max(1,x$n.typed-1) ehet<-(1-sum(x$freqs*x$freqs)) matings<- (x$freqs %*% t(x$freqs))^2 uninf.mating.freq <- sum(matings)-sum(diag(matings)) pic<- ehet - uninf.mating.freq barplot(x$freqs, xlab=paste("Allele frequencies:",x$name)) cat("Allele frequencies for locus `", x$name, "'\n\n",sep="") print(data.frame("Allele"=rownames(x$freqs), "Frequency"=as.vector(x$freqs), "Counts"=as.vector(round(x$freqs*x$n.typed)))) cat("\n") cat("Number of alleles = ", x$n.alleles, "\n", sep="") cat("Heterozygosity (Hu) = ", correction * ehet, "\n", sep="") cat("Poly. Inf. Content = ", pic, "\n", sep="") cat("Number persons typed = ", x$n.typed, " ( ", round(100*x$n.typed/x$n.tot,1), "%)\n", sep="") } # # Segregation ratios etc # describe.binary.trait <- function(trait, locus.data, ped) { if (length(trait)<1) { return(NA) } loc.pos<- match(trait, locus.data$locnam) if (locus.data$loctyp[loc.pos]=="a" && locus.data$active[loc.pos]) { naff<-sum(ped$trait=="y") ntot<-sum(!is.na(ped$trait)) ped.list<-split(ped[,c(1:7,locpos)], ped$pedigree) for(i in seq(along=ped.list)) { na<-sum(ped.list[[i]]$trait=="y") nt<-sum(!is.na(ped.list[[i]]$trait)) cat(na,"/", nt, " ", na/nt,"\n",sep="") } } } # # Call glm() # prreg <- function(words, locus.data, ped) { fam<-expression("gaussian()") if (regexpr("[~+]", words)>0) { form<-paste(words,collapse="") }else{ if (is.type(words[1], locus.data,"a")) { fam<-expression("binomial()") } m<-is.type(words, locus.data,"m") words[m]<-paste("factor(",words[m],")",sep="") a<-is.type(words, locus.data,"a") words[a]<-paste("factor(",words[a],")",sep="") form<-paste(words[1],paste(words[2:length(words)],collapse="+"),sep="~") } print(form) print(fam) print(summary(m1<-glm(formula=formula(form), family=eval(parse(text=fam)), data=ped))) print(anova(m1, test="Chisq")) } # # Call kruskal.test() # pr.mean.table <- function(yvar,xvar,ped) { y<-ped[,yvar] x<-as.factor(ped[,xvar]) tab<-by(y, x, function(x) cbind(N=length(x[!is.na(x)]), Mean=mean(x, na.rm=T), SD=sd(x, na.rm=T))) tab<-matrix(unlist(tab), byr=T, nr=length(levels(x))) colnames(tab)<-c("N", "Mean", "SD") rownames(tab)<-levels(x) print(tab) } prkru <- function(words, locus.data, ped) { if (!is.type(words[2], locus.data,"q")) { cat("ERROR:", words[3], "is not a quantitative variable.\n") return(NA) }else{ opar<-square.of.plots(length(words)-2) for(i in 3:length(words)) { pr.mean.table(words[2],words[i],ped) x<-paste("factor(",words[i],")",sep="") form<-paste(words[2],x,sep="~") print(res<-kruskal.test(formula=formula(form),data=ped)) boxplot(formula=formula(form), data=ped, ylab=words[2], xlab=words[i], main=paste("Kruskal-Wallis Test P=",round(res$p.value,dig=3))) } par(opar) } } # # Multiple plots in square array # square.of.plots<-function(nplots) { if (nplots<=1) { par() }else{ rplot<-ceiling(sqrt(nplots)) cplot<-nplots %/% rplot par(mfcol=c(rplot,cplot)) } } # # Bivariate plots # prplot <- function(loci,locus.data,ped) { nd<-length(loci) if (nd==2) { attach(ped) y<-get(loci[2]) ; if (any(is.character(y))) y<-factor(y) x<-get(loci[1]) ; if (any(is.character(x))) x<-factor(x) plot(y ~ x, ylab=loci[2], xlab=loci[1]) detach(ped) } } # # Histogram # prhisto <- function(loci,locus.data,ped) { nd<-length(loci) if (nd==0) return(NA) rplot<-ceiling(sqrt(nd)) cplot<-nd %/% rplot opar<-square.of.plots(nd) attach(ped) for(loc in loci) { hist(get(loc), xlab=loc, main="") } detach(ped) par(opar) } # # Correlation matrix # prcor <- function(loci,locus.data,ped) { nd<-length(loci) if (nd==0) return(NA) args<-paste(loci,collapse=",") attach(ped) if (nd==1) { mea<-eval(parse(text=paste("mean(",args,",na.rm=TRUE)"))) cov<-eval(parse(text=paste("var(",args,",na.rm=TRUE)"))) }else{ mea<-eval(parse(text=paste("apply(cbind(",args,"),2,mean,na.rm=TRUE)"))) cov<-eval(parse(text=paste("cov(cbind(",args,"),use=\"complete\")"))) } detach(ped) print(data.frame(Mean=mea,Var=cov)) } # # Tables # prtable <- function(loci,locus.data,ped) { cat("Table:",loci,"\n") nd<-length(loci) if (nd==0) return(NA) args<-paste(loci,collapse=",") attach(ped) if (nd==1) { tab<-eval(parse(text=paste("table(",args,")"))) }else{ tab<-eval(parse(text=paste("ftable(",args,")"))) } detach(ped) print(tab) } # # read in pedigree file, sorting by pedigree, generation number, sibship # read.pedigree <- function(datadir, fil, locus.data) { mkdummy <- function(ped,sex,absent) { ncols=ncol(ped)-5 ninsert<-length(absent[absent]) if (sex=="m") { insert<-data.frame(I(ped$pedigree[absent]), I(ped$fa[absent]), I(rep(NA,ninsert)), I(rep(NA,ninsert)), I(rep(sex,ninsert)), array(NA, dim=c(ninsert,ncols))) }else{ insert<-data.frame(I(ped$pedigree[absent]), I(ped$mo[absent]), I(rep(NA,ninsert)), I(rep(NA,ninsert)), I(rep(sex,ninsert)), array(NA, dim=c(ninsert,ncols))) } colnames(insert)<-colnames(ped) rbind(ped,insert) } cat("read.pedigree:",proc.time(),"\n") fil<-paste(datadir,fil,sep="/") # # strip comments and reread because "!" is difficult comment marker # wrkfil<-paste("sp-",substr(as.character(runif(1)),3,8),".wrk",sep="") raw<-readLines(fil) writeLines(raw[!(substr(raw,1,1) %in% c("!","#"))],wrkfil) raw<-read.table(wrkfil, as.is=T, na.string=c("x","."), comment.char="") unlink(wrkfil) ped<-data.frame(I(as.character(raw[,1])),I(as.character(raw[,2])), I(as.character(raw[,3])),I(as.character(raw[,4])), I(as.character(raw[,5])), check.names=FALSE) if (any(ped[!is.na(ped[,3]),3]=="0")) { ped[!is.na(ped[,3]) & ped[,3]=="0",3] <- NA } if (any(ped[!is.na(ped[,4]),4]=="0")) { ped[!is.na(ped[,4]) & ped[,4]=="0",4] <- NA } j<-5 nloci<-length(locus.data$locnam) if (nloci>0) { for(i in 1:length(locus.data$locnam)) { j<-j+1 if (locus.data$loctyp[i] %in% c("m","x")) { ped<-data.frame(ped, I(paste(raw[,j],raw[,j+1],sep="/"))) ped[is.na(raw[,j]),5+i]<-NA j<-j+1 }else if (locus.data$loctyp[i]=="a") { ped<-data.frame(ped, raw[,j]) }else{ ped<-data.frame(ped, I(raw[,j])) } } } rm(raw) colnames(ped)<-c("pedigree","id","fa","mo","sex", locus.data$locnam) # # Individuals where only one parent specified # unspec.fa <- is.na(ped$fa) & !is.na(ped$mo) unspec.mo <- !is.na(ped$fa) & is.na(ped$mo) nunspec.fa <- sum(unspec.fa) nunspec.mo <- sum(unspec.mo) if ((nunspec.fa+nunspec.mo)>0) { cat("NOTE: There are ", nunspec.fa," unidentified fathers and ", nunspec.mo, " unidentified mothers.\n") ped$fa[unspec.fa] <- paste("ZZ", seq(1, nunspec.fa), sep="") ped$mo[unspec.mo] <- paste("ZZ", seq(nunspec.fa+1, nunspec.mo), sep="") } # # Parents without individual record # key.id <-paste(ped$pedigree, ped$id) key.fa <-paste(ped$pedigree, ped$fa) absent.fa <- !is.na(ped$fa) & !(key.fa %in% key.id) ped<-mkdummy(ped,"m",absent.fa) key.id <-paste(ped$pedigree, ped$id) key.mo <-paste(ped$pedigree, ped$mo) absent.mo <- !is.na(ped$mo) & !(key.mo %in% key.id) ped<-mkdummy(ped,"f",absent.mo) ped<-cbind(calcgen(ped), ped) ped <- ped[order(ped$pedigree, !is.na(ped$fa), ped$generation, ped$fa,ped$mo,ped$id,na.last=FALSE),] ped } # # Calculate generations for family # calcgen <- function(ped) { num<-nrow(ped) gen<-rep(NA,num) fa.p<-match(paste(ped$pedigree,ped$fa), paste(ped$pedigree,ped$id)) mo.p<-match(paste(ped$pedigree,ped$mo), paste(ped$pedigree,ped$id)) con<-fam.connect(ped, fa.p=fa.p, mo.p=mo.p) # # For each subpedigree # for(subped in unique(con)) { thisfam<-(con==subped) # cat("Pedigree",subped,"\n") # print(data.frame(con, thisfam)) if (all(is.na(ped$fa))) { gen[thisfam]<-1 # print(data.frame(con,ped$pedigree,ped$id,ped$fa,ped$mo,gen)) }else{ nonfounder <- thisfam & !is.na(fa.p) & !is.na(mo.p) gen[first.na(gen, thisfam)]<-0 # # Find generation number for founders # last.n <- 0 while(sum(as.integer(is.na(gen[thisfam])))>0) { if (last.n==sum(as.integer(is.na(gen[thisfam])))) { print(data.frame(ped[thisfam & is.na(gen),1:5])) } last.n<-sum(as.integer(is.na(gen[thisfam]))) # child gen = father gen + 1 idx<-(nonfounder & is.na(gen) & !is.na(gen[fa.p])) gen[idx]<-gen[fa.p[idx]]+1 # child gen = mother gen + 1 idx<-(nonfounder & is.na(gen) & !is.na(gen[mo.p])) gen[idx]<-gen[mo.p[idx]]+1 # father gen = child gen - 1 idx<-(nonfounder & !is.na(gen) & is.na(gen[fa.p])) gen[fa.p[idx]]<-gen[idx]-1 # mother gen = child gen - 1 idx<-(nonfounder & !is.na(gen) & is.na(gen[mo.p])) gen[mo.p[idx]]<-gen[idx]-1 # revise child gen = max(father, mother) + 1 idx<-(nonfounder & !is.na(gen[fa.p]) & !is.na(gen[mo.p])) gen[idx]<-apply(cbind(gen[fa.p[idx]]+1,gen[mo.p[idx]]+1), 1, function(x) max(x, na.rm=TRUE)) } gen[thisfam]<-gen[thisfam]+1-min(gen[thisfam], na.rm=TRUE) # # Find generation number for nonfounders # gen[nonfounder]<-NA while(sum(as.integer(is.na(gen[thisfam])))>0) { idx<-(nonfounder & is.na(gen) & !is.na(gen[fa.p]) & !is.na(gen[mo.p])) gen[idx]<-max(gen[fa.p[idx]],gen[mo.p[idx]], na.rm=TRUE)+1 } } } cbind(pedigree=con,generation=gen) } # # Indicate connectedness of pedigrees # fam.connect <- function(ped, fa.p=NULL, mo.p=NULL) { if (is.null(fa.p) || is.null(mo.p)) { fa.p<-match(paste(ped$pedigree,ped$fa), paste(ped$pedigree,ped$id)) mo.p<-match(paste(ped$pedigree,ped$mo), paste(ped$pedigree,ped$id)) } num<-nrow(ped) con<-rep(NA,num) ncon<-0 subped<-1 while(sum(as.integer(!is.na(con)))ncon) { ncon<-sum(as.integer(!is.na(con))) idx<-is.na(con) & !is.na(con[fa.p]) con[idx]<-con[fa.p[idx]] idx<-is.na(con) & !is.na(con[mo.p]) con[idx]<-con[mo.p[idx]] idx<-!is.na(con) & !is.na(fa.p) & is.na(con[fa.p]) con[fa.p[idx]]<-con[idx] idx<-!is.na(con) & !is.na(mo.p) & is.na(con[mo.p]) con[mo.p[idx]]<-con[idx] } subped<-subped+1 } con } fam.connect2 <- function(ped, fa.p=NULL, mo.p=NULL) { if (is.null(fa.p) || is.null(mo.p)) { fa.p<-match(paste(ped$pedigree,ped$fa), paste(ped$pedigree,ped$id)) mo.p<-match(paste(ped$pedigree,ped$mo), paste(ped$pedigree,ped$id)) } nonfounders<-!is.na(ped$fa) num<-nrow(ped) nfound<-num-sum(nonfounders) con<-rep(0,num) idx<-logical(num) con[!nonfounders]<-1:nfound ncon<-num while((thisncon<-length(unique(con)))con[fa.p[nonfounders]] con[fa.p[idx]]<-con[idx] idx[nonfounders]<-con[nonfounders]>con[mo.p[nonfounders]] con[mo.p[idx]]<-con[idx] } as.integer(factor(con)) } # # Find first missing value in vector # first.na<-function(x,also=TRUE) { missing<-which(is.na(x) & also) if (length(missing)>0) { missing[1] } else { NA } } # # parse a list of loci # listloci <- function(words, locus.data) { get.list<-function(x, sta, fin) { if (sta<=fin) x[sta:fin] } arglist<-words nloc<-length(locus.data) if (is.na(arglist) || length(arglist)==0) { arglist<-locus.data$locnam }else{ while(!is.na(span<-match("--",arglist))) { sta<- max(1,match(arglist[span-1],locus.data$locnam), na.rm=T) fin<- min(nloc, match(arglist[span+1],locus.data$locnam),na.rm=T) arglist<-c(get.list(arglist,1,span-1), locus.data$locnam[sta:fin], get.list(arglist,span+1,length(arglist))) } while(length(typ<-grep("\\$",arglist))>0) { idx<-substr(arglist[typ[1]],2,2) if (idx=="t") idx<-c("a","q") arglist<-c(get.list(arglist,1,typ[1]-1), locus.data$locnam[locus.data$loctyp %in% idx], get.list(arglist,typ[1]+1,length(arglist))) } } argmatch<-match(arglist,locus.data$locnam) unmatched<-arglist[is.na(argmatch)] if (length(unmatched)>0) { cat(paste("ERROR: Could not find locus ", unmatched,".\n", sep=""), sep="") } locus.data$locnam[argmatch] } # # assess type of each locus in list # is.type <- function(loci, locus.data, typ) { argmatch<-match(loci,locus.data$locnam) locus.data$loctyp[argmatch] %in% typ & locus.data$active[argmatch] } # # Subsets of locus type # getloci <- function(locus.data, typ) { if (is.null(locus.data)) { NA }else{ locus.data$locnam[locus.data$active & (locus.data$loctyp %in% typ)] } } # # Listing of loci # ls.loci <- function(locus.data) { loci<-locus.data$locnam if (length(loci)>0) { idx<-!locus.data$active loci[idx]<-paste("(",loci,")",sep="")[idx] idx<-locus.data$active & locus.data$loctyp %in% c("a","q") loci[idx]<-paste(loci,"*",sep="")[idx] cat(loci, sep=" ",fill=72) } } # # Write a title of an analysis # banner <- function(..., width=50, ch="-") { cat("\n",rep(ch,width),"\n",sep="") cat(...,"\n",sep="") cat(rep(ch,width),"\n\n",sep="") } # # Help for sib-pair commands # nsp.help <- function(query=NA) { if (is.na(query)) { cat("Keywords can be shortened to the first 3 letters.","\n", "Help prints a brief summary of all commands:","\n", " help [all | globals | data | analysis | ]","\n") }else if (query=="all") { cat(nsp.help.strings,sep="\n") }else if (substr(query,1,3)=="glo") { cat(nsp.help.strings[1:26],sep="\n") }else if (substr(query,1,3)=="dat") { cat(nsp.help.strings[27:60],sep="\n") }else if (substr(query,1,3)=="ana") { cat(nsp.help.strings[62:93],sep="\n") }else{ cat(nsp.help.strings[grep(query, nsp.help.strings)],sep="\n") } } nsp.help.strings <- c( "!|# {comment}", "%|$ {shell command}", "hel [all|glo|dat|ana|] {help}", "qui|bye {exit}", "cle {reset}", "dir [] {directory listing}", "inc {include commands}", "lis [mar|tra] {list loci}", "sho map {show current marker map}", "inf {program info}","tim {total elapsed time}", "set pro [on|off] {display prompt}", "set ech [on|off] {echo}", "set out|ple -1|0|1|2|ver|on|off {output verbosity}", "set hap 0|1|2|ver|on|off {haplotype detail}", "set nde [] {output pedigree decimal digits}", "set wei fou {weight allele frequencies}", "set ite|bur {maximum iterations | burn-in iters}", "set min {required numerator approximate P-values}", "set see {RNG seeds}", "set err [on|off] {remove nuclear family mendelian errors}", "set tdt both|one|fir {parents typed for TDT}", "set map fun kos|hal {set mapping function}", "pch {Chi-square P-value}", "pro {CI for proportion}", "sml {recurrence risks}", "grr [] {recurrence risks}", # data "set dat|wor {data | work directory}", "set imp 0|1|2|3|off|on {impute unmeasured genotypes}", "set loc mar|nam|qua|aff {declare locus position/type}", "rea ped |inline {declare pedigree file}", "rea lin |inline {declare pedigree file Linkage type}" , "set map ... {set marker map positions}", "set dis ... {set map distances (cM)}", "set sex on {add dummy variable}", "run {read in pedigree data}", "kee .[to].. [$m|$q|$a] {retain loci in analysis}", "dro .[to].. [$m|$q|$a] {drop loci from analysis}", "und [.[to].. [$m|$q|$a]] {return loci to analysis}", "sel [con whe] {select pedigrees for analysis}", "sel ped [not [in]] ... {select on pedigree name}", "rec ... to {recode old values to new}", "tra
{power transform}", "kap [res] {survivor function estimate}", "ran {rank trait values}", "adj on [to |m|f] {linear regress adjust}" , "res on ... [com] {linear regress resid}", "pre on ... [com] {linear regress predicted}", "imp on ... [com] {linear regress imputation}", "edi |all [to] [] {edit data}", "del |all {set all data missing for person}", "sta [fam] {standardize trait value}", "nuc [] [gra] {convert to (trimmed) nuclear families}", "sub {divide into subpedigrees (if compound)}", "pru {prune unaffecteds}", "pri ped ... [id ...] {print data}", "wri [arl|asp|cri|dot|fis|gas|gda|gh [dum]|lin [dum]|men|mim|ped|phe|sag|tcl] {write ped file}", "wri pap {write PAP trip.dat and phen.dat}", "wri loc asp|fis|gas|gh [dum]|lin [dum]|lok|men|mer|sag|sib|tcl {write locus file}", "wri loc pap {write PAP header.dat and popln.dat}" , "wri map men|mer {write MENDEL or MERLIN map file}", # analysis "dra [] [] {draw pedigree(s)}", "gen [] {summarize pedigree(s)}", "cou|pri [whe] {count or print where expression true}", "hap [] {show sibship haplotypes}", "anc [ove|und ] {common ancestor of most probands}", "fre|des [] {descriptive statistics}", "plo {scatter or box plot}", "his {histogram and normality test}", "mea|cor [..] {phenotypic means and correlations}", "tab [...] {contingency table}", "kru {Kruskall-Wallis test}", "reg on .[to].. {linear regression}", "mix [[] [nor|poo|exp|poi]] {test admixture}", "hwe [fou] {test HWE}", "dis {intragametic association}", "hom [ [ove|und ]] {marker homozygosity}" , "dav {segregation ratios under ascertainment}", "kin [pai|inb] {kinship/inbreeding coeffs}", "ib[s|d] [pai] {relative pair ibs/ibd sharing at marker}", "cki {sib pair ibs sharing at multiple markers}", "sha {rel pair ibs sharing at multiple markers}", "ass [ove|und ] [fou] {allelic association}" , "tdt [ove|und ] [pat|mat] {several TDTs}", "sch [] {Schaid & Sommer HWE test}", "asp [ove|und ] {affected sib-pair IBS}", "apm [ove|und ] [ibd|ibs] {IBS or IBD APM}", "sib [] [sim] [cor [mea ] [sd|var ]] {S&P QTL linkage}", "vis [] [sim] {V&H H-E regression}", "he1 [] [sim] {Trad H-E regression}", "he2 [] [sim] {Cross-product H-E regression}" , "two {two-point Haseman-Elston}", "qtl {Variance components sib-pair linkage}", "var {Variance components trait analysis }" ) # # Calculate recurrence risks and risk ratios for given SML model # recrisk <- function (q,f1,f2,f3) { # MZ Sib P-O 2nd ci <- c(1.0, 0.25, 0.0, 0.0) ct <- c(0.0, 0.5 , 1.0, 0.5) co <- c(0.0, 0.25, 0.0, 0.5) p<-1.0-q kp<-q*q*f1+2.0*p*q*f2+p*p*f3 kq<-1.0-kp qa<-(q*q*f1+q*(1-q)*f2)/kp qu<-(q*q*(1.0-f1)+q*p*(1.0-f2))/kq a<-c(q*q*f1/kp, 2*p*q*f2/kp, p*p*f3/kp) b<-c(q*q*(1.0-f1)/kq, 2*p*q*(1.0-f2)/kq, p*p*(1.0-f3)/kq) oddsr<-rr<-risku<-riska<-double(4) mating<-double(3) for (rel in 1:4) { i<-ci[rel] * diag(1.0,3,3) t<-ct[rel] * matrix(c(q , p ,0, 0.5*q, 0.5 ,0.5*p, 0 , q ,p ), byrow=TRUE, nr=3,nc=3) o<-co[rel] * matrix(c( q*q, 2.0*q*p, p*p, q*q, 2.0*q*p, p*p, q*q, 2.0*q*p, p*p) , byrow=TRUE, nr=3, nc=3) r<-i+t+o ff<-r %*% matrix(c(f1,f2,f3),nc=1) riska[rel]<- a %*% ff risku[rel]<-b %*% ff if (risku[rel]<=0.0) { rr[rel]<-9999.9 oddsr[rel]<-9999.9 }else{ if (riska[rel]/risku[rel]>10000.0) { rr[rel]<-9999.9 oddsr[rel]<-9999.9 }else{ rr[rel]<-riska[rel]/risku[rel] oddsr[rel]<-riska[rel]/(1.0-riska[rel])* (1.0-risku[rel])/risku[rel] } } } va<-2*q*p*(q*(f1-f2)+p*(f2-f3))^2 vd<-q*q*p*p*(f1-2.0*f2+f3)^2 mating[1]<-qu*qu*f1+2.0*(1.0-qu)*qu*f2+(1.0-qu)*(1.0-qu)*f3 mating[2]<-qa*qu*f1+((1.0-qu)*qa+ (1.0-qa)*qu)*f2+(1.0-qa)*(1.0-qu)*f3 mating[3]<-qa*qa*f1+2.0*(1.0-qa)*qa*f2+(1.0-qa)*(1.0-qa)*f3 pap<-100.*(1.0-min(f1,f2,f3)/kp) cat("Frequency(A): ",round(q,3),"; Pen(AA): ",round(f1,3), "; Pen(AB): ",round(f2,3), "; Pen(BB): ",round(f3,3),"\n",sep="") cat("Trait Prev : ",round(kp,3),"; Pop AR: ",round(pap,1), "%; Var(Add): ",round(va,6), "; Var(Dom): ",round(vd,6),"\n",sep="") cat("\nMeasure MZ Twin Sib-Sib Par-Off Second\n", "---------- ------- ------- ------- ------\n",sep="") cat(sprintf( "Rec risk %11.3f %11.3f %11.3f %11.3f\n",riska[1],riska[2],riska[3],riska[4])) cat(sprintf( "Rel risk %11.3f %11.3f %11.3f %11.3f\n",rr[1],rr[2],rr[3],rr[4])) cat(sprintf( "Odds rat %11.3f %11.3f %11.3f %11.3f\n",oddsr[1],oddsr[2],oddsr[3],oddsr[4])) cat(sprintf( "ibd|A-A %11.3f %11.3f %11.3f %11.3f\n", 1.0,0.25*(riska[1]+riska[3])/riska[2], 0.5,0.5*riska[3]/(riska[3]+kp))) cat(sprintf( "ibd|A-U %11.3f %11.3f %11.3f %11.3f\n", 1.0,(0.5-0.25*(riska[1]+riska[3]))/(1.0-riska[2]), 0.5,(0.25-0.25*riska[3])/(1.0-riska[4]))) cat(sprintf("\nFreq of A if Affected: %8.6f (%5.3f, %5.3f, %5.3f)\n", qa, q*q*f1/kp, 2.0*q*(1.0-q)*f2/kp, (1.0-q)*(1.0-q)*f3/kp)) cat(sprintf("Freq of A if Unaffctd: %8.6f (%5.3f, %5.3f, %5.3f)\n", qu, q*q*(1.0-f1)/kq, 2.0*q*(1.0-q)*(1.0-f2)/kq, (1.0-q)*(1.0-q)*(1.0-f3)/kq)) cat("\nMating Proportion Risk to offspring\n", "------ ----------- ------------------\n",sep="") cat(sprintf("UnA x UnA %11.3f %11.3f\n",(1.0-kp)^2,mating[1])) cat(sprintf("Aff x UnA %11.3f %11.3f\n",2.0*kp*(1.0-kp), mating[2])) cat(sprintf("Aff x Aff %11.3f %11.3f\n",kp*kp,mating[3])) } # # Expectations for SML # grrpen <- function (model="dom", genefreq, grr, prev=genefreq) { a<-prev p<-genefreq q<-1-p r<-1.0/grr pen<-double(3) names(pen)<-c("AA","AB","BB") if ( model == "mul" ) { pen[1] <- a/((p*(p+(2.0*q*r)))+((q*r)^2)) pen[2] <- r*pen[1] pen[3] <- r*pen[2] }else if (model == "rec") { pen[1] <- a/((p*(p+(2.0*q*r)))+((q^2)*r)) pen[3] <- pen[2] <- r*pen[1] }else { model <- "dom" pen[2] <- pen[1] <- a/((p*(p+(2*(1-p))))+(((1-p)^2)*r)) pen[3] <- r * pen[1] } pen[pen>1]<-1 pen[pen<0]<-0 recrisk(p,pen[1],pen[2],pen[3]) } # # Exact test of HWE: 2 alleles # # See eg Emigh TH. Comparison of tests for Hardy-Weinberg Equilibrium. # Biometrics 1980; 36: 627-642 # dhwe2 <- function(n11, n12, n22) { f <- function(x) lgamma(x+1) n <- n11+n12+n22 n1 <- 2*n11+n12 n2 <- 2*n22+n12 exp(log(2)*(n12) + f(n) - f(n11) - f(n12) - f(n22) - f(2*n) + f(n1) + f(n2)) } hwe2.test <- function(n11, n12=NULL, n22=NULL) { if (length(n11)==3 && is.null(n12) && is.null(n22)) { n12<-n11[2] n22<-n11[3] n11<-n11[1] } n1 <- 2*n11+n12 n2 <- 2*n22+n12 x12 <- seq(n1 %% 2,min(n1,n2),2) x11 <- (n1-x12)/2 x22 <- (n2-x12)/2 dist <- data.frame(n11=x11,n12=x12,n22=x22,density=dhwe2(x11,x12,x22)) dist <- dist[order(dist$density),] data.frame(n11,n12,n22, freq=n1/(n1+n2), prob=cumsum(dist$density)[dist$n11==n11 & dist$n12==n12 & dist$n22==n22]) } # # Routines for handling genotypes # # Convert "1/2" to 1,2 # geno.as.array <- function(genotypes,renumber=FALSE,miss=NULL,gtp.sep="/") { mknum<-function(genotypes, renumber=FALSE, gtp.sep="/") { alleles<- strsplit(as.character(genotypes), gtp.sep) gtp<-cbind(sapply(alleles, function(x) x[1], simplify=TRUE), sapply(alleles, function(x) x[2], simplify=TRUE)) if (renumber) { alleles<-unique(unlist(alleles)) gtp[,1]<-as.numeric(factor(gtp[,1],levels=alleles)) gtp[,2]<-as.numeric(factor(gtp[,2],levels=alleles)) } if (is.null(miss)) { gtp[is.na(genotypes),]<-NA }else{ gtp[is.na(genotypes),]<-miss } gtp } if (is.null(ncol(genotypes)) || ncol(genotypes)==1) { res<-mknum(genotypes, renumber=renumber) }else{ res<-data.frame(mknum(genotypes[,1], renumber=renumber)) for(i in 2:ncol(genotypes)) { res<-cbind(res,mknum(genotypes[,i], renumber=renumber)) } colnames(res)<-c(t(outer(names(genotypes),1:2,paste,sep="."))) } res } # # 1 2 3...nall # Convert "1/2" to 1 1 0 0 # geno.as.alleles <- function(genotypes) { num<-length(genotypes) alleles<-factor(geno.as.array(genotypes)) nall<-length(levels(alleles)) allele.names<-levels(alleles) alleles<-array(as.numeric(alleles), dim=c(num,2)) alleles[is.na(alleles)]<-0 res<-t(apply(alleles,1,tabulate, nbin=nall)) colnames(res)<-allele.names res } # # Convert 1,2 to "1/2" # geno.as.char <- function(gtp,sep="/") { genotypes<-paste(gtp[,1], gtp[,2], sep=sep) genotypes[is.na(gtp[,1])]<-NA genotypes } # # Convert "101/103" to "1,2" # geno.as.linkage.array <- function(genotypes) { } # # Convert "101/103" to "1 2" # geno.as.linkage <- function(genotypes) { gtp<-geno.as.array(genotypes, renumber=TRUE) paste(gtp[,1], gtp[,2], sep=" ") } # # Is homozygote? is.hom <- function(genotypes) { eligible <- !is.na(genotypes) alleles<- strsplit(genotypes[eligible], "/") res <- rep(NA, length=length(genotypes)) res[eligible] <- sapply(alleles, function(x) !is.na(x[1]) && x[1]==x[2]) res } # # Convert y/n to 2/1 # aff.as.num <- function(trait) { ifelse(is.na(trait), NA, ifelse(trait=="y", 2, 1)) } # # Empty array of genotypes # init.gtp.array <- function(num) { array(NA, dim=c(num,2)) } # # Order alleles in array of genotypes # ordered.gtp <- function(gtp) { swp <- gtp[,1]>gtp[,2] tmp<-gtp[swp,1] gtp[swp,1]<-gtp[swp,2] gtp[swp,2]<-tmp gtp } # # Calculate allele frequencies # allele.freq <- function(genotypes) { prop.table(table(unlist(strsplit(as.character(genotypes[!is.na(genotypes)]), "/")))) } # # Simulate founder genotypes -- drawing from pool of alleles at # the observed frequencies (therefore assumes HWE) # founder.gtp <- function (num, allelefreq) { gtp <- cbind(sample(names(allelefreq),num,replace=TRUE,prob=allelefreq), sample(names(allelefreq),num,replace=TRUE,prob=allelefreq)) ordered.gtp(gtp) } # # Simulate child genotypes -- drawing from pool of alleles (gametes ;)) # present in parents # child.gtp <- function (father.gtp,mother.gtp) { n<-nrow(father.gtp) fsamp<-cbind(1:n,sample(2,n,replace=TRUE)) msamp<-cbind(1:n,sample(2,n,replace=TRUE)) gtp <- cbind(father.gtp[fsamp],mother.gtp[msamp]) ordered.gtp(gtp) } # # Calculate a measure of genotypic association with a trait # assoc.drop <- function(trait, marker, ped, target.numerator=20, B=1000, dnn=c("Trait","Marker"), stat=assoc.binreg.all, quant.trait=FALSE, test.dir=1) { # # Which test if (!is.function(stat)) { stat<-switch(stat, fisher=assoc.fisher.gtp, anova=assoc.anova.all, kruskal=assoc.kruskal.gtp, logistic=assoc.binreg.all, binomial=assoc.binreg.all, assoc.chisq.gtp) } print(stat) # # Which tail of distribution if (test.dir==1) { crit<-function(sim.stat, observed.stat) { sim.stat>observed.stat || (sim.stat==observed.stat & runif(1)>0.5) } }else{ crit<-function(sim.stat, observed.stat) { sim.stat0.5) } } num<-nrow(ped) founder<-is.na(ped$fa) nfound<-sum(as.integer(founder)) fa.p<-match(paste(ped$pedigree,ped$fa), paste(ped$pedigree,ped$id)) mo.p<-match(paste(ped$pedigree,ped$mo), paste(ped$pedigree,ped$id)) untyped<-is.na(marker) allelefreq<-allele.freq(marker) observed.stat<-stat(trait, marker) if (is.na(observed.stat)) { cat("\nERROR: Association statistic not able to be calculated for data.\n") list(pvalue=NA, denom=NA) } # # Sequential simulation of P-value # exceeded<-0 denom<-0 null.stat<-double(0) while(exceeded0) { idx<- !(is.na(gtp[fa.p,1]) | is.na(gtp[mo.p,1])) gtp[idx,]<-child.gtp(gtp[fa.p[idx],],gtp[mo.p[idx],]) } gtp[untyped,]<-NA geno.as.char(gtp) } assoc <- function(traits, locus.data, ped, mincnt=mincnt, iter=200) { markers<-getloci(locus.data, "m") nmar<-length(markers) opar<-square.of.plots(nmar) attach(ped) for(trait in traits) { banner("Allelic association testing for trait \"", trait,"\"") y<-get(trait,2) if (is.type(trait, locus.data, "a")) { for(mar in markers) { cat(" ---- Allelic Association Analysis v. \"", mar, "\" ----\n",sep="") x<-as.factor(get(mar,2)) tab<-table(y, x, dnn=c(trait, mar)) print(tab) assoc.drop(y, x, ped, target.numerator=mincnt, B=iter, dnn=c(trait,mar)) } }else if (is.type(trait, locus.data, "q")) { for(mar in markers) { cat(" ---- Allelic Association Analysis v. \"", mar, "\" ----\n",sep="") x<-get(mar) assoc.drop(y, x, ped, target.numerator=mincnt, B=iter, dnn=c(trait,mar), stat=assoc.anova.all , quant.trait=TRUE) } } } detach(ped) par(opar) } # # Ann. Hum. Genet., Lond. (1979), 42, 507 # Printed in Great Britain # # # The 'singles' method for segregation analysis under # incomplete ascertainment # # by A. M. Davie # # ^ R - J # Our estimate of p is p = ----- where R = total number of affected sibs, # T - J # T = total number of siblings, J = number of sibships with only one # proband ('singles'). For large samples the variance of our estimate can # be estimated by # # ^ (R - J)(T - R) 2Q(T - R)**2 # var (p) ~ -------------- + ------------ # (T - J)**3 (T - J)**4 # # where Q is the number of sibships containing exactly two probands. # davie <- function(trait, proband, ped) { if (is.null(proband) || is.na(proband) || proband=="") proband<-trait nonfounders<-!is.na(ped$fa) sibships<-factor(paste(ped$pedigree,ped$fa, ped$mo)[nonfounders]) list.sibships<-levels(sibships) attach(ped) y<-(get(trait)=="y")[nonfounders] prob<-(get(proband)=="y")[nonfounders] detach(ped) tval<-sum(!is.na(y)) rval<-sum(y) n.aff<-sapply(split(y,sibships), sum) n.prob<-sapply(split(prob,sibships), sum) jval<-length(n.prob[n.prob==1]) qval<-length(n.prob[n.prob==2]) res<-davie.concord(jval, qval, rval, tval) cat("Segregation ratio = ",round(res$p,3)," (SE=",round(res$se,3),")\n",sep="") } davie.concord <- function(j,q,r,t) { p <- (r-j)/(t-j) se <- sqrt ( (r-j)*(t-r)/(t-j)^3 + 2*q*(t-r)^2/(t-j)^4 ) list(p=p,se=se) } kinship <- function(ped,inb="kin") { res<-by(ped,ped$pedigree,kinship.rel) if (inb=="inb") { f.val<-c(sapply(res,function(x) diag(x$kinship),simplify=TRUE)-1.0) print(f.val) res<-data.frame(Pedigree=ped$pedigree,Person=ped$id, Father=ped$fa,Mother=ped$mo,F=f.val) banner("Individuals with non-zero inbreeding coefficient") print(res[res$F>0.0,]) cat("\nMean inbreeding coefficient = ", mean(res$F)," (based on ",nrow(ped), " individuals)\n\n",sep="") }else{ banner("Coefficient of relationship for all relative pairs") print(res) } } segvar <- function(ped, fa.p, mo.p) { kin <- by(ped,ped$pedigree,kinship.rel) f.val <- c(sapply(kin,function(x) diag(x$kinship),simplify=TRUE)) ifelse(is.na(ped$fa), 1.0, sqrt(1-0.25*(f.val[fa.p]+f.val[mo.p]))) } # # Coefficient of relationship # kinship.rel <- function(ped) { kinval<-function(i,j, kin) { ifelse(i>j, kin[i,j], kin[j,i]) } num<-nrow(ped) nfound<-sum(is.na(ped$fa)) kin<-diag(num) ; rownames(kin)<-colnames(kin)<-ped$id frat<-kin if (num>nfound) { fa.p<-match(paste(ped$pedigree,ped$fa), paste(ped$pedigree,ped$id)) mo.p<-match(paste(ped$pedigree,ped$mo), paste(ped$pedigree,ped$id)) for(i in (nfound+1):num) { for(j in 1:(i-1)) { kin[j,i]<-kin[i,j]<-0.5*(kinval(fa.p[i],j,kin)+ kinval(mo.p[i],j,kin)) frat[j,i]<-frat[i,j]<-0.25*(kinval(fa.p[i], fa.p[j],kin)+ kinval(mo.p[i], mo.p[j],kin)+ kinval(fa.p[i], mo.p[j],kin)+ kinval(mo.p[i], fa.p[j],kin)) } kin[i,i]<-1.0+0.5*kinval(fa.p[i], mo.p[i],kin) } } list(pedigree=ped$pedigree[1],kinship=kin, fraternity=frat) } # # Empirical kinship coeffcients: read IBD matrices from MERLIN # kinship.emp <- function(ped, ibds) { num<-nrow(ped) if (is.null(ibds)) { kin=NULL }else{ kin<-as.matrix(ped[,ibds[1:num]]) rownames(kin)<-colnames(kin)<-ped$id } list(pedigree=ped$pedigree[1],kinship=kin) } # # Variance components analysis: MVN or MFT # do_varcomp <- function(trait,ped,covariates=NULL,ibds=NULL, method.opt="Nelder-Mead") { if (is.factor(ped[,trait]) & nlevels(ped[,trait])<5) { varcomp.mft(trait, ped, covariates=covariates, ibds=ibds, method.opt=method.opt) }else{ varcomp(trait, ped, covariates=covariates, ibds=ibds, method.opt=method.opt) } } # # Variance components MVN maximizer # varcomp <- function(trait,ped,covariates=NULL,ibds=NULL, method.opt="Nelder-Mead") { banner("Variance components analysis of \"", trait,"\"") ntrait<-length(trait) if (ntrait>1) { warning("Univariate VC analysis only. Analysing first trait.") } vc.data<-create.vc.data(trait,covariates,ibds,ped) ymean<-mean(ped[,trait], na.rm=T) yvar<-var(ped[,trait], na.rm=T) fixed<-c(ymean, rep(0,length(covariates))) nfixed<-ntrait*length(covariates)+1 nobs<-sum(complete.cases(ped[,c(trait,covariates)])) opt.control <- list(fnscale=-2.0, reltol=1e-12) if (method.opt %in% c("BFGS", "CG", "L-BFGS-B")) { opt.control = list(fnscale=-2.0, pgtol=1e-12) } # # Base AE model # mod="ae" maxlik<-optim(c(fixed,0.9*yvar,0.1*yvar), optlik, gr = NULL, method = method.opt, lower = -Inf, upper = Inf, control = opt.control, hessian = FALSE, vc.data, nfixed, mod) va<-maxlik$par[nfixed+1] vq<-0.0 ve<-maxlik$par[nfixed+2] base.ll <- maxlik$value # # AQE # if (!is.null(ibds)) { mod="aqe" maxlik<-optim(c(fixed, 0.1*yvar, 0.0, 0.9*yvar), optlik, gr = NULL, method = method.opt, lower = -Inf, upper = Inf, control = opt.control, hessian = FALSE, vc.data, nfixed, mod) va<-maxlik$par[nfixed+1] vq<-maxlik$par[nfixed+2] ve<-maxlik$par[nfixed+3] } test.ae.e <-nobs*(log(yvar)+1)+2*maxlik$value fixed<-maxlik$par[1:nfixed] vt<-va+vq+ve cat("Number of families = ", length(unique(ped$pedigree[complete.cases(ped[,trait])])),"\n", "Number of observations = ", nobs,"\n", "Trait mean (intercept) = ", fixed[1],"\n",sep="") if (nfixed>1) { for(i in 2:nfixed) { cat(formatC(covariates[i-1],width=27,flag="-"),"= ",fixed[i],"\n",sep="") } } cat("\n") cat("Additive genetic variance = ", va ," (",round(100*va/vt,1),"%)\n",sep="") if (mod=="aqe") { cat("QTL genetic variance = ", vq ," (",round(100*vq/vt,1),"%)\n",sep="") } cat("Environmental variance = ", ve ," (",round(100*ve/vt,1),"%)\n", "Model loglikelihood = ", maxlik$value,"\n\n",sep="") if (mod=="aqe") { lod=(maxlik$value-base.ll)/log(10) lrts=2*(maxlik$value-base.ll) cat("Lod score testing VQ=0 = ", lod, " (df=1, P=",0.5*pchisq(lrts,1,lower=FALSE),")\n\n",sep="") } cat("Total function evaluations = ", maxlik$counts[1],"\n\n",sep="") } # # function to evaluate the MVN likelihood # lik <- function(m, va, vq, ve, vc.data) { res<-sum(unlist(lapply(vc.data, lik.fam, m, va, vq, ve))) res } lik.fam <- function(vc.data, m, va, vq, ve) { ln<-function(x) ifelse(x>0, log(x), 0) yhat<-vc.data$covariates %*% matrix(m, nc=1) S<-va*vc.data$rel + vq*vc.data$ibd + diag(ve,nrow(vc.data$rel)) res <- -0.5* (ln(det(S)) + t(vc.data$y-yhat) %*% solve(S) %*% (vc.data$y-yhat)) res } # # and the version to allow calling of optim() function maximizer # optlik <- function(par, vc.data, nfixed, mod) { if (mod=="ae") { lik(par[1:nfixed],par[nfixed+1], 0.0, par[nfixed+2],vc.data) }else if (mod=="aqe") { lik(par[1:nfixed],par[nfixed+1], par[nfixed+2], par[nfixed+3],vc.data) } } # # MFT # # Variance components MFT maximizer # varcomp.mft <- function(trait, ped, covariates=NULL, ibds=NULL, method.opt="L-BFGS-B") { require(mvtnorm) banner("MFT Variance components analysis of \"", trait,"\"") if (is.factor(ped[,trait])) { ped[,trait] <- as.numeric(ped[, trait])-1 } nthresh <- sum(!is.na(unique(ped[, trait]))) - 1 vc.data<-create.vc.data(trait,covariates,ibds,ped) ymean<-cumsum(prop.table(table(ped[,trait]))) ymean <- ymean[-length(ymean)] vt <- 1.0 fixed<-c(qnorm(ymean,lower=FALSE), rep(0,length(covariates))) nfixed<-nthresh + length(covariates) nobs<-sum(complete.cases(ped[,c(trait,covariates)])) opt.control <- list(fnscale=-2.0, reltol=1e-12) if (method.opt %in% c("BFGS", "CG", "L-BFGS-B")) { opt.control = list(fnscale=-2.0, pgtol=1e-12) } # # Base AE model # mod="ae" maxlik<-optim(c(fixed, 0.1*vt), optlik.mft, gr = NULL, method = method.opt, lower = c(rep(-Inf,length(fixed)),0.0, 0.0), upper = c(rep(Inf,length(fixed)),1.0, 1.0), control = opt.control, hessian = FALSE, vc.data, nthresh, nfixed, mod) va<-maxlik$par[nfixed+1] vq<-0.0 ve<-maxlik$par[nfixed+2] base.ll <- maxlik$value # # AQE # if (!is.null(ibds)) { mod="aqe" maxlik<-optim(c(fixed, 0.1*yt, 0.0), optlik.mft, gr = NULL, method = method.opt, lower = c(rep(-Inf,length(fixed)),0.0, 0.0, 0.0), upper = c(rep(Inf,length(fixed)),1.0, 1.0, 1.0), control = opt.control, hessian = FALSE, vc.data, nfixed, mod) va<-maxlik$par[nfixed+1] vq<-maxlik$par[nfixed+2] ve<-maxlik$par[nfixed+3] } fixed<-maxlik$par[1:nfixed] ve <- vt-va-vq cat("Number of families = ", length(unique(ped$pedigree[complete.cases(ped[,trait])])),"\n", "Number of observations = ", nobs,"\n", "Baseline Threshold = ", fixed[1], " (Prev=",round(100*pnorm(fixed[1],lower=FALSE),1),"%)\n",sep="") if (nfixed>1) { for(i in 2:nfixed) { cat(formatC(covariates[i-1],width=27,flag="-"),"= ",fixed[i],"\n",sep="") } } cat("\n") cat("Additive genetic variance = ", va ," (",round(100*va/vt,1),"%)\n",sep="") if (mod=="aqe") { cat("QTL genetic variance = ", vq ," (",round(100*vq/vt,1),"%)\n",sep="") } cat("Environmental variance = ", ve ," (",round(100*ve/vt,1),"%)\n", "Model loglikelihood = ", maxlik$value,"\n\n",sep="") if (mod=="aqe") { lod=(maxlik$value-base.ll)/log(10) lrts=2*(maxlik$value-base.ll) cat("Lod score testing VQ=0 = ", lod, " (df=1, P=",0.5*pchisq(lrts,1,lower=FALSE),")\n\n",sep="") } cat("Total function evaluations = ", maxlik$counts[1],"\n\n",sep="") } create.vc.data <- function(trait,covariates,ibds,ped) { make.fixed<-function(x, covariates) { if (is.null(covariates)) { res=matrix(rep(1,nrow(x)), nc=1) colnames(res)="Intercept" }else{ res=as.matrix(cbind(rep(1,nrow(x)), x[,covariates,drop=FALSE])) colnames(res)=c("Intercept", covariates) } res } use.vc<-function(x) { use<-complete.cases(data.frame(x$y,x$covariates)) x$id <- x$id[use] x$y<-as.matrix(x$y)[use,,drop=FALSE] x$covariates<-x$covariates[use,,drop=FALSE] x$rel<-x$rel[use,use,drop=FALSE] x$ibd<-x$ibd[use,use,drop=FALSE] x } if (is.null(ibds)) { vc.data<-by(ped,ped$pedigree, function(x) { list(id=x[,"id"], y=x[,trait], covariates=make.fixed(x,covariates), rel=kinship.rel(x)$kinship, ibd=matrix(0, nr=nrow(x), nc=nrow(x))) } ) }else{ vc.data<-by(ped,ped$pedigree, function(x) list(id=x[,"id"], y=x[,trait], covariates=make.fixed(x,covariates), rel=kinship.rel(x)$kinship, ibd=kinship.emp(x,ibds)$kinship)) } lapply(vc.data,use.vc) } # # function to evaluate the MVN likelihood # lik.mft <- function(m, va, vq, nthresh, vc.data) { res<-sum(unlist(lapply(vc.data, lik.mft.fam, m, va, vq, nthresh))) res } lik.mft.fam <- function(vc.data, m, va, vq, nthresh) { ln <- function(x) ifelse(x>0, log(x), 0) ndata <- length(vc.data$y) thresh <- matrix(NA, nr=ndata, nc=(nthresh+2)) thresh[,1] <- -Inf thresh[,ncol(thresh)] <- Inf thresh[, -c(1, ncol(thresh))] <- vc.data$covariates %*% matrix(m, nr=1) S <- va*vc.data$rel + vq*vc.data$ibd + diag(1-va-vq,nrow(vc.data$rel)) res <- ln(pmvnorm(lower=thresh[cbind(1:ndata,vc.data$y+1)], upper=thresh[cbind(1:ndata,vc.data$y+2)], corr=S)) res } # # and the version to allow calling of optim() function maximizer # optlik.mft <- function(par, vc.data, nthresh, nfixed, mod) { if (mod=="ae") { lik.mft(par[1:nfixed],par[nfixed+1], 0.0, nthresh, vc.data) }else if (mod=="aqe") { lik.mft(par[1:nfixed],par[nfixed+1], par[nfixed+2], nthresh, vc.data) } } # # Select pedigrees based on logical expression # select <- function(expression, locus.data, ped) { nprobands<-1 comparison<-function(use,nprobands) use>=nprobands if (substr(expression[1],1,3) %in% c("con","exa")) { nprobands<-as.integer(expression[2]) if(substr(expression[1],1,3)=="exa" || nprobands==0) { comparison<-function(use,nprobands) use==nprobands } expression<-expression[-c(1:2)] } if (substr(expression[1],1,3)=="whe") { expression<-expression[-1] } if (substr(expression[1],1,3)=="ped" && expression[2]=="in") { expression<-paste("pedigree %in% c(", paste(expression[-c(1:2)], collapse=","), ")", sep="") }else{ expression<-paste(expression, collapse=" ") } cat("\nSelecting pedigrees using \"", expression,"\"\n\n",sep="") attach(ped) cond<-eval(parse(text=expression)) detach(ped) proband.tab<-tapply(cond,ped$pedigree,function(x) sum(x, na.rm=T)) ped.size<-table(ped$pedigree) print(cbind(proband.tab, ped.size)[comparison(proband.tab,nprobands),]) use<-names(proband.tab)[comparison(proband.tab,nprobands)] selected<-ped[ped$pedigree %in% use,] cat("Number of pedigrees selected=",length(use), " (",nrow(selected)," individuals)\n",seq="") selected } # # Writers # pedout <- function(fil, ped, locus.data) { if (is.na(fil)) fil<-"" ped<-ped[,c("pedigree","id","fa","mo","sex", locus.data$locnam[locus.data$active])] marker<-c(rep(FALSE,5), (locus.data$loctyp %in% c("m","x"))[locus.data$active]) if (sum(marker)>0) { ped[,marker]<-apply(ped[,marker],2,function(x) gsub("/"," ",x)) } # ped[is.na(ped)]<-"x" # ped[is.na(ped[,marker])]<-"x x" write.table(ped, file=fil, row.names=FALSE, col.names=FALSE, quote=FALSE, na="x") } # # Linkage "pre" format # wrlink <- function (fil, ped, locus.data) { if (is.na(fil)) fil<-"" linkped<-ped if (sum(locus.data$active)>0) { idx<-locus.data$locnam[(locus.data$loctyp %in% c("m","x")) & locus.data$active] if (length(idx)>0) { linkped[,idx]<-apply(as.matrix(linkped[,idx]),2,geno.as.linkage) } idx<-locus.data$locnam[(locus.data$loctyp %in% c("a")) & locus.data$active] if (length(idx)>0) { linkped[,idx]<-apply(as.matrix(linkped[,idx]),2,aff.as.num) } } write.table(linkped[,c("pedigree","id","fa","mo","sex", locus.data$locnam[locus.data$active])], file=fil, row.names=FALSE, col.names=FALSE, quote=FALSE, na="0") } # # Mendel and Fisher pedigree files # wrfish <- function(fil, locus.data, ped, typ="men") { if (is.na(fil)) fil<-"" one.word<-function(x,fmt="%8s",na=" ") { if (is.na(x)) { na }else{ sprintf(fmt,substr(as.character(x),1,nchar(na))) } } nlp<-sum(active<-locus.data$active) active<-7+seq(1,length(active))[active] if (typ=="men") { cat("(2(i3,1x),a8)\n", "(a8,2(1x,a8),2(1x,a1),",nlp,"(1x,a8))\n",sep="",file=fil) pedhead<-function(num, pedigree,file="") { cat(sprintf(" 0 %3d %8s\n",num,pedigree),file=file) } }else{ cat("((i3,1x,a8)\n", "(a8,2(1x,a8,a1),",nlp,"(1x,a8))\n",sep="",file=fil) pedhead<-function(num, pedigree,file="") { cat(sprintf("%3d %8s\n",num,pedigree),file=file) } } ped.list<-split(ped,ped$pedigree) for(i in seq(along=ped.list)) { pedhead(nrow(ped.list[[i]]), ped.list[[i]]$pedigree[1],file=fil) sid<-substr(ped.list[[i]]$id,1,8); sid[is.na(ped.list[[i]]$id)]<-NA sfa<-substr(ped.list[[i]]$fa,1,8); sfa[is.na(ped.list[[i]]$fa)]<-NA smo<-substr(ped.list[[i]]$mo,1,8); smo[is.na(ped.list[[i]]$mo)]<-NA ssx<-toupper(substr(ped.list[[i]]$sex,1,1)); ssx[is.na(ped.list[[i]]$sex)]<-NA for(j in 1:length(sid)) { cat(one.word(sid[j]), one.word(sfa[j]), one.word(smo[j]), one.word(ssx[j], fmt="%1s", na=" "), " ",file=fil) if (nlp>0) for(k in 1:nlp) { cat(" ",one.word(ped.list[[i]][j,active[k]]),sep="",file=fil) } cat("\n",file=fil) } } } # # Simulate nuclear families under MFT model # mk.test.data <- function(n=200, prev=0.1, r=0.2) { require(mvtnorm) th <- qnorm(1-prev) AA <- round(n*pmvnorm(lower=rep(th,2),upper=rep(Inf,2),cor=matrix(c(1, r, r, 1), nr=2))) UU <- round(n*pmvnorm(lower=rep(-Inf,2),upper=rep(th,2),cor=matrix(c(1, r, r, 1), nr=2))) AU <- n - AA - UU test.ped <- data.frame(pedigree=gl(100,4), id=gl(4,1,400), fa=rep(c(NA,NA,1,1),100), mo= rep(c(NA,NA,2,2),100), sex=rep(c("m","f","m","m"),100)) test.ped$trait <- c(rep(c(NA,NA,"y","y"), AA), rep(c(NA,NA,"y","n"), AU), rep(c(NA,NA,"n","n"), UU)) test.ped$trait <- factor(test.ped$trait) test.locus <- set.locus("trait","a",NA) list(locus.data=test.locus, ped=test.ped) } # # Draw a pedigree # # # Shapes # draw.male<-function(x,y,text="",size=0.2,fill="white") { grid.rect(x,y,width=size, height=size,gp=gpar(fill=fill)) grid.text(text,x,y, check.overlap=TRUE) } draw.female<-function(x,y,text="",size=0.2,fill="white") { grid.circle(x,y,r=size/2,gp=gpar(fill=fill)) grid.text(text,x,y, check.overlap=TRUE) } draw.unknown<-function(x,y,text="",size=0.2, fill="white") { diamond<-function(x,y,r) { grid.polygon(c(x-r, x, x+r, x), c(y, y+r, y, y-r), gp=gpar(fill=fill)) } r<-size/2 for(i in 1:length(x)) diamond(x[i],y[i],r) grid.text(text,x,y, check.overlap=TRUE) } draw.person<-function(x,y,text="", size=0.2, sex=NA, fill="white") { fill<-as.character(fill) idx<-is.na(sex) if (sum(idx)>0) { draw.unknown(x[idx],y[idx],text=text[idx],size=size,fill=fill) } if (sum(!idx)>0) { idx<-sex=="m" if (sum(idx)>0) { draw.male(x[idx],y[idx],text=text[idx],size=size,fill=fill) } idx<-sex=="f" if (sum(idx)>0) { draw.female(x[idx],y[idx],text=text[idx],size=size,fill=fill) } } } # # Lines # draw.riser<-function(y0,y1,x) { if (y1>y0) grid.lines(rep(x, 2), c(y1, y0)) } # # Drawing # draw.ped <- function(words, locus.data, ped) { palette<-c("white","lightgrey") require(grid) grid.newpage() ped<-ped[ped$pedigree %in% words,] max.gen<-max(ped$gener) wide.gen<-apply(table(ped$pedigree, ped$gener),1,max) trait<-getloci(locus.data,"a") cat("Binary trait : ", trait,"\n") cat("No. of generation: ", max.gen,"\n") cat("Widest generation: ", max(wide.gen),"\n") symbol.size<-(1/(2*max(wide.gen))) generation.height<-1/(1+2*max.gen) cat("Symbol size : ", symbol.size,"\n") cat("Generation height: ", generation.height,"\n") num<-nrow(ped) founders<-is.na(ped$fa) first<-(ped$gener==1) nfound<-sum(founders) if (num>nfound) { fa.p<-match(paste(ped$pedigree,ped$fa), paste(ped$pedigree,ped$id)) mo.p<-match(paste(ped$pedigree,ped$mo), paste(ped$pedigree,ped$id)) if (is.na(trait)) { aff<-rep(palette[1],num) }else{ aff<-factor(ped[,trait]=="y",labels=palette) } position.in.drawing<-data.frame(id=ped$id, x=rep(NA,num),y=rep(NA,num)) position.in.drawing[first,] <- draw.founders( ped$id[first], ped$sex[first], fill=aff[first], gen=generation.height, size=symbol.size, spacing=1.5) bysibships<-split(data.frame(ped[!founders,c(4,5,6,7,2)], idx=(1:nrow(ped))[!founders]), factor(paste(ped$pedigree,ped$gener,ped$fa,ped$mo))[!founders]) for(i in seq(along=bysibships)) { nkid<-nrow(bysibships[[i]]) currs<-bysibships[[i]]$idx currf<-fa.p[currs[1]] currm<-mo.p[currs[1]] currs<-c(currf,currm,currs) position.in.drawing[currs,] <- draw.fam.plus.spouses(fam.obj, gen=generation.height, size=symbol.size) # draw.sibship( # fa.xy=unlist(position.in.drawing[currf,2:3]), # mo.xy=unlist(position.in.drawing[currm,2:3]), # nkid=nkid, id=ped$id[currs], sex=ped$sex[currs], # fill=aff[currs], # gen=generation.height, size=symbol.size) } grid.text(paste("Pedigree",ped$pedigree[1]), x=0.5, y=0.04) } } # # Create list of marry-in spouses for each person make.spouse.list <- function(ped) { persons<-split(ped[,c(3,4)], 1:nrow(ped)) sapply(persons, function(x, ped) get.marryin(x[1],x[2],ped),ped) } # # get specified relative of person # ret.na <-function(x) { if (length(x)==0) { NA }else{ x } } get.children <- function(thisped,thisid,ped) { kids<-ped$id[ped$pedigree %in% thisped & !is.na(ped$fa) & (ped$mo %in% thisid | ped$fa %in% thisid)] ret.na(kids) } get.sibs <- function(thisped,thisid,ped) { idx<-ped$pedigree %in% thisped & ped$id %in% thisid sibs<-ped$id[!is.na(ped$fa) & ped$fa==ped$fa[idx] & ped$mo==ped$mo[idx]] ret.na(sibs[sibs!=thisid]) } get.halfsibs <- function(thisped,thisid,ped) { idx<-ped$pedigree %in% thisped & ped$id %in% thisid halfsibs<-ped$id[!is.na(ped$fa) & (as.integer(ped$fa==ped$fa[idx])+ as.integer(ped$mo==ped$mo[idx]))==1] ret.na(halfsibs[halfsibs!=thisid]) } get.father <- function(thisped,thisid,ped) { father<-ped$fa[ped$pedigree %in% thisped & ped$id %in% thisid] ret.na(father) } get.mother <- function(thisped,thisid,ped) { mother<-ped$mo[ped$pedigree %in% thisped & ped$id %in% thisid] ret.na(mother) } get.spouse <- function(thisped,thisid,ped) { idx<-which(ped$pedigree %in% thisped & ped$id %in% thisid) get.spouse.by.pos(idx,ped) } get.marryin <- function(thisped,thisid,ped) { idx<-which(ped$pedigree %in% thisped & ped$id %in% thisid) marryin<-ped$id[ped$pedigree %in% thisped & is.na(ped$fa) & ped$id %in% get.spouse.by.pos(idx,ped)] ret.na(marryin) } get.spouse.by.pos <- function(idx,ped) { target<-ped$id[idx] ped<-ped[ped$pedigree==ped$pedigree[idx] & !is.na(ped$fa),c(5,6)] spouses<-unique(c(ped$fa[ped$mo %in% target], ped$mo[ped$fa %in% target])) ret.na(spouses) } draw.founders <- function(id, sex, fill="white", gen=0.20, size=0.10, spacing=1.5) { nfound<-length(id) gap<-size*spacing found.x<-0.5*(1-nfound*gap)+(0:(nfound-1))*gap found.y<-rep(1-1.1*size, nfound) draw.person(found.x, found.y, text=id,size=size, sex=sex, fill=fill) res<-data.frame(id,x=found.x, y=found.y) res } draw.sibship <- function(fa.xy=c(0.4, 0.7), mo.xy=fa.xy+c(0.2,0), nkid=2, id=as.character(c(101,102,200+(1:nkid))), sex=c("m","f", rep(NA, nkid)), fill=rep("white", nkid+2), gen=0.20, size=0.10) { # # Parents # if (any(is.na(mo.xy))) { mo.xy<-fa.xy+c(0.2,0) } if (any(is.na(fa.xy))) { fa.xy<-mo.xy+c(0.2,0) } mating.y<-min(c(fa.xy[2], mo.xy[2])) mating.x<-mean(c(fa.xy[1], mo.xy[1])) cen<-c(mating.x, mating.y-(1+0.1*runif(1))*gen) grid.lines(c(fa.xy[1], mo.xy[1]), rep(mating.y, 2)) draw.riser(mating.y, fa.xy[2], fa.xy[1]) draw.riser(mating.y, mo.xy[2], mo.xy[1]) grid.lines(rep(cen[1], 2), c(mating.y, cen[2])) draw.person(fa.xy[1], fa.xy[2], id[1], size=size, sex=sex[1],fill=fill[1]) draw.person(mo.xy[1], mo.xy[2], id[2], size=size, sex=sex[2],fill=fill[2]) # # Children # hbar<-min(1-size, nkid*size) hbar.cen<-cen[1] if ((hbar.cen+hbar/2+size/2)>1) { hbar.cen<-1-hbar/2-size/2 }else if ((hbar.cen-hbar/2-size/2)<0) { hbar.cen<-hbar/2+size/2 } if (nkid>1) { grid.lines(c(hbar.cen-hbar/2, hbar.cen+hbar/2), rep(cen[2], 2)) kid.x<-hbar.cen-hbar/2+(0:(nkid-1))*hbar/(nkid-1) }else{ kid.x<-cen[1] } kid.y<-rep(cen[2]-gen, nkid) grid.segments(x0=kid.x, y0=kid.y+gen, x1=kid.x, y1=kid.y+size/2) draw.person(kid.x, kid.y, text=id[c(-1,-2)], size=size, sex=sex[c(-1,-2)], fill=fill[c(-1,-2)]) res<-data.frame(id=id,x=c(fa.xy[1], mo.xy[1], kid.x), y=c(fa.xy[2], mo.xy[2], kid.y)) # print(res) res } # # fam.obj father id x y fill.colour # mother id x y fill.colour # # children id ischild sex x y fill.colour # id ischild sex x y fill.colour # draw.fam.plus.spouses <- function(fam.obj, gen=0.20, size=0.10) { # # Parents # attach(fam.obj) if (is.na(mother$x)) { mother$x<-father$x+0.2 mother$y<-father$y } if (any(is.na(father$x))) { father$x<-mother$x+0.2 father$y<-mother$y } mating.y<-min(c(father$y, mother$y)) mating.x<-mean(c(father$x, mother$x)) cen<-c(mating.x, mating.y-(1+0.1*runif(1))*gen) grid.lines(c(father$x, mother$x, rep(mating.y, 2))) draw.riser(mating.y, father$y, father$x) draw.riser(mating.y, mother$y, mother$x) grid.lines(rep(cen[1], 2), c(mating.y, cen[2])) draw.person(father$x, father$y, id[1], size=size, sex=sex[1],fill=fill[1]) draw.person(mother$x, mother$y, id[2], size=size, sex=sex[2],fill=fill[2]) # # Children # nkid<-nrow(children[is.na(children$x)]) hbar<-min(1-size, nkid*size) hbar.cen<-cen[1] if ((hbar.cen+hbar/2+size/2)>1) { hbar.cen<-1-hbar/2-size/2 }else if ((hbar.cen-hbar/2-size/2)<0) { hbar.cen<-hbar/2+size/2 } if (nkid>1) { grid.lines(c(hbar.cen-hbar/2, hbar.cen+hbar/2), rep(cen[2], 2)) kid.x<-hbar.cen-hbar/2+(0:(nkid-1))*hbar/(nkid-1) }else{ kid.x<-cen[1] } isk<-children$ischild kid.y<-rep(cen[2]-gen, nkid) grid.segments(x0=kid.x[isk], y0=kid.y[isk]+gen, x1=kid.x[isk], y1=kid.y[isk]+size/2) draw.person(kid.x, kid.y, text=id[c(-1,-2)], size=size, sex=sex[c(-1,-2)], fill=fill[c(-1,-2)]) res<-data.frame(id=c(father$id, mother$id, children$id), x=c(father$x, mother$x, kid.x), y=c(father$y, mother$y, kid.y)) detach(fam.obj) # print(res) res } draw.sibs.plus.spouses <- function(fam.obj, gen=0.20, size=0.10) { # # Children # nkid<-nrow(children[is.na(children$x)]) hbar<-min(1-size, nkid*size) hbar.cen<-cen[1] if ((hbar.cen+hbar/2+size/2)>1) { hbar.cen<-1-hbar/2-size/2 }else if ((hbar.cen-hbar/2-size/2)<0) { hbar.cen<-hbar/2+size/2 } if (nkid>1) { grid.lines(c(hbar.cen-hbar/2, hbar.cen+hbar/2), rep(cen[2], 2)) kid.x<-hbar.cen-hbar/2+(0:(nkid-1))*hbar/(nkid-1) }else{ kid.x<-cen[1] } isk<-children$ischild kid.y<-rep(cen[2]-gen, nkid) grid.segments(x0=kid.x[isk], y0=kid.y[isk]+gen, x1=kid.x[isk], y1=kid.y[isk]+size/2) draw.person(kid.x, kid.y, text=id[c(-1,-2)], size=size, sex=sex[c(-1,-2)], fill=fill[c(-1,-2)]) res<-data.frame(id=c(father$id, mother$id, children$id), x=c(father$x, mother$x, kid.x), y=c(father$y, mother$y, kid.y)) detach(fam.obj) # print(res) res } # # Automatic layout of pedigree drawing # # 101 102 ped.layout # +--+--+ id, position - marriage nodes # | - parental node # +---+----+-----------+ between generation: # | | | widest generation first # 201 202 203 204 205 within generation: # | | | | marry-in # +-+-+ +-+-+ sib # | | parents # +-+-+---+---+ +-+-+ spouse # | | | | | | # 301 302 303 304 305 306 # # # sig.others <- list of c(me, fa, mo, spouses) # position <- vector of x.positions of each pedigree member # drawing.size <- function(sig.others, x.pos, midpoint=20, par.weight=4, centre.weight=0.5) { dist.to.pars<- function(x,x.pos) { max(0,sum(abs(x.pos[x[1]]-x.pos[x[c(2,3)]]), na.rm=T), na.rm=T) } dist.to.spouses<-function(x,x.pos) { max(0,sum(abs(x.pos[x[1]]-x.pos[x[-c(1,2,3)]]), na.rm=T), na.rm=T) } par.weight*sum(unlist(lapply(sig.others, dist.to.pars, x.pos))) + centre.weight*sum(abs(x.pos-midpoint)) + sum(unlist(lapply(sig.others, dist.to.spouses, x.pos))) } # # Mendelian checking # # Tests if child genotype consistent with parental genotypes: # parcon=4*Pr(Child_genotype|Father_genotype,Mother_genotype) # if xmale TRUE then X-linked locus *and* male child # parcon <-function(c1,c2,p11,p12,p21,p22,xmale=FALSE) { res<-rep(0, length(c1)) if (xmale) { 2*sum(c1==p21,c1==p22) }else{ res<-apply(cbind((c1==p11 & c2==p21) | (c1==p21 & c2==p11), (c1==p11 & c2==p22) | (c1==p22 & c2==p11), (c1==p12 & c2==p21) | (c1==p21 & c2==p12), (c1==p12 & c2==p22) | (c1==p22 & c2==p12)), 1, sum) print(res) res } } # # test if child genotype consistent with one parental genotype # opcon <-function(c1,c2,p1,p2) { (c1==p1 | c1==p2 | c2==p1 | c2==p2) } # # check for heterozygote males among X-linked loci # xlinked.hets <- function(marker, ped, err.drop=TRUE) { idx <- !is.hom(ped[,marker]) idx <- !is.na(idx) & idx & ped$sex %in% "m" if (any(idx)) { cat("\n") cat(paste("ERROR: Heterozygous male ",paste(ped$pedigree, ped$id, sep="-")[idx], " at X-linked locus " , marker,"\n", sep=""),sep="") cat("\n") } if (err.drop) ped[idx,marker]<-NA ped } # # check nuclear families # all.possible.genos <- function(alleles) { alleles<-sort(alleles[!is.na(alleles)]) cbind(rep(alleles,seq(length(alleles),1,-1)), rev(alleles)[sequence(seq(length(alleles),1,-1))]) } check.nuclear <- function(marker, ped, err.drop=TRUE) { gtp<-geno.as.array(ped[,marker]) nonfounders<-!is.na(ped$fa) bysibships<-rep(0,nrow(ped)) bysibships[nonfounders]<-as.integer(factor(paste(ped$pedigree,ped$gener,ped$fa,ped$mo)[nonfounders])) nships<-max(bysibships) for(i in 1:nships) { idx<-(bysibships==i) currf<-ped$id==(ped$fa[idx][1]) currm<-ped$id==(ped$mo[idx][1]) incon<-rep(FALSE, sum(idx)) if (!is.na(gtp[currf, 1]) && !is.na(gtp[currm, 1])) { incon<-(parcon(gtp[idx,1],gtp[idx,2], gtp[currf,1],gtp[currf,2], gtp[currm,1],gtp[currm,2])==0) cat("incon=",incon,"\n") cat("incon=",any(incon),"\n") if (any(incon)) { cat(paste("ERROR: Mendelian inconsistency due to child ", paste(ped$pedigree, ped$id, sep="-")[idx][incon], " at locus " , marker,"\n",sep="")) if (err.drop) { gtp[idx,][incon,]<-NA ped[idx,marker][incon]<-NA } } }else{ alleles<-unique(c(gtp[c(currf | currm | idx),])) if (is.na(gtp[currf, 1])) { fa.gtp<-all.possible.genos(alleles) }else { fa.gtp<-matrix(gtp[currf,], nc=2) } incon <- t(apply(fa.gtp, 1, function(x) !opcon(gtp[idx,1],gtp[idx,2],x[1], x[2]))) cat("FA:\n") print(cbind(fa.gtp, incon)) if (is.na(gtp[currm, 1])) { mo.gtp<-all.possible.genos(alleles) }else { mo.gtp<-matrix(gtp[currm,], nc=2) } cat("MO:\n") incon <- t(apply(mo.gtp, 1, function(x) !opcon(gtp[idx,1],gtp[idx,2],x[1], x[2]))) print(cbind(mo.gtp, incon)) if (any(incon)) { cat(paste("ERROR: Parent-offspring inconsistency due to child ", paste(ped$pedigree, ped$id, sep="-")[idx][incon], " at locus " , marker,"\n",sep="")) if (err.drop) { gtp[idx,][incon,]<-NA ped[idx,marker][incon]<-NA } } cat("FA:\n") print(fa.gtp) cat("MO:\n") print(mo.gtp) incon<-parcon(gtp[idx,1],gtp[idx,2], fa.gtp[1],fa.gtp[2], mo.gtp[1],mo.gtp[2])==0 print(incon) } } ped } # # General pedigree Mendel checker following Lange and Goradia 1987 # Known to be incomplete if loops # check.ped <- function(marker, ped, err.drop=TRUE) { gtp<-geno.as.array(ped[,marker]) alleles<-unique(gtp) nonfounders<-!is.na(ped$fa) bysibships<-rep(0,nrow(ped)) bysibships[nonfounders]<-as.integer(factor(paste(ped$pedigree,ped$gener,ped$fa,ped$mo)[nonfounders])) nships<-max(bysibships) for(i in 1:nships) { idx<-(bysibships==i) currf<-ped$id==(ped$fa[idx][1]) currm<-ped$id==(ped$mo[idx][1]) incon<-rep(FALSE, sum(idx)) if (is.na(gtp[currf, 1])) { fa.gtp<-all.possible.genos(alleles) }else { fa.gtp<-as.matrix(gtp[currf,]) cat("FA: ") incon<-!(opcon(gtp[idx,1],gtp[idx,2],fa.gtp[1], fa.gtp[2])) } if (is.na(gtp[currm, 1])) { mo.gtp<-all.possible.genos(alleles) }else { mo.gtp<-as.matrix(gtp[currm,]) cat("MO: ") incon<-!(opcon(gtp[idx,1],gtp[idx,2],mo.gtp[1], mo.gtp[2])) } if (any(incon)) { cat(paste("ERROR: Parent-offspring inconsistency due to child ", paste(ped$pedigree, ped$id, sep="-")[idx][incon], " at locus " , marker,"\n",sep="")) if (err.drop) { gtp[idx,][incon,]<-NA ped[idx,marker][incon]<-NA } } print(fa.gtp) print(mo.gtp) incon<-parcon(gtp[idx,1],gtp[idx,2], fa.gtp[1],fa.gtp[2], mo.gtp[1],mo.gtp[2])==0 print(incon) } ped } # # Metropolis sampler # test.varcomp.sampler <- function(trait, ped, model="mu ve va", it=10000, mu=0.4299, va=0.2084, ve=0.4816, burnin=(it %/% 10), samples=1000, batches=100) { banner("MCMC Variance components analysis of \"", trait,"\"") if ("nspObject" %in% class(ped)) ped <- ped$ped if (batches>it) batches <- it if (samples>it) samples <- it # # set up pedigree # num <- nrow(ped) samp <- seq(1,num) founders <- is.na(ped$fa) fa.p<-match(paste(ped$pedigree,ped$fa), paste(ped$pedigree,ped$id)) mo.p<-match(paste(ped$pedigree,ped$mo), paste(ped$pedigree,ped$id)) rsd <- segvar(ped, fa.p, mo.p) # # set up model # typed <- !is.na(ped[,trait]) if (sum(typed)==0) { error("No phenotyped individuals") } y <- ped[typed, trait] nfam <- length(unique(ped$pedigree[typed])) nobs <- sum(typed) ymean <- mean(y) yvar <- var(y) parnames <- unlist(strsplit(model,split="[ ]+")) npar <- length(parnames) bpar <- par <- double(npar) names(par) <- parnames pos.mu <- grep("mu", parnames) pos.ve <- grep("ve", parnames) pos.va <- grep("va", parnames) par[pos.mu] <- mu par[pos.ve] <- ve par[pos.va] <- va batchest <- matrix(nr=batches, nc=npar) colnames(batchest) <- parnames parest <- matrix(nr=samples+1, nc=npar+4) colnames(parest) <- c("it", "prop", "accept", "lik", parnames) res <- matrix(nr=samples+1, nc=num) colnames(res) <- ped$id res[1,] <- bval <- rep(0.0, num) blik <- breedlik(par, bval, founders, fa.p, mo.p, rsd, pos.va) totlik <- phenlik(par, y, res[1,], typed, pos.mu, pos.va, pos.ve) + blik parest[1, ] <- c(1,0,0,totlik,par) accept.raw <- matrix(nr=it, nc=6) colnames(accept.raw) <- c("prop", "oldlik", "newlik", "accept", "oldval","newval") # # Preliminary output # cat("Number of families = ", nfam, "\n") cat("Number of observations = ", nobs, "\n") cat("Burn-in MCMC iterations = ", burnin, "\n") cat("Evaluated MCMC iterations = ", it, "\n") cat("Model parameters = ", model, "\n\n") cat("Global trait mean = ", ymean, "\n") cat("Global trait variance = ", yvar, "\n") # # Iterate MCMC chain # props <- seq(0, npar) prop <- 0 accept <- 0 i <- 0 # # burnin iterations # while(i < burnin) { i <- i+1 # breeding value proposal if (prop==0) { prop.res <- proposal.breeding(blik, totlik, par, y, bval, num, typed, founders, fa.p, mo.p, rsd, samp, pos.mu, pos.va, pos.ve) if (prop.res$accept) { bval <- prop.res$bval blik <- prop.res$blik totlik <- prop.res$totlik } # global means/variances proposal }else{ prop.res <- proposal.globals(prop, blik, totlik, par, y, bval, num, typed, founders, ymean, yvar, pos.mu, pos.va, pos.ve) if (prop.res$accept) { par <- prop.res$par totlik <- prop.res$totlik } } # prop <- sample(props,1) } # # Main run # # Count proposals aprop <- nprop <- integer(npar+1) # sample realizations isamp <- 0 nsamp <- 1 tsamp <- it %/% (samples-1) # summarize batches ibatch <- 0 nbatch <- 0 tbatch <- it %/% batches i <- 0 while(i < it) { i <- i+1 isamp <- isamp + 1 # breeding value proposal if (prop==0) { for (j in samp) { nprop[prop+1] <- nprop[prop+1]+1 prop.res <- proposal.breeding(blik, totlik, par, y, bval, num, typed, founders, fa.p, mo.p, rsd, samp, pos.mu, pos.va, pos.ve) if (prop.res$accept) { bval <- prop.res$bval blik <- prop.res$blik totlik <- prop.res$totlik aprop[1] <- aprop[1]+1 } } # global means/variances proposal }else{ nprop[prop+1] <- nprop[prop+1]+1 prop.res <- proposal.globals(prop, blik, totlik, par, y, bval, num, typed, founders, ymean, yvar, pos.mu, pos.va, pos.ve) if (prop.res$accept) { par <- prop.res$par totlik <- prop.res$totlik aprop[prop+1] <- aprop[prop+1]+1 } accept.raw[i,] <- c(prop, prop.res$oldlik, prop.res$newlik, prop.res$accept, prop.res$oldpar, prop.res$newpar) } # accumulate batch ibatch <- ibatch+1 dev <- par-bpar bpar <- bpar + dev/ibatch if (ibatch==tbatch) { nbatch <- nbatch + 1 ibatch <- 0 batchest[nbatch,] <- bpar bpar <- double(npar) } # accumulate samples if (isamp==tsamp) { nsamp <- nsamp + 1 isamp <- 0 parest[nsamp,] <- c(i, prop,accept,totlik,par) res[nsamp,] <- bval } # prop <- sample(props,1) } if (nsamprunif(1)) { accept <- 1 blik <- blik2 totlik <- totlik2 bval <- p }else{ accept <- 0 } list(blik=blik, totlik=totlik, bval=bval, accept=accept) } # # Proposal and test for global parameters: total mean, variances # proposal.globals <- function(prop, blik, totlik, par, y, bval, num, typed, founders, ymean, yvar, pos.mu, pos.va, pos.ve) { tuner <- 0.4 oldlik <- totlik oldpar <- par[prop] newpar <- par prop.ratio <- 0 if (prop == pos.mu) { newpar[prop] <- newpar[prop] + rnorm(1) * tuner * sqrt(yvar) }else { newpar[prop] <- newpar[prop] + rnorm(1) * tuner * yvar } totlik2 <- phenlik(newpar, y, bval, typed, pos.mu, pos.va, pos.ve) + blik qa <- min(1.0, exp(totlik2-totlik + prop.ratio)) if (qa>runif(1)) { accept <- 1 totlik <- totlik2 par <- newpar }else{ accept <- 0 } list(totlik=totlik, par=par, accept=accept, oldlik=oldlik, newlik=totlik2, oldpar=oldpar, newpar=newpar[prop]) } # # triangular proposal distribution # rtri <- function(n, range=1) 0.5*range*(runif(n)+runif(n)-1) # # plot diagnostics for varcomp.sampler output # plot.varcomp.sampler <- function(x, one.page=TRUE) { y <- x$ped[, x$trait] ymean <- mean(y, na.rm=TRUE) yvar <- var(y, na.rm=TRUE) vrange <- c(0, max(x$batches$va+x$batches$ve)) if (one.page) { op <- par(mfcol=c(4,1), mar=c(0.1, 4, 0, 1)) with(x$batches, plot(mu, t="l", axes=FALSE, ylab="Grand Mean")) abline(h=ymean, col="red", lty=3) axis(2); box() with(x$batches, plot(va+ve, t="l", axes=FALSE, ylab="VT", ylim=vrange)) abline(h=yvar, col="red", lty=3) axis(2); box() with(x$batches, plot(va, t="l", axes=FALSE, ylab="VA", ylim=vrange)) axis(2); box() par(mar=c(2.1, 4, 0, 1)) with(x$batches, plot(ve, t="l", ylab="VE", ylim=vrange)) par(op) }else{ with(x$batches, plot(mu, t="l")) abline(h=ymean, col="red", lty=3) with(x$batches, plot(va+ve, t="l", axes=FALSE, ylab="VT", ylim=vrange)) abline(h=yvar, col="red", lty=3) with(x$batches, plot(va, t="l", ylab="VA", ylim=vrange)) with(x$batches, plot(ve, t="l", ylab="VE", ylim=vrange)) } } summary.varcomp.sampler <- function(x, one.page=TRUE) { cat("\nBased on ", x$job.pars$batches, " batches of size ", x$job.pars$batchsize, ":\n") cat("\nParameters\n\n") ptab <- apply(x$batches, 2, function(x) c(mean(x), median(x), sd(x))) ar.sd <- sqrt(diag(ar(x$batches)$var.pred)) ptab <- data.frame(t(ptab), ar.sd) colnames(ptab) <- c("Mean", "Median", "SD", "AR.SD") print(ptab) cat("\nProposal Acceptance Rates\n\n") print(x$acceptance) } # # Simulate MVN quantitative trait # simqtrait <- function(trait, ped, h2=0.5) { if (h2>1.0) h2 <- 1.0 if (h2<0.0) h2 <- 0.0 e2 <- 1-h2 sdA <- sqrt(h2) sdE <- sqrt(e2) # # set up pedigree # num <- nrow(ped) qtrait <- rep(NA,num) founders <- is.na(ped$fa) fa.p<-match(paste(ped$pedigree,ped$fa), paste(ped$pedigree,ped$id)) mo.p<-match(paste(ped$pedigree,ped$mo), paste(ped$pedigree,ped$id)) rsd <- segvar(ped, fa.p, mo.p) qtrait[founders] <- rnorm(sum(founders)) for(i in 1:num) { if (is.na(qtrait[i])) { midpar <- 0.5*(qtrait[fa.p[i]] + qtrait[mo.p[i]]) qtrait[i] <- rnorm(1, mean=midpar, sd=rsd[i]) } } qtrait <- qtrait*sdA + rnorm(num, mean=0, sd=sdE) qtrait } # # Proposal and test for standardized breeding value # proposal.breeding.test <- function(idx, blik, totlik, par, y, bval, num, typed, founders, fa.p, mo.p, rsd, samp, pos.mu, pos.va, pos.ve) { p <- bval p[idx] <- p[idx] + 0.5*rnorm(1) blik2 <- breedlik(par, p, founders, fa.p, mo.p, rsd, pos.va) totlik2 <- phenlik(par, y, p, typed, pos.mu, pos.va, pos.ve) + blik2 qa <- min(1.0, exp(totlik2-totlik)) if (qa>runif(1)) { accept <- 1 blik <- blik2 totlik <- totlik2 bval <- p }else{ accept <- 0 } list(blik=blik, totlik=totlik, bval=bval, accept=accept) } animal.model <- function(trait, ped, h2) { banner("BLUPS for \"", trait,"\"") ntrait<-length(trait) if (ntrait>1) { warning("Univariate analysis only. Analysing first trait.") } vc.data<-create.vc.data(trait,covariates=NULL,ibds=NULL,ped) lapply(vc.data, blup.fam, h2) } blup.fam <- function(vc.data, h2) { n <- length(vc.data$y) lam <- (1-h2)/h2 Ainv <- solve(vc.data$rel) M <- rbind(matrix(c(n, rep(1, n)), nr=1), cbind(matrix(1, nc=1, nr=n), diag(rep(1,n))+lam*Ainv)) blups <- solve(M, c(sum(vc.data$y), vc.data$y)) variances <- diag(solve(M)) names(variances) <- names(blups) <- c("mean", vc.data$id) list(blups=blups, variances=variances) } grid.sampler <- function(trait, ped, model="mu ve va", it=1000, mu=0.4299, va=seq(0.1,1.5,0.1), ve=seq(0.4, 0.8, 0.1)) { g <- expand.grid(va, ve) g <- data.frame(g, rep(NA, nrow(g))) names(g) <- c("va", "ve", "lik") for(i in 1:nrow(g)) { res <- test.varcomp.sampler(trait, ped, it=it, va=g[i,1], ve=g[i,2]) g$lik[i] <- mean(res$samples$lik) cat(i,":") print(g[i,]) } g }