##################################### # BUM S-plus function library # Author: Stan Pounds, Ph.D. # Created: January 7, 2003 # Last Modified: January 10, 2003 # About this library: This library implements the BUM procedures for estimating # the occurrence of errors in microarray analysis. For more # information see Pounds and Morris (2003) "Estimating the # occurrence of false positives and false negatives in # microarray studies by approximating and partitioning the # empirical distribution of p-values." Bioinformatics, 19, 368-375. # Disclaimer: Any damages arising from use of this software are NOT # the responsibility of its developer, Stan Pounds, or # his employer, St. Jude Children's Research Hospital. ############################################################################## ############################################################################## # Functions in this library - Functions most useful for analysis are marked with an asterisk (*). # Functions are listed alphabetically here. More detailed # documentation can be found in the code where the respective # functions are declared and defined. Use the "find" feature # of your text editor to find the function declaration and # respective documentation on how to use each function. Enter # "Function: function.name" to find the function and its documentation. # For example, to find documentation on "bum.EBP", find the string # "Function: bum.EBP". # # *bum.CI - compute confidence intervals for quantities based on the estimation of a bum model # # *bum.EBP - compute the empirical Bayes' posterior for a given BUM model # # *bum.error.diagram - produce a diagram illustrating the occurrence of errors in the analysis # # *bum.FDR - compute the false discovery rate for a given BUM model # # *bum.false.negative - estimate the proportion of false negatives committed # # *bum.false.positive - estimate the proportion of false positives committed # # *bum.histogram - produce a histogram of p-values and compare to BUM MLE curve # # bum.logL - calculate the log-likelihood for the BUM model for a set of p-values # # *bum.logL.contour - find contours of the log-likelihood in terms of a and lambda # # *bum.mle - find the MLE for the BUM parameters # # *bum.true.negative - estimate the proportion of true negatives when a particular p-value threshold is used # # *bum.true.positive - estimate the proportion of true positives when a particular p-value threshold is used # # *bum.weighted.error - estimate a weighted sum of the proportion of false negatives and false positives # # dalt - calculate the density of the alternative component of a BUM # # dbum - calculate the density of the BUM distribution # # *ext.pi - extract the uniform component from a BUM distribution # # *find.EBP.threshold - find the p-value threshold corresponding to a desired empirical Bayes' posterior # # *find.FDR.threshold - find the p-value threshold corresponding to a desired false discovery rate # # *find.WE.threshold - find the p-value threshold that minimizes the weighted # sum of the estimated proportion of false positives # and estimated proportion of false negatives # # inv.dbum - the mathematical inverse of the BUM density # # inv.logit - compute the inverse of the logit # # logit - compute the logit # # neg.bum.logL - negative log-likelihood of the BUM model for a set of p-values # # palt - calculate the cdf of the alternative component of the BUM # # pbum - calculate the cdf of the BUM distribution # # qalt - calculate the quantile of the alternative component of the BUM # # qbum - calculate the quantile of the BUM distribution # # *qqbum - produce a BUM quantile-quantile plot # # rbum - generate random BUM observations # # special.case.CI - find CI's for error control quantities that # are not linear in a and lambda # # special.case.function - function used by special.case.CI # # unpack.contour.object - repackage the output of the S-plus function contour ############################################################################## ######################################################################### # Function: bum.CI # Purpose: Produce a confidence interval for various BUM related quantities # based on the confidence contours computed by bum.logL.contour # Arguments: bc.object - object returned by bum.logL.contour # quantity - string specifying the quantity of interest (default = "pi") # possible values: "pi", "a", "lambda", "FDR", "EBP" # "FDR.threshold", "EBP.threshold", # "false.positve", "false.negative", # "true.positive", "true.negative" # arg - additional argument for some quantities to be produced, # for example, if the quantity is "FDR", a value of a # threshold must be specified so that a CI for the FDR # can be computed. If the quantity is "FDR.threshold", # the desired level of FDR must be specified, and a CI will # be computed for the corresponding threshold. (default = 0.05) # MLE - object returned by bum.mle, if specified the function will return # a point estimate based on the value of a and lambda in the object # Returns: a list with the following components # quantity - a string specifying the quantity being estimated # point.estimate - the point estimate (if the argument MLE is specified) # CIs - a data frame giving the confidence level and corresponding intervals # Notes: For quantities monotone in both a and lambda, the interval corresponds to # extrema of the quantity among the (a,lambda) pairs in bc.object. For # other quantities, the interval corresponds to the extrema within the # minimal bounding box for the confidence contours. # May Call: ext.pi, bum.EBP, bum.FDR, find.EBP.threshold, find.FDR.threshold, special.case.CI # bum.false.positive, bum.false.negative, bum.true.positive, bum.true.negative ########################################################################## bum.CI<-function(bc.object,quantity="pi",arg=0.05,MLE=NULL) { conflevels<-as.numeric(levels(as.category(bc.object$conf))) if(any(quantity==c("EBP","FDR.threshold","EBP.threshold"))) { result<-special.case.CI(bc.object,quantity,arg) if(is.null(MLE)) return(list(quantity=quantity,CIs=cbind(conf.level=conflevels,CILB=result$CImin,CIUB=result$CImax))) if (quantity=="EBP") point.estimate<-bum.EBP(arg,MLE$a,MLE$lambda) if (quantity=="FDR.threshold") point.estimate<-find.FDR.threshold(arg,MLE$a,MLE$lambda) if (quantity=="EBP.threshold") point.estimate<-find.EBP.threshold(arg,MLE$a,MLE$lambda) return(list(quantity=quantity,point.estimate=point.estimate,CIs=cbind(conf.level=conflevels,CILB=result$CImin,CIUB=result$CImax))) } nCIs<-length(conflevels) CILB<-rep(0,nCIs) CIUB<-rep(0,nCIs) x<-NULL if (quantity=="pi") x<-ext.pi(bc.object$a,bc.object$lambda) if (quantity=="a") x<-bc.object$a if (quantity=="lambda") x<-bc.object$lambda if (quantity=="FDR") x<-bum.FDR(arg,bc.object$a,bc.object$lambda) if (quantity=="false.positive") x<-bum.false.positive(arg,bc.object$a,bc.object$lambda) if (quantity=="false.negative") x<-bum.false.negative(arg,bc.object$a,bc.object$lambda) if (quantity=="true.positive") x<-bum.true.positive(arg,bc.object$a,bc.object$lambda) if (quantity=="true.negative") x<-bum.true.negative(arg,bc.object$a,bc.object$lambda) n<-length(x) if(length(x)==0) return("Unknown Quanitity Specified.") for (i in 1:nCIs) { select<-(1:n)[bc.object$conf==conflevels[i]] CILB[i]<-min(x[select],na.rm=T) CIUB[i]<-max(x[select],na.rm=T) } if(is.null(MLE)) return(list(quantity=quantity,CIs=cbind(conf.level=conflevels,CILB=CILB,CIUB=CIUB))) if (quantity=="pi") point.estimate<-ext.pi(MLE$a,MLE$lambda) if (quantity=="a") point.estimate<-MLE$a if (quantity=="lambda") point.estimate<-MLE$lambda if (quantity=="FDR") point.estimate<-bum.FDR(arg,MLE$a,MLE$lambda) if (quantity=="false.positive") point.estimate<-bum.false.positive(arg,MLE$a,MLE$lambda) if (quantity=="false.negative") point.estimate<-bum.false.negative(arg,MLE$a,MLE$lambda) if (quantity=="true.positive") point.estimate<-bum.true.positive(arg,MLE$a,MLE$lambda) if (quantity=="true.negative") point.estimate<-bum.true.negative(arg,MLE$a,MLE$lambda) return(list(quantity=quantity,point.estimate=point.estimate,CIs=cbind(conf.level=conflevels,CILB=CILB,CIUB=CIUB))) } ######################################################################### # Function: bum.EBP # Purpose: Compute the lower bound of BUM based empirical Bayes posterior # probability of the alternative hypothesis # Arguments: x - the point or vector of points at which to compute EB post # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: the lower bound of the BUM based empirical Bayes posterior at x # Notes: Computes an upper bound because maximal extraction of uniform density # is performed. Requires 0 0. # default = .1*tau # title - string giving main title of graph # showkey - boolean, indicates whether to include key. # Returns: Nothing is returned by this function. A plot is produced. # Notes: Requires 0tau],1,tau),c(dbum(tau,a,lambda),dbum(x[x>tau],a,lambda),pi,pi),col=4) text(-.02,pi,"p",font=8) text(tau,-.01*ymax,"t",font=8) lines(x,dbum(x,a,lambda)) lines(c(0,1),c(pi,pi)) lines(c(tau,tau),c(0,dbum(tau,a,lambda))) lines(c(0,1),c(0,0)) lines(c(0,0),c(0,dbum(x[1],a,lambda))) lines(c(1,1),c(0,pi)) if(showkey) { text(.8,ymax,"p",font=8) text(.87,ymax,paste(" = ",round(pi,4))) text(.8,.95*ymax,"t",font=8) text(.87,.95*ymax,paste(" = ",round(tau,4))) text(.85,.9*ymax,paste("FP <=",round(bum.false.positive(tau,a,lambda),4)),col=2) text(.85,.85*ymax,paste("FN >=",round(bum.false.negative(tau,a,lambda),4)),col=4) text(.85,.80*ymax,paste("TP >=",round(bum.true.positive(tau,a,lambda),4)),col=3) text(.85,.75*ymax,paste("TN <=",round(bum.true.negative(tau,a,lambda),4)),col=5) } } ######################################################################### # Function: bum.FDR # Purpose: Compute an estimated upper bound for the false discovery rate # when significance is determined by comparing a p-value to # a threshold tau # Arguments: tau - the threshold of comparison (point or vector) # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: Estimated upper bound of FDR # Notes: FDR is the expected proportion of rejections that are false # positives, or Type I errors. Requires 00) { starta<-logit(runif(nstartpts)) startlambda<-logit(runif(nstartpts)) } adjusted<-min(pvals)<=0 if (adjusted) pvals<-(nadj*pvals+adjeps)/(nadj+2*adjeps) bestlogL<- -Inf nstartpts<-length(starta) for (i in 1:nstartpts) { results<-nlminb(c(starta[i],startlambda[i]),neg.bum.logL,pvals=pvals) if (-results$objective>bestlogL) { a<-inv.logit(results$parameters[1]) lambda<-inv.logit(results$parameters[2]) logL<- -results$objective nits<- results$iterations termination <- results$message bestlogL<-logL } } return(list(a=a,lambda=lambda,logL=logL,pvals.adjusted=adjusted,nstartpts=nstartpts,nits=nits,termination=termination)) } ######################################################################### # Function: bum.true.negative # Purpose: Compute an estimated lower bound for the proportion of all # tests resulting in true negatives when significance is # determined by comparing the p-value to a threshold tau # Arguments: tau - threshold # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: Estimated lower bound for proportion of all tests resulting # true negatives # Calls: ext.pi ######################################################################### bum.true.negative<-function(tau,a,lambda) { pi<-ext.pi(a,lambda) return(pi*(1-tau)) } ######################################################################### # Function: bum.true.positive # Purpose: Compute an estimated lower bound for the proportion of all # tests resulting in true positives when significance is # determined by comparing the p-value to a threshold tau # Arguments: tau - threshold # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: Estimated lower bound for proportion of all tests resulting # true positives # Calls: ext.pi ######################################################################### bum.true.positive<-function(tau,a,lambda) { pi<-ext.pi(a,lambda) return(pbum(tau,a,lambda)-pi*tau) } ######################################################################### # Function: bum.weighted.error # Purpose: Compute a linear combination of the upper bound for the # proportion of false positives and the lower bound for the # proportion of false negatives resulting when significance # is determined by comparing p-values to a threshold # Arguments: tau - threshold # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # wfp - weight given to the false positives # wfn - weight given to the false negatives # Returns: the weighted combination of error rates # Calls: bum.false.positive, bum.false.negative ######################################################################### bum.weighted.error<-function(tau,a,lambda,wfp=1,wfn=1) { return(wfp*bum.false.positive(tau,a,lambda)+wfn*bum.false.negative(tau,a,lambda)) } ######################################################################### # Function: dalt # Purpose: compute the density of the alternative distribution # Arguments: x - p-value of interest # a - shape parameter of beta component # lambda - mixing parameter, proportion of uniform component # Returns: vector of density at x # Calls: ext.pi, dbum ######################################################################## dalt<-function(x,a,lambda) { pi<-ext.pi(a,lambda) return((dbum(x,a,lambda)-pi)/(1-pi)) } ########################################################################## # Function: dbum # Purpose: Compute the pdf of the bum distribution # Arguments: x - point or vector of points at which to compute pdf # a - shape parameter of beta component # lambda - mixture parameter, proportion of uniform component # Returns: value of the pdf of the bum distribution for x # Notes: While dbum does not require 01)+(lambda<0)>0]<-NA a[(a>1)+(a<0)]<-NA return((lambda+(1-lambda)*a)) } ######################################################################### # Function: find.EBP.threshold # Purpose: Find a p-value threshold so that all p-values less than the # threshold have an empirical Bayes (EB) posterior probability # greater than a desired level # Arguments: EBP - desired empirical Bayes' posterior # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: p-value threshold that maintains the desired EB posterior # Calls: ext.pi ######################################################################### find.EBP.threshold<-function(EBP,a,lambda) { pi<-ext.pi(a,lambda) return(((EBP*lambda+a*(1-lambda))/(a*(1-EBP)*(1-lambda)))^(1/(a-1))) } ######################################################################### # Function: find.FDR.threshold # Purpose: Find a p-value threshold so that all p-values less than the # threshold have an FDR lower than a desired level # Arguments: fdr - desired fdr # a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # Returns: p-value threshold that maintains the desired fdr ######################################################################### find.FDR.threshold<-function(fdr,a,lambda) { pi<-ext.pi(a,lambda) return(((pi-fdr*lambda)/(fdr*(1-lambda)))^(1/(a-1))) } ######################################################################### # Function: find.WE.threshold # Purpose: Compute the significance threshold for p-values so that # to obtain an optimal weighted error comparison # Arguments: a - shape parameter of beta component of bum distribution # lambda - mixing parameter, proportion uniform in bum dist # wfp - weight of false positives # wfn - weight of false negatives # nbisect - number of bisections for inv.dbum # Returns: a p-value threshold that optimizes the weighted error function # Notes: inv.dbum will use the bisection method, hence results will # be accurate to within .5^(nbisect). # Calls: ext.pi, inv.dbum ######################################################################### find.WE.threshold<-function(a,lambda,wfp=1,wfn=1,nbisect=20) { pi<-ext.pi(a,lambda) return(inv.dbum((wfp+wfn)*pi/wfn,a,lambda,nbisect)) } ########################################################################## # Function: inv.dbum # Purpose: Compute the inverse of the pdf of the bum distribution # Arguments: y - value of the pdf of the bum of interest # a - shape parameter of the beta component # lambda - mixing parameter, weight of uniform component # nbisect - the number of bisections to perform # Returns: x so that pbum(x,a,lambda) = y # Notes: Uses the bisection method. The results will # be accurate to within 2^(-nbisect). Used by find.WE.threshold. ########################################################################## inv.dbum<-function(y,a,lambda,nbisect=20) { n<-length(y) mid<-rep(0,n) for (i in 1:n) { top<-1 bot<-0 for (j in 1:nbisect) { mid[i]<-mean(c(top,bot)) if(dbum(mid[i],a,lambda)>y[i]) bot<-mid[i] else top<-mid[i] } } return(mid) } ######################################################################### # Function: inv.logit # Purpose: Compute the inverse logit of x # Arguments: x - point or vector of points # Returns: inverse logit of x ######################################################################### inv.logit<-function(x) { return(exp(x)/(1+exp(x))) } ######################################################################### # Function: logit # Purpose: Compute the logit of an x in (0,1) # Arguments: x - point or vector of points in (0,1) # Returns: logit of x # Notes: requires 0