#####################################
# BUM S-plus function library
# Author: Stan Pounds, Ph.D.
# Created: January 7, 2003
# Last Modified: January 10, 2003
# About this library: This library implements the BUM procedures for estimating
#                     the occurrence of errors in microarray analysis.  For more
#                     information see Pounds and Morris (2003) "Estimating the 
#                     occurrence of false positives and false negatives in 
#                     microarray studies by approximating and partitioning the 
#                     empirical distribution of p-values." Bioinformatics, 19, 368-375.
# Disclaimer:  Any damages arising from use of this software are NOT
#              the responsibility of its developer, Stan Pounds, or
#              his employer, St. Jude Children's Research Hospital.
##############################################################################

##############################################################################
# Functions in this library - Functions most useful for analysis are marked with an asterisk (*).
#                             Functions are listed alphabetically here.  More detailed
#                             documentation can be found in the code where the respective
#                             functions are declared and defined.  Use the "find" feature
#                             of your text editor to find the function declaration and
#                             respective documentation on how to use each function.  Enter
#                             "Function: function.name" to find the function and its documentation.
#                             For example, to find documentation on "bum.EBP", find the string 
#                             "Function: bum.EBP".
#
# *bum.CI - compute confidence intervals for quantities based on the estimation of a bum model
#
# *bum.EBP - compute the empirical Bayes' posterior for a given BUM model
#
# *bum.error.diagram - produce a diagram illustrating the occurrence of errors in the analysis
#
# *bum.FDR - compute the false discovery rate for a given BUM model
#
# *bum.false.negative - estimate the proportion of false negatives committed
#
# *bum.false.positive - estimate the proportion of false positives committed
#
# *bum.histogram - produce a histogram of p-values and compare to BUM MLE curve
#
#  bum.logL - calculate the log-likelihood for the BUM model for a set of p-values
#
# *bum.logL.contour - find contours of the log-likelihood in terms of a and lambda
#
# *bum.mle - find the MLE for the BUM parameters
#
# *bum.true.negative - estimate the proportion of true negatives when a particular p-value threshold is used
#
# *bum.true.positive - estimate the proportion of true positives when a particular p-value threshold is used
#
# *bum.weighted.error - estimate a weighted sum of the proportion of false negatives and false positives
#
#  dalt - calculate the density of the alternative component of a BUM
#
#  dbum - calculate the density of the BUM distribution
#
# *ext.pi - extract the uniform component from a BUM distribution
#
# *find.EBP.threshold - find the p-value threshold corresponding to a desired empirical Bayes' posterior
#
# *find.FDR.threshold - find the p-value threshold corresponding to a desired false discovery rate
#
# *find.WE.threshold - find the p-value threshold that minimizes the weighted
#                      sum of the estimated proportion of false positives
#                      and estimated proportion of false negatives
#
#  inv.dbum - the mathematical inverse of the BUM density
#
#  inv.logit - compute the inverse of the logit
#
#  logit - compute the logit
#
#  neg.bum.logL - negative log-likelihood of the BUM model for a set of p-values
#
#  palt - calculate the cdf of the alternative component of the BUM
#
#  pbum - calculate the cdf of the BUM distribution
#
#  qalt - calculate the quantile of the alternative component of the BUM
#
#  qbum - calculate the quantile of the BUM distribution
#
# *qqbum - produce a BUM quantile-quantile plot
#
#  rbum - generate random BUM observations
#
#  special.case.CI - find CI's for error control quantities that
#                    are not linear in a and lambda
#
#  special.case.function - function used by special.case.CI
#
#  unpack.contour.object - repackage the output of the S-plus function contour
##############################################################################

#########################################################################
# Function: bum.CI
# Purpose: Produce a confidence interval for various BUM related quantities
#          based on the confidence contours computed by bum.logL.contour
# Arguments: bc.object - object returned by bum.logL.contour
#            quantity - string specifying the quantity of interest (default = "pi")
#                       possible values: "pi", "a", "lambda", "FDR", "EBP"
#                                        "FDR.threshold", "EBP.threshold",
#                                        "false.positve", "false.negative",
#                                        "true.positive", "true.negative"
#            arg - additional argument for some quantities to be produced,
#                  for example, if the quantity is "FDR", a value of a
#                  threshold must be specified so that a CI for the FDR
#                  can be computed.  If the quantity is "FDR.threshold", 
#                  the desired level of FDR must be specified, and a CI will
#                  be computed for the corresponding threshold. (default = 0.05)
#            MLE - object returned by bum.mle, if specified the function will return
#                  a point estimate based on the value of a and lambda in the object
# Returns: a list with the following components
#          quantity - a string specifying the quantity being estimated
#          point.estimate - the point estimate (if the argument MLE is specified)
#          CIs - a data frame giving the confidence level and corresponding intervals
# Notes:  For quantities monotone in both a and lambda, the interval corresponds to
#         extrema of the quantity among the (a,lambda) pairs in bc.object.  For
#         other quantities, the interval corresponds to the extrema within the
#         minimal bounding box for the confidence contours.
# May Call: ext.pi, bum.EBP, bum.FDR, find.EBP.threshold, find.FDR.threshold, special.case.CI
#           bum.false.positive, bum.false.negative, bum.true.positive, bum.true.negative
##########################################################################                 

bum.CI<-function(bc.object,quantity="pi",arg=0.05,MLE=NULL)

{
	conflevels<-as.numeric(levels(as.category(bc.object$conf)))
	if(any(quantity==c("EBP","FDR.threshold","EBP.threshold")))
	{
		result<-special.case.CI(bc.object,quantity,arg)
		if(is.null(MLE)) return(list(quantity=quantity,CIs=cbind(conf.level=conflevels,CILB=result$CImin,CIUB=result$CImax)))
		if (quantity=="EBP") point.estimate<-bum.EBP(arg,MLE$a,MLE$lambda)
		if (quantity=="FDR.threshold") point.estimate<-find.FDR.threshold(arg,MLE$a,MLE$lambda)
		if (quantity=="EBP.threshold") point.estimate<-find.EBP.threshold(arg,MLE$a,MLE$lambda)
		return(list(quantity=quantity,point.estimate=point.estimate,CIs=cbind(conf.level=conflevels,CILB=result$CImin,CIUB=result$CImax)))
	}

	nCIs<-length(conflevels)
	CILB<-rep(0,nCIs)
	CIUB<-rep(0,nCIs)
	x<-NULL
	if (quantity=="pi") x<-ext.pi(bc.object$a,bc.object$lambda)
	if (quantity=="a")  x<-bc.object$a
	if (quantity=="lambda") x<-bc.object$lambda
	if (quantity=="FDR") x<-bum.FDR(arg,bc.object$a,bc.object$lambda)
	if (quantity=="false.positive") x<-bum.false.positive(arg,bc.object$a,bc.object$lambda)
	if (quantity=="false.negative") x<-bum.false.negative(arg,bc.object$a,bc.object$lambda)
	if (quantity=="true.positive")  x<-bum.true.positive(arg,bc.object$a,bc.object$lambda)
	if (quantity=="true.negative")  x<-bum.true.negative(arg,bc.object$a,bc.object$lambda)
	n<-length(x)
	if(length(x)==0) return("Unknown Quanitity Specified.")
	for (i in 1:nCIs)
	{
		select<-(1:n)[bc.object$conf==conflevels[i]]
		CILB[i]<-min(x[select],na.rm=T)
		CIUB[i]<-max(x[select],na.rm=T)
	}
	if(is.null(MLE)) return(list(quantity=quantity,CIs=cbind(conf.level=conflevels,CILB=CILB,CIUB=CIUB)))
	if (quantity=="pi") point.estimate<-ext.pi(MLE$a,MLE$lambda)
	if (quantity=="a")  point.estimate<-MLE$a
	if (quantity=="lambda") point.estimate<-MLE$lambda
	if (quantity=="FDR") point.estimate<-bum.FDR(arg,MLE$a,MLE$lambda)
	if (quantity=="false.positive") point.estimate<-bum.false.positive(arg,MLE$a,MLE$lambda)
	if (quantity=="false.negative") point.estimate<-bum.false.negative(arg,MLE$a,MLE$lambda)
	if (quantity=="true.positive")  point.estimate<-bum.true.positive(arg,MLE$a,MLE$lambda)
	if (quantity=="true.negative")  point.estimate<-bum.true.negative(arg,MLE$a,MLE$lambda)
	return(list(quantity=quantity,point.estimate=point.estimate,CIs=cbind(conf.level=conflevels,CILB=CILB,CIUB=CIUB)))
}

#########################################################################
# Function: bum.EBP
# Purpose: Compute the lower bound of BUM based empirical Bayes posterior
#          probability of the alternative hypothesis
# Arguments: x - the point or vector of points at which to compute EB post
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: the lower bound of the BUM based empirical Bayes posterior at x
# Notes: Computes an upper bound because maximal extraction of uniform density
#        is performed.  Requires 0<x<1, 0<a<1, and 0<lambda<1.
# Calls: ext.pi, dbum
#########################################################################

bum.EBP<-function(x,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return((dbum(x,a,lambda)-pi)/dbum(x,a,lambda))
}

#########################################################################
# Function: bum.error.diagram
# Purpose: Produce a bum.error.diagram
# Arguments: tau - significance threshold for p-values
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
#            xmin - the lower bound of x for the diagram.  Must be > 0.
#                   default = .1*tau
#            title - string giving main title of graph
#            showkey - boolean, indicates whether to include key.
# Returns: Nothing is returned by this function.  A plot is produced.
# Notes: Requires 0<tau<1, 0<a<1, 0<lambda<1.
# Calls: dbum, ext.pi, bum.false.positive, bum.false.negative,
#        bum.true.positive, bum.true.negative
#########################################################################

bum.error.diagram<-function(tau,a,lambda,xmin=NULL,title=NULL,showkey=F)

{
	pi<-ext.pi(a,lambda)
	if (is.null(xmin)) xmin<-.1*tau
	if (is.null(title)) title<-"Error Regions"
	x<-seq(from=xmin,to=1,length=1000)
	ymax<-max(dbum(x,a,lambda))
	plot(c(0,1),c(0,dbum(x[1],a,lambda)),main=title,xlab="p-value",ylab="Density",type="n")
	polygon(c(0,tau,tau,0),c(pi,pi,0,0),col=2)
	polygon(c(tau,1,1,tau),c(pi,pi,0,0),col=5)
	polygon(c(0,x[x<=tau],tau,tau,0),c(dbum(x[1],a,lambda),dbum(x[x<=tau],a,lambda),dbum(tau,a,lambda),pi,pi),col=3)
	polygon(c(tau,x[x>tau],1,tau),c(dbum(tau,a,lambda),dbum(x[x>tau],a,lambda),pi,pi),col=4)
	text(-.02,pi,"p",font=8)
	text(tau,-.01*ymax,"t",font=8)
	lines(x,dbum(x,a,lambda))
	lines(c(0,1),c(pi,pi))
	lines(c(tau,tau),c(0,dbum(tau,a,lambda)))
	lines(c(0,1),c(0,0))
	lines(c(0,0),c(0,dbum(x[1],a,lambda)))
	lines(c(1,1),c(0,pi))
	if(showkey)
	{
		text(.8,ymax,"p",font=8)
		text(.87,ymax,paste(" = ",round(pi,4)))
		text(.8,.95*ymax,"t",font=8)
		text(.87,.95*ymax,paste(" = ",round(tau,4)))
		text(.85,.9*ymax,paste("FP <=",round(bum.false.positive(tau,a,lambda),4)),col=2)
		text(.85,.85*ymax,paste("FN >=",round(bum.false.negative(tau,a,lambda),4)),col=4)
		text(.85,.80*ymax,paste("TP >=",round(bum.true.positive(tau,a,lambda),4)),col=3)
		text(.85,.75*ymax,paste("TN <=",round(bum.true.negative(tau,a,lambda),4)),col=5)
	}
}

#########################################################################
# Function: bum.FDR
# Purpose: Compute an estimated upper bound for the false discovery rate
#          when significance is determined by comparing a p-value to
#          a threshold tau
# Arguments: tau - the threshold of comparison (point or vector)
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated upper bound of FDR
# Notes: FDR is the expected proportion of rejections that are false
#        positives, or Type I errors.  Requires 0<tau<1, 0<a<1, and 0<lambda<1.
# Calls: ext.pi, pbum
#########################################################################

bum.FDR<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(tau*pi/pbum(tau,a,lambda))
}

#########################################################################
# Function: bum.false.negative
# Purpose: Compute an estimated lower bound for the proportion of all
#          tests resulting in false negatives when significance is
#          determined by comparing the p-value to a threshold tau
# Arguments: tau - threshold of comparison
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated lower bound for proportion of all tests resulting
#          false negatives (Type II errors).
# Notes: Requires 0<tau<1, 0<a<1, and 0<lambda<1.
# Calls: ext.pi, pbum
#########################################################################

bum.false.negative<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(1-pbum(tau,a,lambda)-pi*(1-tau))
}

#########################################################################
# Function: bum.false.positive
# Purpose: Compute an estimated upper bound for the proportion of all tests
#          that will be false positives when significance is determined
#          by comparing the p-value to a threshold tau
# Arguments: tau - the threshold of comparison (point or vector)
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated upper bound of proportion of all tests that are
#          false positives (Type I errors).
# Notes: Requires 0<tau<1, 0<a<1, and 0<lambda<1.
# Calls: ext.pi
#########################################################################

bum.false.positive<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(tau*pi)
}

#########################################################################
# Function: bum.histogram
# Purpose: Compare the fitted bum curve to a histogram of the p-values.
# Arguments: pvalues - vector of p-values
#            a - a for bum curve, if unspecified, it will estimate the MLE
#            lambda - lambda for bum curve, will estimate if unspecified
#            main - primary title of plot, default = "Histogram"
#            xlab - label of x-axis, default = "p-value
#            ylab - label of y-axis, default = "Density"
#            nstartpts - number of startpoints for bum.mle, if required
#            starta - starting values for a for bum.mle, if required
#            startlambda - starting values for bum.mle, if required
# Returns: Doesn't return anything.  Produces a plot.
# Notes: Requires 0<a<1 and 0<lambda<1.
# Calls: dbum
# Possibly Calls: bum.mle
#########################################################################

bum.histogram<-function(pvalues,a=NA,lambda=NA,main="Histogram",xlab="p-value",ylab="Density",nstartpts=0,starta=0,startlambda=0)

{
	if(is.na(a)||is.na(lambda))
	{
		MLE<-bum.mle(pvalues,nstartpts=nstartpts,starta=starta,startlambda=startlambda)
		a<-MLE$a
		lambda<-MLE$lambda
	}
	hist(pvalues,probability=T,main=main,xlab=xlab,ylab=ylab)
	x<-1:100/100
	lines(x,dbum(x,a,lambda),lwd=3)
}

#########################################################################
# Function: bum.logL
# Purpose: for a set of p-values x, compute the log-likelihood of 
#          parameters a and lambda
# Arguments: a - the shape parameter of the beta component
#            lambda - mixing parameter, proportion of uniform component
#            x - vector of p-values
# Returns: the log-likelihood of the parameters
# Notes: Permutation techniques can result in p-values of zero.  This
#        routine will provide non-numeric results if a p-value of zero or one
#        is included.  A recommendation is to adjust permutation p-values
#        before calling the function by letting the new p-values
#        equal (nperms*old.pvalues + .5)/(nperms+1).
# Calls: dbum
#########################################################################

bum.logL<-function(a,lambda,x)

{
  return(sum(log(dbum(x,a,lambda))))
}

##########################################################################
# Function: bum.logL.contour
# Purpose: Produce contours of the log-likehood function corresponding to
#          specified confidence region boundaries.
# Arguments: pvals - vector of p-values
#            nticks - number of tick marks for each axis on the grid, used only if aticks and lticks are not specified (default = 20)
#            conflevel - vector of desired confidence levels (default=c(0.999,0.99,0.95,0.90))
#            aticks - the vector of grid lines for the paramter a for finding contour lines (default = (1:(nticks-1))/nticks)
#            lticks - the vector of grid lines of the paramter lambda for finding contour lines (default = (1:(nticks-1))/nticks)
#            tri - equivalent to the triangle argument of the S-plus function contour (default = T)
#            plotit - boolean, if T, produce a plot of the requested contours
#            refine - number of times to refine the plot by "zooming" in on the region of interest.
# Returns: a list with the following components
#            lambda - vector of lambda points for the contours
#            a - vector of a points for the contours
#            logL - vector of the log-likelihood of the (lambda,a) points
#            conf - vector of the confidence level of the (lambda,a) points of the contour
# Notes: Accuracy will depend heavily on the number of ticks and the number of refinements.  In terms of accuracy, the more the better.  Accuracy must
#        be traded off with computing time.  The contours produced by the function form the basis for the confidence intervals
#        computed by bum.CI.
# Calls: bum.logL, bum.mle, unpack.contour.object
###########################################################################

bum.logL.contour<-function(pvals,nticks=20,conflevel=c(0.999,0.99,0.95,0.9),aticks=(1:(nticks-1))/nticks,lticks=(1:(nticks-1))/nticks,tri=T,plotit=T,refine=10)

{
	a <- aticks
	lambda <- lticks
	nticks2<-nticks
	res2<-NULL
	maxlogL <- bum.mle(pvals)
	conflevels<-conflevel[rev(order(conflevel))]
	while(length(res2)==0)  # Obtain enough tickmarks to get a contour
	{
		naticks <- length(a)
		nlticks <- length(lambda)
		z <- matrix(0, naticks, nlticks)
		for(i in 1:naticks)
			for(j in 1:nlticks)
				z[i, j] <- bum.logL(a[j], lambda[i], pvals)
		levs <- maxlogL$logL - qchisq(conflevels, 2)/2
		result <- contour(lambda,a,z,levels=levs,labex=0,save=T,plotit=F,triangles=tri)
		if (length(unlist(result))!=0) res2<-unpack.contour.object(result)
		nticks2<-2*nticks2
		a<-(1:(nticks2-1)/nticks2)
		lambda<-a
	}
	nticks2<-nticks2/2
	minx1<-0
	miny1<-0
	maxx1<-1
	maxy1<-1
	for (i in 1:refine)  # Zoom in on the relevant portion of the contour
	{
		maxy2<-(2*maxy1+max(res2$y,na.rm=T))/3
		maxx2<-(2*maxx1+max(res2$x,na.rm=T))/3
		miny2<-(2*miny1+min(res2$y,na.rm=T))/3
		minx2<-(2*minx1+min(res2$x,na.rm=T))/3
		a<-seq(from=miny2,to=maxy2,length=nticks2)
		lambda<-seq(from=minx2,to=maxx2,length=nticks2)
		naticks <- length(a)
		nlticks <- length(lambda)
		z <- matrix(0, naticks, nlticks)
		for(i in 1:naticks)
			for(j in 1:nlticks)
				z[i, j] <- bum.logL(a[j], lambda[i], pvals)
		levs <- maxlogL$logL - qchisq(conflevels, 2)/2
		result <- contour(lambda,a,z,levels=levs,labex=0,save=T,plotit=(i==refine)&&(plotit),triangles=tri,ylab="a",xlab="")
		res2<-unpack.contour.object(result)
		minx1<-minx2
		maxx1<-maxx2
		miny1<-miny2
		maxy1<-maxy2
	}
	conf<-rep(0,length(res2$level))
	lvls<-as.numeric(levels(as.category(res2$level)))
	nlvls<-length(lvls)
	for (i in 1:nlvls)
		conf[res2$level==lvls[i]]<-conflevels[i]
	if(plotit) 
	{
		points(maxlogL$lambda,maxlogL$a)
		yrange<-maxy1-miny1
		text((maxx1+minx1)/2,miny1-.15*yrange,"l",cex=1.5,font=8)
	}
	return(list(lambda=res2$x,a=res2$y,logL=res2$level,conf=conf))
}	

#########################################################################
# Function: bum.mle
# Purpose: For a set of p-values, compute MLE's for a and lambda
# Arguments: p-vals - the set of p-values
#            nstartpts - number of starting points to randomly generate
#            starta - vector of the LOGIT OF starting a's for optimization routine, default = 0
#            startlambda - vector of LOGIT OF starting lambdas for optimization, default = 0
#            nadj - number for adjusting p-values if necessary, default = 100000
#            adjeps - epsilon for adjusting pvalues, if necessary, default = 0.5
# Returns: A list with the following components
#          a - the estimate for a with the given or generated starting points
#          lambda - the estimate for lambda with the given or generated starting points
#          logL - log likelihood of the (a,lambda) estimate
#          pvals.adjusted - indicates whether or not the p-values were adjusted to avoid
#                           the problems that occur when some pvalues equal zero.
#          nstartpts - the number of start points used for optimization 
#          nits - the number of times the algorithm iterated on the search that yielded the optimum
#          termination - a note about the nature of the termination.  Indicates whether or
#                        not the algorithm converged.
# Notes: May not provide global mle.  For each starting point, the algorithm will
#        converge to a local optimum.  Repeating with multiple starting
#        is more likely to produce the global mle.  However, in "well-behaved"
#        data, the contours of the log-likelihood are not likely to lead to
#        a local optimal "trap".  The arguments nadj and adjeps are used to
#        slightly modify p-values so that no p-value equals zero when
#        bum.logL is called.  
# Calls: dbum, bum.logL, neg.bum.logL, logit, inv.logit
#########################################################################

bum.mle<-function(pvals,nstartpts=0,starta=0,startlambda=0,nadj=100000,adjeps=0.5)

{
	if (nstartpts>0)
	{
		starta<-logit(runif(nstartpts))
		startlambda<-logit(runif(nstartpts))
	}
	adjusted<-min(pvals)<=0
	if (adjusted) pvals<-(nadj*pvals+adjeps)/(nadj+2*adjeps)
	bestlogL<- -Inf
	nstartpts<-length(starta)	
	for (i in 1:nstartpts)
	{
		results<-nlminb(c(starta[i],startlambda[i]),neg.bum.logL,pvals=pvals)
		if (-results$objective>bestlogL)
		{
			a<-inv.logit(results$parameters[1])
			lambda<-inv.logit(results$parameters[2])
			logL<- -results$objective
			nits<- results$iterations
			termination <- results$message
			bestlogL<-logL
		}
	}
	return(list(a=a,lambda=lambda,logL=logL,pvals.adjusted=adjusted,nstartpts=nstartpts,nits=nits,termination=termination))
}

#########################################################################
# Function: bum.true.negative
# Purpose: Compute an estimated lower bound for the proportion of all
#          tests resulting in true negatives when significance is
#          determined by comparing the p-value to a threshold tau
# Arguments: tau - threshold
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated lower bound for proportion of all tests resulting
#          true negatives
# Calls: ext.pi
#########################################################################

bum.true.negative<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(pi*(1-tau))
}

#########################################################################
# Function: bum.true.positive
# Purpose: Compute an estimated lower bound for the proportion of all
#          tests resulting in true positives when significance is
#          determined by comparing the p-value to a threshold tau
# Arguments: tau - threshold
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: Estimated lower bound for proportion of all tests resulting
#          true positives
# Calls: ext.pi
#########################################################################

bum.true.positive<-function(tau,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(pbum(tau,a,lambda)-pi*tau)
}

#########################################################################
# Function: bum.weighted.error
# Purpose: Compute a linear combination of the upper bound for the 
#          proportion of false positives and the lower bound for the
#          proportion of false negatives resulting when significance
#          is determined by comparing p-values to a threshold 
# Arguments: tau - threshold
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
#            wfp - weight given to the false positives
#            wfn - weight given to the false negatives
# Returns: the weighted combination of error rates
# Calls: bum.false.positive, bum.false.negative
#########################################################################

bum.weighted.error<-function(tau,a,lambda,wfp=1,wfn=1)

{
   return(wfp*bum.false.positive(tau,a,lambda)+wfn*bum.false.negative(tau,a,lambda))
}

#########################################################################
# Function: dalt
# Purpose: compute the density of the alternative distribution
# Arguments: x - p-value of interest
#            a - shape parameter of beta component
#            lambda - mixing parameter, proportion of uniform component
# Returns: vector of density at x
# Calls: ext.pi, dbum
########################################################################

dalt<-function(x,a,lambda)

{
	pi<-ext.pi(a,lambda)
	return((dbum(x,a,lambda)-pi)/(1-pi))
}

##########################################################################
# Function: dbum
# Purpose: Compute the pdf of the bum distribution
# Arguments: x - point or vector of points at which to compute pdf
#            a - shape parameter of beta component
#            lambda - mixture parameter, proportion of uniform component
# Returns: value of the pdf of the bum distribution for x
# Notes: While dbum does not require 0<x<=1, 0<a<1, and 0<lambda<1, many
#        other routines that call dbum do.
##########################################################################

dbum<-function(x,a,lambda)

{
	return(lambda+(1-lambda)*a*x^(a-1))
}

#########################################################################
# Function: ext.pi
# Purpose: Extract the maximal uniform componet from a bum density
# Arguments: a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, component of bum that is uniform
# Returns: the proportion of the density that can be extracted as a uniform
# Notes: Will return NA if a or lambda not in [0,1] to protect other functions
#        that require a and lambda to be in [0,1].
#########################################################################

ext.pi<-function(a,lambda)

{
	lambda[(lambda>1)+(lambda<0)>0]<-NA
	a[(a>1)+(a<0)]<-NA
	return((lambda+(1-lambda)*a))
}

#########################################################################
# Function: find.EBP.threshold
# Purpose: Find a p-value threshold so that all p-values less than the
#          threshold have an empirical Bayes (EB) posterior probability
#          greater than a desired level
# Arguments: EBP - desired empirical Bayes' posterior
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: p-value threshold that maintains the desired EB posterior
# Calls: ext.pi
#########################################################################

find.EBP.threshold<-function(EBP,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(((EBP*lambda+a*(1-lambda))/(a*(1-EBP)*(1-lambda)))^(1/(a-1)))
}

#########################################################################
# Function: find.FDR.threshold
# Purpose: Find a p-value threshold so that all p-values less than the
#          threshold have an FDR lower than a desired level
# Arguments: fdr - desired fdr
#            a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
# Returns: p-value threshold that maintains the desired fdr
#########################################################################

find.FDR.threshold<-function(fdr,a,lambda)

{
   pi<-ext.pi(a,lambda)
   return(((pi-fdr*lambda)/(fdr*(1-lambda)))^(1/(a-1)))
}

#########################################################################
# Function: find.WE.threshold
# Purpose: Compute the significance threshold for p-values so that
#          to obtain an optimal weighted error comparison
# Arguments: a - shape parameter of beta component of bum distribution
#            lambda - mixing parameter, proportion uniform in bum dist
#            wfp - weight of false positives
#            wfn - weight of false negatives
#            nbisect - number of bisections for inv.dbum
# Returns: a p-value threshold that optimizes the weighted error function
# Notes: inv.dbum will use the bisection method, hence results will
#        be accurate to within .5^(nbisect).
# Calls: ext.pi, inv.dbum
#########################################################################

find.WE.threshold<-function(a,lambda,wfp=1,wfn=1,nbisect=20)

{
  pi<-ext.pi(a,lambda)
  return(inv.dbum((wfp+wfn)*pi/wfn,a,lambda,nbisect))
}

##########################################################################
# Function: inv.dbum
# Purpose: Compute the inverse of the pdf  of the bum distribution
# Arguments: y - value of the pdf of the bum of interest
#            a - shape parameter of the beta component
#            lambda - mixing parameter, weight of uniform component
#            nbisect - the number of bisections to perform
# Returns: x so that pbum(x,a,lambda) = y
# Notes: Uses the bisection method.  The results will
#        be accurate to within 2^(-nbisect).  Used by find.WE.threshold.
##########################################################################

inv.dbum<-function(y,a,lambda,nbisect=20)

{

	n<-length(y)
	mid<-rep(0,n)
	for (i in 1:n)
	{
		top<-1
		bot<-0
		for (j in 1:nbisect)
		{
			mid[i]<-mean(c(top,bot))
			if(dbum(mid[i],a,lambda)>y[i]) bot<-mid[i]
			else top<-mid[i]	
		}
	}
	return(mid)
}

#########################################################################
# Function: inv.logit
# Purpose: Compute the inverse logit of x
# Arguments: x - point or vector of points
# Returns: inverse logit of x
#########################################################################

inv.logit<-function(x)

{
	return(exp(x)/(1+exp(x)))
}

#########################################################################
# Function: logit
# Purpose: Compute the logit of an x in (0,1)
# Arguments: x - point or vector of points in (0,1)
# Returns: logit of x
# Notes: requires 0<x<1
#########################################################################

logit<-function(x)

{
	return(log(x)-log(1-x))
}

#########################################################################
# Function: neg.bum.logL
# Purpose: Compute the negative log-likelihood for use by nlminb in
#          bum.mle.
# Arguments: x - a vector with two components 
#                x[1] corresponds to logit of a, beta shape parameter
#                x[2] corresponds to logit of lambda, the prop. uniform
#            pvals - a vector of non-zero p-values
# Returns: the negative log-likelihood 
# Notes: used in bum.mle via the S-plus function nlminb
# Calls: dbum, inv.logit
#########################################################################

neg.bum.logL<-function(x,pvals)

{
	return(-1*sum(log(dbum(pvals,inv.logit(x[1]),inv.logit(x[2])))))
}

#########################################################################
# Function: palt
# Purpose: compute the cdf of the extracted alternative component of 
#          the BUM distribution
# Arguments: x - pvalue of interest
#            a - shape parameter of beta component
#            lambda - mixing parameter, proportion of uniform component
# Returns: vector of cdf at x
# Notes: requires 0<x<=1, 0<a<=1, 0<=lambda<=1.
# Calls: ext.pi, pbum
#########################################################################

palt<-function(x,a,lambda)

{
	pi<-ext.pi(a,lambda)
	return((pbum(x,a,lambda)-pi*x)/(1-pi))
}

##########################################################################
# Function: pbum
# Purpose: Compute the cdf of the bum distribution
# Arguments: x - point or vector of points at which to compute the pdf
#            a - shape parameter of the beta component
#            lambda - mixing parameter, weight of uniform component
# Returns: value of the cdf of the bum distribution for x
# Notes: While dbum does not require 0<x<=1, 0<a<=1, and 0<=lambda<=1, many
#        other routines that call pbum do.
##########################################################################

pbum<-function(x,a,lambda)
{
	return(lambda*x+(1-lambda)*x^a)
}

##########################################################################
# Function: qalt
# Purpose: compute the quantile of the alternative component of BUM
# Arguments: p - the percentile
#            a - shape parameter of beta component
#            lambda - mixing parameter, proportion of uniform component
#            nbisect - number of times to perform bisection (default=20)
# Returns: a vector the p quantiles
# Notes: Uses the bisection method.  Results will be accurate to within .5^(nbisect).
# Calls: palt
###########################################################################

qalt<-function(p,a,lambda,nbisect=20)

{
	n<-length(p)
	mid<-rep(0,n)
	for (i in 1:n)
	{
		top<-1
		bot<-0
		for (j in 1:nbisect)
		{
			mid[i]<-mean(c(top,bot))
			if(palt(mid[i],a,lambda)<p[i]) bot<-mid[i]
			else top<-mid[i]	
		}
	}
	return(mid)
}

##########################################################################
# Function: qbum
# Purpose: Compute the quantile of the bum distribution
# Arguments: p - percentile or vector of percentiles
#            a - shape parameter of the beta component
#            lambda - mixing parameter, weight of uniform component
#            nbisect - the number of bisections to perform
# Returns: the values x such that the pdf of x equals the percentiles
# Notes: Uses the bisection method to find quantiles, a larger value of
#        nbisect will result in more accurate results.  The results will
#        be accurate to within 2^(-nbisect).  Requires 0<x<=1.
# Calls: pbum
##########################################################################

qbum<-function(p,a,lambda,nbisect=20)

{

	n<-length(p)
	mid<-rep(0,n)
	top<-rep(1,n)
	bot<-rep(0,n)
	gohigher<-rep(F,n)
	for (j in 1:nbisect)
	{
		mid<-(top+bot)/2
		gohigher<-(pbum(mid,a,lambda)<p)
		bot[gohigher]<-mid[gohigher]
		top[!gohigher]<-mid[!gohigher]
	}
	return(mid)
}

#########################################################################
# Function: qqbum
# Purpose: Produce a quantile-quantile plot for a set of p-values
# Arguments: pvals - a vector of p-values
#            a - shape parameter for beta component of bum distribution, will estimate if unspecified
#            lambda - mixing parameter for BUM distribution, will estimate if unspecified
#            main - primary plot tile, default = "BUM QQ Plot"
#            xlab - label of x-axis, default = "BUM Expected p-value"
#            ylab - label of y-axis, default = "Observed p-value"
#            nstartpts - value of nstartpts to provide to bum.mle if a or lambda not provided
#            starta - value of starta to provide to bum.mle if a or lambda not provided
#            startlambda - value of startlambda to provide to bum.mle if a or lambda not provided
# Returns: a quantile-quantile plot
# Notes: If values of a and lambda are not provided, bum.mle
#        will be used to estimate a and lambda.  May take a few minutes.
# Calls: qbum
# May Call: bum.mle
#########################################################################


qqbum<-function(pvals,a=NULL,lambda=NULL,main="BUM QQ Plot",xlab="BUM Expected p-value",ylab="Observed p-value",nstartpts=0,starta=0,startlambda=0)

{
	n<-length(pvals)
	pvals<-sort(pvals)	
	if(is.null(a)||is.null(lambda))
	{
		al<-bum.mle(pvals,nstartpts=nstartpts,starta=starta,startlambda=startlambda)
		plot(c(0,1),c(0,1),main=main,xlab=xlab,ylab=ylab,type="n")
		lines(qbum((rank(pvals)-.5)/n,al$a,al$lambda),pvals,lty=2)
		lines(c(0,1),c(0,1))
	}
	else
	{
		plot(c(0,1),c(0,1),main=main,xlab=xlab,ylab=ylab,type="n")
		lines(qbum((rank(pvals)-.5)/n,a,lambda),pvals,lty=2)
		lines(c(0,1),c(0,1))		
	}
}

#########################################################################
# Function: rbum
# Purpose: generate random bum observations
# Arguments: n - number of observations
#            a - parameter a
#            lambda - parameter lambda
# Returns: a vector of random bum observations
# Calls: qbum
##########################################################################

rbum<-function(n,a,lambda)

{
	u<-runif(n)
	return(qbum(u,a,lambda))
}

###########################################################################
# Function: special.case.CI
# Purpose: Find conservative CI's for error control quantities that are not monotone
#          in a and lambda.
# Arguments: bc.object - the object returned by bum.logL.contour
#            quantity - string specifying the quantity of interest for CI
#            arg - argument necessary to compute quantity
# Returns: a (set) of conservative CI(s) for the specified quantity
# Notes: Finds range of the quantity within the smallest box that bounds the
#        confidence region for a and lambda, yielding a conservative interval.
#        Intended primarily as a function to be called by bum.CI.
# Calls: special.case.function
###########################################################################

special.case.CI<-function(bc.object,quantity="EBP",arg=0.95)

{
	conflevels<-as.numeric(levels(as.category(bc.object$conf)))
	nconflvls<-length(conflevels)
	CImin<-rep(NA,nconflvls)
	CImax<-rep(NA,nconflvls)
	for (i in 1:nconflvls)
	{
		select<-bc.object$conf==conflevels[i]
		a<-bc.object$a[select]
		lam<-bc.object$lambda[select]
		mina<-min(a,na.rm=T)
		maxa<-max(a,na.rm=T)
		minl<-min(lam,na.rm=T)
		maxl<-max(lam,na.rm=T)
		mna<-mean(a,na.rm=T)
		mnl<-mean(lam,na.rm=T)
		minres<-nlminb(c(mna,mnl),special.case.function,lower=c(mina,minl),upper=c(maxa,maxl),arg=arg,quantity=quantity,pos=T)
		maxres<-nlminb(c(mna,mnl),special.case.function,lower=c(mina,minl),upper=c(maxa,maxl),arg=arg,quantity=quantity,pos=F)
		CImin[i]<-minres$objective
		CImax[i]<- -1*maxres$objective
	}
	return(list(quantity=quantity,CImin=CImin,CImax=CImax))
}

############################################################################
# Function: special.case.function
# Purpose: Compute the special case function for special.case.CI
# Arguments: params - vector, params[1] = a, params[2] = lambda
#            arg - supplemental argument to compute quantity
#            quantity - string specifying quantity of interest
#            pos - if T, return function, if F, return -1*function
# Notes: Intended for use by special.case.CI
# Calls: bum.EBP, find.FDR.threshold, find.EBP.threshold
############################################################################

special.case.function<-function(params,arg,quantity,pos=T)

{
	if (quantity=="EBP") ans<-bum.EBP(arg,params[1],params[2])
	if (quantity=="EBP.threshold") ans<-find.EBP.threshold(arg,params[1],params[2])
	if (quantity=="FDR.threshold") ans<-find.FDR.threshold(arg,params[1],params[2])
	if(pos) return(ans)
	else return(-1*ans)
}

###########################################################################
# Function: unpack.contour.object
# Purpose: Make the output from the S-plus function contour more easily used
#          by other routines in this library
# Arguments: contour.object - an object returned by the S-plus function contour
# Returns: a list with the following components
#          x - vector of x-coordinates of contour lines
#          y - vector of y-coordinates of contour lines
#          level - vector of corresponding level of the contours
# Notes: intended primarily for use by bum.CI
###########################################################################

unpack.contour.object<-function(contour.object)

{
	levs<-names(contour.object)
	res<-unlist(contour.object)
	names(res)<-NULL
	n<-length(res)
	nas<-c(0,(1:n)[is.na(res)])
	numnas<-length(nas)
	x<-NULL
	y<-NULL
	z<-NULL
	for (i in 1:(numnas-1))
	{
		if ((i%%2)==1) x<-c(x,res[(nas[i]+1):(nas[i+1])])
		else 
		{ 
			y<-c(y,res[(nas[i]+1):(nas[i+1])])
			z<-c(z,rep(as.numeric(levs[((i-1)%/%2)+1]),nas[i+1]-nas[i]))
		}
	}
	names(x)<-NULL
	names(y)<-NULL
	return(list(x=x,y=y,level=z))
}