# MultTests, Copyright (c) 2001-2004 by David R. Bickel.

# 1/31/04. min.null.auc changed to max.null.auc. Data frame output to effect.size.estimation.plot added. In rejections(), exceeding tolerance gives a warning instead of an error. monotone = TRUE is a new default argument for effect.size.estimation.plot.
# 6/20/03. wilcox.test.stat can now handle arguments that entire consist of NA.
# 11/8/02. rejections can now accept other column names for FDR.
# 8/30/02. diff.scale.estimator now uses NULL to disable rescaling.
# 8/20/02. unpaired.diff.tests changed to boot.diff.tests throughout.
# 8/8/02.  test.stat argument replaces three arguments in diff.tests and diffTests.
# 8/7/02.  paired tests now allow one-sided alternatives.
# 7/29/02. multtests changed to err.rates throughout.
# DifferenceTests divided into DiffTests, MultTests, and TestStats on 7/29/02.

# DifferenceTests, Copyright (c) 2001-2002 by David R. Bickel.
# Created by David R. Bickel on 10/1/01.
# 7/12/02. multcomp changed to multtests throughout to avoid conflict with the multcomp package.
# 6/17/02. progess.file.base replace with file.base throughout.
# 6/14/02. paired.diff.tests modified to depend on gen.paired.diff.tests.
# 6/6/02. multcomp added and four old functions (gen.t.tests, location.tests, diff.tests, paired.diff.tests) now call it; tests of these functions for the single-comparison case are recorded in the R consoles R multcomp01 and R multcomp02.
# 5/13/02. default NULL added to y of t.test.plus.
# 5/10/02. paired.diff.tests added.
# 5/2/02. gen.t.tests has file option.
# 5/1/02. diff.tests modified. 
# 4/29/02. diff.scale.estimator added to diff.tests.
# 4/16/02. This error in diff.tests corrected: desum was fixed with location.estimator = mean.
# 4/13/02. desum added.
# 4/6/02. diff.tests added.
# 4/1/02. replace added to gen.t.test.
# 2/25/02. t.test.plus added.


source("TestStats.r")

# multTests: superclass for genPairedDiffTests/pairedDiffTests, diffTests/bootDiffTests.

multTests <- function(getTestName, getComparison, getNumVars, getTestStats, getNullStats, getAlternative, getUsingPValueStats) {list(getTestName = getTestName, getComparison = getComparison, getNumVars = getNumVars, getTestStats = getTestStats, getNullStats = getNullStats, getAlternative = getAlternative, getUsingPValueStats = getUsingPValueStats)}




se.diff <- function(x, y) {sqrt(var(x)/length(x) + var(y)/length(y))}

pooled.scale <- function(x, y = NULL, scale.estimator = sd)
{
  if(is.null(y))
  {
    stopifnot(is.list(x))
    sqrt(sum(sapply(x, function(vec) {scale.estimator(vec)^2 * (length(vec) - 1)})) / (sum(sapply(x, length)) - length(x)))
  }
  else
    sqrt((scale.estimator(x)^2 * (length(x) - 1) + scale.estimator(y)^2 * (length(y) - 1)) / (length(x) + length(y) - 2))
}



desum <- function(d, location.estimator = mean, regress.file = '')
# d is a data frame, only the last column of which is a factor; that factor has two levels
{
  sum.diff <- function(d, location.estimator = mean, file = '')
  {
    fac0 <- d[,ncol(d)]
    d.loc <- abs(apply(d[, 1:(length(d)-1)], MARGIN = 2, FUN = function(column) tapply(column, INDEX = fac0, FUN = location.estimator)))
    d.loc.sum <- apply(d.loc, 2, function(column) {column[1]+column[2]})
    d.loc.diff <- apply(d.loc, 2, function(column) abs(column[1]-column[2]))
    dat <- data.frame(sum = d.loc.sum, diff = d.loc.diff)
    obj <- lm(diff ~ sum, dat)
    dat$diff.prediction <- predict(obj, dat)
    if(file != '') save.spreadsheet(dat, file = file)
    dat
  }

  predicted.diffs <- sum.diff(d, location.estimator = location.estimator, file = regress.file)$diff.prediction
  ifelse(predicted.diffs < 0, stop('Linear fit inappropriate for this data.'), 0)
  fac0 <- d[, ncol(d)]
  levs <- unique(fac0)
  mat <- t(apply(d[, 1:(ncol(d)-1)], 1, function(r) {r/predicted.diffs}))
  dat <- data.frame(mat, fac0)
  names(dat) <- names(d)
  dat
}

boot.diff.se <- function(x, y, nboot, location.estimator = mean)
{
  diffs <- numeric(nboot)
  for(i in 1:nboot)
    diffs[i] <- location.estimator(sample(y, replace = TRUE)) - location.estimator(sample(y, replace = TRUE))
  sd(diffs)
}

split.into.pair <- function(x, f)
{
  if(length(unique(f)) != 2) {print('The factor f must have two unique elements.'); browser()}
  pair <- split(x, f)
  if(length(pair) != 2 || !is.list(pair)) {print('x not split into two list elements.'); browser()}
  pair
}

rescale.diff <- function(d, diff.scale.estimator = function(x, y) {boot.diff.se(x, y, nboot = 20, location.estimator = mean)}, scale.file = '')
{
# d is a data frame, only the last column of which is a factor; that factor has two levels. diff.scale.estimator is a function of two vectors.
  fac0 <- d[, ncol(d)]
  diff.scale <- function(variable.vector)
  {
    vector.pair <- split.into.pair(variable.vector, fac0) # depends on split.into.pair as of 9/5/02
    diff.scale.estimator(vector.pair[[1]], vector.pair[[2]])
  }
  diff.scales <- apply(d[, 1:(ncol(d)-1)], 2, function(vec) diff.scale(vec))
  mat <- t(apply(d[, 1:(ncol(d)-1)], 1, function(r) {r/diff.scales}))
  dat <- data.frame(mat, fac0)
  names(dat) <- names(d)
  if(scale.file != '') save.spreadsheet(data.frame(diff.scales), file = scale.file) # dput(diff.scales, file = scale.file)
  dat
}

wilcox.diff.tests <- function(two.samples, alternative = c('two.sided', 'less', 'greater'), replace = FALSE, balance = FALSE, desum = FALSE, regress.file = '', nboot.rescale = 0, diff.scale.estimator = NULL, scale.file = '', test.name = 'wilcox.diff.tests', file.base = test.name, max.null.auc = 0.5, show.progress = TRUE, fdr = 0.2, ...)
{
  alternative <- match.arg(alternative)
  stopifnot(alternative == 'two.sided')
  effect.size <- null.zeroed.wilcox.statistic(max.null.auc = max.null.auc, two.samples = two.samples)

  h0DistErrRates(null.cdfs = list(function(stat){wilcox.null.cdf(stat, two.samples)}), effect.sizes = effect.size, file.base = file.base, show.progress = show.progress, null.quant.funs = list(function(p){wilcox.null.quant.fun(p, two.samples = two.samples)}), fdrs = fdr, multTestsObjs = list(diffTests(two.samples = two.samples, alternative = alternative, test.stat = wilcox.test.stat, replace = replace, balance = balance, desum = desum, regress.file = regress.file, nboot.rescale = nboot.rescale, diff.scale.estimator = diff.scale.estimator, scale.file = scale.file, test.name = test.name, show.progress = show.progress, ...)))
}

t.diff.tests <- function(two.samples, alternative = c('two.sided', 'less', 'greater'), replace = FALSE, balance = FALSE, desum = FALSE, regress.file = '', nboot.rescale = 0, diff.scale.estimator = NULL, scale.file = '', test.name = 't.diff.tests', file.base = test.name, show.progress = TRUE, fdr = 0.2, ...)
{
print('This function was created on 5/20/03, but never tested. Cf. bio.t.diff.tests() and wilcox.diff.tests().')
  alternative <- match.arg(alternative)
  stopifnot(alternative == 'two.sided')

  h0DistErrRates(null.cdfs = list(function(stat){t.null.cdf(stat, two.samples)}), file.base = file.base, show.progress = show.progress, null.quant.funs = list(function(p){t.null.quant.fun(p, two.samples = two.samples)}), fdrs = fdr, multTestsObjs = list(diffTests(two.samples = two.samples, alternative = alternative, test.stat = t.test.stat, replace = replace, balance = balance, desum = desum, regress.file = regress.file, nboot.rescale = nboot.rescale, diff.scale.estimator = diff.scale.estimator, scale.file = scale.file, test.name = test.name, show.progress = show.progress, ...)))
}

h0dist.diff.tests <- function(two.samples, null.cdf, alternative = c('two.sided', 'less', 'greater'), test.stat = gen.t.stat, replace = FALSE, balance = FALSE, desum = FALSE, regress.file = '', nboot.rescale = 0, diff.scale.estimator = NULL, scale.file = '', test.name = 'h0dist.diff.tests', file.base = test.name, show.progress = TRUE, null.quant.fun = NULL, fdr = if(is.null(quant.null.fun)) NULL else 0.2, ...)
{
  h0DistErrRates(null.cdfs = list(null.cdf), file.base = file.base, null.quant.funs = if(is.null(null.quant.fun)) NULL else list(null.quant.fun), fdrs = fdrs, multTestsObjs = list(diffTests(two.samples = two.samples, alternative = alternative, test.stat = test.stat, replace = replace, balance = balance, desum = desum, regress.file = regress.file, nboot.rescale = nboot.rescale, diff.scale.estimator = diff.scale.estimator, scale.file = scale.file, test.name = test.name, show.progress = show.progress, ...)))
}

diff.tests <- function(two.samples, nresample, alternative = c('two.sided', 'less', 'greater'), test.stat = gen.t.stat, replace = FALSE, balance = FALSE, desum = FALSE, regress.file = '', nboot.rescale = 0, diff.scale.estimator = NULL, scale.file = '', test.name = 'diff.tests', file.base = test.name, input.file.base = '', ascii = FALSE, input.ascii = FALSE, show.progress = TRUE, ...)
# two.samples is either a list of two matrices (variables by realizations), or is a data frame (realizations by variables), all columns of which are numeric, except the last column, which is a two-level factor.
{
  errRates(nresample = nresample, file.base = file.base, input.file.base = input.file.base, ascii = ascii, input.ascii = input.ascii, multTestsObjs = list(diffTests(two.samples = two.samples, alternative = alternative, test.stat = test.stat, replace = replace, balance = balance, desum = desum, regress.file = regress.file, nboot.rescale = nboot.rescale, diff.scale.estimator = diff.scale.estimator, scale.file = scale.file, test.name = test.name, show.progress = show.progress, ...)))
}

two.sample.sizes <- function(two.samples)
{
  d1 <- two.samples.as.data.frame(two.samples)
  last.column <- d1[, ncol(d1)]
  unique.last.column <- sort(unique(last.column))
  n <- sapply(unique.last.column, function(lev){sum(last.column == lev)})
  stopifnot(all(sapply(n, length) > 0))
  stopifnot(n[1] + n[2] == nrow(d1))
  n
}

resample.two.matrices <- function(two.matrices, subsample.sizes = c(NA, NA), replace = FALSE, report.subsampling = TRUE)
# The value returned is a data frame, only the last column of which is a factor; that factor has two unique elements
{
  if(!replace && all(is.na(subsample.sizes)))
    two.matrices
  else
  {
    old.sample.sizes <- sapply(two.matrices, ncol)
    if(all(is.na(subsample.sizes))) names(subsample.sizes) <- names(two.matrices)
    names.subsample.sizes <- names(subsample.sizes)
    subsample.sizes <- sapply(c(1, 2), function(i){if(is.na(subsample.sizes[i])) ncol(two.matrices[[i]]) else subsample.sizes[i]})
    names(subsample.sizes) <- names.subsample.sizes
    if(is.null(names(two.matrices))) stop('two.matrices has no names')
    sum1 <- sum(names(subsample.sizes) == names(two.matrices))
    sum2 <- sum(names(subsample.sizes) == rev(names(two.matrices)))
    if(sum1 + sum2 != 2 || sum1 == 1 || sum2 == 1) {print(paste('names(subsample.sizes) incompatible with names(two.matrices); c(sum1, sum2):', c(sum1, sum2))); browser()}
    names.two.matrices <- names(two.matrices)
    two.matrices <- lapply(c(1, 2), function(i){sample.columns(mat = two.matrices[[i]], size = subsample.sizes[names(two.matrices)[i]], replace = replace)})
    names(two.matrices) <- names.two.matrices
    if(is.null(names(two.matrices))) stop('lapply removed names')
    new.sample.sizes <- sapply(two.matrices, ncol)
    if(report.subsampling && any(old.sample.sizes != new.sample.sizes))
    {
      cool.cat('Old sample sizes: ')#, old.sample.sizes[1], ' and ', old.sample.sizes[2])
      print(old.sample.sizes)
      cool.cat('New sample sizes: ')#, new.sample.sizes[1], ' and ', new.sample.sizes[2])
      print(new.sample.sizes)
    }
    two.matrices
  }
}

two.matrices.as.data.frame <- function(two.matrices, subsample.sizes = c(NA, NA), bootstrap.sample = FALSE, report.subsampling = TRUE)
# The value returned is a data frame, only the last column of which is a factor; that factor has two unique elements
{
  if(any(is.finite(subsample.sizes)) && bootstrap.sample) warning('Permutation and bootstrap combined in the same realization.')
  two.submatrices <- resample.two.matrices(two.matrices, subsample.sizes = subsample.sizes, report.subsampling = report.subsampling)
  two.samples.as.data.frame(two.samples = if(bootstrap.sample) resample.two.matrices(two.submatrices, replace = TRUE) else two.submatrices)
}

two.samples.as.data.frame <- function(two.samples)
# The value returned is a data frame, only the last column of which is a factor; that factor has two unique elements
{
  if(is.list(two.samples) && is.matrix(two.samples[[1]]) && is.matrix(two.samples[[2]]))
    d1 <- matrices.as.data.frame(two.samples)
  else if(is.data.frame(two.samples))
    d1 <- two.samples
  else
    stop('two.samples is neither a data frame nor a list of matrices.')

  last.column <- d1[, ncol(d1)]
  unique.last.column <- sort(unique(last.column))
  stopifnot(length(unique.last.column) == 2)
  num.levels <- length(levels(unique.last.column))
  if(num.levels != 2) warning(paste('two.samples has', num.levels, 'levels'))
  if(!all(levels(last.column) == unique.last.column)) warning('order of levels might be transposed')

  d1  
}

t.null.cdf <- function(stat, two.samples)
{
  m.n <- two.sample.sizes(two.samples)
  pt(stat, df = m.n[1] + m.n[2] - 2)
}

t.null.quant.fun <- function(p, two.samples)
{
  m.n <- two.sample.sizes(two.samples)
  qt(p, df = m.n[1] + m.n[2] - 2)
}

t.test.stat <- function(x, y) {gen.t.stat(x, y, var.equal = TRUE)}

wilcox.null.cdf <- function(stat, two.samples, m.n = NULL)
{
  stopifnot(xor(is.null(two.samples), is.null(m.n)))
  if(is.null(m.n)) m.n <- two.sample.sizes(two.samples)
  pwilcox(unzeroed.wilcox.statistic(zeroed.w = stat, m = m.n[1], n = m.n[2]), m = m.n[1], n = m.n[2])
}

null.zeroed.wilcox.statistic <- function(max.null.auc, two.samples, m.n = NULL, round.off = FALSE)
{
  stopifnot(xor(is.null(two.samples), is.null(m.n)))
  if(is.null(m.n)) m.n <- two.sample.sizes(two.samples)
  m <- m.n[1]; n <- m.n[2]
  zeroed.w <- auc.zeroed.wilcox.statistic(area.under.curve = max.null.auc, m = m, n = n, round.off = round.off)
  stopifnot(zeroed.w >= 0)
  zeroed.w
}

wilcox.null.quant.fun <- function(p, two.samples, m.n = NULL)
{
  stopifnot(xor(is.null(two.samples), is.null(m.n)))
  if(is.null(m.n)) m.n <- two.sample.sizes(two.samples)
  zeroed.wilcox.statistic(w = qwilcox(p, m = m.n[1], n = m.n[2]), m = m.n[1], n = m.n[2])
}

wilcox.test.stat <- function(x, y, ...)
{
  if(sum(is.finite(x)) > 0 && sum(is.finite(y)) > 0)
    zeroed.wilcox.statistic(w = wilcox.test(x, y, ...)$statistic, m = length(x), n = length(y))
  else
    0
}

auc.test.stat <- function(x, y, ...)
{  1 * (wilcox.area.under.curve(zeroed.w = wilcox.test.stat(x, y, ...), m = length(x), n = length(y), max.auc = 1.1) - 0.5)}

new.effect.size.estimation.table <- function(stat, bias, correct.stat, ran1.stats, ran2.stats, effect.size, monotone.effect.size = NULL)
{
  datf <- data.frame(stat = stat, bias = bias, correct.stat = correct.stat, ran1.stats = ran1.stats, ran2.stats = ran2.stats, effect.size = effect.size)
  if(!is.null(monotone.effect.size)) datf$monotone.effect.size <- monotone.effect.size
  datf
}

adjusted.effect.size.estimation.table <- function(effect.size.estimation.table)
{  !is.null(effect.size.estimation.table$monotone.effect.size)}

read.effect.size.estimation.table <- function(file)
{
  datf <- read.spreadsheet(file)
  datf
#  attach(datf)
#  new.effect.size.estimation.table(stat = stat, bias = bias, correct.stat = correct.stat, ran1.stats = ran1.stats, ran2.stats = ran2.stats, effect.size = effect.size) # row.names not supported
}

adjust.effect.size.estimation.table <- function(effect.size.estimation.table, transform.stat = function(zeroed.auc){abs(zeroed.auc) + 0.5}, take.min.stat = TRUE, alternative = c('two.sided', 'less', 'greater'), monotone = TRUE, jitter = NULL, bioinfo.algor = FALSE)
{
  alternative <- match.arg(alternative)
  stopifnot(alternative == 'two.sided')
  if(adjusted.effect.size.estimation.table(effect.size.estimation.table)) {stop('effect.size.estimation.table is already adjusted')}
  datf <- effect.size.estimation.table
  attach(datf)
  if(!is.null(transform.stat))
  {
    stat <- transform.stat(stat)
    correct.stat <- transform.stat(correct.stat)
    ran1.stats <- transform.stat(ran1.stats)
    ran2.stats <- transform.stat(ran2.stats)
    effect.size <- transform.stat(effect.size)
  }
  if(take.min.stat)
  {
    stopifnot(all(stat >= 0))
    correct.stat <- pmin(correct.stat, stat)
    effect.size <- pmin(effect.size, stat)
  }
  stopifnot(length(stat) == length(effect.size))
  get.monotone.effect.size <- function()
  { 
    if(monotone)
    {
      if(bioinfo.algor) print('Using the Bioinformatics algorithm')
      stat.ranks <- integer.rank(abs(stat), jitter = jitter)
      stopifnot(length(stat.ranks) == length(stat))
      names(effect.size) <- make.names(1:length(effect.size))
      if(length(effect.size) != length(stat.ranks)) {print('monotone problem'); browser()}
      monotone.effect.size <- if(bioinfo.algor)
      {
        l <- sapply(1:length(stat.ranks), function(r){which(stat.ranks == r)})
        stopifnot(length(unique(l)) == length(stat.ranks))
        t.tilde <- rep(NA, length(stat.ranks))
        t.tilde[l[length(l)]] <- effect.size[l[length(l)]]
        for(r in ((length(l) - 1):1))
          t.tilde[l[r]] <- min(effect.size[l[r]], t.tilde[l[r + 1]])
        t.tilde
      }
      else
      {
        sorted.effect.size <- sort.by.integer.rank(effect.size, ranks = stat.ranks)
        stopifnot(all(sort(sorted.effect.size) == sort(effect.size)))
        sorted.monotone.effect.size <- sorted.effect.size
        for(i in (length(sorted.monotone.effect.size) - 1):1)
        {  sorted.monotone.effect.size[i] <- min(sorted.monotone.effect.size[i], sorted.monotone.effect.size[i+1])}
        sorted.monotone.effect.size[names(effect.size)]
      }
      stopifnot(all(names(monotone.effect.size) == names(effect.size)))
      if(!bioinfo.algor) stopifnot(all(sort(monotone.effect.size) == sort(sorted.monotone.effect.size)))
      monotone.effect.size
    }
    else
      rep(NA, length(stat))
  }
  monotone.effect.size <- get.monotone.effect.size()
  if(is.null(monotone.effect.size) || length(monotone.effect.size) != length(stat))
  { print('monotone.effect.size error'); browser()}
  new.effect.size.estimation.table(stat = stat, bias = bias, correct.stat = correct.stat, ran1.stats = ran1.stats, ran2.stats = ran2.stats, effect.size = effect.size, monotone.effect.size = monotone.effect.size)
}

#sort.by.stats <- function(stats.to.sort, stats.for.ranks)
#{
#  stopifnot(any(stats.to.sort < 0 | stats.for.ranks < 0))
#  
#}

effect.size.estimation.plot <- function(effect.size.estimation.table, x = NULL, plot.bias = TRUE, xlab = if(plot.bias) 'AUC' else 'uncorrected AUC estimate', ylab = if(plot.bias) 'bias' else 'corrected AUC estimate', transform.stat = function(zeroed.auc){abs(zeroed.auc) + 0.5}, ylim = NULL, sort.by.rank = TRUE, simple.sort = FALSE, take.min.stat = TRUE, use.points = TRUE, alternative = c('two.sided', 'less', 'greater'), jitter = NULL, monotone = TRUE)
# x is the vector of true parameter values, generally only known in theoretical models, as for simulations.
{
  alternative <- match.arg(alternative)
  stopifnot(alternative == 'two.sided')
  if(!is.null(x) && !plot.bias) stop('x must be NULL if plot.bias is FALSE')
  if(sort.by.rank && simple.sort) {stop('sort.by.rank is incompatible with simple.sort')}
  datf <- if(adjusted.effect.size.estimation.table(effect.size.estimation.table))
    effect.size.estimation.table
  else
    adjust.effect.size.estimation.table(effect.size.estimation.table, transform.stat = transform.stat, take.min.stat = take.min.stat, alternative = alternative, monotone = monotone)
  attach(datf)
  if(monotone)
  {
    if(is.null(datf$monotone.effect.size)) stop('monotone.effect.size is missing')
    if(length(datf$monotone.effect.size) != length(stat)) {print('monotone.effect.size length problem'); browser()}
    effect.size <- datf$monotone.effect.size
  }
  if(is.null(x))
  {
    if(plot.bias)
      plot(stat, stat - effect.size, xlab = xlab, ylab = ylab, ylim = ylim)
    else
      plot(stat, effect.size, xlab = xlab, ylab = ylab, ylim = ylim)
  }
  else
  {
    stopifnot(plot.bias)
    if(sort.by.rank)
    {
      stat.ranks <- integer.rank(abs(stat), jitter = jitter)
      stopifnot(length(stat.ranks) == length(effect.size))
    }
    ordered.estimates <- function(unordered.estimates)
    {
      if(sort.by.rank) sort.by.integer.rank(unordered.estimates, ranks = stat.ranks)
      else
      {
        if(simple.sort) sort(unordered.estimates) else unordered.estimates
      }
    }
    y1 <- ordered.estimates(stat) - x
    if(is.null(ylim)) ylim <- range((if(is.null(transform.stat)) effect.size.estimation.table$effect.size else transform.stat(effect.size.estimation.table$effect.size)) - x)
    plot(x, y1, type = 'l', lty = 1, xlab = xlab, ylab = ylab, ylim = ylim)  # sort.by.integer.rank <- function(vec, ranks = integer.rank(vec))
    y2 <- ordered.estimates(effect.size) - x
    if(use.points) points(x, y2) else lines(x, y2, lty = 3)
  }
  cool.cat(paste('effect.size.estimation.plot', xlab, ylab, date(), sep = ', '))
  data.frame(stat = stat, effect.size = effect.size)
}

effect.size.estimation <- function(two.samples, nresample, alternative = c('two.sided', 'less', 'greater'), test.stat = auc.test.stat, subsample = TRUE, file = 'effect.size.xls', using.p.value.stats = FALSE, browse = FALSE, jitter.sd = NULL, bioinfo.algor = FALSE)
{
  alternative <- match.arg(alternative)
  two.matrices <- if(is.data.frame(two.samples)) data.frame.as.matrices(two.samples) else two.samples
  nvar <- nrow(two.matrices[[1]])
  stopifnot(nvar == nrow(two.matrices[[2]]))
  if(bioinfo.algor) print('Using the algorithm in Bioinformatics')

  resigned.stats <- function(stats) {as.vector(weighted.stat.fcns(weights = 1, alternatives = alternative, using.p.value.stats = using.p.value.stats)$resigned.stat(matrix(stats, ncol = 1)))}
  stats.from.two.mats <- function(two.mats) {vec <- sapply(1:nvar, function(i){test.stat(two.mats[[1]][i, ], two.mats[[2]][i, ])}); stopifnot(is.vector(vec)); stopifnot(length(vec) == nvar) ; vec}
  resigned.stats.from.two.mats <- function(two.mats) {resigned.stats(stats.from.two.mats(two.mats))}
  stats <- stats.from.two.mats(two.matrices)
  rs.stats <- resigned.stats(stats)
  rs.ranks <- integer.rank(rs.stats, jitter.sd = jitter.sd)
#  biased.rs.stats <- rep(NA, length(rs.stats))
#  sorted.biased.rs.stats <- rep(NA, length(rs.stats))
#  unbiased.rs.stats <- rep(NA, length(rs.stats))
  
  perm.get.diffs <- function()
  {
    stop('perm.get.diffs needs to have its return value made consistent with that of subsample.get.diffs')
    random.matrices <- sample.matrices(two.matrices, replace = FALSE)
    data.for.biased.estimates <- random.matrices
    data.for.unbiased.estimates <- random.matrices
    sorted.biased.rs.stats <- sort(resigned.stats.from.two.mats(data.for.biased.estimates))
    unbiased.rs.stats <- resigned.stats.from.two.mats(data.for.unbiased.estimates)
    diffs <- sorted.biased.rs.stats[rs.ranks] - unbiased.rs.stats
    stopifnot(is.vector(diffs))
    diffs
  }
  
  subsample.get.diffs <- function()
  {
    submatrix.ncol <- function(mat){trunc(ncol(mat)/2)} #sapply(two.matrices, function(mat){trunc(ncol(mat)/2)})
    get.columns <- function(mat)
    {
      submat.ncol <- submatrix.ncol(mat)
      cols <- sample(1:(2 * submat.ncol), replace = FALSE)
      list(columns.for.biased.estimates = cols[1:submat.ncol], columns.for.unbiased.estimates = cols[(submat.ncol + 1):(2 * submat.ncol)])
    }
    get.submatrices <- function(mat)
    {
      column.list <- get.columns(mat)
      list(mat.for.biased.estimates = mat[, column.list$columns.for.biased.estimates], mat.for.unbiased.estimates = mat[, column.list$columns.for.unbiased.estimates])
    }
    submats.list <- lapply(two.matrices, get.submatrices)
    data.for.biased.estimates <- list(submats.list[[1]]$mat.for.biased.estimates, submats.list[[2]]$mat.for.biased.estimates)
    data.for.unbiased.estimates <- list(submats.list[[1]]$mat.for.unbiased.estimates, submats.list[[2]]$mat.for.unbiased.estimates)

    biased.rs.stats <- resigned.stats.from.two.mats(data.for.biased.estimates)
    sorted.biased.rs.stats <- sort(biased.rs.stats)
    unbiased.rs.stats <- resigned.stats.from.two.mats(data.for.unbiased.estimates)
    diffs <- sorted.biased.rs.stats[rs.ranks] - unbiased.rs.stats
    stopifnot(is.vector(diffs))
#    ranks.and.effect.sizes <- cbind(rank(biased.rs.stats), unbiased.rs.stats)
    ranks <- integer.rank(biased.rs.stats, jitter.sd = jitter.sd)
    effect.sizes <- unbiased.rs.stats
    rs.effect.sizes <- if(bioinfo.algor)
    {
      stopifnot(length(effect.sizes) == length(rs.ranks))
      t.threestars.sorted <- sapply(1:length(effect.sizes), function(i) {mean(effect.sizes[which(ranks == i)])})
      t.threestars <- sapply(1:length(effect.sizes), function(i) {t.threestars.sorted[rs.ranks[i]]})
      t.threestars
    }
    else
      sapply(rs.ranks, function(rs.rank){mean(effect.sizes[which(ranks == rs.rank)])})
    if(!all(is.finite(rs.effect.sizes))) {print('rs.effect.sizes problem'); browser()}
    data.frame(diffs = diffs, biased.rs.stats = biased.rs.stats, unbiased.rs.stats = unbiased.rs.stats, rs.effect.sizes = rs.effect.sizes)
  }
  
  get.diffs <- function() {if(subsample) subsample.get.diffs() else perm.get.diffs()}

  stopifnot(alternative == 'two.sided')
  
  diffs.datfs <- lapply(1:nresample, function(i){get.diffs()})
#  diffs <- sapply(1:nresample, function(i) {get.diffs()$diffs})
  diffs <- sapply(diffs.datfs, function(dd){dd$diffs})
  rs.effect.sizes.mat <- sapply(diffs.datfs, function(dd){dd$rs.effect.sizes})
  diffs.datf <- diffs.datfs[[1]]
  stopifnot(nrow(diffs) == nvar && ncol(diffs) == nresample)
  biases <- apply(diffs, 1, mean)
  mean.rs.effect.sizes <- apply(rs.effect.sizes.mat, 1, mean)
  stopifnot(length(biases) == nvar)
  
  corrected.rs.stats <- pmin(rs.stats, pmax(0, rs.stats - biases))
  boo <- corrected.rs.stats < 0 | corrected.rs.stats > abs(stats)
  if(any(boo)) {cool.cat('which(boo): ', which(boo)); warning('corrected.rs.stats out of range')}
  stopifnot(length(corrected.rs.stats) == length(stats))
  
  datf <- new.effect.size.estimation.table(stat = stats, bias = biases, correct.stat = sign(stats) * corrected.rs.stats, ran1.stats = sign(stats) * diffs.datf$biased.rs.stats, ran2.stats = sign(stats) * diffs.datf$unbiased.rs.stats, effect.size = sign(stats) * mean.rs.effect.sizes)
  if(saving.results(file)) save.spreadsheet(datf, file = file)
  if(browse) {print('Browsing: the main information is in diffs.datfs'); browser()}
  datf
}

auc.estimation <- function(two.samples, nresample, alternative = c('two.sided', 'less', 'greater'), file = 'auc.xls', using.p.value.stats = FALSE)
{
  alternative <- match.arg(alternative)
  stopifnot(alternative == 'two.sided')
  datf <- effect.size.estimation(two.samples, nresample, alternative = c('two.sided', 'less', 'greater'), test.stat = wilcox.test.stat, file = '', using.p.value.stats = FALSE)
  if(saving.results(file)) save.spreadsheet(datf, file = file)
  m.n <- two.sample.sizes(two.samples)
#  w <- sapply(datf$stat, function(zeroed.w) {unzeroed.wilcox.statistic(zeroed.w, m = m.n[1], n = m.n[2])}
  auc.boo <- zeroed.wilcox.statistic.in.range(datf$stat, m = m.n[1], n = m.n[2])
  if(!all(auc.boo)) {cool.cat('which(auc.boo): ', which(auc.boo)); browser('stat out of range')}
  datf$auc <- sapply(abs(datf$stat), function(zeroed.w){wilcox.area.under.curve(zeroed.w = zeroed.w, m = m.n[1], n = m.n[2], max.auc = 1.1)})
  if(saving.results(file)) save.spreadsheet(datf, file = file)
  cool.cat('Finishing auc.estimation on ', date())
  auc.boo <- zeroed.wilcox.statistic.in.range(datf$correct.stat, m = m.n[1], n = m.n[2])
  if(!all(auc.boo)) {cool.cat('which(auc.boo): ', which(auc.boo)); browser('stat out of range')}
#  if(!all(zeroed.wilcox.statistic.in.range(datf$correct.stat, m = m.n[1], n = m.n[2]))) {print('correct.stat out of range'); browser()}
  datf$correct.auc <- sapply(abs(datf$correct.stat), function(zeroed.w){wilcox.area.under.curve(zeroed.w = zeroed.w, m = m.n[1], n = m.n[2], max.auc = 1.1)})
  if(saving.results(file)) save.spreadsheet(datf, file = file)
  datf
}

diffTests <- function(two.samples, subsample.sizes = c(NA, NA), bootstrap.sample = FALSE, alternative = c('two.sided', 'less', 'greater'), test.stat = gen.t.stat, using.p.value.stats = identical(test.stat, gen.t.pstat), replace = FALSE, balance = FALSE, desum = FALSE, regress.file = '', nboot.rescale = 0, diff.scale.estimator = NULL, scale.file = '', test.name = 'diffTests', show.progress = TRUE)
# 'levels(' changed to 'unique(' in many places on 9/5/02 and on 9/9/02.
# Created 4/6/02 by David R. Bickel.
# d1 is a data frame, only the last column of which is a factor; that factor has two unique elements
{
  if(show.progress) cool.cat('Starting diffTests() on ', date())  
  two.matrices <- if(is.data.frame(two.samples)) data.frame.as.matrices(two.samples) else two.samples
  if(!(is.list(two.matrices) && length(two.matrices) == 2)) {stop('two.samples is not in the correct format.')}
  stopifnot(length(subsample.sizes) == 2)
  if(is.null(names(subsample.sizes)))
  {
    if(!all(is.na(subsample.sizes))) stop('names of subsample.sizes must be specified')
    names(subsample.sizes) <- names(two.matrices)
  }
  d1 <- two.matrices.as.data.frame(two.matrices = two.matrices, subsample.sizes = subsample.sizes, bootstrap.sample = bootstrap.sample, report.subsampling = show.progress)
  if(show.progress) print(dim(d1))
    
  nvar <- ncol(d1) - 1
  var.names <- names(d1)[1:nvar]
  if(desum) d1 <- desum(d1, location.estimator = mean, regress.file = regress.file)
  if(nboot.rescale > 0 || is.function(diff.scale.estimator))
  {
    if(nboot.rescale > 0 && is.function(diff.scale.estimator)) stop('diff.tests cannot use bootstrap SE and specified diff.scale.estimator')
    if(nboot.rescale > 0) se.fcn <- function(x, y) {boot.diff.se(x, y, nboot = nboot.rescale, location.estimator = mean)}
    if(is.function(diff.scale.estimator)) se.fcn <- diff.scale.estimator
    d1 <- rescale.diff(d1, diff.scale.estimator = se.fcn, scale.file = scale.file)
  }
  d1.factor <- d1[,ncol(d1)]
  if(!(is.factor(d1.factor)&&length(unique(d1.factor))==2)) stop('The last column of d1 must be a factor of two unique elements.')
  gen.t.teststat <- function(x.vec, y.vec)
  { ts <- test.stat(x.vec, y.vec); if(is.null(ts)) stop('test.stat yields NULL'); ts}
    
  teststat <- function(variable.vector, some.factor) # change variable.index to variable.vector to enable bootstrap
  {
    vector.pair <- split.into.pair(variable.vector, some.factor) # d1[, variable.index]
    tau <- gen.t.teststat(vector.pair[[1]], vector.pair[[2]])
    if(!is.finite(tau)) {print('tau is not finite'); browser()}
    tau
  }
  teststats <- function(some.factor)
  {
    apply(d1[, 1:nvar, drop=FALSE], 2, function(vec) teststat(vec,some.factor))
  }
  arrange.levels <- function(sorted.factor)
  {
    if(!identical(sort(unique(sorted.factor)), sort(unique(d1.factor)))) {print('Inconsistent elements in arrange.levels.'); browser()}
    levs <- sorted.factor
    levs[order(d1.factor)] <- sorted.factor
    levs
  }
  factor.pair <- split.into.pair(d1.factor, d1.factor)
  test.arrange.levels <- function()
  {
    if(!identical(TRUE, all(sort(c(as.vector(unique(factor.pair[[1]])), as.vector(unique(factor.pair[[2]])))) == sort(as.vector(unique(d1.factor))))))
      {print('Inconsistent unique elements in test.arrange.levels.'); browser()}
    sorted.d1.factor <- combine.factors(factor.pair[[1]], factor.pair[[2]])
    if(d1.factor != arrange.levels(sorted.d1.factor)) {cat('Please enter Q and report this bug in arrange.levels to bickel@prueba.info: ', d1.factor, '\n'); browser(); if(balance) stop('Bug in internal function arrange.levels of diff.tests.')}
  }
  test.arrange.levels()
  resample <- function()
  {
    if(replace)
      stop('replace=TRUE not yet implemented. Use boot.diff.tests instead.')
    else
    {
      if(balance)
      {
        resampled.pair <- balance.sample(factor.pair[[1]], factor.pair[[2]], replace=FALSE)
        arrange.levels(combine.factors(resampled.pair[[1]], resampled.pair[[2]]))
      }
      else
        sample(d1.factor)
    }
  }
  
  ts.vec <- teststats(d1.factor)
  if(length(ts.vec) != length(var.names))
  { print(date()); print('Inconsistency between names(ts.vec) and var.names'); browser()}
  else
    names(ts.vec) <- var.names
  if(!is.vector(ts.vec) || any(!is.finite(ts.vec))) {print('ts.vec is not a vector of numbers'); browser()}

  alternative <- match.arg(alternative)
  if(show.progress) cool.cat('Finishing diffTests() on ', date())  
  multTests(getTestName = function(){test.name}, getComparison = function(){as.vector(unique(d1[,ncol(d1)])[1:2])}, getNumVars = function(){nvar}, getTestStats = function(){ts.vec}, getNullStats = function(){teststats(resample())}, getAlternative = function(){alternative}, getUsingPValueStats = function() {using.p.value.stats})
}



    
sorted.matrices <- function(d2) # a better implementation might make use of split()
# this function splits sorted.matrix(d2) into two matrices.
# Created 5/9/02 by David R. Bickel.
# 8/2/02. Error checking added.
# d2 is a data frame, only the last two columns of which are factors; the first factor has two levels and the second factor specifies which rows are paired.
{
  half.nrow <- (nrow(d2) / 2)

    sorted.matrix <- function(d2)
    # this function returns a matrix with rows of the first group and then rows of the second group, ordered consistently from group to group.
    # Created 5/9/02 by David R. Bickel.
    # d2 is a data frame, only the last two columns of which are factors; the first factor has two levels and the second factor specifies which rows are paired.
    {
        nvar <- ncol(d2) - 2
        fac1 <- d2[, nvar + 1]
        fac2 <- d2[, nvar + 2]
        if(!is.factor(fac1) || length(unique(fac1)) != 2) stop('the group column of d2 must be a factor of two unique elements')
        if(!is.factor(fac2) || length(unique(fac2)) != nrow(d2) / 2)
        {print('the pair column of d2 must be a factor of nrow(d2) / 2 unique elements'); browser()}
        datf <- d2[order(fac1, fac2), ]
        fac1b <- datf[, nvar + 1]
        fac2b <- datf[, nvar + 2]
        if(!identical(TRUE, all(fac1b[1:half.nrow] == fac1b[1]))) stop('Factor error #1: the first factor must have equal numbers of each of two levels.')
        if(!identical(TRUE, all(fac1b[(half.nrow+1):(2*half.nrow)] == fac1b[2*half.nrow]))) stop('Factor error #2: the first factor must have equal numbers of each of two levels.')
        if(!identical(TRUE, all(fac2b[1:half.nrow] == fac2b[(half.nrow+1):(2*half.nrow)]))) stop('Factor error #3: the second factor must be consistent with the first factor, such that the two factors together uniquely specify each row, with the first factor specifying the group, and the second factor specifying the pair.')
        
        as.matrix(datf[, 1:nvar])
    }
    
  sorted.mat <- sorted.matrix(d2)
  list(sorted.mat[1 : half.nrow, ], sorted.mat[(half.nrow + 1) : nrow(sorted.mat), ])
}

null.diff.matrix <- function(d2)
# Created 5/9/02 by David R. Bickel.
# d2 is a data frame, only the last two columns of which are factors; the first factor has two levels and the second factor specifies which rows are paired.
{
  sorted.mats <- sorted.matrices(d2)
  rows.per.matrix <- nrow(sorted.mats[[1]])
  null.diff.vec <- function(sorted.mat)
  {
    sub <- numeric()
    nrows <- nrow(sorted.mat)
    for(i in 1:(nrows-1))
      for(j in (i+1):nrows)
      {
        sub <- c(sub, sorted.mat[i, ] - sorted.mat[j, ])
      }
    sub
  }
  subvecs <- lapply(sorted.mats, null.diff.vec)
  mat <- matrix(c(subvecs[[1]], subvecs[[2]]), byrow = TRUE, ncol = ncol(sorted.mats[[2]]))
  ifelse(rbinom(nrow(mat), 1, .5), 1, -1) * mat  # Random vector added 8/26/02 to enable one-sided tests.
}

diff.matrix <- function(d2)
# Created 5/9/02 by David R. Bickel.
# d2 is a data frame, only the last two columns of which are factors; the first factor has two levels and the second factor specifies which rows are paired.
{
  sorted.mats <- sorted.matrices(d2)
  sorted.mats[[1]] - sorted.mats[[2]]
}


paired.diff.tests <- function(d2, nresample, randomize.signs = sign.permute.columns, alternative = c('two.sided', 'less', 'greater'), test.stat = gen.paired.t.stat, monitor.test.stat = NULL, test.name = 'paired.diff.tests', file.base = test.name, input.file.base = '', ascii = FALSE, input.ascii = FALSE, ...)
# d2 is a data frame, only the last two columns of which are factors; the first factor has two levels and the second factor specifies which rows are paired.
{
  alternative <- match.arg(alternative)

  errRates(nresample = nresample, file.base = file.base, input.file.base = input.file.base, ascii = ascii, input.ascii = input.ascii, multTestsObjs = list(pairedDiffTests(d2 = d2, randomize.signs = randomize.signs, alternative = alternative, test.stat = test.stat, monitor.test.stat = monitor.test.stat, test.name = test.name, ...)))
}

pairedDiffTests <- function(d2, randomize.signs = sign.permute.columns, alternative = c('two.sided', 'less', 'greater'), test.stat = gen.paired.t.stat, using.p.value.stats = identical(test.stat, gen.paired.t.pstat), monitor.test.stat = NULL, test.name = 'pairedDiffTests')
# 8/2/02. test.stat option replaces location.estimator and scale.estimator.
# Created 5/10/02 by David R. Bickel.
# d2 is a data frame, only the last two columns of which are factors; the first factor has two levels and the second factor specifies which rows are paired.
{
  diffs <- t(diff.matrix(d2))
  null.diffs <- t(null.diff.matrix(d2))
  nvar <- ncol(d2) - 2

  rownames(diffs) <- names(d2)[1:nvar]
  rownames(null.diffs) <- rownames(diffs)

  alternative <- match.arg(alternative)

  genPairedDiffTests(diffs = diffs, null.diffs = null.diffs, levels.compared = unique(d2[,ncol(d2)-1])[1:2], sample.within.levels = FALSE, randomize.signs = randomize.signs, alternative = alternative, test.stat = test.stat, using.p.value.stats = using.p.value.stats, monitor.test.stat = monitor.test.stat, test.name = test.name)
}

sample.matrices <- function(matrices, size = sum(sapply(matrices, ncol)), replace = FALSE, ...)
{
  stopifnot(all(nrow(matrices[[1]]) == sapply(matrices, nrow)))
  if(is.null(names(matrices))) names(matrices) <- sapply(1:length(matrices), function(i){paste('mat', toString(i), sep = '')})
  mat <- do.call('cbind', matrices)
  names.col <- do.call('c', lapply(1:length(matrices), function(i){rep(names(matrices)[i], ncol(matrices[[i]]))}))
  stopifnot(length(names.col) == ncol(mat))
  mat <- sample.columns(mat, size = size, replace = replace, sample.within.colnames = FALSE, ...)
  colnames(mat) <- names.col
  mats <- lapply(unique(names.col), function(nombre) {mat[, colnames(mat) == nombre]})
  names(mats) <- names(matrices)
  mats
}

sample.columns <- function(mat, size = ncol(mat), sample.within.colnames = FALSE, replace = FALSE, ...) # samples columns of matrix mat
{
  if(is.na(size)) return(mat)
  stopifnot(size <= ncol(mat))
  if(sample.within.colnames) # recursive
  {
    if(is.null(colnames(mat))) stop('Names of columns of mat not specified.')
    sorted.mat <- mat[, order(colnames(mat))]
    nsubmats <- size
    ncol.submat <- ncol(sorted.mat) / nsubmats
    submats <- list()
    for(i in 1:nsubmats)
    {
      submats[[i]] <- sorted.mat[, ((i-1) * ncol.submat + 1) : (i * ncol.submat)]
      if(i > 1 && colnames(submats[[i]]) == colnames(submats[[i-1]]))
        stop(paste(i, 'Conflict between colnames(mat) and size.'))
      stopifnot(identical(do.call('all.equal', as.list(colnames(submats[[i]]))), TRUE))
    }
    rmats <- lapply(submats, function(m) {sample.columns(m, size = 1, sample.within.colnames = FALSE, replace = replace, ...)})
    matrix(do.call('c', rmats), nrow = nrow(mat), dimnames = list(rownames(mat), do.call('c', lapply(rmats, colnames))))
  }
  else # stopping condition
  {
    col.indices <- sample(seq(from = 1, to = ncol(mat)), size = size, replace = replace, ...)
    rmat <- matrix(ncol = length(col.indices), nrow = nrow(mat))
    for(i in 1:length(col.indices))
      rmat[, i] <- mat[, col.indices[i]]
    rmat
  }
}

sign.permute.columns <- function(diffs) # a randomize.signs option of gen.paired.diff.tests
# This is the method of Efron et al. (2001).
{
  sign.matrix <- sign(diffs)
  abs(diffs) * sample.columns(sign.matrix, size = ncol(diffs), replace = FALSE)
}

sign.permute.column.elements <- function(diffs) # a randomize.signs option of gen.paired.diff.tests
# This is the method originally used in paired.diff.tests.
{
  mat <- apply(diffs, MARGIN = 2, FUN = function(column) {abs(column) * sign(sample(column))})
  if(dim(mat) != dim(diffs)) stop(paste('sign.permute.column.elements wrong dimensions:', dim(diffs), dim(mat)))
  mat
}

sign.permute.pairwise <- function(diffs) # a randomize.signs option of gen.paired.diff.tests
{
  diffs * matrix(ifelse(rbinom(length(diffs), 1, .5), 1, -1), nrow=nrow(diffs), ncol=ncol(diffs))
}



gen.paired.diff.tests <- function(diffs, null.diffs = diffs, nresample, levels.compared = c('A', 'B'), sample.within.levels = (ncol(diffs) < ncol(null.diffs)), randomize.signs = sign.permute.columns, alternative = c('two.sided', 'less', 'greater'), test.stat = gen.paired.t.stat, monitor.test.stat = NULL, test.name = 'gen.paired.diff.tests', file.base = test.name, input.file.base = '', ascii = FALSE, input.ascii = FALSE, ...)
# 8/2/02. Default randomize.signs changed to sign.permute.columns.
{
  alternative <- match.arg(alternative)

  errRates(nresample = nresample, file.base = file.base, input.file.base = input.file.base, ascii = ascii, input.ascii = input.ascii, multTestsObjs = list(genPairedDiffTests(diffs = diffs, null.diffs = null.diffs, levels.compared = levels.compared, sample.within.levels = sample.within.levels, randomize.signs = randomize.signs, test.stat = test.stat, monitor.test.stat = monitor.test.stat, alternative = alternative, test.name = test.name, ...)))
}

genPairedDiffTests <- function(diffs, null.diffs = diffs, levels.compared = c('A', 'B'), sample.within.levels = (ncol(diffs) < ncol(null.diffs)), randomize.signs = sign.permute.columns, alternative = c('two.sided', 'less', 'greater'), test.stat = gen.paired.t.stat, using.p.value.stats = identical(test.stat, gen.paired.t.pstat), monitor.test.stat = NULL, test.name = 'genPairedDiffTests')
# 8/2/02. genPairedDiffTests created from old gen.paired.diff.tests. 
# Created 6/14/02. The rows of diffs and null.diffs are variables and the columns are realizations.
# Before 6/21/02, this effectively always used sample.within.levels = FALSE.
# 7/12/02. Example of new option: monitor.test.stat = function(teststat) {abs(teststat) > 100}. Default test.stat modified to allow sd(diff.vector) == 0.
{
  alternative <- match.arg(alternative)
  if(nrow(diffs) != nrow(null.diffs)) stop(paste('Inconsistent numbers of variables:', nrow(diffs), nrow(null.diffs)))
  stopifnot(ncol(diffs) <= ncol(null.diffs))
  if(sample.within.levels)
  {
    if(ncol(diffs) == ncol(null.diffs)) warning('Columns of null.diffs cannot be resampled.')
    stopifnot(ncol(null.diffs) %% ncol(diffs) == 0)
  }

  test.stats <- function(diff.mat)
  {
    if(ncol(diff.mat) != ncol(diffs)) {print(ncol(diff.mat)); stop('comparing unequal sample sizes')}
    teststats <- apply(diff.mat, MARGIN = 1, FUN = test.stat)
    if(!is.null(monitor.test.stat))
    {
      monitor <- monitor.test.stat(teststats)
      if(!is.logical(any(monitor)) || is.na(any(monitor)) || any(monitor))
      {
        if(is.logical(any(monitor)) && !is.na(any(monitor)))
        {
          monitored.ind <- (1:length(teststats))[monitor]
          cat('Monitoring ', length(monitored.ind), ' test statistics.\n')
        }
        else
          print('any(monitor) is either NA or is not logical.')
        browser()
      }
    }
    teststats
  }

  sample.null.diffs <- function()
  {
    randomize.signs(sample.columns(null.diffs, ncol(diffs), replace = FALSE, sample.within.colnames = sample.within.levels))
  }
  
  ts.vec <- test.stats(diffs)
  names(ts.vec) <- rownames(diffs)

  multTests(getTestName = function() {test.name}, getComparison = function() {as.vector(levels.compared)}, getNumVars = function() {nrow(diffs)}, getTestStats = function() {ts.vec}, getNullStats = function() {test.stats(sample.null.diffs())}, getAlternative = function() {alternative}, getUsingPValueStats = function() {using.p.value.stats})
}


boot.diff.tests <- function(two.samples, nresample, location.estimator = mean, diff.scale.estimator = function(x, y) {pooled.scale(x, y, scale.estimator = meanad)}, alternative = c('two.sided', 'less', 'greater'), test.name = 'boot.diff.tests', file.base = test.name, input.file.base = '', ascii = FALSE, input.ascii = FALSE, ...)
{
  alternative <- match.arg(alternative)

  errRates(nresample = nresample, file.base = file.base, input.file.base = input.file.base, ascii = ascii, input.ascii = input.ascii, multTestsObjs = list(bootDiffTests(two.samples = two.samples, location.estimator = location.estimator, diff.scale.estimator = diff.scale.estimator, alternative = alternative, test.name = test.name, ...)))
}

bootDiffTests <- function(two.samples, location.estimator = mean, diff.scale.estimator = function(x, y) {pooled.scale(x, y, scale.estimator = meanad)}, alternative = c('two.sided', 'less', 'greater'), test.name = 'bootDiffTests')
# 8/2/02. two.samples is either a list of two matrices (variables by realizations), or is a data frame (realizations by variables), all columns of which are numeric, except the last column, which is a two-level factor.
{
  replace <- TRUE
  
  if(is.data.frame(two.samples)) two.samples <- data.frame.as.matrices(two.samples)
  if(!(is.list(two.samples) && length(two.samples) == 2)) {print('two.samples is not in the correct format.'); browser()}
  sample1 <- two.samples[[1]]; sample2 <- two.samples[[2]]
  sample.labels <- names(two.samples)
  if(is.null(sample.labels)) sample.labels <- c('sample1', 'sample2')

  if(nrow(sample1) != nrow(sample2)) stop('Unequal numbers of variables')
  scales <- numeric(nrow(sample1))
  for(i in 1:length(scales))
    scales[i] <- (if(is.function(diff.scale.estimator)) diff.scale.estimator(sample1[i, ], sample2[i, ]) else 1)
  
  zero.scale.ind <- (1:length(scales))[scales==0]
  if(length(zero.scale.ind) > 0)
  {
    cat(length(zero.scale.ind), ' scales are equal to zero, which will cause a division by 0 error if not corrected. Type zero.scale.ind to see their indices. To disable rescaling, either enter Q to quit and then call the boot.diff.tests function with the argument diff.scale.estimator=NULL, or enter the following two lines to do so without quitting:\n> scales <- rep(1, length(scales))\n> c\n')
    browser()
  }
  
  rescale.sample <- function(sample) {apply(sample, 2, FUN = function(column) {column / scales})}
  mat1 <- rescale.sample(sample1)
  mat2 <- rescale.sample(sample2)
  if(any(is.nan(mat1)) || any(is.nan(mat2))) {print('NaNs present in mat1 and/or mat2'); browser()}
  
  resample <- function(mat) {sample.columns(mat, sample.within.colnames = FALSE, replace = replace)}
  
  if(replace)
  {
    null.sample <- function(sample) {t(apply(sample, 1, FUN = function(vec) {vec - location.estimator(vec)}))}
    null1 <- null.sample(mat1)
    null2 <- null.sample(mat2)
    stopifnot(identical(all.equal(dim(mat1), dim(null1), dim(sample1)), TRUE))
    stopifnot(identical(all.equal(dim(mat2), dim(null2), dim(sample2)), TRUE))
  
    test.stat <- function(x, y) {location.estimator(x) - location.estimator(y)}
    test.stats <- function(samp1, samp2)
    {
      ts <- numeric(nrow(samp1))
      for(i in 1:length(ts))
        ts[i] <- test.stat(samp1[i, ], samp2[i, ])
      ts
    }
    null.stats <- function() {test.stats(resample(null1), resample(null2))}
  }
  else
  {
    stop('Bootstrapping is resampling with replacement. Use diff.tests for permutation tests (without replacement).')
  }
    
  ts.vec <- test.stats(mat1, mat2)
  if(identical(rownames(sample1), rownames(sample2)))
    names(ts.vec) <- rownames(sample1)
  else
    warning('Difference rownames for sample1 and sample2.')

  alternative <- match.arg(alternative)

  multTests(getTestName = function(){test.name}, getComparison = function(){sample.labels}, getNumVars = function(){nrow(sample1)}, getTestStats = function(){ts.vec}, getNullStats = null.stats, getAlternative = function(){alternative}, getUsingPValueStats = function() {FALSE})
}

statFcns <- function(multTestsObjs, weights)
# Called by empiricalBayes, errRates, and h0DistErrRates.
{
  stopifnot(length(weights) == length(multTestsObjs))
  nrows <- multTestsObjs[[1]]$getNumVars()
  stopifnot(identical(TRUE, all(sapply(multTestsObjs, function(obj) {obj$getNumVars()}) == nrows)))
  
  stat.fcn <- function(stat.fcns)
  {
    stats <- sapply(stat.fcns, function(fcn) {fcn()}, USE.NAMES = TRUE)
    if(!is.matrix(stats)) {print('ERROR: stats is not a matrix.'); browser()}
    if(ncol(stats) != length(multTestsObjs)) {print('ERROR: ncol(stats) != length(multTestsObjs).'); browser()}
    stats
  }
  teststat.fcn <- function() {stat.fcn(lapply(multTestsObjs, function(obj){obj$getTestStats}))}
  nullstat.fcn <- function() {stat.fcn(lapply(multTestsObjs, function(obj){obj$getNullStats}))}
  using.p.value.stats.fcn <- function()
  {
    upvs <- sapply(multTestsObjs, function(obj) {obj$getUsingPValueStats()})
    if(any(upvs != upvs[1])) stop('Incompatible test statistics: some were generated by the function p.value.stat, but others were not.')
    upvs[1]
  }

  list(teststat.fcn = teststat.fcn, nullstat.fcn = nullstat.fcn, using.p.value.stats.fcn = using.p.value.stats.fcn)
}

weighted.stat.fcns <- function(weights, effect.sizes = rep(0, length(weights)), alternatives, using.p.value.stats)
# Called by empiricalBayes and err.rates.
{
  stopifnot(length(weights) == length(effect.sizes))
  stopifnot(all(effect.sizes >= 0))
  if(using.p.value.stats)
  {
    stopifnot(is.numeric(weights))
    p.value.stat.weight.fcns <- function(test.emphases)
    # test.emphases gives the relative importance of each test, e.g., for three tests, test.emphases could be c(.01, .01, .03), c(.2, .3, .5), or c(1, 2, 5).
    {
      stopifnot(length(alternatives) == length(test.emphases))
      stopifnot(all(test.emphases >= 0))
      p.factors <- function() {1 / test.emphases}
      weighted.p.value <- function(p.factor, p.value) {pmin(1, p.factor * p.value)}
      p.facs <- p.factors()
      fcns <- as.list(p.facs)
      for(i in 1:length(p.facs))
      {
        cmd <- paste('function(pstat) {p.value.to.p.value.stat(weighted.p.value(p.factor = p.facs[', i, '], p.value = p.value.stat.to.p.value(pstat = pstat, alternative = alternatives[', i, '])), alternative = alternatives[', i, '], test.stat.is.high = (pstat > 0))}', sep='')
        fcns[[i]] <- eval(parse(text = cmd))
      }
      fcns
    }
    weights <- p.value.stat.weight.fcns(test.emphases = weights)
  }
  
  weighted.stat.vec <- function(weight, stat.vec)
  {
    stopifnot(length(weight) == 1)
    if(is.numeric(weight))
      weight * stat.vec
    else
      weight(stat.vec)
  }
  weighted.stat <- function(stat)
  {
    weights <- as.list(weights)
    sign.stat <- sign(stat)
    effect.mat <- sapply(effect.sizes, function(effect.size){rep(effect.size, nrow(stat))})
    stopifnot(all(dim(stat) == dim(effect.mat)))
    stopifnot(all(dim(sign.stat) == dim(stat)))
    centered.stat <- sign(stat) * pmax(0, abs(stat) - effect.mat)
    stopifnot(all(dim(stat) == dim(centered.stat)))
    if(all(effect.sizes == 0)) stopifnot(all(stat == centered.stat))
    ws <- sapply(1:length(weights), function(i) {weighted.stat.vec(weights[[i]], centered.stat[, i])}) # ws <- t(weights*t(stat))
    if(length(weights) != ncol(ws)) {print(length(weights)); print(ncol(ws)); stop('length(weights) != ncol(ws)')}
    stopifnot(all(dim(stat) == dim(ws)))
    ws
  }
  resigned.stat <- function(stat)
  {
    if(!is.matrix(stat)) {print('stat must be a matrix'); browser()}
    ws <- weighted.stat(stat)
    if(ncol(ws) != length(alternatives)) {print('ncol(ws) != length(alternatives)'); browser()}
    mat <- matrix(numeric(length(ws)), ncol = ncol(ws))
    for(i in 1:ncol(mat))
      mat[, i] <- switch(alternatives[i], 'two.sided' = abs(ws[, i]), 'less' = -ws[, i], 'greater' = ws[, i], stop('alternative must be two.sided, less, or greater'))
    mat
  }
  
  list(weighted.stat = weighted.stat, resigned.stat = resigned.stat)
}

posterior.data.frame <- function(variable, posterior.prob, stat, weighted.stat, test, comparison, alternative)
{
  data.frame(variable = variable, posterior.prob = posterior.prob, stat = stat, weighted.stat = weighted.stat, test = test, comparison = comparison, alternative = alternative)
}

empBayes.file.base <- function(file.base, weighted)
{
  separator <- if(weighted) '-wEB' else '-EB'
  paste(file.base, sep = separator, '')
}

save.empBayes <- function(empirical.bayes.output, weighted = FALSE, datf = NULL, file.base)
{
  save.results <- saving.results(file.base)

  if(save.results)
  {
    save.object(object = empirical.bayes.output, file.base = empBayes.file.base(file.base, weighted = weighted), object.name = paste(file.base, '.eb', sep = ''))
    if(!is.null(datf)) save.err.rates.table(err.rates.table = datf, file.base)
  }
}

load.empBayes <- function(file.base, weighted = FALSE, ...)
{
  load.object(file.base = empBayes.file.base(file.base, weighted = weighted), ...)
}

empiricalBayes <- function(nresample, multTestsObjs, weights = rep(1, length(multTestsObjs)), file.base = 'empiricalBayes', ratio.rule = 2, df = 5, portion.prob.from.nulldistr = 1 - 2 * pnorm(-.5), other.prob.from.nulldistr = NULL, logistic = TRUE, regress = logistic, ...)
{
  stat.fcns <- statFcns(multTestsObjs = multTestsObjs, weights = weights)
  alternatives <- sapply(multTestsObjs, function(obj) {obj$getAlternative()})
  wt.stat.fcns <- weighted.stat.fcns(weights = weights, alternatives = alternatives, using.p.value.stats = stat.fcns$using.p.value.stats.fcn())
  
  teststat.mat <- stat.fcns$teststat.fcn()
  nvar <- nrow(teststat.mat)
  stats.vec <- as.vector(wt.stat.fcns$resigned.stat(teststat.mat))
  stats.null.vec <- as.vector(sapply(1:nresample, function(i) {wt.stat.fcns$resigned.stat(stat.fcns$nullstat.fcn())}))
  
  empirical.bayes.output <- empirical.bayes(x = stats.vec, xnull = stats.null.vec, ratio.rule = ratio.rule, df = df, portion.prob.from.nulldistr = portion.prob.from.nulldistr, other.prob.from.nulldistr = other.prob.from.nulldistr, logistic = logistic, regress = regress, ...)

  post.probs <- sapply(stats.vec, empirical.bayes.output$prob.from.altdistr.fcn)
  test.names <- sapply(multTestsObjs, function(obj) {obj$getTestName()})
  comparisons <- matrix(sapply(multTestsObjs, function(obj) {obj$getComparison()}), ncol = length(multTestsObjs))
  
  datf <- posterior.data.frame(variable = as.vector(apply(teststat.mat, 2, function(dummy) {rownames(teststat.mat)})), posterior.prob = post.probs, stat = as.vector(teststat.mat), weighted.stat = as.vector(wt.stat.fcns$weighted.stat(teststat.mat)), test = as.vector(sapply(test.names, function(tn) rep(tn, nvar))), comparison = as.vector(apply(comparisons, MARGIN=2, FUN=function(comp){rep(paste(comp, collapse=' v. '), nvar)})), alternative = as.vector(sapply(alternatives, function(alts) rep(alts, nvar)))) # 'data.frame' changed to 'posterior.data.frame' on 9/23/02.
  
  save.empBayes(empirical.bayes.output = empirical.bayes.output, datf = datf, file.base = file.base)
  
  c(list(posterior.table = datf), empirical.bayes.output)
}

new.err.rates.table <- function(variable, p.value, fdr.value, stat, weighted.stat, null.prop.ge, test, comparison, alternative)
{
  data.frame(variable = variable, p.value = p.value, fdr.value = fdr.value, stat = stat, weighted.stat = weighted.stat, null.prop.ge = null.prop.ge, test = test, comparison = comparison, alternative = alternative)
}

create.err.rates.table <- function(variable, p.value, fdr.value, stat, weighted.stat, null.prop.ge, nvar, test.names, comparisons, alternatives)
# All arguments are vector columns of the value returned, except for nvar, test.names, comparisons, and alternatives.
{
  new.err.rates.table(variable = variable, p.value = p.value, fdr.value = fdr.value, stat = stat, weighted.stat = weighted.stat, null.prop.ge = null.prop.ge, test = as.vector(sapply(test.names, function(tn) rep(tn, nvar))), comparison = as.vector(apply(comparisons, MARGIN=2, FUN=function(comp){rep(paste(comp, collapse=' v. '), nvar)})), alternative = as.vector(sapply(alternatives, function(tn) rep(tn, nvar))))
}

adjust.wilcox.err.rates <- function(wilcox.err.rates.out, old.m, old.n, new.m, new.n, file.base = 'adjust.wilcox.err.rates', using.p.value.stats = FALSE, show.progress = TRUE, fdr = 0.2)
{
  attach(recover.err.rates.table(wilcox.err.rates.out))
  stopifnot(all(test == test[1]))
  stopifnot(all(comparison == comparison[1]))
  stopifnot(all(stat == weighted.stat))
  new.stat <- sapply(stat, function(st) {new.zeroed.wilcox.statistic(old.zeroed.w = st, old.m = old.m, old.n = old.n, new.m = new.m, new.n = new.n)})
  #new.err.rates.table(variable = variable, p.value = , fdr.value = , stat = new.stat, weighted.stat = new.stat, null.prop.ge = , test = test, comparison = comparison, alternative = alternative)
  ncomp <- 1
  
  h0dist.err.rates(null.cdfs = list(function(stat){wilcox.null.cdf(stat, two.samples = NULL, m.n = c(new.m, new.n))}), test.names = matrix(test[1], ncol=ncomp), comparisons = matrix(comparison[1], ncol=ncomp), weights = rep(1, ncomp), teststat.mat = matrix(new.stat, ncol = ncomp), alternatives = matrix(alternative[1], ncol=ncomp), file.base = file.base, using.p.value.stats = using.p.value.stats, show.progress = show.progress, null.quant.funs = list(function(p){wilcox.null.quant.fun(p, two.samples = NULL, m.n = c(new.m, new.n))}), fdrs = fdr)
}

save.err.rates.table <- function(err.rates.table, file.base)
{
  if(saving.results(file.base))
  {
    file.base.plus <- paste(file.base, '.xls', sep = '')
    if(is.null(err.rates.table))
      warning(paste('err.rates.table is NULL, so', file.base.plus,'cannot be saved.'))
    else
      save.spreadsheet(err.rates.table, file = file.base.plus)
  }
  else
    warning('Cannot save err.rates.table without a valid file.base.')
}

read.err.rates.table <- function(file = 'errRates.xls')
{
  read.spreadsheet(file, as.is = TRUE, comment.char = '')
}

recover.err.rates.table <- function(err.rates.out)
# err.rates.out may be a file name such as 'err.rates.xls' or the data frame output of err.rates
{
  datf <- if(is.character(err.rates.out)) read.err.rates.table(err.rates.out) else err.rates.out
  if(!rejections.table.is.empty(datf) && !is.data.frame(datf)) {print('bad err.rates.out'); browser()}
  datf
}

recover.err.rates.tables <- function(err.rates.out)
{
  if(is.data.frame(err.rates.out) || is.character(err.rates.out))
    recover.err.rates.table(err.rates.out)
  else
  {
    stopifnot(is.list(err.rates.out) && is.data.frame(err.rates.out[[1]]))
    err.rates.out
  }
}

default.summarize.err.rates <- function(tests.fun.out){c(min.p.value = min(tests.fun.out$p.value), num.genes.fdr05 = sum(reject(err.rates.out = tests.fun.out, rejections.out = rejections(tests.fun.out, fdr = .05, file = '', tolerance = .05), simplify = TRUE)))}

resample.tests <- function(nresample, tests.fun, summarize.err.rates = default.summarize.err.rates)
# tests.fun must be a function of a single argument (the iteration number) that takes a single sample.
# summarize.err.rates is a function of an err.rates.out object (the value returned by tests.fun) that returns a vector of named summary statistics.
# resample.tests returns a (summary statistic) by (sample) matrix of summary statistics for all samples.
# For example, see bio.boot.wilcox.diff.tests in BiobaseSupp.r.
{
  mat <- sapply(1:nresample, function(b){summarize.err.rates(tests.fun(b))}, simplify = TRUE)
  if(!is.matrix(mat)) warning('The value returned by boot.tests is invalid.')
  mat
}

ncomparisons <- function(test.names, comparisons, weights, teststat.mat, alternatives)
{
  ncomp <- ncol(comparisons)
  if(ncomp != ncol(teststat.mat)) {print(ncomp); print(ncol(teststat.mat)); stop('Columns of comparisons must correspond to those of teststat.mat.')}
  if(ncomp != length(weights)) {print(ncomp); print(length(weights)); stop('Columns of comparisons must correspond to elements of weights.')}
  if(!identical(TRUE, all.equal(length(test.names), ncomp, length(weights), length(alternatives)))) browser()

  stopifnot(identical(TRUE, all.equal(length(test.names), ncol(comparisons), length(weights), ncol(teststat.mat), length(alternatives)))) # redundant
  
  ncomp
}

h0DistErrRates <- function(null.cdfs, multTestsObjs, weights = rep(1, length(multTestsObjs)), effect.sizes = rep(0, length(weights)), file.base = 'h0DistErrRates', show.progress = TRUE, null.quant.funs = NULL, fdrs = if(is.null(null.quant.funs)) NULL else rep(0.2, ncol(comparisons)))
# The distributions of the null hypotheses are considered known and given by null.cdfs, a list of single-argument functions.
{
  stat.fcns <- statFcns(multTestsObjs = multTestsObjs, weights = weights)
  teststat.mat = stat.fcns$teststat.fcn()
  if(!all(is.finite(teststat.mat))) {print('teststat.mat is not a matrix of numbers'); browser()}
  stopifnot(length(null.cdfs) == length(multTestsObjs))
  
  h0dist.err.rates(null.cdfs, test.names = sapply(multTestsObjs, function(obj) {obj$getTestName()}), comparisons = matrix(sapply(multTestsObjs, function(obj) {obj$getComparison()}), ncol = length(multTestsObjs)), weights = weights, effect.sizes = effect.sizes, teststat.mat = teststat.mat, alternatives = sapply(multTestsObjs, function(obj) {obj$getAlternative()}), file.base = file.base, using.p.value.stats = stat.fcns$using.p.value.stats.fcn(), show.progress = show.progress, null.quant.funs = null.quant.funs, fdrs = fdrs)
}


h0dist.err.rates <- function(null.cdfs, test.names, comparisons = matrix('A', ncol=1), weights = rep(1, ncol(comparisons)), effect.sizes = rep(0, length(weights)), teststat.mat, alternatives, file.base = 'h0dist.err.rates', using.p.value.stats = FALSE, show.progress = TRUE, null.quant.funs = NULL, fdrs = if(is.null(null.quant.funs)) NULL else rep(0.2, ncol(comparisons)))
{
  if(show.progress) cool.cat('Starting h0dist.err.rates() on ', date())  
  nvar <- nrow(teststat.mat)
  ncomp <- ncomparisons(test.names = test.names, comparisons = comparisons, weights = weights, teststat.mat = teststat.mat, alternatives = alternatives)
  stopifnot(length(null.cdfs) == ncomp)
  stopifnot(is.null(null.quant.funs) || length(null.quant.funs) == ncomp)
  stopifnot(is.null(fdrs) || length(fdrs) == ncomp)
  stopifnot(!xor(is.null(fdrs), is.null(null.quant.funs)))

  if(show.progress) cool.cat('  Calling weighted.stat.fcns() on ', date())
  wt.stat.fcns <- weighted.stat.fcns(weights = weights, effect.sizes = effect.sizes, alternatives = alternatives, using.p.value.stats = using.p.value.stats)
  attach(wt.stat.fcns)
  stats.raw <- weighted.stat(teststat.mat)

#browser()
  if(show.progress) cool.cat('  Computing cdf.v on ', date())
  get.fast.stat.range <- function(alternative = c('two.sided', 'less', 'greater'), null.quant.fun, fdr)
  {
    alternative <- match.arg(alternative)
    low.stat.thres <- if(alternative == 'two.sided')
      null.quant.fun(fdr / 2)
    else
    {
      if(alternative == 'less')
        null.quant.fun(fdr)
      else
        -Inf
    }
    high.stat.thres <- if(alternative == 'two.sided')
      null.quant.fun(1 - fdr / 2)
    else
    {
      if(alternative == 'less')
        Inf
      else
        null.quant.fun(1 - fdr)
    }
    c(low.stat.thres, high.stat.thres)
  }
  fast.stat.ranges <- if(is.null(fdrs))
    lapply(1:ncomp, function(i){c(1, 0)})
  else
  {
    close.to.half <- null.cdfs[[1]](null.quant.funs[[1]](0.5))
    small.fraction <- 0.01
    if(abs(close.to.half - 0.5) > small.fraction) {print('0.5 != null.cdfs[[1]](null.quant.funs[[1]](0.5))'); browser()}
    lapply(1:ncomp, function(i){get.fast.stat.range(alternatives[i], null.quant.funs[[i]], fdrs[i])})
  }
  conservative.cdf <- function(alternative = c('two.sided', 'less', 'greater'), null.cdf, fast.stat.range)
  {
    stopifnot(is.vector(fast.stat.range) && length(fast.stat.range) == 2)
    function(stat.raw)
    {
      if(stat.raw > fast.stat.range[1] && stat.raw < fast.stat.range[2])
      {
        if(alternative == 'two.sided')
          0.5
        else
        {
          if(alternative == 'less')
            1
          else
            0
        }
      }
      else # not in fast.stat.range
        null.cdf(stat.raw)
    }
  }

  cdf.v <- sapply(1:ncomp, function(i) {sapply(stats.raw[, i], conservative.cdf(alternatives[i], null.cdfs[[i]], fast.stat.ranges[[i]]))})
  if(show.progress) cool.cat('  Computing p.v on ', date())
  p.v <- sapply(1:ncomp, function(i) {p.value(test.stat = stats.raw[, i], cdf = cdf.v[, i], alternative = alternatives[i])})
  if(show.progress) cool.cat('  Computing fdr.v on ', date())
  fdr.v <- sapply(p.v, function(threshold) { threshold / (sum(p.v <= threshold) / length(p.v)) })
  #sapply(1:ncomp, function(i) {stats.raw.vec <- stats.raw[, i]; sapply(stats.raw.vec, function(threshold) {(sum(stats.raw.vec >= threshold) / length(stats.raw.vec) / (1 - null.cdfs[[i]](threshold)))})})
  if(show.progress) cool.cat('  Computing null.prop.ge.v on ', date())
  null.prop.ge.v <- 1 - cdf.v  # sapply(1:ncomp, function(i) {1 - sapply(stats.raw[, i], null.cdfs[[i]])})

  if(is.null(rownames(teststat.mat))) rownames(teststat.mat) <- (1:nrow(teststat.mat))
  if(show.progress) cool.cat('  Calling create.err.rates.table() on ', date())  
  datf <- create.err.rates.table(variable = as.vector(apply(teststat.mat, 2, function(dummy) {rownames(teststat.mat)})), p.value = as.vector(p.v), fdr.value = as.vector(fdr.v), stat = as.vector(teststat.mat), weighted.stat = as.vector(stats.raw), null.prop.ge = as.vector(null.prop.ge.v), nvar = nvar, test.names = test.names, comparisons = comparisons, alternatives = alternatives)

  if(show.progress) cool.cat('Finishing h0dist.err.rates() on ', date())  
  if(saving.results(file.base)) save.err.rates.table(datf, file.base = file.base)
  datf
}

errRates <- function(nresample, multTestsObjs, weights = rep(1, length(multTestsObjs)), file.base = 'errRates', input.file.base = '', ascii = FALSE, input.ascii = FALSE)
# errRates: function of one or more multTests objects; calls err.rates
# multTestsObjs is a list, each element of which is a list returned by the multTests function
{
  stat.fcns <- statFcns(multTestsObjs = multTestsObjs, weights = weights)
    
  err.rates(nresample, test.names = sapply(multTestsObjs, function(obj) {obj$getTestName()}), comparisons = matrix(sapply(multTestsObjs, function(obj) {obj$getComparison()}), ncol = length(multTestsObjs)), weights = weights, teststat.mat = stat.fcns$teststat.fcn(), nullstat.fcn = stat.fcns$nullstat.fcn, alternatives = sapply(multTestsObjs, function(obj) {obj$getAlternative()}), file.base = file.base, input.file.base = input.file.base, ascii = ascii, input.ascii = input.ascii, using.p.value.stats = stat.fcns$using.p.value.stats.fcn())
}


err.rates <- function(nresample, test.names, comparisons = matrix('A', ncol=1), weights = rep(1, ncol(comparisons)), teststat.mat, nullstat.fcn, alternatives, file.base = 'err.rates', using.p.value.stats = FALSE, input.file.base = '', ascii = FALSE, input.ascii = FALSE)
# 8/20/02. using.p.value.stats is TRUE if all test statistics were generated by the function p.value.stat.
# 8/9/02. weights can now be a list of single-argument functions of test statistics.
# 6/5/02. comparisons is a matrix of column vectors, where each column vector specifies a comparison between two groups.
# weights is a vector of relative weights given to the comparisons.
# teststat.mat is a matrix of which each column corresponds to a column of comparisons and is a vector of the test stats of the data.
# nullstat.fcn is a function of no arguments that returns a matrix of random stats based on the null hypothesis, where each column vector specifies a comparison between two groups.
{
  save.results <- saving.results(file.base) # (! (is.null(file.base) || !is.character(file.base) || file.base==''))
  progress.end <- if(ascii) 'prgr.txt' else 'prgr'
  input.progress.end <- if(input.ascii) 'prgr.txt' else 'prgr'
  index.end <- 'indx.xls'
  if(save.results && identical(TRUE, file.base == input.file.base)) stop('file.base and input.file.base must be different.')
  nvar <- nrow(teststat.mat)
  ncomp <- ncomparisons(test.names = test.names, comparisons = comparisons, weights = weights, teststat.mat = teststat.mat, alternatives = alternatives)
#  if(!identical(test.names, unique(test.names)))
#    test.names <- sapply(1:length(test.names), function(i) {paste(test.names[i], i)})
  
  wt.stat.fcns <- weighted.stat.fcns(weights = weights, alternatives = alternatives, using.p.value.stats = using.p.value.stats)
  attach(wt.stat.fcns)
  stats.raw <- weighted.stat(teststat.mat)
  stats0 <- resigned.stat(teststat.mat)
  if(is.null(input.file.base) || !is.character(input.file.base) || input.file.base=='')
  {
    stats <- stats0
    if(any(is.nan(stats))) {cat('NaN present in ', length(stats[is.nan(stats)]),' stats.\n'); browser()} # added 7/12/02
    stats.null.maxs <- numeric(nresample)
    dns <- sapply(stats, function(s){length(stats[stats>=s])}) # matrix to vector
    fdns <- numeric(nvar * ncomp)
    null.nge <- numeric(nvar * ncomp)
    i0 <- 1
  }
  else
  {
    load(paste(input.file.base, input.progress.end))
    if(!identical(TRUE, all.equal(stats, stats0))) {print('Specified tests incompatible with input file.'); browser()}
    null.nge <- if(is.numeric(try(null.nge))) null.nge else rep(x = NA, times = nvar * ncomp)
      # err.rates prior to 9/24/02 did not use null.nge.
    i0 <- i # dget(paste(input.file.base, 'indx.xls'))
    cat('Recovering ', i0 - 1, ' iterations and adding ', nresample - i0 + 1, ' more iterations.\n')
  }
  stopifnot(i0 - 1 <= nresample && i0 >= 1 && nresample >= 1)
  save.progress <- function(i)
  {
      dput(i, file = paste(file.base,'indx.xls'))
      save(i, stats, stats.null.maxs, dns, fdns, null.nge, file = paste(file.base, progress.end), ascii = ascii)
      if(i > 0) # Carbon R 1.5.1- fails this consistency check for ascii = TRUE.
      {
        if(!identical(TRUE, all.equal(stats, stats0))) {print('stats consistency error.'); browser()}
        load(paste(file.base, progress.end))
        if(!identical(TRUE, all.equal(stats, stats0))) {print('I/O error.'); browser()}
      }
  }

  if(i0 <= nresample) for(i in i0:nresample)
  {
    i1 <- i - 1 # Before 9/19/02, i1 == i.
    if((i1<=5 || (i1<=150 && i1%%10 == 0) || (i1<=500 && i1%%25 == 0) || i1%%50 == 0) && save.results) save.progress(i)
    stats.null.unweighted <- nullstat.fcn()
    stats.null.raw <- weighted.stat(stats.null.unweighted)
    stats.null <- resigned.stat(stats.null.unweighted)
    if(any(is.nan(stats.null))) {cat('NaN present in ', length(stats.null[is.nan(stats.null)]),' stats.null.\n'); browser()}
    null.nge <- null.nge + sapply(stats.raw, function(s){sum(stats.null.raw >= s)})
    fdns <- fdns + sapply(stats, function(s){sum(stats.null >= s)}) # matrix to vector
    stats.null.maxs[i] <- max(stats.null)
  }
  save.progress(nresample + 1)
  
  p.v <- (sapply(stats, function(s){sum(stats.null.maxs>=s)})) / length(stats.null.maxs)
  fdr.v <- (fdns/nresample)/dns
  null.prop.ge.v <- (null.nge/nresample)/(nvar * ncomp)
  
  if(is.null(rownames(teststat.mat))) rownames(teststat.mat) <- (1:nrow(teststat.mat))
  datf <- create.err.rates.table(variable = as.vector(apply(teststat.mat, 2, function(dummy) {rownames(teststat.mat)})), p.value = p.v, fdr.value = as.vector(fdr.v), stat = as.vector(teststat.mat), weighted.stat = as.vector(stats.raw), null.prop.ge = as.vector(null.prop.ge.v), nvar = nvar, test.names = test.names, comparisons = comparisons, alternatives = alternatives)

  if(save.results) save.err.rates.table(datf, file.base = file.base) # save.spreadsheet(datf, file = paste(file.base, '.xls', sep = ''))
  datf
}

empty.rejections.table <- function(){NA}

rejections.table.is.empty <- function(rejections.table){is.na(rejections.table)}

rejections <- function(err.rates.out = 'errRates.xls', fdr = NULL, fwer = NULL, posterior = (if(is.null(fdr) && is.null(fwer)) .9 else NULL), file = 'rejections.xls', fdr.name = 'fdr.value', tolerance = .01) # alternative argument removed on 8/23/02, but not tested.
# not compatible with files created before 8/1/02 due to using fields (column headings) 'fdr.value' instead of 'fdr.vector' and 'p.value' instead of 'p.vector'
# err.rates.out may be a file name such as 'err.rates.xls' or the data frame output of err.rates
{
  err.rates.output <- recover.err.rates.tables(err.rates.out)
  if(is.null(err.rates.output$alternative))
  {
    warning('alternative is not specified in err.rates.out, so it is assumed to be "two.sided"')
    err.rates.output$alternative <- rep('two.sided', nrow(err.rates.output))
  }
  stopifnot((!is.null(posterior) && is.null(fdr) && is.null(fwer)) || xor(is.null(fdr), is.null(fwer)))
  safe.length <- function(x = numeric(0)){if(is.null(x)) 0 else length(x)}
  stopifnot(is.data.frame(err.rates.output) || (is.list(err.rates.output) && length(err.rates.output) == max(safe.length(fdr), safe.length(fwer), safe.length(posterior))))
  if(!is.null(fdr) && length(fdr) > 1) return(lapply(1:length(fdr), function(i) {rejections(err.rates.out = err.rates.output[[i]], fdr = fdr[i], file = if(i==1) file else NULL, fdr.name = fdr.name)}))
  if(!is.null(fwer) && length(fwer) > 1) return(lapply(1:length(fwer), function(i) {rejections(err.rates.out = err.rates.output[[i]], fwer = fwer[i], file = if(i==1) file else NULL, fdr.name = fdr.name)}))
  if(!is.null(posterior) && length(posterior) > 1) return(lapply(1:length(posterior), function(i) {rejections(err.rates.out = err.rates.output[[i]], posterior = posterior[i], file = if(i==1) file else NULL, fdr.name = fdr.name)}))

  sort.err.rates.output <- function(datf) # datf$key changed to key on 7/25/02
  {
    key <- ifelse(datf$alternative == 'two.sided', abs(datf$weighted.stat), ifelse(datf$alternative == 'less', -datf$weighted.stat, ifelse(datf$alternative == 'greater', datf$weighted.stat, NaN)))
    if(any(is.nan(key))) stop('Bad alternative.')
    sero <- datf[rev(order(key)), ]
    if(!identical(TRUE, ae <- all.equal(sero$p.value, sort(datf$p.value), tolerance = tolerance)))
    {
      warning(paste('p.value is incompatible with alternative and weighted.stat since the following mean relative difference (between p-values sorted by p-value and p-values sorted by test statistic) is high:', ae, sep = '\n'))
#      browser()
    }
    sero
  }

  if(is.null(posterior))
  {  
    sorted <- sort.err.rates.output(err.rates.output)

    if(!is.null(fdr))
    {
      error.rate <- fdr
      error.col <- fdr.name
    }
    else
    {
      error.rate <- fwer
      error.col <- 'p.value'
    }

    stopifnot(identical(TRUE, length(sorted[, error.col]) >= 1))
  
    indices <- seq(along = sorted[, 1])[sorted[, error.col] <= error.rate]
    num.rejected <- ifelse(length(indices) > 0, max(indices), 0)
    rej <- if(num.rejected > 0)
    {
      sorted[1:num.rejected, ]
    }
    else
    {
      warning('No null hypotheses satisfy the error control criterion.')
      NULL
    }
  }
  else # posterior probability threshold; modified 2/18/03
  {
    sorted <- if(is.null(err.rates.output$posterior.table))
      sort.err.rates.output(err.rates.output)
    else
      sort.err.rates.output(err.rates.output$posterior.table)
    if(is.null(sorted$posterior.prob)) stop('posterior.prob column is missing from err.rates.out.')
    pp.boo <- sorted$posterior.prob >= posterior
    rej <- if(sum(pp.boo) > 0)
    {
      pp.thresh <- min(sorted$posterior.prob[pp.boo])
      sorted[1:max(which(sorted$posterior.prob == pp.thresh)), ]
    }
    else
    {
      warning('No null hypotheses are improbable enough to be rejected.')
      NULL
    }
  }
  
  if(!is.null(rej) && nrow(rej) >= 1)
    {if(!is.null(file) && file != '') save.spreadsheet(rej, file = file)}
  else
  {
    warning('No null hypotheses were rejected.')
    rej <- empty.rejections.table()
  }
  return(rej)
}

summarize.reject <- function(err.rates.out, rejections.out = NULL, des.object = NULL, simplify = TRUE)
{
  if(is.null(err.rates.out)) stop('not yet implemented for err.rates.out = NULL')
  if(!simplify) stop('not yet implemented for simplify = FALSE')
  err.rates.out <- recover.err.rates.table(err.rates.out)
  stopifnot(is.data.frame(err.rates.out))
  if(!is.null(des.object)) des.object <- recover.des.object(des.object)
  reject.out <- reject(err.rates.out = err.rates.out, rejections.out = rejections.out, des.object = des.object, simplify = simplify)
  stopifnot(!is.matrix(reject.out))
  stopifnot(length(reject.out) == nrow(err.rates.out))
  attach(err.rates.out)
  summary.rej <- c(ntest = length(reject.out), nreject = sum(reject.out), min.abs.wt.stat = min(abs(weighted.stat)), max.abs.wt.stat = max(abs(weighted.stat)), thres.abs.wt.stat = min(abs(weighted.stat[reject.out])))
  stopifnot(is.vector(summary.rej))
#  if(!is.null(des.object))
#    summary.rej <- c(summary.rej, sapply(des.object[sapply(des.object, is.numeric)], function(a){a}))
  if(!is.null(rejections.out) && !is.null(rejections.out$correct.des.value))
    summary.rej <- c(summary.rej, c(correct.des.value = rejections.out$correct.des.value[nrow(rejections.out)]), correct.fdr.value = rejections.out$correct.fdr.value[nrow(rejections.out)])
  if(!is.null(des.object))
    summary.rej <- c(summary.rej, des.object[sapply(des.object, is.numeric)])
  as.data.frame(as.list(summary.rej))
}

reject <- function(err.rates.out = NULL, rejections.out = NULL, des.object = NULL, simplify)
# Example: reject(err.rates.out = 'errRates.xls', rejections.out = 'rejections.xls')
# Returns a logical vector or matrix corresponding to the rows of err.rates.out
# rejections.out or des.object can be either a file name of an object or an actual object
{
  param.in.use <- function(param) {!is.null(param) && (is.na(param) || param != '')}
#  if(!is.null(des.object)) des.object <- recover.des.object(des.object)
  if(!param.in.use(err.rates.out))
  {
    if(!param.in.use(des.object) || is.null(des.object$posterior.table)) stop('des.object must have posterior.table if err.rates.out is not specified')
    reject(err.rates.out = des.object$posterior.table, rejections.out = rejections.out, des.object = des.object, simplify = simplify)
  }
  else
  {
    err.rates.output <- recover.err.rates.tables(err.rates.out)
    object.to.return <- function(vec) {stopifnot(is.vector(vec)); if(simplify) vec else matrix(vec, ncol = 1)}
    reject.none <- function(){rep(FALSE, nrow(err.rates.output))}
    if(param.in.use(rejections.out))
    {
      if(param.in.use(des.object))
        stop('rejections.out and des.object cannot both be specified; you can set one argument to NULL.')
      mult.rejections.out <- length(rejections.out) > 1 && !is.data.frame(rejections.out)
      if(is.data.frame(err.rates.output))
      {  if(mult.rejections.out) stop('Incompatible arguments: err.rates.out and rejections.out')}
      else
      {
        if(!is.list(err.rates.output) || (length(err.rates.output) != length(rejections.out)))
        {  print('err.rates.out and rejections.out arguments are incompatible'); browser()}
      }
      if(mult.rejections.out)
      {
        stopifnot(is.list(err.rates.output))
        return(sapply(1:length(rejections.out), function(i) {ero <- err.rates.output[[i]]; rej <- rejections.out[[i]]; if(!(is.data.frame(ero) && (rejections.table.is.empty(rej) || is.data.frame(rej)))){print('Problem with err.rates.out or rejections.out argument'); browser()}; reject(err.rates.out = ero, rejections.out = rej, des.object = des.object, simplify = TRUE)})) # added 3/3/03
      }
      rejections.output <- recover.err.rates.table(rejections.out)
      if(rejections.table.is.empty(rejections.output)) return(object.to.return(reject.none()))
      reject.null <- function(variable.name)
      {
        nreject <- sum(variable.name == rejections.output$variable)
        stopifnot(nreject <= 1)
        nreject == 1
      }
      boo <- sapply(err.rates.output$variable, reject.null)
      stopifnot(sum(boo) == nrow(rejections.output))
    }
    else # rejections.out is not in use
    {
      if(!param.in.use(des.object)) stop("Either rejections.out or des.object must be non-NULL and non-''.")
      des <- recover.des.object(des.object)
      if(length(des$best.row.num) > 1)
      {
        if(is.data.frame(err.rates.output)) {err.rates.output <- list(err.rates.output)}
        if(length(err.rates.output) != length(des$best.row.num))
        {
          if(length(err.rates.output) == 1)
            err.rates.output <- rep(err.rates.output, length(des$best.row.num))
          else
            stop('bad length(err.rates.output)')
        }
        mult.thres.rej <- sapply(1:min(length(err.rates.output), length(des$best.row.num)), function(i){reject(err.rates.out = err.rates.output[[i]], rejections.out = rejections.out, des.object = create.des.object(correct.prob.from.nulldistr = des$correct.prob.from.nulldistr, best.threshold = des$best.threshold[i], max.des = des$max.des[i], best.correct.fdr = des$best.correct.fdr[i], best.row.num = des$best.row.num[i], threshold.is.p.value = des$threshold.is.p.value), simplify = TRUE)}) # added 3/3/03
        if(is.null(mult.thres.rej) || !is.matrix(mult.thres.rej)) {print('bad mult.thres.rej'); browser()}
        return(mult.thres.rej)
      }
      if(!is.data.frame(err.rates.output)) {print('Incompatible arguments: des.object and err.rates.out'); browser()}
      if(des$max.des <= 0) return(object.to.return(reject.none()))
      tipv <- des$threshold.is.p.value
      if(is.null(tipv))
      {
        tipv <- FALSE
        warning('des.object does not include the logical threshold.is.p.value (automatic after 2/19/03), so FALSE is assumed')
      }
      if(tipv)
      {
        p.value.threshold <- err.rates.output$p.value[des$best.row.num]
        if(abs(p.value.threshold - des$best.threshold) > .001)
          stop(paste('p.value.threshold==', p.value.threshold, '!=', des$best.threshold, '==des$best.threshold', sep = ''))
        boo <- err.rates.output$p.value <= p.value.threshold
      }
      else
      {
        stopifnot(abs(err.rates.output$weighted.stat[des$best.row.num]) == abs(des$best.threshold))
        alt <- err.rates.output$alternative[1]
        stopifnot(all(err.rates.output$alternative == alt))
        positive.stats <- switch(alt, 'two.sided' = abs(err.rates.output$weighted.stat), 'less' = -err.rates.output$weighted.stat, 'greater' = err.rates.output$weighted.stat, stop('alternative must be two.sided, less, or greater'))
        boo <- positive.stats >= des$best.threshold
      }
    }
    stopifnot(length(boo) == nrow(err.rates.output))
    object.to.return(boo) # if(simplify) boo else matrix(boo, ncol = 1)
  } # end else (recursion stopping condition)
}

h0dist.emp.bayes <- function(err.rates.out = 'errRates.xls', file.base = 'h0dist.emp-bayes', portion.prob.from.nulldistr = 1 - 2 * pnorm(-.5), other.prob.from.nulldistr = NULL, cost = 19, benefit = 1, print.alternative = TRUE, ...)
# It is assumed that the p-values of err.rates.out have not been adjusted for multiplicity, i.e., that they are uniform (0,1) under the null hypothesis.
# Estimate the data distribution using a Kernel estimator, as per Genovese & Wasserman.
{
  stopifnot(length(cost) == length(benefit))

  err.rates.output <- recover.err.rates.table(err.rates.out)
  p.values <- err.rates.output$p.value
  
  h0dist.prob.from.nulldistr <- function()
  {
    target.p.value <- 1 - portion.prob.from.nulldistr
    p.value.errors <- abs(target.p.value - p.values)
    lambda <- p.values[which(p.value.errors == min(p.value.errors))[1]]
    p <- min(1, (sum(p.values >= lambda) / length(p.values)) / portion.prob.from.nulldistr)
    stopifnot(p >= 0 && length(p) == 1)
    p
  }
  p0 <- probability.from.nulldistr(portion.prob.from.nulldistr = portion.prob.from.nulldistr, other.prob.from.nulldistr = other.prob.from.nulldistr, stable.fcn = h0dist.prob.from.nulldistr) # h0dist.prob.from.nulldistr()
  p.densplus <- density.plus(p.values, ...)
  null.density <- dunif
  
#  density..ratio <- function(numerator.density.fcn, denominator.density)
#  # cf. density.ratio and logistic.density.ratio
#  {
#    
#  }
  uniform.to.pdistr <- density.ratio(numerator.density.fcn = null.density, denominator.density = p.densplus$density, regress = FALSE)
  
  f1 <- function(z) {altdistr.density(prob.from.nulldistr = p0, x.density.value = p.densplus$density.fcn(z), xnull.density.value = null.density(z))}
  
  empirical.bayes.object <- c(list(prob.from.altdistr.fcn = function(z) {prob.from.altdistr(z, uniform.to.pdistr, prob.from.nulldistr = p0)}, altdistr.density.fcn = f1, prob.from.nulldistr = p0, x.density.plus = p.densplus, xnull.density.plus = list(density.fcn = null.density, density.mode = NULL, density = NULL)), uniform.to.pdistr) # uniform.to.pdistr is a list
  
  post.probs <- sapply(p.values, empirical.bayes.object$prob.from.altdistr.fcn)
  correct.fdrs <- p0 * err.rates.output$fdr.value
  desirabilities <- desirabilities(same.sign.stats = -p.values, fdr.values = correct.fdrs, cost = cost, benefit = benefit, print.alternative, simplify = FALSE)
  columns.are.unique <- function(column.names){length(column.names) == length(unique(column.names))}
  datfs <- lapply(1:ncol(desirabilities), function(i) {data.frame(variable = err.rates.output$variable, stat = err.rates.output$stat, weighted.stat = err.rates.output$weighted.stat, correct.des.value = desirabilities[, i], posterior.prob = post.probs, correct.fdr.value = correct.fdrs, err.rates.output[, names(err.rates.output)!='variable' & names(err.rates.output)!='stat' & names(err.rates.output)!='weighted.stat' & names(err.rates.output)!='correct.des.value' & names(err.rates.output)!='posterior.prob' & names(err.rates.output)!='correct.fdr.value'])})
  if(!columns.are.unique(names(datfs[[1]])))
  {
    if(columns.are.unique(names(err.rates.output))) {print('h0dist.emp.bayes caused a redundancy in column names'); browser()}
    else warning('Column names of posterior.table are not unique')
  }
  
  op.stat.and.booleans <- optimal.stat.and.booleans(desirabilities = desirabilities, same.sign.stats = -p.values, print.alternative)
  attach(op.stat.and.booleans) # optimal.stat and is.optimal
  if(length(is.optimal) != nrow(datfs[[1]]) && nrow(is.optimal) != nrow(datfs[[1]])) {print('Problem with is.optimal.'); browser()}
#  apply(is.optimal, 2, function(is.optimal.vec){if(sum(is.optimal.vec) != 1) {print('Incorrect number of optimal values.'); browser()}})
  cfdr.values <- fields(datfs, 'correct.fdr.value') # modified 3/25/03, but not tested directly
  if(is.matrix(cfdr.values) && (!is.matrix(is.optimal) || dim(cfdr.values) != dim(is.optimal)))
    {print('cfdr.values and is.optimal are incompatible'); browser()}
  bayes.des <- create.des.object(correct.prob.from.nulldistr = p0, best.threshold = -optimal.stat, max.des = apply(desirabilities, 2, max), best.correct.fdr = best.values(cfdr.values, is.optimal = is.optimal), best.row.num = best.values(1:nrow(datfs[[1]]), is.optimal = is.optimal), threshold.is.p.value = TRUE) # apply(desirabilities, 2, max) added 3/3/03
  
  save.empBayes(empirical.bayes.output = empirical.bayes.object, file.base = file.base)
  save.err.rates.table(err.rates.table = datfs[[1]], file.base = file.base)
  save.des.object(bayes.des.object = bayes.des, file.base = file.base)

  c(list(posterior.table = if(length(datfs) == 1) datfs[[1]] else datfs), empirical.bayes.object, bayes.des)
}

emp.bayes <- function(err.rates.out = 'errRates.xls', file.base = 'emp-bayes', weights = NULL, use.raw.stats = TRUE, rule = 0, max.cdf.error = NULL, ratio.rule = 2, df = 5, portion.prob.from.nulldistr = 1 - 2 * pnorm(-.5), logistic = TRUE, regress = logistic, direct.xnull.density = FALSE, ...) # direct.xnull.density = FALSE is faster and lower-biased since it uses an average of densities estimated from vectors of the same length as the test statistic vector.
{
  err.rates.output <- recover.err.rates.table(err.rates.out)
  recovered.stats <- recover.stats(err.rates.output = err.rates.output, use.raw.stats = use.raw.stats, rule = rule, max.cdf.error = max.cdf.error)
  attach(recovered.stats) # recovered.stats is a list of 3 names: 'stat.vec', 'nullstat.vec', and 'null.probs.ge'
  
  if(is.null(weights))
  {
    stat.vec.arg <- stat.vec
    nullstat.vec.arg <- nullstat.vec
  }
  else
  {
    warning('The same null distribution is assumed for all tests.')
      # This follows from reliance on recover.stats() since the fdr.value and p.value columns of err.rates.out() were computed assuming the same null distribution for all tests.
    stat.vec.arg <- split(stat.vec, err.rates.output$test)
    nullstat.vec.arg <- if(direct.xnull.density)
      list(nullstat.vec)
    else
      split(sample(nullstat.vec), err.rates.output$test)
        # The elements of nullstat.vec must be permuted to avoid the bias that would result from associating higher values of nullstat.vec with higher values of stat.vec. Having nullstat.vec.arg() and its elements the same lengths as those of stat.vec.arg may help reduce estimation bias.
    stopifnot(length(weights) == length(stat.vec.arg))
    stopifnot((direct.xnull.density && length(nullstat.vec.arg)==1) || (length(weights) == length(nullstat.vec.arg)))
  }
  
  empirical.bayes.output <- empirical.bayes(x = stat.vec.arg, xnull = nullstat.vec.arg, weights = weights, ratio.rule = ratio.rule, df = df, portion.prob.from.nulldistr = portion.prob.from.nulldistr, logistic = logistic, regress = regress, fast = FALSE, direct.xnull.density = direct.xnull.density, ...)
  
  post.probs <- sapply(stat.vec, empirical.bayes.output$prob.from.altdistr.fcn)
  datf <- data.frame(variable = variable, posterior.prob = post.probs, err.rates.output[, -(names(err.rates.output)=='variable')])

  save.empBayes(empirical.bayes.output = empirical.bayes.output, datf = datf, file.base = file.base)
  c(list(posterior.table = datf), empirical.bayes.output)
}

recover.stats <- function(err.rates.output, use.raw.stats = TRUE, rule = 0, max.cdf.error = NULL, ...)
{
  max.tolerance <- 1 / nrow(err.rates.output)
  if(is.null(max.cdf.error)) max.cdf.error <- max.tolerance

  if(use.raw.stats && (is.null(err.rates.output$null.prop.ge) || all(is.na(err.rates.output$null.prop.ge))))
  {
    warning('Cannot use raw statistics because null.prop.ge is not a non-NA column of the data frame. Re-signed statistics will be used instead.')
    use.raw.stats <- FALSE
  }
  attach(err.rates.output) # do not attach before the statement that reassigns use.raw.stats

  stat.vec <- if(use.raw.stats)
    weighted.stat
  else
    as.vector(ifelse(alternative=='two.sided', abs(weighted.stat),
      ifelse(alternative=='less', -weighted.stat, ifelse(alternative=='greater', weighted.stat, NA))))
  if(any(is.na(stat.vec))) {print('Bad alternative.'); browser()}

  if(any(is.na(stat.vec))) stop('stat.vec cannot have missing values.')
  stat.probs.ge <- pdata(data.vec = stat.vec, q = stat.vec, a = NULL, lower.tail = FALSE)
  q.vec <- function(probs.ge) {qdata(stat.vec, p = probs.ge, a = NULL, lower.tail = FALSE)}
  stopifnot(identical(TRUE, all.equal(stat.vec, q.vec(stat.probs.ge))))
  if(use.raw.stats)
    null.probs.ge <- null.prop.ge
  else
  {
    fdr.vec <- fdr.value
    stopifnot(length(stat.vec) == length(fdr.vec))
    null.probs.ge <- fdr.vec * stat.probs.ge
  }
  stat.vs.null.probs.ge <- function(stat.prob.ge)
  {
    if(rule != 1 && rule != 2)
    {
      stop('rule should be 1 or 2')
      stat.num.ge <- length(stat.vec) * stat.prob.ge
      if(stat.num.ge > length(stat.vec) || stat.num.ge < 1) stop('bad stat.prob.ge')
      
    }
    else
    {
      af <- approxfun(x = null.probs.ge, y = stat.vec, rule = rule, ...)
      if(is.na(af(stat.prob.ge)))
      {
        if(stat.prob.ge > median(null.probs.ge)) min(stat.vec) else max(stat.vec)
      }
      else
        af(stat.prob.ge)
    }
  }
  nullstats0 <- function()
  {
    stat.nums.ge <- round(stat.probs.ge * length(stat.vec)) # sum(stat.vec >= stat.sca)
    if(!identical(TRUE, all.equal(stat.nums.ge, unique(stat.nums.ge))))
    {
      warning('stat.nums.ge is replaced by integer ranks (ties broken arbitrarily) to force uniqeness')
      stat.nums.ge <- rev(integer.rank(stat.probs.ge))
    }
    null.nums.ge <- round(null.probs.ge * length(stat.vec))
    if(max(stat.nums.ge) != length(stat.nums.ge) || min(stat.nums.ge) != 1)
      {print(range(stat.nums.ge)); stop('bad stat.nums.ge')}
    if(any(null.nums.ge > length(stat.vec)) || any(null.nums.ge < 0))
      {print(range(null.nums.ge)); stop('bad null.nums.ge')}

    sorted.stat.vec <- rev(sort(stat.vec)) # rev(order(stat.vec))
    sorted.null.nums.ge <- sort(null.nums.ge)
    sorted.stat.nums.ge <- sort(stat.nums.ge)
    sorted.row.numbers <- rev(order(stat.vec))
    stopifnot(identical(TRUE, all.equal(sorted.stat.vec, stat.vec[sorted.row.numbers])))
    null.stats0 <- rep(NA, length(stat.vec))
    if(!identical(TRUE, all.equal(1:length(stat.vec), sorted.stat.nums.ge)))
    {
      major.vers <- 1
      minor.vers <- 6.2
      if(as.numeric(R.Version()$minor) < minor.vers && as.numeric(R.Version()$major) <= major.vers)
        cool.cat('Upgrading to R ', major.vers, '.', minor.vers, ' or later enables checking for a potential sorting error.')
      else
      {
        cool.cat('Sorting error.')
        browser()
      }
    }
    times.to.skip <- 0
    null.index <- 1
    for(i in sorted.stat.nums.ge)
    {
      if(TRUE) # if(times.to.skip == 0)
      {
        n.old.values <- sum(!is.na(null.stats0))
        n.new.values <- if(null.index <= length(sorted.null.nums.ge)) sorted.null.nums.ge[i] - n.old.values else 0
        if(is.character(try(stopifnot(n.new.values >= 0 && n.new.values + n.old.values <= length(stat.vec)))))
          {print('Inconsistency found.'); print(list(n.new.values, n.old.values, length(stat.vec))); browser()}
        if(n.new.values > 0)
        {
          times.to.skip <- n.new.values - 1
          indices.to.fill <- sapply(seq(from = null.index, to = null.index + times.to.skip), function(j) {sorted.row.numbers[j]})
          if(!all(is.na(null.stats0[indices.to.fill]))) {print('attempt to assign two values to the same position'); browser()}
          null.stats0[indices.to.fill] <- sorted.stat.vec[i]
          null.index <- null.index + n.new.values
        }
        else
          times.to.skip <- 0
      }
      else
        times.to.skip <- times.to.skip - 1
    }
#browser()
    if(all(is.na(null.stats0))) {print('No null.stats0 were generated'); browser()}
    if(any(is.na(null.stats0)))
    {
      min.null.stat <- min(null.stats0, na.rm = TRUE)
      sorted.uniq <- rev(sort(unique(null.stats0), na.last = NA))
      nextlowest.null.stat <- sorted.uniq[length(sorted.uniq) - 1]
      stopifnot(min.null.stat < nextlowest.null.stat)
      min.null.stat0 <- min.null.stat - (nextlowest.null.stat - min.null.stat)
      if(!use.raw.stats) min.null.stat0 <- max(0, min.null.stat0)
      null.stats0[is.na(null.stats0)] <- min.null.stat0
      stopifnot(min(null.stats0) < min.null.stat)
    }
    stopifnot(length(null.stats0) == length(stat.vec))
    if(any(is.na(null.stats0))) stop('null.stats0 has at least 1 NA')

    if(!is.vector(null.stats0)) stop('bad null.stats0')
    null.nums.ge0 <- sapply(stat.vec, function(stat.sca){sum(null.stats0 >= stat.sca)})
    ae <- all.equal(null.nums.ge0, null.nums.ge)
    if(!identical(TRUE, ae)) {print('Inexact CDF'); print(ae); cat('bad indices: ', (badi <- which(null.nums.ge0 != null.nums.ge)), '; ', null.nums.ge0[badi], ' not equal to ', null.nums.ge[badi], '\n', sep = ''); if(length(stat.vec) <= -1) print(rbind(stat.vec, null.stats0, stat.nums.ge, null.nums.ge, null.nums.ge0)); browser()} # This problem occurs even with use.raw.stats=FALSE since fdr.value was computed based on a number of random samples, so that null.nums.ge was not necessarily close to an integer before rounding.
    null.stats0
  }
  nullstat.vec <- if(rule == 0)
  {
#debug(nullstats0)
    nullstats0()
  }
  else
    sapply(stat.probs.ge, stat.vs.null.probs.ge)
  if(any(is.na(nullstat.vec))) {print(rbind(nullstat.vec, stat.probs.ge, stat.vec)); if(rule == 1) print('rule == 1 may have caused a missing value error.'); stop('nullstat.vec cannot have missing values.')}
  null.probs.ge2 <- pdata(data.vec = nullstat.vec, q = stat.vec, a = NULL, lower.tail = FALSE)
  ae <- all.equal(null.probs.ge, null.probs.ge2)
  if(!identical(TRUE, ae))
  {
    diffs <- null.probs.ge2 - null.probs.ge
    print(paste('Recovered CDF is off by as much as', max(abs(diffs)), 'in abs units, by an average of', mean(abs(diffs)), 'in abs units, and by an average of', mean(diffs)))
      # Some CDF error is unavoidable if length(nullstat.vec) is less than the total number of null scores used to compute err.rates.output
    if(max(abs(diffs)) > max.tolerance) {print(ae); warning('max.tolerance of ', max.tolerance, ' exceeded.')}
    if(max(abs(diffs)) > max.cdf.error) {print(ae); if(length(null.probs.ge) <= 100) print(rbind(null.probs.ge, null.probs.ge2)); stop(paste('max.cdf.error of ', max.cdf.error, 'exceeded.'))}
  }
  if(!use.raw.stats && !(all(stat.vec >= 0) && all(nullstat.vec >= 0))) {print('Some recovered stats are negative.'); browser()}

  list(stat.vec = stat.vec, nullstat.vec = nullstat.vec, null.probs.ge = null.probs.ge)
}

stats.have.same.sign <- function(same.sign.stats, print.alternative = TRUE)
{
  all.pos <- all(same.sign.stats >= 0)
  all.neg <- all(same.sign.stats <= 0)
  if(print.alternative)
  {
    if(all.pos) print('Hypotheses are rejected for high stats.')
    if(all.neg) print('Hypotheses are rejected for low stats (e.g., p-values).')
  }
  
  all.pos || all.neg
}

desirabilities <- function(same.sign.stats, fdr.values, cost, benefit, print.alternative = TRUE, simplify = TRUE)
# As of 3/1/03, this returns a matrix of length(fdr.values) rows and length(benefit) columns. It previously returned a vector of length(fdr.values) elements.
{
  if(!stats.have.same.sign(same.sign.stats, print.alternative)) stop('same.sign.stats are not all of the same sign')
  cost.to.benefit <- cost/benefit
  stopifnot(length(benefit) == length(cost.to.benefit))
  mat <- sapply(1:length(cost.to.benefit), function(i) {benefit[i] * (1 - (1 + cost.to.benefit[i]) * fdr.values) * sapply(same.sign.stats, function(stat) {sum(same.sign.stats >= stat)})})
  if(simplify && is.matrix(mat) && ncol(mat) == 1)
  {
    stopifnot(length(benefit) == 1)
    as.vector(mat)
  }
  else
  {
    if(!is.matrix(mat))
      matrix(mat, ncol = 1)
    else
      mat
  }
}

optimal.stat.and.booleans <- function(desirabilities, same.sign.stats, print.alternative = TRUE)
# As of 3/1/03, desirabilities can be a matrix with a number of rows equal to length(same.sign.stats). It previously required desirabilities to be a vector oflength(same.sign.stats) elements.
{
  if(!stats.have.same.sign(same.sign.stats, print.alternative)) stop('same.sign.stats are not all of the same sign')
  osab.object <- function(optimal.stat = optimal.stat, is.optimal){list(optimal.stat = optimal.stat, is.optimal = is.optimal)}
  call.recursively <- function(desirability.object){is.matrix(desirability.object) && ncol(desirability.object) > 1}
  if(call.recursively(desirabilities))
  {
    stopifnot(nrow(desirabilities) == length(same.sign.stats))
    list.of.osab <- apply(desirabilities, 2, function(des.vector){if(call.recursively(des.vector)) browser(); optimal.stat.and.booleans(desirabilities = des.vector, same.sign.stats = same.sign.stats, print.alternative = print.alternative)})
    osab.object(optimal.stat = sapply(list.of.osab, function(osab){stopifnot(length(osab$optimal.stat) == 1); osab$optimal.stat}), is.optimal = sapply(list.of.osab, function(osab){osab$is.optimal}))
  }
  else
  {
    stopifnot(length(desirabilities) == length(same.sign.stats))
    is.optimal <- desirabilities == max(desirabilities)
    stopifnot(sum(is.optimal) > 0)
    if(!is.finite(same.sign.stats[is.optimal])) browser()
    optimal.stats <- same.sign.stats[is.optimal]
    optimal.stat <- if(optimal.stats[1] >= 0) min(optimal.stats) else max(optimal.stats)
    is.optimal <- same.sign.stats == optimal.stat
  
    osab.object(optimal.stat = optimal.stat, is.optimal = is.optimal)
  }
}

create.des.object <- function(correct.prob.from.nulldistr, best.threshold, max.des, best.correct.fdr, best.row.num, threshold.is.p.value)
{
  list(correct.prob.from.nulldistr = correct.prob.from.nulldistr, best.threshold = best.threshold, max.des = max.des, best.correct.fdr = best.correct.fdr, best.row.num = best.row.num, threshold.is.p.value = threshold.is.p.value)
}

save.des.object <- function(bayes.des.object, file.base)
{
  save.results <- saving.results(file.base)
  separator <- '-des'
  if(save.results)
  {
    dput(bayes.des.object, file = paste(file.base, sep = separator, '.txt'))
#    save(bayes.des.object, file = paste(file.base, sep = separator, '.RData')) # This is not needed, but is here for redundancy and in case a function is added to a bayes.des.object. It was discontinued on 8/4/03 due to confusion.
  }
}

read.des.object <- function(file) {dget(file)}

recover.des.object <- function(des.object)
{
  if(is.character(des.object)) read.des.object(des.object) else des.object
}

best.values <- function(values, is.optimal)
# removed from optimize.desirability on 3/3/03
# If is.optimal is a vector, then this returns values[is.optimal]
{
  best.value <- function(value.vec, is.optimal.vec)
  {
    stopifnot(is.vector(value.vec) && is.vector(is.optimal.vec))
    stopifnot(length(value.vec) == length(is.optimal.vec))
    bv <- value.vec[is.optimal.vec]
    if(any(bv[1] != bv))
    {
      best.values.indices <- (1:length(is.optimal.vec))[is.optimal.vec]
      mult.ind.base <- paste('best.values.ind', sum(is.optimal.vec), sep = '-')
      mult.val.base <- paste('best.values.val', sum(is.optimal.vec), sep = '-')
      save.object(file.base = mult.ind.base, object.name = 'best.values.indices')
      save.object(file.base = mult.val.base, object.name = 'bv')
      #print('push c to continue'); browser()
      warning(paste('length(bv):', length(bv), '  bv[1]:', bv[1], '  Best values range from ', min(bv), ' to ', max(bv), 'so they were written to ', mult.val.base, '.RData, and their multiple indices were written to ', mult.ind.base, '.RData', sep = ''))
    }
    bv[1]
  }
  if(is.matrix(values))
  {
    if(!is.matrix(is.optimal) || dim(values) != dim(is.optimal))
      {print('Incompatible arguments: value.vec and is.optimal.vec'); browser()}
    sapply(1:ncol(values), function(i){best.value(values[, i], is.optimal[, i])})
  }
  else
  {
    if(is.matrix(is.optimal))
      apply(is.optimal, 2, function(io.vec){best.value(values, io.vec)})
    else
      best.value(values, is.optimal)
  }
} 


optimize.desirability <- function(err.rates.out = 'errRates.xls', file.base = 'op-des', cost, benefit, portion.prob.from.nulldistr = 1 - 2 * pnorm(-.5), other.prob.from.nulldistr = NULL, use.raw.stats = TRUE, rule = 0, max.cdf.error = NULL, print.alternative = TRUE)
# created from emp.bayes on 10/28/02.  default rule changed to 0 on 11/7/02.
{
  stopifnot(length(cost) == length(benefit))
  if(is.character(err.rates.out) && substr(err.rates.out, 1, nchar(err.rates.out) - 4) == file.base)
    stop('File bases must be different.')
  
  err.rates.output <- recover.err.rates.table(err.rates.out)
  if(!all(err.rates.output$alternative == 'two.sided')) stop('Alternatives other than "two.sided" are not yet implemented.')
  recovered.stats <- recover.stats(err.rates.output = err.rates.output, use.raw.stats = use.raw.stats, rule = rule, max.cdf.error = max.cdf.error)
  attach(recovered.stats)  # recovered.stats is a list of 3 names: 'stat.vec', 'nullstat.vec', and 'null.probs.ge'
#  if(!(all(stat.vec >= 0) && all(nullstat.vec >= 0))) {print('Some stats are negative.'); browser()}
  
  bayes.des <- bayes.desirability(scores = abs(stat.vec), nullscores = abs(nullstat.vec), cost = cost, benefit = benefit, portion.prob.from.nulldistr = portion.prob.from.nulldistr, other.prob.from.nulldistr = other.prob.from.nulldistr)

  stable.pr.from.nulldistr <- function() #**
  {
    if(use.raw.stats) print('stable.pr.from.nulldistr does not need raw stats')
    null.probs.ge <- fdr.value * sapply(abs(stat.vec), function(stat) { sum(abs(stat.vec) >= stat) / length(stat.vec)})
    a.upper.bound <- max(abs(stat.vec[1 - null.probs.ge <= portion.prob.from.nulldistr])) # sort(abs(stat.vec))[floor(portion.prob.from.nulldistr * length(stat.vec))]
#    sort.abs.stat.vec <- sort(abs(stat.vec))
#    a.upper.bound <- sort.abs.stat.vec[which(sort.abs.stat.vec == a.upper.bound)[1] + 1]
    stopifnot(is.finite(a.upper.bound) && length(a.upper.bound) == 1)
    prop.stat.in.a <- (1 + sum(abs(stat.vec) <= a.upper.bound)) / length(stat.vec) # max(sort(abs(stat.vec))) # mean(max(null.probs.ge[abs(stat.vec) == a.upper.bound]))
    stopifnot(length(prop.stat.in.a) == 1)
    min(1, prop.stat.in.a / portion.prob.from.nulldistr)
  }

  #correct.prob.from.nulldistr <- max(abs(stat.vec)[null.prop.ge <= portion.prob.from.nulldistr])
  correct.prob.from.nulldistr <- probability.from.nulldistr(portion.prob.from.nulldistr = portion.prob.from.nulldistr, other.prob.from.nulldistr = other.prob.from.nulldistr, stable.fcn = stable.pr.from.nulldistr)

  bayes.desirabilities <- sapply(abs(stat.vec), bayes.des$expected.des.fcn) # bayes.desirabilities may be a vector or rows of vectors.
  bayes.fdrs <- sapply(abs(stat.vec), bayes.des$bayes.fdr.fcn)
  correct.fdrs <- correct.prob.from.nulldistr * err.rates.output$fdr.value
  if(!identical(TRUE, all.equal(abs(stat.vec), abs(weighted.stat)))) {print('stat.vec deviates'); browser()}
  desirabilities <- desirabilities(same.sign.stats = abs(stat.vec), fdr.values = correct.fdrs, cost = cost, benefit = benefit, print.alternative, simplify = FALSE) # benefit * (1 - (1 + cost/benefit) * correct.fdrs) * sapply(stat.vec, function(stat) {sum(abs(stat.vec) >= abs(stat))})
  if(length(cost) == 1 && length(benefit) == 1)
  {
    desirabilities0 <- benefit * (1 - (1 + cost/benefit) * correct.fdrs) * sapply(abs(stat.vec), function(stat) {sum(abs(stat.vec) >= stat)})
    if(!identical(TRUE, all.equal(as.vector(desirabilities), desirabilities0))) {print('Abnormality'); browser()}
  }

  ith.vector <- function(obj, i){if(is.matrix(obj)) obj[i, ] else {stopifnot(is.vector(obj)); obj}}
  datfs <- lapply(1:ncol(desirabilities), function(i) {data.frame(variable = variable, stat = stat, weighted.stat = weighted.stat, bayes.des.value = ith.vector(bayes.desirabilities, i), correct.des.value = desirabilities[, i], bayes.fdr.value = bayes.fdrs, correct.fdr.value = correct.fdrs, err.rates.output[, -(names(err.rates.output)=='variable' | names(err.rates.output)=='stat' | names(err.rates.output)=='weighted.stat')])})
  
  for(datf in datfs)
  {
    ae <- all.equal(datf$bayes.fdr.value, datf$correct.fdr.value)
    if(!identical(TRUE, ae))
    {
      max.fdr.err <- max(abs(datf$bayes.fdr.value - datf$correct.fdr.value))
      cat('Error in bayes.fdr.value is as high as', max.fdr.err, '\n')
      print(ae)
    }
  }
  rm(datf)

  op.stat.and.booleans <- optimal.stat.and.booleans(desirabilities = desirabilities, same.sign.stats = abs(stat.vec), print.alternative) # changed 2/14/03, but not tested directly
  attach(op.stat.and.booleans)
  
  bfdr.values <- fields(datfs, 'bayes.fdr.value')
  cfdr.values <- fields(datfs, 'correct.fdr.value')
  bayes.des <- c(bayes.des, list(correct.prob.from.nulldistr = correct.prob.from.nulldistr, best.threshold = optimal.stat, max.des = apply(desirabilities, 2, max), best.bayes.fdr = best.values(bfdr.values, is.optimal = is.optimal), best.correct.fdr = best.values(cfdr.values, is.optimal = is.optimal), best.row.num = best.values(1:nrow(datfs[[1]]), is.optimal = is.optimal)), threshold.is.p.value = FALSE)
  
  save.des.object(bayes.des.object = bayes.des, file.base = file.base) # changed 2/14/03, but not tested directly
  save.err.rates.table(err.rates.table = datfs[[1]], file.base = file.base) # changed 2/14/03, but not tested directly
  
  c(list(posterior.table = if(length(datfs) == 1) datfs[[1]] else datfs), bayes.des)
}

prob.vector <- function(n.probs, n.pos.probs, min.pos.prob = 0.8)
{ 
  stopifnot(n.pos.probs <= n.probs)
  stopifnot(0 <= min.pos.prob <= 1)
  sapply(1:n.probs, function(i){1 - (if(i <= n.pos.probs) (1-min.pos.prob)*i/n.pos.probs else 1)})
}

maximize.desirability <- function(err.rates.out = 'errRates.xls', file.base = 'mx-des', cost, benefit, portion.prob.from.nulldistr = 1 - 2 * pnorm(-.5), other.prob.from.nulldistr = NULL)
# Created from optimize.desirability on 11/15/02. It does not use exactly the same output structure (e.g., column names), so that its output can be distinguished.
{
  
}

cost.to.benefit <- function(posterior)
# posterior is the rejection threshold of the posterior probability of the alternative hypothesis
{
  p <- 1 - posterior
  1/p - 1
}

posterior.from.cost.benefit.ratio <- function(cost.benefit.ratio)
# Returns the rejection threshold of the posterior probability of the alternative hypothesis
{
  p <- 1 / (cost.benefit.ratio + 1)
  1 - p
}



mean.bayes.desirability <- function(null.is.false, reject.null, cost.benefit.ratio)
{
  stopifnot(length(null.is.false) == length(reject.null))
  p <- 1 - posterior.from.cost.benefit.ratio(cost.benefit.ratio)
  (p * sum(reject.null & null.is.false) - (1 - p) * sum(reject.null & !null.is.false)) / length(reject.null)
}

mean.bayes.error <- function(null.is.false, reject.null, cost.benefit.ratio)
{
  stopifnot(length(null.is.false) == length(reject.null))
  p <- 1 - posterior.from.cost.benefit.ratio(cost.benefit.ratio)
  ((1 - p) * sum(reject.null & !null.is.false) + p * sum(!reject.null & null.is.false)) / length(reject.null)
}

assess.posterior.probs <- function(nsims = 1, probs1.alt, n1, n2 = n1, posterior = 0.95, true.cost.benefit.ratio = cost.to.benefit(posterior), random.null = random.null.default, random.alt = random.alt.default, exact = FALSE, bw.fun = function(x){bw.nrd0.mad(x, beta = 1/4)}, file.base = 'reject.table', print.alternative = TRUE)
{
  create.app.object <- function(loss.gain.table, loss.gain.se.table, reject.table)
  {
    if(!is.null(file.base) && file.base != '')
    {
      save.spreadsheet(loss.gain.table, file = paste(file.base, 'lg.xls', sep = '-'))
      if(!is.null(loss.gain.se.table)) save.spreadsheet(loss.gain.se.table, file = paste(file.base, 'lgse.xls', sep = '-'))
      if(!is.null(reject.table)) save.spreadsheet(reject.table, file = paste(file.base, 'xls', sep = '.'))
    }
    list(loss.gain.table = loss.gain.table, loss.gain.se.table = loss.gain.se.table, reject.table = reject.table)
  }
  
  if(nsims > 1) 
  {
    loss.gain.tables <- lapply(1:nsims, function(i){assess.posterior.probs(nsims = 1, probs1.alt = probs1.alt, n1 = n1, n2 = n2, posterior = posterior, true.cost.benefit.ratio = true.cost.benefit.ratio, random.null = random.null, random.alt = random.alt, exact = exact, bw.fun = bw.fun, file.base = if(i==1) file.base else '', print.alternative = if(i==1) print.alternative else FALSE)$loss.gain.table})
    loss.gain.tables <- lapply(loss.gain.tables, function(datf){datf[, names(datf) != 'posterior' & names(datf) != 'true.cost.benefit.ratio']})
    loss.gain.array <- data.frames.as.array(loss.gain.tables) # lapply(loss.gain.tables, function(datf){datf[, -c('posterior', 'true.cost.benefit.ratio')]}))
    mean.mat <- apply(loss.gain.array, c(1, 2), mean)
#    if(sum(is.finite(loss.gain.array)) != length(loss.gain.array))
#      {print('elements missing from loss.gain.array'); browser()} 
    options(show.error.messages = FALSE)
    se.mat <- try(apply(loss.gain.array, c(1, 2), function(vec){if(all(is.finite(vec))) se.mean(vec) else NaN}))
    if(!is.matrix(se.mat)) {print('se.mat is not a matrix'); browser()}
    options(show.error.messages = TRUE)
    colnames(mean.mat) <- colnames(se.mat) <- names(loss.gain.tables[[1]])
    loss.gain.datf <- function(mat){data.frame(posterior, true.cost.benefit.ratio, mat)}
    return(create.app.object(loss.gain.table = loss.gain.datf(mean.mat), loss.gain.se.table = loss.gain.datf(se.mat), reject.table = NULL))
  }

  stopifnot(length(posterior) == length(true.cost.benefit.ratio))
  probs2.alt <- rep(0, length(probs1.alt))
  stopifnot(is.vector(probs1.alt) && is.vector(is.vector(probs2.alt)))
  stopifnot(length(probs1.alt) == length(probs2.alt))
  is.sample.size <- function(n){is.finite(n) && length(n) == 1}
  stopifnot(is.sample.size(n1) && is.sample.size(n2))
  
  sample1 <- data.from.mixture(probs1.alt, n1, random.null = random.null, random.alt = random.alt)
  sample2 <- data.from.mixture(probs2.alt, n2, random.null = random.null, random.alt = random.alt)
  err.rates.table <- h0dist.diff.tests(list(sample1 = sample1, sample2 = sample2), null.cdf = function(q){pt(q, df=n1+n2-2)}, file.base = '', test.stat = function(x, y){gen.t.stat(x, y, var.equal = TRUE)})
  cost.benefit.ratio <- cost.to.benefit(posterior)
  emp.bayes.output <- h0dist.emp.bayes(err.rates.table, file.base = file.base, exact = exact, bw.fun = bw.fun, cost = cost.benefit.ratio, benefit = rep(1, length(cost.benefit.ratio)), print.alternative = print.alternative)

  des.reject <- reject(des.object = emp.bayes.output, simplify = FALSE)
  density.reject <- reject(err.rates.out = emp.bayes.output$posterior.table, rejections.out = rejections(err.rates.out = emp.bayes.output$posterior.table, posterior = posterior, file = ''), simplify = FALSE)
  cfdr.reject <- reject(err.rates.out = emp.bayes.output$posterior.table, rejections.out = rejections(err.rates.out = emp.bayes.output$posterior.table, fdr = 1 - posterior, fdr.name = 'correct.fdr.value', file = ''), simplify = FALSE)
  if(!is.matrix(des.reject) || !is.matrix(density.reject) || !is.matrix(cfdr.reject)) {print('At least one rejection object is not a matrix.'); browser()}
  if(length(des.reject) != length(density.reject) || length(des.reject) != length(cfdr.reject)) {print('rejection objects of different lengths'); browser()}
  
  alts.probable <- probs1.alt >= posterior
  
  reject.table <- data.frame(emp.bayes.output$posterior.table, alts.probable = alts.probable, des.reject = des.reject[, 1], density.reject = density.reject[, 1], cfdr.reject = cfdr.reject[, 1])
  save.err.rates.table(err.rates.table = reject.table, file.base)

  apply.bayes <- function(bayes.fun, reject.obj, cost.benefit.ratio)
  {
    if(is.matrix(reject.obj))
    {
      stopifnot(ncol(reject.obj) == length(cost.benefit.ratio))
      sapply(1:length(cost.benefit.ratio), function(i){apply.bayes(bayes.fun = bayes.fun, reject.obj = reject.obj[, i], cost.benefit.ratio = cost.benefit.ratio[i])})
    }
    else
      bayes.fun(null.is.false = alts.probable, reject.null = reject.obj, cost.benefit.ratio = cost.benefit.ratio)
  }
  rejection.error <- function(reject.obj)
  {
    if(is.matrix(reject.obj))
      apply(reject.obj, 2, function(reject.vec){rejection.error(reject.vec)})
    else
      classification.error(alts.probable, reject.obj)
  }

  mean.bayes.des <- data.frame(sapply(list(des.mbd = des.reject, density.mbd = density.reject, cfdr.mbd = cfdr.reject), function(reject.obj){apply.bayes(bayes.fun = mean.bayes.desirability, reject.obj = reject.obj, cost.benefit.ratio = cost.benefit.ratio)}))
  mean.bayes.err <- data.frame(sapply(list(des.mbe = des.reject, density.mbe = density.reject, cfdr.mbe = cfdr.reject), function(reject.obj){apply.bayes(bayes.fun = mean.bayes.error, reject.obj = reject.obj, cost.benefit.ratio = cost.benefit.ratio)}))
  errors <- data.frame(sapply(list(des.error = des.reject, density.error = density.reject, cfdr.error = cfdr.reject), rejection.error))
  
#  if(!is.null(file.base) && file.base != '') save(paste(file.base, sep = '.', 'img'))

  create.app.object(loss.gain.table = data.frame(posterior, true.cost.benefit.ratio, mean.bayes.des, mean.bayes.err, log.density.to.des.mbe = log(mean.bayes.err$density.mbe / mean.bayes.err$des.mbe), errors), loss.gain.se.table = NULL, reject.table = reject.table)
}

wdfdr <- function(nums.reject, nums.false.reject = NULL, rates.false.reject = if(is.null(nums.false.reject)) rep(.05, length(nums.reject)) else NULL, weights = rep(1, length(nums.reject)))
{
  stopifnot(xor(is.null(nums.false.reject), is.null(rates.false.reject)))
  if(is.null(nums.false.reject)) nums.false.reject <- round(rates.false.reject * nums.reject)
  stopifnot(length(nums.reject) == length(nums.false.reject) && length(nums.reject) == length(weights))
  sum(weights * nums.false.reject) / sum(weights * nums.reject)
}

dFDR <- function(p.values, test.wise.sig.level = c(.001, .01, .05))
{
  alphas <- test.wise.sig.level
  ntests <- length(p.values)
  nfalse <- ntests * alphas
  nreject <- sapply(alphas, function(alpha){sum(p.values <= alpha)})
  stopifnot(length(nfalse) == length(nreject))
  stopifnot(all(!is.na(nreject)) && all(!is.na(nfalse)))
  max.dFDR <- ifelse(nreject == 0, 0, nfalse / nreject)
  data.frame(alpha = alphas, nfalse = nfalse, nreject = nreject, max.dFDR = max.dFDR)
}

quantile.reject <- function(err.rates.out, cdf.value, file = '', tolerance = .05) {reject(err.rates.out = err.rates.out, rejections.out = rejections(err.rates.out = err.rates.out, fdr = cdf.value, file = file, tolerance = tolerance), simplify = TRUE)}

get.err.rates.out.files.default <- function(quantiles, id = 's0-wdt') {sapply(quantiles, function(q) {paste('a', toString(q), '-', id, '.xls', sep = '')})}

effect.size.quantiles <- function(effect.sizes, quantile.rejects.matrix, p, crude = FALSE){apply(quantile.rejects.matrix, 2, function(column){stopifnot(length(column) == length(effect.sizes)); data.vec = effect.sizes[column]; if(p == 0.5) median(data.vec) else qdata(data.vec = data.vec, p = p, crude = crude)})}

auc.quantile.table <- function(auc.quantile.goals, m, n, p = 0.5, auc.effect.sizes = NULL, id = 's0-wdt', file = paste('auc.quant-', id, '-', toString(p), '.xls', sep = ''), get.err.rates.out.files = get.err.rates.out.files.default, crude = FALSE)
{
  err.rates.out.files <- get.err.rates.out.files(quantiles = auc.quantile.goals, id = id)
  err.rates.out.datfs <- lapply(err.rates.out.files, recover.err.rates.table)
  stopifnot(length(auc.quantile.goals) == length(err.rates.out.datfs))
  quantile.rejects.matrix <- sapply(err.rates.out.datfs, function(err.rates.out){quantile.reject(err.rates.out = err.rates.out, cdf.value = p)})
  if(!is.null(auc.effect.sizes)) stopifnot(nrow(quantile.rejects.matrix) == length(auc.effect.sizes))
  stopifnot(ncol(quantile.rejects.matrix) == length(auc.quantile.goals))
  sample.auc.quantiles = sapply(1:length(err.rates.out.datfs), function(i){qdata(data.vec = sapply(abs(err.rates.out.datfs[[i]][quantile.rejects.matrix[, i], 'stat']), function(zeroed.w){wilcox.area.under.curve(zeroed.w = zeroed.w, m, n)}), p = p, crude = crude)})
  
  datf <- data.frame(auc.quantile.goals = auc.quantile.goals, auc.quantiles = if(is.null(auc.effect.sizes)) rep(NA, length(auc.quantile.goals)) else effect.size.quantiles(effect.sizes = auc.effect.sizes, quantile.rejects.matrix = quantile.rejects.matrix, p = p, crude = crude), sample.auc.quantiles = sample.auc.quantiles, ndiscoveries = apply(quantile.rejects.matrix, 2, sum))
  row.names(datf) <- err.rates.out.files
  if(saving.results(file)) save.spreadsheet(datf, file = file)
  datf
}