# Estimation, Copyright (c) 2001-2004 by David R. Bickel.
# Last modified by David R. Bickel on 3/27/04.
# DifferenceTests, Copyright (c) 2001-2003 by David R. Bickel.
# DifferenceTests divided into DiffTests, MultTests, and TestStats on 7/29/02.
# Created by David R. Bickel on 11/30/01.

# 11/9/02. bayes.desirability() corrected. See 'Notes on R FDR.doc'.
# 8/7/02. 'clip' changed to 'crop' throughout.
# 4/26/02. grenander.mode added.
# 4/25/02. winsorize option added to clipped.mean; clipped.range modified to ensure winsorize=FALSE; clipped.sd added.
# 4/24/02. winsorized.sd and winsorized.mean added.
# 4/8/02. trimmed.sd and trimmed.mean added.
# 4/1/02. Functions depending on clip.vector added.
# 2/28/02. trimmed.vector.skews and trimmed.vector.skews.matrix added.

print('loading Estimation.r')
source("General.r")
library(modreg)  # for smooth.spline
#library(stepfun)  # for ecdf

cv <- function(x){m <- mean(x); s <- sd(x); c(n = length(x), m = m, s = s, cv = s/m)}

se.mean <- function(x, ...){sd(x, ...)/sqrt(length(x))}

med<-function(x){if(length(x)<1) NaN else median(x)}

meanad<-function(x,constant=1.25331){constant*mean(abs(x-mean(x)))}

sd.mle<-function(x){n<-length(x);sd(x)*sqrt((n-1)/n)}

skew<-function(x,scale.fcn=sd.mle)
{
 if(length(x)<1)
   NaN
 else
 {
  #(n/((n-1)*(n-2)))*sum((x-mean(x))^3)/sd(x)^3 #p. 115 of Biometry by Sokal & Rohlf.
  mean((x-mean(x))^3)/scale.fcn(x)^3 #compatible with Mathematica
 }
}

hsm<-function(x) # half-sample mode of a vector
{
  y<-sort(x);
  while (length(y)>=4)
  {
#print(y);
    m<-ceiling(length(y)/2);
#cat('m==',m,'\n');
    w.min<-y[length(y)]-y[1];
    for(i in 1:(length(y)-m+1))
    {
      w<-y[i+m-1]-y[i];
#print(c(w,w.min));
      if(w<=w.min)
      {
        w.min<-w;
        j<-i
      }
    }
#cat('j==',j,'\n');
    if(w==0)
      y<-y[j]
    else
      y<-y[j:(j+m-1)]
  }
#print(y);
  if (length(y)==3)
  {
    z<-2*y[2]-y[1]-y[3];
    if(!is.finite(z))
    {
      print('ERROR: z is not finite; x, y, and z follow:');
      print(x);
      print(y);
      print(z);
    }
    if(z<0)
      mean(y[1:2])
    else
    {
      if(z>0)
        mean(y[2:3])
      else
        y[2]
    }
  }  
  else
    mean(y)
}

grenander.mode <- function(x, p, k)
{
  y <- sort(x)
  if(!(0 < p && p < k)) {print(p); print(k); stop('p must satisfy 0 < p < k for consistency.')}
  if(!(k < length(y))) stop('k must be less than length(x)')
  if(!(k > 2 * p)) warning('Asymptotic normality not proved for those parameters p and k.')
  v1 <- y[(1 + k) : length(y)]
  v2 <- y[1 : (length(y) - k)]
  if(length(v1)!=length(v2)) stop('grenander.mode error')
  diff <- v1 - v2
  tot <- v1 + v2
  if(any(diff==0))
  {
    warning('Limiting value of Grenander mode used.')
    mean(ifelse(diff==0, tot, NA), na.rm = TRUE)/2
  }
  else
  {
    b <- sum( tot / diff^p )/2
    a <- sum(1 / diff^p)
    if(is.finite(b/a)) b/a else {print(x); cat('b==',b,' a==',a,'\n'); stop('grenander.mode failed')}
  }
}

modal.skew<-function(x,mode.fcn=hsm)
{
 if(length(x)<1)
    NaN
 else
 {
  m<-mode.fcn(x);
  cdf.value<-(length(x[x<m])+length(x[x==m])/2)/length(x);
  1-2*cdf.value
 }
}

mean.based.skew<-function(x,mean.fcn=mean)
{
 if(length(x)<1)
    NaN
 else
 {
  m<-mean.fcn(x);
  cdf.value<-(length(x[x<m])+length(x[x==m])/2)/length(x);
  2*cdf.value-1
 }
}

trimmed.vector.skews <- function(orig.vector, trims=c(0)) {to.return <- numeric(length(trims)); for(i in seq(along=trims)) to.return[i] <- mean.based.skew(trim.vector(orig.vector,trim=trims[i])); to.return}

trimmed.vector.skews.matrix <- function(n,nsamples,trims=c(0),random.gen=function(n2){rnorm(n2)}) {skew.matrix <- matrix(nrow=length(trims),ncol=nsamples); for(i in 1:nsamples) {rdata <- random.gen(n); 
# cat('rdata: ',rdata,'\n');
skew.matrix[,i] <- trimmed.vector.skews(rdata,trims); 
# cat('ith col: ',trimmed.vector.skews(rdata,trims),'\n')
}; skew.matrix}

trimmed.mean <- function(x, trim = 0, ...) {mean(trim.vector(x, trim = trim), ...)}

trimmed.sd <- function(x, trim = 0, ...) {sd(trim.vector(x, trim = trim), ...)}

winsorized.mean <- function(x, win = 0, ...) {mean(win.vector(x, win = win), ...)}

winsorized.sd <- function(x, win = 0, ...) {sd(win.vector(x, win = win), ...)}

cropped.mean <- function(x, crop = 0, winsorize = FALSE, ...) {mean(crop.vector(x, crop = crop, winsorize = winsorize,...))}

cropped.sd <- function(x, crop = 0, winsorize = FALSE, ...) {sd(crop.vector(x, crop = crop, winsorize = winsorize, ...))}

cropped.range <- function(x, crop = 0, ...)
{
  cropped.x <- crop.vector(x, crop = crop, winsorize = FALSE, ...);
  cropped.x[length(cropped.x)]-cropped.x[1]
}


shorth.n <- function(orig.length) {floor(orig.length/2)+1}

shorth <- function(x) {cropped.mean(x, new.length = shorth.n(length(x)))}

shorth.range <- function(x, constant=1.4826/2) {constant*cropped.range(x, new.length = shorth.n(length(x)))}

lms.location <- function(x)
  {lms.x <- crop.vector(x, new.length = shorth.n(length(x)));(lms.x[length(lms.x)]+lms.x[1])/2}
  
bw.nrd0.general <- function(x, beta = 1/5, scale.estimator = function(x){min(sd(x), IQR(x)/(2*qnorm(3/4)))})
# the default parameters make this function like bw.nrd0, except with less rounding error since 2*qnorm(3/4) is used instead of 1.34
{.9 * scale.estimator(x) / length(x)^beta}

bw.nrd0.mad <- function(x, beta = 1/5) {bw.nrd0.general(x = x, beta = beta, scale.estimator = function(x){min(sd(x), mad(x))})}

normal.kernel <- function(t) {(1/sqrt(2*pi)) * exp(-t^2/2)}

pdf.estimate <- function(x, bw = bw.nrd0.mad(x), adjust = 1)
{
  bw2 <- adjust * bw
  function(xval) {sum(sapply(x, function(xi) {normal.kernel((xval - xi) / bw2)})) / (length(x) * bw2)}
}

density.plus <- function(x, rule = 1, bw.fun = bw.nrd0.mad, bw = bw.fun(x), exact = TRUE, fast = FALSE, adjust = 1, compressed.size = NULL, ...)
{
  if(!is.null(compressed.size)) x <- compress(x, n = compressed.size)
  if(exact)
  {
    density.fcn <- pdf.estimate(x = x, bw = bw, adjust = adjust)
    dens <- if(fast) list(x = NA, y = NA) else list(x = x, y = sapply(x, density.fcn))
    stopifnot(length(dens$x) == length(dens$y))
  }
  else
  {
    dens <- density(x, bw = bw, adjust = adjust, ...)
    density.fcn <- approxfun(dens$x, dens$y, rule = rule)
  }
  list(density.fcn = density.fcn, density.mode = dens$x[dens$y==max(dens$y)], density = dens)
}

prob.from.nulldistr <- function(max.nulldistr.over.datadistr, other.prob.from.nulldistr = NULL)
# max.nulldistr.over.datadistr can be found using density.ratio
# prob.from.null returns an upper bound
{
  if(is.null(other.prob.from.nulldistr))
    min(1, max(0, 1/max.nulldistr.over.datadistr))
  else
    other.prob.from.nulldistr
}

stable.prob.from.nulldistr <- function(x, xnull, portion.prob.from.nulldistr) # removed from empirical.bayes on 9/30/02 #**
{
  interval.a <- if(any(xnull < 0))
  {
    print('At least one element of xnull is negative.')
    stopifnot(any(x < 0))
    range(trim.vector(xnull, trim = (1 - portion.prob.from.nulldistr) / 2))
  }
  else
  {
    print('No element of xnull is negative.')
    if(any(x < 0)) stop('x has at least one negative element.')
    low.xnull <- sort(xnull)[1:round(portion.prob.from.nulldistr * length(xnull))]
    c(0, low.xnull[length(low.xnull)])
  }
  portion.in.a <- function(vec) {sum(vec >= interval.a[1] & vec <= interval.a[2]) / length(vec)}
  p <- min(1, portion.in.a(x) / portion.in.a(xnull))
  stopifnot(p >= 0 && length(p) == 1)
  p
}
  


prob.from.altdistr <- function(x, density.ratio.output, prob.from.nulldistr)
# density.ratio.output is the list returned by density.ratio when numerator.density.fcn represents the null distribution and denominator.density represents the data distribution. To be conservative, set other.prob.from.nulldistr = 1.
{
  p0 <- prob.from.nulldistr
  pmin(1, pmax(0, 1 - p0 * density.ratio.output$ratio.fcn(x)))
}

density.ratio <- function(numerator.density.fcn, denominator.density, rule = 1, df = 5, regress = TRUE)
# Useful when numerator.density.fcn represents the null distribution and denominator.density represents the data distribution (Efron et al., JASA, 2001).
# As opposed to the logistic regression method of logistic.density.ratio, which minimizes the SSE of a function of the ratio, density.ratio minimizes the SSE of the ratio itself.
{
  stopifnot(is.function(numerator.density.fcn) && !is.function(denominator.density))
  ratios <- sapply(denominator.density$x, numerator.density.fcn) / denominator.density$y
  stopifnot(length(ratios) == length(denominator.density$y))
  
  mat <- cbind(denominator.density$x, ratios)
  mat <- mat[is.finite(mat[, 2]), ]

  if(regress)
  {
    ss <- smooth.spline(mat, df = df)
    list(ratio.fcn = approxfun(ss$x, ss$y, rule = rule), ratio.max = max(ss$y), ratio.mat = cbind(ss$x, ss$y))
  }
  else
  {
    list(ratio.fcn = approxfun(mat[, 1], mat[, 2], rule = rule), ratio.max = max(mat[, 2]), ratio.mat = mat)
  }
}

probability.from.nulldistr <- function(portion.prob.from.nulldistr, other.prob.from.nulldistr, stable.fcn) #**
{
  if(is.numeric(portion.prob.from.nulldistr))
  {
    if(is.numeric(other.prob.from.nulldistr))
      stop('portion.prob.from.nulldistr or other.prob.from.nulldistr should be set to NULL.')
#debug(stable.fcn)
    stable.fcn()
  }
  else
  {
    stopifnot(is.numeric(other.prob.from.nulldistr))
    other.prob.from.nulldistr
  }
}


bayes.desirability <- function(scores, nullscores, cost, benefit, portion.prob.from.nulldistr = 1 - 2 * pnorm(-.5), other.prob.from.nulldistr = NULL)
{
  lapply(list(scores, nullscores, cost, benefit), function(vec) {stopifnot(is.vector(vec))})
  stopifnot(length(cost) == length(benefit))
  stopifnot(all(scores) >= 0 && all(nullscores) >= 0)


  p0 <- probability.from.nulldistr(portion.prob.from.nulldistr = portion.prob.from.nulldistr, other.prob.from.nulldistr = other.prob.from.nulldistr, stable.fcn = function() {stable.prob.from.nulldistr(x = scores, xnull = nullscores, portion.prob.from.nulldistr = portion.prob.from.nulldistr)}) #**
  stopifnot(length(p0) == 1)
  
  p.reject <- function(threshold, x.vec) {pdata(data.vec = x.vec, q = threshold, a = NULL, lower.tail = FALSE)}
  bayes.fdr.fcn <- function(threshold)
  {
    stopifnot(length(threshold) == 1) # added 3/1/03
    p0 * p.reject(threshold, nullscores) / p.reject(threshold, scores)
  }
  
  expected.des.fcn <- function(threshold)
  {
    stopifnot(length(threshold) == 1) # added 3/1/03
    benefit * (1 - (1 + cost/benefit) * bayes.fdr.fcn(threshold)) * sum(scores >= threshold)
  }
  
  list(expected.des.fcn = expected.des.fcn, bayes.fdr.fcn = bayes.fdr.fcn, prob.from.nulldistr = p0)
}

bayes.desirability.same.threshold <- function(x, xnull, costs, benefits, portion.prob.from.nulldistr = 1 - 2 * pnorm(-.5), other.prob.from.nulldistr = NULL)
{
  if(!is.list(x)) x <- list(x)
  if(!is.list(xnull)) xnull <- list(xnull)
  stopifnot(is.list(x) && is.list(xnull))
  stopifnot(is.vector(costs) && is.vector(benefits))

  stopifnot(all(length(x) == sapply(list(xnull, costs, benefits), length)))
  
  probs.from.nulldistr <- function()
  {
    if(is.numeric(portion.prob.from.nulldistr))
    {
      if(is.numeric(other.prob.from.nulldistr))
        stop('portion.prob.from.nulldistr or other.prob.from.nulldistr should be set to NULL.')
      sapply(1:length(x), function(i) {stable.prob.from.nulldistr(x = x[[i]], xnull = xnull[[i]], portion.prob.from.nulldistr = portion.prob.from.nulldistr)})
    }
    else
    {
      stopifnot(length(x) == length(other.prob.from.nulldistr))
      other.prob.from.nulldistr
    }
  }
  
  p0.vec <- probs.from.nulldistr()
  stopifnot(length(x) == length(p0.vec))
  
  p.reject <- function(threshold, x.vec) {pdata(data.vec = x.vec, q = threshold, a = NULL, lower.tail = FALSE)}
  q.fcn <- function(threshold, index)
  {
    warning('not tested; probably has typical i problem')
    p0.vec[index] * p.reject(threshold, xnull[[index]]) / p.reject(threshold, x[[index]])
  }
  
  expected.des.fcn <- function(threshold)
  {
#browser()
    warning('not tested; probably has typical i problem')
    sum(sapply((1:length(x)), function(i){benefits[i] * (1 - (1 + costs[i]/benefits[i]) * q.fcn(threshold, i))}))
  }
  
  list(expected.des.fcn = expected.des.fcn, q.fcn = q.fcn, prob.from.nulldistr = p0.vec)
}

empirical.bayes <- function(x, xnull, weights = NULL, ratio.rule = 2, df = 5, portion.prob.from.nulldistr = 1 - 2 * pnorm(-.5), other.prob.from.nulldistr = NULL, logistic = TRUE, regress = logistic, impute = FALSE, bw = NULL, bw.fcn = bw.nrd0.mad, fast = logistic, direct.xnull.density = FALSE, ...) # 9/16/02: default changed to logistic = TRUE.
{
  if(fast && !logistic) stop('ERROR: fast=TRUE is currently incompatible with logistic=FALSE')
  if(!is.list(x)) x <- list(x)
  if(!is.list(xnull)) xnull <- list(xnull)
  stopifnot(is.list(x) && is.list(xnull))
  if(is.null(weights)) weights <- rep(1, length(x))
  weights <- weights / sum(weights)
  stopifnot(length(x) == length(weights))
  x.all <- as.vector(do.call('c', x))
  if(is.null(bw)) bw <- bw.fcn(x.all)
  
  if(direct.xnull.density)
  {
    null.weights <- 1
    xnull <- list(do.call('c', xnull))
  }
  else
    null.weights <- weights
  stopifnot(length(xnull) == length(null.weights))
  
  eb.density.plus <- function(vec) {density.plus(vec, bw = bw, fast = fast, ...)}
  wt.density.plus <- function(densities.plus, wts)
  {
    stopifnot(length(densities.plus) == length(wts))
    x.vec1 <- densities.plus[[1]]$density$x
    if(!is.na(x.vec1) && identical(TRUE, all(sapply(densities.plus, function(dp) {identical(TRUE, all.equal(dp$density$x, x.vec1))}))))
    {
      x.vec <- x.vec1
      y.mat <- sapply(1:length(densities.plus), function(i) {wts[i] * densities.plus[[i]]$density$y})
      y.vec <- apply(y.mat, 1, sum)
      stopifnot(is.vector(y.vec) && length(y.vec) == length(x.vec))
    }
    else
    {
      x.vec <- NA
      y.vec <- NA
    }
    dens <- list(x = x.vec, y = y.vec)
    dens.fcn <- function(z) {sum(wts * sapply(densities.plus, function(dp) {dp$density.fcn(z)}))}
    list(density.fcn = dens.fcn, density.mode = dens$x[dens$y==max(dens$y)], density = dens)
  }
  x.densplus <- wt.density.plus(lapply(x, eb.density.plus), weights)
  xnull.densplus <- wt.density.plus(lapply(xnull, eb.density.plus), null.weights)
  
  logistic.density.ratio <- function()
  # based on Eqs. (6.3) and (6.4) of Efron et al. (JASA, 2001)
  {
    b <- sum(null.weights * sapply(xnull, length)) / sum(weights * sapply(x, length))
    prob.success <- function(z)
    {
      f.z <- x.densplus$density.fcn(z)
      f0.z <- xnull.densplus$density.fcn(z)
      if(is.na(f.z) || f.z < 0) f.z <- 0
      if(is.na(f0.z) || f0.z < 0) f0.z <- 0
      p.s <- if(f.z == 0 && f0.z == 0)
        {if(impute) 1/(1+b) else NA}
      else
        f.z / (f.z + b * f0.z)
      if(is.finite(p.s) && !(p.s >= 0 && p.s <= 1)) {print('ERROR: p.s is out of range'); print(date()); browser()}
      if(!is.finite(p.s) && impute) {print('ERROR: p.s is not finite'); print(date()); browser()}
      p.s
    }
cat('computing probs.success on ', date(), '\n')
dput(date(), file = paste('begin probs ', as.character(impute), '.txt', sep = ''))
    x.unique <- unique(x.all)
    probs.success <- sapply(x.unique, prob.success)
cat('finished computing probs.success on ', date(), '\n')
dput(date(), file = paste('end probs ', as.character(impute), '.txt', sep = ''))
    prob.success.mat <- cbind(x.unique, probs.success)
    prob.success.ss <- smooth.spline(prob.success.mat[is.finite(prob.success.mat[, 2]), ], df = df)
    prob.success.fcn <- approxfun(prob.success.ss$x, prob.success.ss$y, rule = ratio.rule)
    ratio.fcn <- function(z) {pi.z <- prob.success.fcn(z); max(0, (1 - pi.z) / (b * pi.z))} # max added 9/25/02
    ratio.mat <- cbind(prob.success.ss$x, sapply(prob.success.ss$x, ratio.fcn))
#browser()
    list(ratio.fcn = ratio.fcn, ratio.max = max(ratio.mat[, 2]), ratio.mat = ratio.mat)
  }
  
  xnull.to.x <- if(logistic)
  { 
    if(!regress) stop('logistic=TRUE is incompatible with regress=FALSE')
    logistic.density.ratio()
  }
  else
    density.ratio(numerator.density.fcn = xnull.densplus$density.fcn, denominator.density = x.densplus$density, rule = ratio.rule, df = df, regress = regress)

  p0 <- if(is.numeric(portion.prob.from.nulldistr))
  {
    if(is.numeric(other.prob.from.nulldistr))
      stop('portion.prob.from.nulldistr or other.prob.from.nulldistr should be set to NULL.')
    if(!direct.xnull.density)
      stopifnot(identical(TRUE, all.equal(weights, null.weights)))
    xnull.element <- function(i) {if(direct.xnull.density) xnull[[1]] else xnull[[i]]}
    sum(weights * sapply(1:length(x), function(i) {stable.prob.from.nulldistr(x = x[[i]], xnull = xnull.element(i), portion.prob.from.nulldistr = portion.prob.from.nulldistr)}))
  }
  else
    prob.from.nulldistr(xnull.to.x$ratio.max, other.prob.from.nulldistr = other.prob.from.nulldistr)

  f1 <- function(z) {altdistr.density(prob.from.nulldistr = p0, x.density.value = x.densplus$density.fcn(z), xnull.density.value = xnull.densplus$density.fcn(z))}
  
  c(list(prob.from.altdistr.fcn = function(z) {prob.from.altdistr(z, xnull.to.x, prob.from.nulldistr = p0)}, altdistr.density.fcn = f1, prob.from.nulldistr = p0, x.density.plus = x.densplus, xnull.density.plus = xnull.densplus), xnull.to.x)
}

altdistr.density <- function(prob.from.nulldistr, x.density.value, xnull.density.value)
{
  p0 <- prob.from.nulldistr
  p1 <- 1 - p0
  if(p1 == 0) warning('altdistr.density.fcn cannot be computed.')
  if(p1 > 0) pmax(0, (x.density.value - p0 * xnull.density.value) / p1) else NA
}

qdata <- function(data.vec, p, a = 1/2, lower.tail = TRUE, crude = FALSE, ...)
{
  if(length(data.vec) == 0) return(NA)
  data.vec <- sort(data.vec) # Added 3/31/03 to correct an error.
  if(crude)
  {
    index.vec <- round(p * (length(data.vec) - 0.5))
    if(index.vec < 1) index.vec <- 1
    if(!(index.vec >= 1 && index.vec <= length(data.vec))) {print('index.vec out of range'); browser()}
    data.vec[index.vec]
  }
  else
  {
    qfcn0 <- if(is.null(a))
      approxfun(x = pdata(data.vec = data.vec, q = data.vec, a = a, lower.tail = lower.tail, ...), y = data.vec, ...)
    else
      {stopifnot(lower.tail); approxfun(x = ppoints(data.vec, a = a), y = data.vec, ...)}
    qfcn <- function(p0)
    {
      q0 <- qfcn0(p0)
      if(is.na(q0))
        {if((p0 > .5 && lower.tail) || (p0 < .5 && !lower.tail)) max(data.vec) else min(data.vec)}
      else
        q0
    }
    sapply(p, qfcn)
  }
}

pdata <- function(data.vec, q, continuity.correct = FALSE, a = 1/2, lower.tail = TRUE, ...)
{
  data.vec <- sort(data.vec) # Added 3/31/03 to correct an error.
  pfcn0 <- if(is.null(a) || continuity.correct == TRUE)
    function(q0)
    {
      p <- sum(if(lower.tail) (data.vec <= q0) else (data.vec >= q0)) / length(data.vec)
      if(continuity.correct) p <- p - .5 * sum(data.vec == q0) / length(data.vec)
#      if(is.na(p)) {print('bad element p'); browser()}
      p
    }
  else
    {stopifnot(lower.tail); approxfun(x = data.vec, y = ppoints(data.vec, a = a), ...)}
  pfcn <- function(q0)
  {
    p0 <- pfcn0(q0)
    med <- median(data.vec)
    if(is.na(p0))
      {if(any(is.na(data.vec))) {print('Value missing from data.vec in pdata()'); browser()}; if(is.na(q0)) stop('q0 is NA'); if(((q0 > med) && lower.tail) || ((q0 < med) && !lower.tail)) 1 else 0}
    else
      p0
  }
  sapply(q, pfcn)
}

compress <- function(data.vec, n, a = 1/2, ...)
{
  stopifnot(n >= 1 && n <= length(data.vec))
  if(n == length(data.vec)) warning('n == length(data.vec)')
  qdata(data.vec, p = ppoints(n, a = a), a = a, ...)
}

moving.stat <- function(series, window.width, FUN = mean, smooth.weight = 1)
{
  stopifnot(smooth.weight >= 0 && smooth.weight <= 1)
  stopifnot(is.matrix(series) || is.data.frame(series) || ncol(series) != 2)
  fcn <- function(vec){FUN(vec, na.rm = TRUE)}
  w2 <- window.width / 2
  x <- series[, 1]
  y <- series[, 2]
  series[, 2] <- smooth.weight * sapply(x, function(xval){fcn(y[x >= xval - w2 & x <= xval + w2])}) + (1 - smooth.weight) * y
  series
}

time.series.cor <- function(x, y, transform.fun = log)
{
  stopifnot(length(x) == length(y))
  trans.x <- transform.if.possible(x, transform.fun = transform.fun)
  trans.y <- transform.if.possible(y, transform.fun = transform.fun)
  sum(first.differences(trans.x) * first.differences(trans.y)) / (length(x) - 1)
}

vectors.for.cor <- function(x, y, y.lag)
# For internal use only.
{
  stopifnot(y.lag >= 0)
  data.frame(x.part = x[1:(length(x) - y.lag)], y.part = y[(1 + y.lag):length(y)])
}

cross.cor.tests <- function(x, y, y.lag, x.lag)
# For internal use only.
{
  stopifnot(length(x) == length(y))
  stopifnot(xor(is.null(y.lag), is.null(x.lag)))
}

cross.cor <- function(x, y, y.lag = 0, x.lag = if(is.null(y.lag)) 0 else NULL, use = 'pairwise.complete.obs', cor.fun.name = 'cor', ...)
{
  cross.cor.tests(x = x, y = y, y.lag = y.lag, x.lag = x.lag)
  if(is.null(y.lag)) y.lag <- -x.lag
  if(y.lag >= 0)
  {
    attach(vectors.for.cor(x = x, y = y, y.lag = y.lag))
    if(cor.fun.name == 'cor')
      cor(x = x.part, y = y.part, use = use)
    else
      do.call(cor.fun.name, list(x.part, y.part, ...))
  }
  else
    cross.cor(x = y, y = x, y.lag = -y.lag, use = use, cor.fun.name = cor.fun.name, ...)
}

cross.cor.test <- function(x, y, y.lag = 0, x.lag = if(is.null(y.lag)) 0 else NULL, alternative = c("two.sided", "less", "greater"), method = c("pearson", "kendall", "spearman"), exact = NULL, conf.level = 0.95, ...)
{
  cross.cor.tests(x = x, y = y, y.lag = y.lag, x.lag = x.lag)
  if(is.null(y.lag)) y.lag <- -x.lag
  if(y.lag >= 0)
  {
    attach(vectors.for.cor(x = x, y = y, y.lag = y.lag))
    cor.test(x = x.part, y = y.part, alternative = alternative, method = method, exact = exact, conf.level = conf.level, ...)
  }
  else
    cross.cor.test(x = y, y = x, y.lag = -y.lag, alternative = alternative, method = method, exact = exact, conf.level = conf.level, ...)
}

default.max.abs.lag <- function(n.time.points, denominator = 4) {max(1, floor(n.time.points / denominator))}

optimal.y.lag <- function(x, y, max.abs.lag = default.max.abs.lag(length(x)), y.lags = (-max.abs.lag):(max.abs.lag), ...)
{
  if(identical(TRUE, all.equal(x, y))) return(0)
  cors <- sapply(y.lags, function(y.lag){cross.cor(x = x, y = y, y.lag = y.lag, ...)})
  if(all(is.na(cors))) return(0)
  max.cor.boo <- cors == max(cors, na.rm = TRUE)
  max.cor.index <- which(abs(y.lags) == min(abs(y.lags[max.cor.boo]), na.rm = TRUE) & max.cor.boo & !is.na(cors))
  if(length(max.cor.index) > 1) {warning('inverse lags are equally optimal'); return(0)}
  if(length(max.cor.index) != 1) {print('bad length of max.cor.index'); print(max.cor.index); browser()}
  y.lags[max.cor.index]
}

new.lag.table <- function(vectorRows, reference.vector = NULL, alternative = c("two.sided", "less", "greater"), max.abs.lag = default.max.abs.lag(ncol(vectorRows)), y.lags = (-max.abs.lag):(max.abs.lag), nresample = NULL, ...)
{
  stopifnot(is.matrix(vectorRows))
  ncase <- ncol(vectorRows)
  nvar <- nrow(vectorRows)
  var.names <- if(is.null(rownames(vectorRows))) 1:nvar else rownames(vectorRows)
  stopifnot(nvar == length(var.names))
  datf.ncol <- 3
  op.y.lag <- function(x, y){optimal.y.lag(x = x, y = y, y.lags = y.lags, ...)}
  op.y.lag.vs.reference <- function(y){op.y.lag(x = reference.vector, y = y)}
  if(is.null(reference.vector))
  {
    stop('A change made on 11/26/03 rendered reference.vector = NULL unusable. To restore functionality, either i and j need to be changed back to integers, or the use of a matrix needs to be eliminated.')
    mat <- matrix(numeric(datf.ncol), ncol = datf.ncol) # max.x.index * max.y.index * 
    for(i in var.names)
      for(j in (var.names[1]):i)
      {
        mat <- rbind(mat, c(x.name = i, y.name = j, y.lag = op.y.lag(x = vectorRows[i, ], y = vectorRows[j, ])))
      }
    datf <- data.frame(mat[-1, ])
  }
  else
  {
    stopifnot(length(reference.vector) == ncase)
    datf <- data.frame(x.name = rep(NA, nvar), y.name = I(var.names), y.lag = sapply(var.names, function(j){stopifnot(length(reference.vector) == ncol(vectorRows)); op.y.lag.vs.reference(y = vectorRows[j, ])}))
  }
  datf$p.value <- if(is.null(nresample))
    rep(NA, nrow(datf))
  else
    sapply(1:nrow(datf), function(k){
      k.x.name <- datf[k, 'x.name']
      local.x <- if(is.na(k.x.name)) reference.vector else vectorRows[k.x.name, ]
      local.y <- vectorRows[datf[k, 'y.name'], ]
      if(length(local.x) != length(local.y)) {print('bad vector lengths'); browser()}
      y.lag.p.value(x = local.x, y = local.y, x.is.reference = is.na(k.x.name), y.lag = datf[k, 'y.lag'], y.lags = y.lags, nresample = nresample, alternative = alternative, ...)
    })
  datf
}

y.lag.p.value <- function(x, y, y.lag, nresample, x.is.reference = FALSE, max.abs.lag = default.max.abs.lag(length(x)), y.lags = (-max.abs.lag):(max.abs.lag), alternative = c("two.sided", "less", "greater"), ...)
{
  alternative <- match.arg(alternative)
  if(length(x) != length(y)) {print('y.lag.p.value error: length(x) != length(y)'); browser()}
  stopifnot(y.lag >= min(y.lags) && y.lag <= max(y.lags))
  sd.ratio <- max(sd(x), sd(y)) / min(sd(x), sd(y))
  if(sd.ratio >= 2) warning(paste('equality of standard deviations violated: sd.ratio==', sd.ratio))
  null.series <- if(x.is.reference) x else (x + y) / 2
  residuals <- if(x.is.reference) y - null.series else c(x - null.series, y - null.series)
  random.series <- function()
  {
    vec <- null.series + sample(residuals, size = length(null.series), replace = TRUE)
    stopifnot(length(vec) == length(x))
    vec
  }
  null.stat <- function()
  {
    optimal.y.lag(x = if(x.is.reference) x else random.series(), y = random.series(), y.lags = y.lags, ...)
  }
  null.stats <- sapply(1:nresample, function(i){null.stat()})
  null.rejections <- if(alternative == 'two.sided')
    abs(null.stats) >= abs(y.lag)
  else if(alternative == 'greater')
    null.stats >= y.lag
  else if(alternative == 'less')
    null.stats <= y.lag
  else
    stop('error in alternative')
  sum(null.rejections) / length(null.stats)
}

time.series.y.lag.p.value <- function(x, y, y.lag, max.abs.lag = default.max.abs.lag(length(x)), y.lags = (-max.abs.lag):(max.abs.lag), nresample, transform.fun = log)
{
  stopifnot(length(x) == length(y))
  trans.x <- transform.if.possible(x, transform.fun = transform.fun)
  trans.y <- transform.if.possible(y, transform.fun = transform.fun)
  y.lag.p.value(x = trans.x, y = trans.y, y.lag = y.lag, max.abs.lag = max.abs.lag, y.lags = y.lags, nresample = nresample, cor.fun.name = 'time.series.cor', transform.fun = NULL) # transform.fun must be NULL here since x and y are already transformed
}

time.series.new.lag.table <- function(vectorRows, max.abs.lag = default.max.abs.lag(ncol(vectorRows)), y.lags = (-max.abs.lag):(max.abs.lag), nresample = NULL, transform.fun = log)
{
  stopifnot(is.matrix(vectorRows))
  trans.fun <- function(x) {transform.if.possible(x, transform.fun = transform.fun)}
  new.lag.table(vectorRows = apply(vectorRows, 1, trans.fun), max.abs.lag = max.abs.lag, y.lags = y.lags, nresample = nresample, cor.fun.name = 'time.series.cor', transform.fun = NULL) # transform.fun must be NULL here since x and y are already transformed
}

interesting.smoothness <- function(x, interesting.df = 5, boring.df = 3, smoothness.to.interest = 1)
{
  stopifnot(boring.df < interesting.df)
  ss <- function(df){smooth.spline(x, df = df)}
  interesting.ss <- ss(df = interesting.df)
  boring.ss <- ss(df = boring.df)
  mean(abs(boring.ss$y - x) - smoothness.to.interest * abs(interesting.ss$y - x))
}

summary.cor <- function(cors, FUN = mean){stopifnot(all(cors >= 0) || all(cors <= 0)); r <- sign(cors[1]) * sqrt(FUN(cors^2)); if(is.null(r) || !(r >= -1 && r <= 1)){print('bad r'); browser()}; r}

extreme.cor <- function(cors){summary.cor(cors = cors, FUN = max)}

lm.predictions <- function(x.vec, lm.output)
{
  stopifnot(is.vector(x.vec))
  sapply(x.vec, function(x){lm.output[[1]]['(Intercept)'] + lm.output[[1]][2] * x})
}

power.law.lm <- function(x, y)
{
  stopifnot(length(x) == length(y))
  datf <- data.frame(log.x = log(x), log.y = log(y))
  lm(log.y ~ log.x, data = datf)
}
