jverzani/D.jl

## bisection_method.jl
function bisection_method (f::Function, a::Number, b::Number)

    ## redefine to be self-contained
    close_enough(x, y) = abs( x - y ) < sqrt(eps())
    close_enough(x) = close_enough(x, 0)

    ## check endpoints
    if f(a) * f(b) > 0
      error("Function values at endpoints must have different signs")
    end

    ## endpoints aren't already zeroes?
    if close_enough(f(a))
      return(a)
    elseif close_enough(f(b))
      return(b)
    end

    ## begin
    update_mid(a, b) = (a + b)/2
    mid = update_mid(a, b)
    ctr = 1

    while ( !close_enough(f(mid)) )
        ## update a or b and then mid
        f(a) * f(mid) < 0 ? (b = mid) : (a = mid)
	mid = update_mid(a, b)
	ctr += 1
     end

     println("Converged to $mid in $ctr steps")

    return(mid)
end

## central_4th.jl
## Code to take a derivative of a real valued function using 4th central difference

central_4th_difference(f, x, h) = (-f(x + 2h) + 8f(x+h) - 8f(x-h) + f(x-2h))/(12h)
Dc4(f, h) = begin g(x::Real) = central_4th_difference(f, x, h); g end
Dc4(f) = Dc4(f, 0.001)

## introducd f'(x) notation:
import Base.ctranspose
ctranspose(f::Function) = Dc4(f)


## D.jl
module ADiff

## Code to take automatic derivative modeled after
## http://www.davidson.edu/math/neidinger/SIAMRev74362.pdf Numeric
## derivatives via finite differences have errors due to the
## mathematical approximation and the floating point approximation,
## that latter can be large as most compuations end with a difference
## of similar-sized values. Automatic derivatives avoid this, by
## adding information to each step of a function evaluation to also
## evalute the derivative. Errors are due to floating point
## approximation.
##
## This is a asic implementation of (forward) automatic differenctiation using
## operator overloading.  See https://groups.google.com/forum/#!topic/julia-dev/tXBR04t31vI for
## a better implementation.
##
## This illustrates how julia's type notation makes such things straighforward.
##
## The idea of automatic differentiation is to
## carry along the information needed to make the derivative (using
## the chain rule) with each operation so while a function is being
## defined, the derivative is also. Compared to symbolic derivatives
## it is much faster (and not a comparison for julia which has no
## symbolic derivative). Compared to numeric approximations, there is
## no issue with round-off error from taking finite differences.
##
## The simplest usage is the D operator:
##
## julia> f(x) = x^2
## julia> D(f)(3)
## 6.0
## plot([f, D(f)], 0, 4) ## using Gadfly, say
## julia> Dc4(f)(3)  ## less roundoff than numeric
## 5.9999999999994875
##
## More generally, the D operator can have a second argument indicating the order of the derivative:
## julia> D(sin, 2)(pi/2)
## -1.0
## For second derivatives, we also provide a D2 operator. You can
## not get this by D(D(f)), it won't work. (As D only works for functions
## which are written as compositions of elementary functions, of which D is
## not one.)
##
## This may not always work. For example, as `airy` is not overloaded
## we have an error:
##
## julia> f(x) = airy(x)
## julia> D(f)(2)
## no method airy(Int64,ADiff)
## julia> Dc4(f)(2)  ## this works though
## -0.05309038443370347
##
## For lower level use, one declares x to be of ADiff type Then when
## you create an expression with x, the derivative gets carried along:
##
## julia> x = Ad(1, 1) ## or ad(1)
## Ad(1,1)
## julia> sin(cos(x))
## Ad(0.5143952585235492,-0.7216061490634433)
##
## You can make functions too:
##
## f(x) = x^2 - sin(x)
## g(x) = (f*ad)(x)
## can plot with Gadfly, say
## plot([f,ADiff.val*f*ad], 0, 1) ## same plot
## but, it would be easiest to just do
## plot([f, D(f)], 0, 1)

export Ad, ad, D, D2, taylor, taylor_coefs


type Ad
    val
    der
end

## constructors
ad(a::Real) = Ad(a, 1)
ad(a::Ad)   = Ad(a, 1)


## Need to extract
function ad_order(a::Ad)
    local b = a
    ctr = 1
    while isa(b.val, Ad)
        ctr += 1
        b = b.val
    end
    ctr
end

## accessors
val(a::Ad) = a.val
val(a::Real) = a
der(a::Ad) = a.der
der(a::Real) = 0

importall Base

## compare
isless(a::Ad, y::Real) = isless(a.val, y)
isless(x::Real, b::Ad) = isless(x, b.val)
isless(a::Ad, b::Ad) = isless(a.val, b.val)
## compose with *
*(f::Function, g::Function) = x -> f(g(x))

## compute f^(k)(a)
function diff(a::Ad, k::Integer)

    if k == 0 return(a) end
    n = ad_order(a)
    if k > n
        error("Can only take upto $n derivatives for this value of a")
    end

    if k == n
      (der^k)(a)
    else
      (val^(n-k))((der^k)(a))
    end
end

## Operator for derivatives
##
## @param f some function of a real variable
## @param k degree of derivative, defaults to 1
function D(f::Function, k::Integer)
    if k == 0
        x -> f(x)
    elseif k > 0
        x -> diff(f((ad^k)(x)), k)
    else
        stop("Can't handle negative k")
    end
end

D(f::Function) = D(f, 1)
D2(f::Function) = D(f, 2)

## Taylor series expansion
## return polynomial object
## @param f real-valued function
## @param n, degree
## @param c, expansion point. Defaults to 0
function taylor(f::Function, n::Integer, c::Real)
    ## sum f^k(c)(x-c)^k
    coefs = taylor_coefs(f, n, c)
    x -> sum(float( [ coefs[k + 1] * (x - c)^k for k in 0:n] ))
end

## default Maclurin
taylor(f::Function, n::Integer) = taylor(f, n, 0)

## function to (most) efficiently compute coefficients.
## The compuations can be really costly when n gets bigger than 10
##
## @param f a function for which ADiff is valid (e.g., not an ADiff generated object!)
## @param n up to order n f^(n)(c)/n!
## @param c point to expand around
## @example
## Using Polynomial
## a = ( taylor(sin, 10, 0) | reverse )
## p = Poly(a)
function taylor_coefs(f::Function, n::Integer, c::Real)
    x = (ad^n)(c)
    y = f(x)

    [(val^(n-k))((der^k)(y))/factorial(k) for k in 0:n] | float
end

## math operations
+(x::Ad, y::Ad) = Ad(x.val + y.val, x.der + y.der)
+(x::Ad, y::Real) = Ad(x.val + y, x.der)
+(x::Real, y::Ad) = y + x
+(x::Ad, y::Vector) = [x + i for in in y]
+(x::Vector, y::Ad) = y + x


-(x::Ad)        = Ad(-x.val, -x.der)
-(x::Ad, y::Ad) = x + (-y)
-(x::Ad, y::Real) = Ad(x.val - y, x.der)
-(x::Real, y::Ad) = -y + x
-(x::Ad, y::Vector) = [ x - i for i in y]
-(x::Vector, y::Ad) = -(y-x)

*(x::Ad, y::Ad) = Ad(x.val * y.val, x.val * y.der + x.der * y.val)
*(x::Ad, y::Real) = Ad(y * x.val, y * x.der)
*(x::Real, y::Ad) = y * x
*(x::Ad, y::Vector) = [x*i for i in y]
*(x::Vector, y::Ad) = y*x

/(x::Ad, y::Ad) = Ad(x.val/y.val, (x.der * y.val - x.val * y.der)/(y.val^2))
/(x::Ad, y::Real) = Ad(x.val/y, x.der/y)
/(x::Real, y::Ad) = Ad(x/y.val, -x*y.der/(y.val)^2)
/(x::Ad, y::Vector) = [x/i for i in y]
/(x::Vector, y::Ad) = [i/y for i in x]

one(x::Ad) = Ad(1, 0)                        ## multiplicative identity
function ^(x::Ad, y::Integer)
    if y == 0
        one(x)
    elseif y < 0
        (1/x)^abs(y)
    else
        Ad(x.val ^ y, y * (x.val) ^ (y-1) * x.der)
    end
end
^(x::Ad, y::Real) = Ad(x.val ^ y, y * (x.val) ^ (y-1) * x.der)
^(x::Ad, y::Ad) = exp(y * log(x))
^(x::Real, y::Ad) = exp(y * log(x))
^(x::Ad, y::Vector) = [x^i for i in y]
^(x::Vector, y::Ad) = [i^y for i in x]

for k in (:<, :<=, :>=, :>, :(==))
    @eval begin
        ($k)(x::Ad, y::Ad) = ($k)(x.val, y.val)
    end
end


for k in (:max, :min)
    @eval begin
        ($k)(x::Ad, y::Ad) = ($k)(x.val, y.val)
    end
end

## functions
for (k,v) in ((:exp, exp),
              (:log, u -> 1/u),
              (:log10, u-> 1/u/log(10)),
              (:log2, u -> 1/u/log(2)),
              (:sin, cos),
              (:cos, u -> -sin(u)),
              (:tan, u -> sec(u)^2),
              (:csc, u -> -csc(u) * cot(u)),
              (:sec, u -> sec(u) * tan(u)),
              (:cot, u -> -csc(u)^2),
              (:asin, u -> (1-u^2)^(-1/2)),
              (:acos, u -> -(1-u^2)^(-1/2)),
              (:atan, u -> 1/(1+u ^ 2)),
              (:acsc, u -> -1/(abs(u) * sqrt(u^2 - 1))),
              (:asec, u -> 1/(abs(u) * sqrt(u^2 - 1))),
              (:acot, u -> -1/(1+u^2)),
              (:sinh, cosh),
              (:cosh, u -> -sinh(u)),
              (:tanh, u -> 1 - tanh(u)^2),
              (:csch, u -> -coth(u) * csch(u)),
              (:sech, u -> -tanh(u) * sech(u)),
              (:coth, u -> 1 - coth(u)^2),
              (:abs,  u -> u > 0 ? 1 : (u < 0 ? -1 : NaN)),
              (:sign, u -> u == 0 ? NaN : 0),
              (:sqrt, u -> 1/2/sqrt(u)),
              (:cbrt, u -> (1/3)/(cbrt(u)^2))
              )

    @eval begin
        ($k)(x::Ad) = Ad($(k)(x.val), (($v))(x.val) * x.der)
    end
end


end

using ADiff

## gauss_quadrature.jl
## A nieve implementation of Gauss quadrature to compute \int_{-1}^1 f(x) dx
## Due to how the Legendre Polynomials are recursively defined, this will
## be real slow for n larger than a given value. Of course, in practice one
## could compute the nodes and weights and store in a table.

using Polynomial

## Bonnets recursion. Slow and repeats! Should memoize at least! http://en.wikipedia.org/wiki/Legendre_polynomials
function lgp(n::Integer)
    if n == 0 return Poly([1]) end
    if n == 1 return Poly([1, 0]) end

    (2*(n-1) + 1) / n * lgp(1) * lgp(n-1) - (n-1)/n * lgp(n-2)
end

## nodes are roots of the legendre polynomials
gauss_nodes(n) = sort(roots(lgp(n)))

## Weights are computed from nodes, derivative of polynomials http://en.wikipedia.org/wiki/Gaussian_quadrature
function gauss_weights(n)
    x = gauss_nodes(n)
    w = similar(x)
    p = lgp(n)
    p = p/polyval(p, 1)
    weights(x) = 2 / (1 - x^2) / (polyval(polydir(p), x))^2

    map(weights, x) , x
end

## integral f over -1, 1 with n terms
function gauss_quadrature(f::Function, n::Integer)
    w, x = gauss_weights(n)
    sum(w .* map(f, x))
end

## golden_section.jl
function golden_section(f::Function, x1, x2, x3)
    ## we must have f(x2) >= max(f(x1), f(x3)) *and* f is unimodal

    if f(x2) < max(f(x1), f(x3))
        error("Need a triple of points")
    end

    tol = sqrt(eps())
    a = 2 - (1 + sqrt(5))/2 ## 2 - golden ratio

    ## pick point in bigger of two subintervals return point and indicate which side
    pick_point(x1,x2,x3) = x2 - x1 > x3 - x2 ? (x2 - a*(x2-x1), true) : (x2 + a*(x3-x2), false)

    ## update triple if point is new max or not.
    function update_triple(x1, x2, x3)
        (x, left_side) = pick_point(x1, x2, x3)
        if f(x) > f(x2)
            left_side ? (x1, x, x2) : (x2, x, x3)
        else
            left_side ? (x,x2,x3) : (x1, x2, x)
        end
    end

    ## now loop until the gap between left and right is within tolerance.
    ## the distance shrinks by 1 - gr = .618 each trip through so this converges in about
    ## `(8 + log10(x3 - x1))/log10(1/.618) | ceil` trips
    while x3 - x1 > tol
        (x1, x2, x3) = update_triple(x1, x2, x3)
    end

    ## return argmax and max
    (x2, f(x2))
end

## integrate.jl
function integrate(f::Function, a::Real, b::Real, n::Integer, approx_area::Function)

  x = linspace(float(a), b, n+1) ## n + 1 points for n subintervals
  sum( [ approx_area(f, x[i], x[i+1]) for i in 1:n ] )

end

left_riemann(f::Function, a, b) = f(a)*(b-a)
right_riemann(f::Function, a::Real, b::Real) = f(b)*(b-a)
trapezoid(f::Function, a::Real, b::Real) = (f(a) + f(b))/2 * (b-a)
function simpsons(f::Function, a::Real, b::Real)
  c = (a+b)/2
  (f(a) + 4f(c) + f(b)) * (b-a)/6
end

function integrate(f::Function, a::Real, b::Real, n::Integer)
  integrate(f, a, b, n, left_riemann)
end

##
## from the `area` function in the MASS package of R
function adapt(f::Function, a::Real, b::Real, limit::Integer)
    close_enough(x, y) = abs(x - y) < 1e-3

    println("adapt called with a=$a, b=$b, limit=$limit")

    h = b-a
    c = (a + b)/2

    a1 = (f(a) + f(b)) * h/2          ## trapezoid
    a2 = (f(a) + 4f(c) + f(b)) * h/6  ## Simpson's parabola

    if close_enough(a1, a2)
        return(a2)
    end

    if limit == 0
        println("limit reached for this interval [$a, $b]")21
	return(a2)
    end

    adapt(f, a, c, limit - 1) + adapt(f, c, b, limit-1)
end


## nm.jl
function nm(f::Function, fp::Function, x0::Real)

    ## redefine to be self contained
    close_enough(x) = abs(x) < sqrt(eps())

    max_steps = 100
    ctr = 0

    update_guess(x) = x - f(x)/fp(x)

    x = update_guess(x0)

    while !close_enough(f(x)) && ctr < max_steps
        ctr += 1
        x = update_guess(x)
    end

    ## all done
    if ctr >= max_steps
        error("Method did not converge")
    else
        return (x, ctr)
    end
end

## using automatic derivative here
findzero(f::Function, x0::Real) = nm(f, D(f), x0)[1]
findcritical(f::Function, x0::Real) = nm(D(f), D(f,2), x0)[1]

## generalized iteration method to find fixed point
## Iterate x = F(f,x) until we converge to a fixed point as determined by tol
function iterate(f::Function, x0::Number,  F::Function, tol::Real)

    x = F(f, x0)
    ctr = 1

    while abs(f(x)) > tol && ctr < 200
        x = F(f, x)
        ctr += 1
    end

    ctr >= 200 ? stop("Method did not converge in 200 steps") : (x, ctr)
end
iterate(f::Function, x0::Number, F::Function) = iterate(f, x0, F, 1e-14)

## various update functions using automatic derivative:
newton_update(f::Function, x0::Number) = x0 - f(x0)/D(f)(x0)
function halley_update(f::Function, x0::Number)
    a = f(x0); b = D(f)(x0); c = D(f,2)(x0)
    x0 - 2*a*b/(2*b^2 - a*c)
end
householder_update(f::Function, x0, p::Integer) = x0 - (p+1)*D(u -> 1/f(u), p)(x0)/D(u -> 1/f(u), p+1)(x0)
householder_update0(f::Function, x0) = householder_update(f, x0, 0) ## Newton: quadratic
householder_update1(f::Function, x0) = householder_update(f, x0, 1) ## halley: cubic
householder_update2(f::Function, x0) = householder_update(f, x0, 2) ## higher order of degree d + 2 ...
householder_update3(f::Function, x0) = householder_update(f, x0, 3)
householder_update4(f::Function, x0) = householder_update(f, x0, 4)

## to use: iterate(f, x0, householder_update0)

## thukral.jl
## http://www.hindawi.com/journals/ijmms/2012/493456/
## Rajinder Thukral
## very fast (8th order) root finder like newton
thukral = function(f::Function, xo::Real)
    function update(f::Function, xn::Real, tol::Real)
        i,j,k,l = 1,1 ,1 ,1 ## in {1,2}

        beta = 1/i
        fxn = f(xn)

        wn = xn + (1/beta)*fxn
        fwn = f(wn)

        if abs(fwn) < tol return(wn) end

        yn = xn - fxn^2/(fwn - fxn)
        fyn = f(yn)

        if abs(fyn) < tol return(yn) end

        phi = ((1 - fyn/fwn)^(-1),  (1 + fyn/fwn))

        zn = yn - phi[j]*( (xn-yn)/(fxn - fyn) ) * fyn
        fzn = f(zn)

        if abs(fzn) < tol return(zn) end

        omega = ( (1- fzn/fwn)^(-1), 1 + fzn/fwn  + (fzn/fwn)^2)
        psi = (1 - 2*fyn^3/(fwn^2*fxn), (1 + 2*fyn^3/(fwn^2*fxn))^(-1) )

        zn - omega[k]*psi[l]*((fzn - fyn)/(zn-yn) - (fyn - fxn)/(yn - xn) + (fzn - fxn)/(zn-xn))^(-1)*fzn
    end
    update(f::Function, x::Ad, tol::Real) = update(f, x.val, tol)

    xn = xo
    tol = 1e-14
    ctr = 1
    while (abs(f(xn)) > tol) & (ctr < 20)
        xn = update(f, xn, tol)
        ctr += 1
    end

    (xn, ctr)
end

## some tests
## julia> thukral(x -> (x-2)*(x^10 + x + 1)*exp(-x-1), 1.9)
## (2.0,4)

## julia> thukral(x -> x^11 + x + 1, -1)
## (-0.844397528792023,4)

## julia> thukral(u -> sin(u)^2 - u^2 + 1, 1)
## (1.4044916482153413,3)
	function bisection_method (f::Function, a::Number, b::Number)

	## redefine to be self-contained
	close_enough(x, y) = abs( x - y ) < sqrt(eps())
	close_enough(x) = close_enough(x, 0)

	## check endpoints
	if f(a) * f(b) > 0
	error("Function values at endpoints must have different signs")
	end

	## endpoints aren't already zeroes?
	if close_enough(f(a))
	return(a)
	elseif close_enough(f(b))
	return(b)
	end

	## begin
	update_mid(a, b) = (a + b)/2
	mid = update_mid(a, b)
	ctr = 1

	while ( !close_enough(f(mid)) )
	## update a or b and then mid
	f(a) * f(mid) < 0 ? (b = mid) : (a = mid)
	mid = update_mid(a, b)
	ctr += 1
	end

	println("Converged to $mid in $ctr steps")

	return(mid)
	end
	## Code to take a derivative of a real valued function using 4th central difference

	central_4th_difference(f, x, h) = (-f(x + 2h) + 8f(x+h) - 8f(x-h) + f(x-2h))/(12h)
	Dc4(f, h) = begin g(x::Real) = central_4th_difference(f, x, h); g end
	Dc4(f) = Dc4(f, 0.001)

	## introducd f'(x) notation:
	import Base.ctranspose
	ctranspose(f::Function) = Dc4(f)
	module ADiff

	## Code to take automatic derivative modeled after
	## http://www.davidson.edu/math/neidinger/SIAMRev74362.pdf Numeric
	## derivatives via finite differences have errors due to the
	## mathematical approximation and the floating point approximation,
	## that latter can be large as most compuations end with a difference
	## of similar-sized values. Automatic derivatives avoid this, by
	## adding information to each step of a function evaluation to also
	## evalute the derivative. Errors are due to floating point
	## approximation.
	##
	## This is a asic implementation of (forward) automatic differenctiation using
	## operator overloading. See https://groups.google.com/forum/#!topic/julia-dev/tXBR04t31vI for
	## a better implementation.
	##
	## This illustrates how julia's type notation makes such things straighforward.
	##
	## The idea of automatic differentiation is to
	## carry along the information needed to make the derivative (using
	## the chain rule) with each operation so while a function is being
	## defined, the derivative is also. Compared to symbolic derivatives
	## it is much faster (and not a comparison for julia which has no
	## symbolic derivative). Compared to numeric approximations, there is
	## no issue with round-off error from taking finite differences.
	##
	## The simplest usage is the D operator:
	##
	## julia> f(x) = x^2
	## julia> D(f)(3)
	## 6.0
	## plot([f, D(f)], 0, 4) ## using Gadfly, say
	## julia> Dc4(f)(3) ## less roundoff than numeric
	## 5.9999999999994875
	##
	## More generally, the D operator can have a second argument indicating the order of the derivative:
	## julia> D(sin, 2)(pi/2)
	## -1.0
	## For second derivatives, we also provide a D2 operator. You can
	## not get this by D(D(f)), it won't work. (As D only works for functions
	## which are written as compositions of elementary functions, of which D is
	## not one.)
	##
	## This may not always work. For example, as `airy` is not overloaded
	## we have an error:
	##
	## julia> f(x) = airy(x)
	## julia> D(f)(2)
	## no method airy(Int64,ADiff)
	## julia> Dc4(f)(2) ## this works though
	## -0.05309038443370347
	##
	## For lower level use, one declares x to be of ADiff type Then when
	## you create an expression with x, the derivative gets carried along:
	##
	## julia> x = Ad(1, 1) ## or ad(1)
	## Ad(1,1)
	## julia> sin(cos(x))
	## Ad(0.5143952585235492,-0.7216061490634433)
	##
	## You can make functions too:
	##
	## f(x) = x^2 - sin(x)
	## g(x) = (f*ad)(x)
	## can plot with Gadfly, say
	## plot([f,ADiff.valfad], 0, 1) ## same plot
	## but, it would be easiest to just do
	## plot([f, D(f)], 0, 1)

	export Ad, ad, D, D2, taylor, taylor_coefs


	type Ad
	val
	der
	end

	## constructors
	ad(a::Real) = Ad(a, 1)
	ad(a::Ad) = Ad(a, 1)


	## Need to extract
	function ad_order(a::Ad)
	local b = a
	ctr = 1
	while isa(b.val, Ad)
	ctr += 1
	b = b.val
	end
	ctr
	end

	## accessors
	val(a::Ad) = a.val
	val(a::Real) = a
	der(a::Ad) = a.der
	der(a::Real) = 0

	importall Base

	## compare
	isless(a::Ad, y::Real) = isless(a.val, y)
	isless(x::Real, b::Ad) = isless(x, b.val)
	isless(a::Ad, b::Ad) = isless(a.val, b.val)
	## compose with *
	*(f::Function, g::Function) = x -> f(g(x))

	## compute f^(k)(a)
	function diff(a::Ad, k::Integer)

	if k == 0 return(a) end
	n = ad_order(a)
	if k > n
	error("Can only take upto $n derivatives for this value of a")
	end

	if k == n
	(der^k)(a)
	else
	(val^(n-k))((der^k)(a))
	end
	end

	## Operator for derivatives
	##
	## @param f some function of a real variable
	## @param k degree of derivative, defaults to 1
	function D(f::Function, k::Integer)
	if k == 0
	x -> f(x)
	elseif k > 0
	x -> diff(f((ad^k)(x)), k)
	else
	stop("Can't handle negative k")
	end
	end

	D(f::Function) = D(f, 1)
	D2(f::Function) = D(f, 2)

	## Taylor series expansion
	## return polynomial object
	## @param f real-valued function
	## @param n, degree
	## @param c, expansion point. Defaults to 0
	function taylor(f::Function, n::Integer, c::Real)
	## sum f^k(c)(x-c)^k
	coefs = taylor_coefs(f, n, c)
	x -> sum(float( [ coefs[k + 1] * (x - c)^k for k in 0:n] ))
	end

	## default Maclurin
	taylor(f::Function, n::Integer) = taylor(f, n, 0)

	## function to (most) efficiently compute coefficients.
	## The compuations can be really costly when n gets bigger than 10
	##
	## @param f a function for which ADiff is valid (e.g., not an ADiff generated object!)
	## @param n up to order n f^(n)(c)/n!
	## @param c point to expand around
	## @example
	## Using Polynomial
	## a = ( taylor(sin, 10, 0) \| reverse )
	## p = Poly(a)
	function taylor_coefs(f::Function, n::Integer, c::Real)
	x = (ad^n)(c)
	y = f(x)

	[(val^(n-k))((der^k)(y))/factorial(k) for k in 0:n] \| float
	end

	## math operations
	+(x::Ad, y::Ad) = Ad(x.val + y.val, x.der + y.der)
	+(x::Ad, y::Real) = Ad(x.val + y, x.der)
	+(x::Real, y::Ad) = y + x
	+(x::Ad, y::Vector) = [x + i for in in y]
	+(x::Vector, y::Ad) = y + x


	-(x::Ad) = Ad(-x.val, -x.der)
	-(x::Ad, y::Ad) = x + (-y)
	-(x::Ad, y::Real) = Ad(x.val - y, x.der)
	-(x::Real, y::Ad) = -y + x
	-(x::Ad, y::Vector) = [ x - i for i in y]
	-(x::Vector, y::Ad) = -(y-x)

	(x::Ad, y::Ad) = Ad(x.val y.val, x.val * y.der + x.der * y.val)
	(x::Ad, y::Real) = Ad(y x.val, y * x.der)
	(x::Real, y::Ad) = y x
	(x::Ad, y::Vector) = [xi for i in y]
	(x::Vector, y::Ad) = yx

	/(x::Ad, y::Ad) = Ad(x.val/y.val, (x.der * y.val - x.val * y.der)/(y.val^2))
	/(x::Ad, y::Real) = Ad(x.val/y, x.der/y)
	/(x::Real, y::Ad) = Ad(x/y.val, -x*y.der/(y.val)^2)
	/(x::Ad, y::Vector) = [x/i for i in y]
	/(x::Vector, y::Ad) = [i/y for i in x]

	one(x::Ad) = Ad(1, 0) ## multiplicative identity
	function ^(x::Ad, y::Integer)
	if y == 0
	one(x)
	elseif y < 0
	(1/x)^abs(y)
	else
	Ad(x.val ^ y, y * (x.val) ^ (y-1) * x.der)
	end
	end
	^(x::Ad, y::Real) = Ad(x.val ^ y, y * (x.val) ^ (y-1) * x.der)
	^(x::Ad, y::Ad) = exp(y * log(x))
	^(x::Real, y::Ad) = exp(y * log(x))
	^(x::Ad, y::Vector) = [x^i for i in y]
	^(x::Vector, y::Ad) = [i^y for i in x]

	for k in (:<, :<=, :>=, :>, :(==))
	@eval begin
	($k)(x::Ad, y::Ad) = ($k)(x.val, y.val)
	end
	end


	for k in (:max, :min)
	@eval begin
	($k)(x::Ad, y::Ad) = ($k)(x.val, y.val)
	end
	end

	## functions
	for (k,v) in ((:exp, exp),
	(:log, u -> 1/u),
	(:log10, u-> 1/u/log(10)),
	(:log2, u -> 1/u/log(2)),
	(:sin, cos),
	(:cos, u -> -sin(u)),
	(:tan, u -> sec(u)^2),
	(:csc, u -> -csc(u) * cot(u)),
	(:sec, u -> sec(u) * tan(u)),
	(:cot, u -> -csc(u)^2),
	(:asin, u -> (1-u^2)^(-1/2)),
	(:acos, u -> -(1-u^2)^(-1/2)),
	(:atan, u -> 1/(1+u ^ 2)),
	(:acsc, u -> -1/(abs(u) * sqrt(u^2 - 1))),
	(:asec, u -> 1/(abs(u) * sqrt(u^2 - 1))),
	(:acot, u -> -1/(1+u^2)),
	(:sinh, cosh),
	(:cosh, u -> -sinh(u)),
	(:tanh, u -> 1 - tanh(u)^2),
	(:csch, u -> -coth(u) * csch(u)),
	(:sech, u -> -tanh(u) * sech(u)),
	(:coth, u -> 1 - coth(u)^2),
	(:abs, u -> u > 0 ? 1 : (u < 0 ? -1 : NaN)),
	(:sign, u -> u == 0 ? NaN : 0),
	(:sqrt, u -> 1/2/sqrt(u)),
	(:cbrt, u -> (1/3)/(cbrt(u)^2))
	)

	@eval begin
	($k)(x::Ad) = Ad($(k)(x.val), (($v))(x.val) * x.der)
	end
	end



	end

	using ADiff
	## A nieve implementation of Gauss quadrature to compute \int_{-1}^1 f(x) dx
	## Due to how the Legendre Polynomials are recursively defined, this will
	## be real slow for n larger than a given value. Of course, in practice one
	## could compute the nodes and weights and store in a table.

	using Polynomial

	## Bonnets recursion. Slow and repeats! Should memoize at least! http://en.wikipedia.org/wiki/Legendre_polynomials
	function lgp(n::Integer)
	if n == 0 return Poly([1]) end
	if n == 1 return Poly([1, 0]) end

	(2(n-1) + 1) / n lgp(1) * lgp(n-1) - (n-1)/n * lgp(n-2)
	end

	## nodes are roots of the legendre polynomials
	gauss_nodes(n) = sort(roots(lgp(n)))

	## Weights are computed from nodes, derivative of polynomials http://en.wikipedia.org/wiki/Gaussian_quadrature
	function gauss_weights(n)
	x = gauss_nodes(n)
	w = similar(x)
	p = lgp(n)
	p = p/polyval(p, 1)
	weights(x) = 2 / (1 - x^2) / (polyval(polydir(p), x))^2

	map(weights, x) , x
	end

	## integral f over -1, 1 with n terms
	function gauss_quadrature(f::Function, n::Integer)
	w, x = gauss_weights(n)
	sum(w .* map(f, x))
	end
	function golden_section(f::Function, x1, x2, x3)
	## we must have f(x2) >= max(f(x1), f(x3)) and f is unimodal

	if f(x2) < max(f(x1), f(x3))
	error("Need a triple of points")
	end

	tol = sqrt(eps())
	a = 2 - (1 + sqrt(5))/2 ## 2 - golden ratio

	## pick point in bigger of two subintervals return point and indicate which side
	pick_point(x1,x2,x3) = x2 - x1 > x3 - x2 ? (x2 - a(x2-x1), true) : (x2 + a(x3-x2), false)

	## update triple if point is new max or not.
	function update_triple(x1, x2, x3)
	(x, left_side) = pick_point(x1, x2, x3)
	if f(x) > f(x2)
	left_side ? (x1, x, x2) : (x2, x, x3)
	else
	left_side ? (x,x2,x3) : (x1, x2, x)
	end
	end

	## now loop until the gap between left and right is within tolerance.
	## the distance shrinks by 1 - gr = .618 each trip through so this converges in about
	## `(8 + log10(x3 - x1))/log10(1/.618) \| ceil` trips
	while x3 - x1 > tol
	(x1, x2, x3) = update_triple(x1, x2, x3)
	end

	## return argmax and max
	(x2, f(x2))
	end
	function integrate(f::Function, a::Real, b::Real, n::Integer, approx_area::Function)

	x = linspace(float(a), b, n+1) ## n + 1 points for n subintervals
	sum( [ approx_area(f, x[i], x[i+1]) for i in 1:n ] )

	end

	left_riemann(f::Function, a, b) = f(a)*(b-a)
	right_riemann(f::Function, a::Real, b::Real) = f(b)*(b-a)
	trapezoid(f::Function, a::Real, b::Real) = (f(a) + f(b))/2 * (b-a)
	function simpsons(f::Function, a::Real, b::Real)
	c = (a+b)/2
	(f(a) + 4f(c) + f(b)) * (b-a)/6
	end

	function integrate(f::Function, a::Real, b::Real, n::Integer)
	integrate(f, a, b, n, left_riemann)
	end

	##
	## from the `area` function in the MASS package of R
	function adapt(f::Function, a::Real, b::Real, limit::Integer)
	close_enough(x, y) = abs(x - y) < 1e-3

	println("adapt called with a=$a, b=$b, limit=$limit")

	h = b-a
	c = (a + b)/2

	a1 = (f(a) + f(b)) * h/2 ## trapezoid
	a2 = (f(a) + 4f(c) + f(b)) * h/6 ## Simpson's parabola

	if close_enough(a1, a2)
	return(a2)
	end

	if limit == 0
	println("limit reached for this interval [$a, $b]")21
	return(a2)
	end

	adapt(f, a, c, limit - 1) + adapt(f, c, b, limit-1)
	end
	function nm(f::Function, fp::Function, x0::Real)

	## redefine to be self contained
	close_enough(x) = abs(x) < sqrt(eps())

	max_steps = 100
	ctr = 0

	update_guess(x) = x - f(x)/fp(x)

	x = update_guess(x0)

	while !close_enough(f(x)) && ctr < max_steps
	ctr += 1
	x = update_guess(x)
	end

	## all done
	if ctr >= max_steps
	error("Method did not converge")
	else
	return (x, ctr)
	end
	end

	## using automatic derivative here
	findzero(f::Function, x0::Real) = nm(f, D(f), x0)[1]
	findcritical(f::Function, x0::Real) = nm(D(f), D(f,2), x0)[1]

	## generalized iteration method to find fixed point
	## Iterate x = F(f,x) until we converge to a fixed point as determined by tol
	function iterate(f::Function, x0::Number, F::Function, tol::Real)

	x = F(f, x0)
	ctr = 1

	while abs(f(x)) > tol && ctr < 200
	x = F(f, x)
	ctr += 1
	end

	ctr >= 200 ? stop("Method did not converge in 200 steps") : (x, ctr)
	end
	iterate(f::Function, x0::Number, F::Function) = iterate(f, x0, F, 1e-14)

	## various update functions using automatic derivative:
	newton_update(f::Function, x0::Number) = x0 - f(x0)/D(f)(x0)
	function halley_update(f::Function, x0::Number)
	a = f(x0); b = D(f)(x0); c = D(f,2)(x0)
	x0 - 2ab/(2b^2 - ac)
	end
	householder_update(f::Function, x0, p::Integer) = x0 - (p+1)*D(u -> 1/f(u), p)(x0)/D(u -> 1/f(u), p+1)(x0)
	householder_update0(f::Function, x0) = householder_update(f, x0, 0) ## Newton: quadratic
	householder_update1(f::Function, x0) = householder_update(f, x0, 1) ## halley: cubic
	householder_update2(f::Function, x0) = householder_update(f, x0, 2) ## higher order of degree d + 2 ...
	householder_update3(f::Function, x0) = householder_update(f, x0, 3)
	householder_update4(f::Function, x0) = householder_update(f, x0, 4)

	## to use: iterate(f, x0, householder_update0)
	## http://www.hindawi.com/journals/ijmms/2012/493456/
	## Rajinder Thukral
	## very fast (8th order) root finder like newton
	thukral = function(f::Function, xo::Real)
	function update(f::Function, xn::Real, tol::Real)
	i,j,k,l = 1,1 ,1 ,1 ## in {1,2}

	beta = 1/i
	fxn = f(xn)

	wn = xn + (1/beta)*fxn
	fwn = f(wn)

	if abs(fwn) < tol return(wn) end

	yn = xn - fxn^2/(fwn - fxn)
	fyn = f(yn)

	if abs(fyn) < tol return(yn) end

	phi = ((1 - fyn/fwn)^(-1), (1 + fyn/fwn))

	zn = yn - phi[j]( (xn-yn)/(fxn - fyn) ) fyn
	fzn = f(zn)

	if abs(fzn) < tol return(zn) end

	omega = ( (1- fzn/fwn)^(-1), 1 + fzn/fwn + (fzn/fwn)^2)
	psi = (1 - 2fyn^3/(fwn^2fxn), (1 + 2fyn^3/(fwn^2fxn))^(-1) )

	zn - omega[k]psi[l]((fzn - fyn)/(zn-yn) - (fyn - fxn)/(yn - xn) + (fzn - fxn)/(zn-xn))^(-1)*fzn
	end
	update(f::Function, x::Ad, tol::Real) = update(f, x.val, tol)

	xn = xo
	tol = 1e-14
	ctr = 1
	while (abs(f(xn)) > tol) & (ctr < 20)
	xn = update(f, xn, tol)
	ctr += 1
	end

	(xn, ctr)
	end

	## some tests
	## julia> thukral(x -> (x-2)(x^10 + x + 1)exp(-x-1), 1.9)
	## (2.0,4)

	## julia> thukral(x -> x^11 + x + 1, -1)
	## (-0.844397528792023,4)

	## julia> thukral(u -> sin(u)^2 - u^2 + 1, 1)
	## (1.4044916482153413,3)