2021-09-24 Transient problems¶

Last time¶

p-Laplacian
algorithmic differentiation for matrix assembly
principled by-hand differentiation

Today¶

recap/discuss methods for differentiating
intro to ordinary differential equations
accuracy and stability
transient advection

using Plots
using LinearAlgebra
using SparseArrays
using Zygote

function vander(x, k=nothing)
    if k === nothing
        k = length(x)
    end
    V = ones(length(x), k)
    for j = 2:k
        V[:, j] = V[:, j-1] .* x
    end
    V
end

function fdstencil(source, target, k)
    "kth derivative stencil from source to target"
    x = source .- target
    V = vander(x)
    rhs = zero(x)'
    rhs[k+1] = factorial(k)
    rhs / V
end

function newton(residual, jacobian, u0; maxits=20)
    u = u0
    uhist = [copy(u)]
    normhist = []
    for k in 1:maxits
        f = residual(u)
        push!(normhist, norm(f))
        J = jacobian(u)
        delta_u = - J \ f
        u += delta_u
        push!(uhist, copy(u))
    end
    uhist, normhist
end

newton (generic function with 1 method)

p-Laplacian with Zygote¶

function plaplace_fpoint(u, h)
    u_xstag = diff(u) / h
    kappa_stag = abs.(u_xstag) .^ (plaplace_p - 2)
    [1/h, -1/h]' * (kappa_stag .* u_xstag)
end

function plaplace_fzygote(u)
    n = length(u)
    h = 2 / (n - 1)
    u = copy(u)
    f = copy(u)
    u[[1, n]] = [0, 1]
    f[n] -= 1
    for i in 2:n-1
        f[i] = plaplace_fpoint(u[i-1:i+1], h) - plaplace_forcing
    end
    f
end

plaplace_fzygote (generic function with 1 method)

function plaplace_Jzygote(u)
    n = length(u)
    h = 2 / (n - 1)
    u = copy(u)
    u[[1, n]] = [0, 1]
    rows = [1, n]
    cols = [1, n]
    vals = [1., 1.] # diagonals entries (float)
    for i in 2:n-1
        js = i-1:i+1
        fi_ujs = gradient(ujs -> plaplace_fpoint(ujs, h), u[js])[1]
        mask = 1 .< js .< n
        append!(rows, [i,i,i][mask])
        append!(cols, js[mask])
        append!(vals, fi_ujs[mask])
    end
    sparse(rows, cols, vals)
end

plaplace_Jzygote (generic function with 1 method)

Test it out¶

plaplace_p = 1.3
plaplace_forcing = .1
x = LinRange(-1, 1, 20)
u0 = collect(1 .+ x)
uhist, normhist = newton(plaplace_fzygote, plaplace_Jzygote, u0; maxits=20);
plot(normhist, marker=:auto, yscale=:log10)

InterruptException:

Stacktrace:
  [1] Set
    @ ./set.jl:10 [inlined]
  [2] copymutable
    @ ./set.jl:72 [inlined]
  [3] setdiff
    @ ./abstractset.jl:160 [inlined]
  [4] iterate
    @ ~/.julia/packages/RecipesPipeline/Bxu2O/src/utils.jl:37 [inlined]
  [5] iterate
    @ ./abstractdict.jl:64 [inlined]
  [6] warn_on_unsupported_args(pkg::Plots.GRBackend, plotattributes::RecipesPipeline.DefaultsDict)
    @ Plots ~/.julia/packages/Plots/1RWWg/src/args.jl:1593
  [7] _add_the_series(plt::Plots.Plot{Plots.GRBackend}, sp::Plots.Subplot{Plots.GRBackend}, plotattributes::RecipesPipeline.DefaultsDict)
    @ Plots ~/.julia/packages/Plots/1RWWg/src/pipeline.jl:390
  [8] add_series!(plt::Plots.Plot{Plots.GRBackend}, plotattributes::RecipesPipeline.DefaultsDict)
    @ Plots ~/.julia/packages/Plots/1RWWg/src/pipeline.jl:320
  [9] _process_seriesrecipe(plt::Any, plotattributes::Any)
    @ RecipesPipeline ~/.julia/packages/RecipesPipeline/Bxu2O/src/series_recipe.jl:46
 [10] _process_seriesrecipes!(plt::Any, kw_list::Any)
    @ RecipesPipeline ~/.julia/packages/RecipesPipeline/Bxu2O/src/series_recipe.jl:27
 [11] recipe_pipeline!(plt::Any, plotattributes::Any, args::Any)
    @ RecipesPipeline ~/.julia/packages/RecipesPipeline/Bxu2O/src/RecipesPipeline.jl:97
 [12] _plot!(plt::Plots.Plot, plotattributes::Any, args::Any)
    @ Plots ~/.julia/packages/Plots/1RWWg/src/plot.jl:208
 [13] #plot#159
    @ ~/.julia/packages/Plots/1RWWg/src/plot.jl:91 [inlined]
 [14] top-level scope
    @ In[4]:6
 [15] eval
    @ ./boot.jl:360 [inlined]
 [16] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
    @ Base ./loading.jl:1116

plot(x, uhist, marker=:auto, legend=:topleft)

Can you fix the plaplace_J to differentiate correctly (analytically)?
What is causing Newton to diverge?
How might we fix or work around it?

A model problem¶

k = 1.05
fk(x) = tanh.(k*x)
Jk(x) = reshape(k / cosh.(k*x).^2, 1, 1)
xhist, normhist = newton(fk, Jk, [1.], maxits=10)
plot(xhist, fk.(xhist), marker=:auto, legend=:bottomright)
plot!(fk, xlims=(-3, 3))

../_images/2021-09-24-transient_10_0.svg

What about symbolic differentiation?¶

import Symbolics: Differential, expand_derivatives, @variables
@variables x
Dx = Differential(x)
y = tanh(k*x)
Dx(y)

\[\begin{equation} \frac{dtanh(1.05x)}{dx} \end{equation}\]

expand_derivatives(Dx(y))

\[\begin{equation} 1.05 - 1.05 \tanh^{2}\left( 1.05 x \right) \end{equation}\]

Cool, what about products?¶

y = x
for _ in 1:2
    y = cos(y^pi) * log(y)
end
expand_derivatives(Dx(y))

\[\begin{equation} \frac{\left( \frac{\cos\left( x^{\pi} \right)}{x} - 3.141592653589793 x^{2.141592653589793} \log\left( x \right) \sin\left( x^{\pi} \right) \right) \cos\left( \cos^{3.141592653589793}\left( x^{\pi} \right) \left( \log\left( x \right) \right)^{3.141592653589793} \right)}{\cos\left( x^{\pi} \right) \log\left( x \right)} - \left( \frac{3.141592653589793 \cos^{3.141592653589793}\left( x^{\pi} \right) \left( \log\left( x \right) \right)^{2.141592653589793}}{x} - 9.869604401089358 \cos^{2.141592653589793}\left( x^{\pi} \right) \left( \log\left( x \right) \right)^{3.141592653589793} x^{2.141592653589793} \sin\left( x^{\pi} \right) \right) \sin\left( \cos^{3.141592653589793}\left( x^{\pi} \right) \left( \log\left( x \right) \right)^{3.141592653589793} \right) \log\left( \log\left( x \right) \cos\left( x^{\pi} \right) \right) \end{equation}\]

The size of these expressions can grow exponentially.

Hand coding derivatives: it’s all chain rule and associativity¶

\[ df = f'(x) dx \]

function f(x)
    y = x
    for _ in 1:2
        a = y^pi
        b = cos(a)
        c = log(y)
        y = b * c
    end
    y
end

f(1.9), gradient(f, 1.9)

(-1.5346823414986814, (-34.03241959914049,))

function df(x, dx)
    y = x
    dy = dx
    for _ in 1:2
        a = y^pi
        da = pi * y^(pi-1) * dy
        b = cos(a)
        db = -sin(a) * da
        c = log(y)
        dc = 1/y * dy
        y = b * c
        dy = db * c + b * dc
    end
    dy
end

df(1.9, 1)

-34.03241959914048

We can go the other way¶

We can differentiate a composition \(h(g(f(x)))\) as

(15)¶\[\begin{align} \operatorname{d} h &= h' \operatorname{d} g \\ \operatorname{d} g &= g' \operatorname{d} f \\ \operatorname{d} f &= f' \operatorname{d} x. \end{align}\]

What we’ve done above is called “forward mode”, and amounts to placing the parentheses in the chain rule like

\[ \operatorname d h = \frac{dh}{dg} \left(\frac{dg}{df} \left(\frac{df}{dx} \operatorname d x \right) \right) .\]

The expression means the same thing if we rearrange the parentheses,

\[ \operatorname d h = \left( \left( \left( \frac{dh}{dg} \right) \frac{dg}{df} \right) \frac{df}{dx} \right) \operatorname d x \]

which we can compute with in reverse order via

\[ \underbrace{\bar x}_{\frac{dh}{dx}} = \underbrace{\bar g \frac{dg}{df}}_{\bar f} \frac{df}{dx} .\]

A reverse mode example¶

\[ \underbrace{\bar x}_{\frac{dh}{dx}} = \underbrace{\bar g \frac{dg}{df}}_{\bar f} \frac{df}{dx} .\]

function g(x)
    a = x^pi
    b = cos(a)
    c = log(x)
    y = b * c
    y
end
(g(1.4), gradient(g, 1.4))

(-0.32484122107701546, (-1.2559761698835525,))

function gback(x, y_)
    a = x^pi
    b = cos(a)
    c = log(x)
    y = b * c
    # backward pass
    c_ = y_ * b 
    b_ = c * y_
    a_ = -sin(a) * b_
    x_ = 1/x * c_ + pi * x^(pi-1) * a_
    x_
end
gback(1.4, 1)

-1.2559761698835525

Kinds of algorithmic differentation¶

Source transformation: Fortran code in, Fortran code out
- Duplicates compiler features, usually incomplete language coverage
- Produces efficient code
Operator overloading: C++ types
- Hard to vectorize
- Loops are effectively unrolled/inefficient
Just-in-time compilation: tightly coupled with compiler
- JIT lag
- Needs dynamic language features (JAX) or tight integration with compiler (Zygote, Enzyme)
- Some sharp bits

How does Zygote work?¶

h1(x) = x^3 + 3*x
h2(x) = ((x * x)  + 3) * x
@code_llvm h2(4.)

;  @ In[92]:2 within `h2'
define double @julia_h2_16295(double %0) {
top:
; ┌ @ float.jl:332 within `*'
   %1 = fmul double %0, %0
; └
; ┌ @ promotion.jl:321 within `+' @ float.jl:326
   %2 = fadd double %1, 3.000000e+00
; └
; ┌ @ float.jl:332 within `*'
   %3 = fmul double %2, %0
; └
  ret double %3
}

@code_llvm gradient(h2, 4.)

;  @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:74 within `gradient'
define [1 x double] @julia_gradient_16320(double %0) {
top:
;  @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:75 within `gradient'
; ┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:40 within `pullback'
; │┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:34 within `_pullback' @ In[92]:2 @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface2.jl:9
; ││┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface2.jl within `macro expansion'
; │││┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/chainrules.jl:152 within `chain_rrule'
; ││││┌ @ /home/jed/.julia/packages/ChainRulesCore/Voykb/src/rules.jl:134 within `rrule' @ /home/jed/.julia/packages/ChainRules/3Qznz/src/rulesets/Base/fastmath_able.jl:222
; │││││┌ @ float.jl:332 within `*'
        %1 = fmul double %0, %0
; ││└└└└
; ││ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:34 within `_pullback' @ In[92]:2 @ /home/jed/.julia/packages/ZygoteRules/OjfTt/src/adjoint.jl:57
; ││┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/lib/number.jl:15 within `adjoint'
; │││┌ @ promotion.jl:321 within `+' @ float.jl:326
      %2 = fadd double %1, 3.000000e+00
; └└└└
;  @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:76 within `gradient'
; ┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:41 within `#50'
; │┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/lib/lib.jl:22 within `accum' @ /home/jed/.julia/packages/Zygote/nsu1Y/src/lib/lib.jl:17
; ││┌ @ float.jl:326 within `+'
     %3 = fadd double %1, %2
     %4 = fadd double %1, %3
; └└└
  %.fca.0.insert = insertvalue [1 x double] undef, double %4, 0
  ret [1 x double] %.fca.0.insert
}

Forward or reverse?¶

It all depends on the shape.

\[ \operatorname d h = \frac{dh}{dg} \left(\frac{dg}{df} \left(\frac{df}{dx} \operatorname d x \right) \right) .\]

\[ \operatorname d h = \left( \left( \left( \frac{dh}{dg} \right) \frac{dg}{df} \right) \frac{df}{dx} \right) \operatorname d x \]

One input, many outputs: use forward mode
- “One input” can be looking in one direction
Many inputs, one output: use reverse mode
- Will need to traverse execution backwards (“tape”)
- Hierarchical checkpointing
About square? Forward is usually a bit more efficient.

Ordinary Differential Equations¶

Given initial condition \(u_0 = u(t=0)\), find \(u(t)\) for \(t > 0\) that satisfies

\[ \dot u \equiv \frac{\partial u}{\partial t} = f(t, u) \]

Application	\(u\)	\(f\)
Orbital dynamics	position, momentum	conservation of momentum
Chemical reactions	concentration	conservation of atoms
Epidemiology	infected/recovered population	transmission and recovery
Heat transfer	temperature	conservation of energy
Seismology	displacement, momentum	conservative of momentum

Solving differential equations¶

Linear equations¶

\[ \dot u = A(t) u + \text{source}(t)\]

Autonomous if \(A(t) = A\) and source independent of \(t\)
Suppose \(u\) and \(a = A\) are scalars: \(u(t) = e^{at} u_0\)

Can do the same for systems¶

\[ y(t) = e^{A t} y_0 \]

What does it mean to exponentiate a matrix?¶

Taylor series!

\[ e^A = 1 + A + \frac{A^2}{2} + \frac{A^3}{3!} + \dotsb \]

and there are many practical ways to compute it.

Question¶

Suppose that the diagonalization \(A = X \Lambda X^{-1}\) exists and derive a finite expression for the matrix exponential using the scalar exp function.

Forward Euler method¶

function ode_euler(f, u0; tfinal=10., h=0.1)
    u = copy(u0)
    t = 0.
    thist = [t]
    uhist = [u0]
    while t < tfinal
        tnext = min(t+h, tfinal)
        h = tnext - t
        u += h * f(t, u)
        t = tnext
        push!(thist, t)
        push!(uhist, u)
    end
    thist, hcat(uhist...)
end

ode_euler (generic function with 1 method)

f1(t, u; k=5) = -k * (u .- cos(t))

thist, uhist = ode_euler(f1, [.5], tfinal=10, h=.4)
scatter(thist, uhist[1,:])
plot!(cos)

../_images/2021-09-24-transient_37_0.svg

Forward Euler on a linear system¶

f2(t, u) = [0 1; -1 0] * u

thist, uhist = ode_euler(f2, [0., 1], h=.03, tfinal=20)
scatter(thist, uhist')
plot!([cos, sin])

../_images/2021-09-24-transient_39_0.svg

eigen([0 1; -1 0])

Eigen{ComplexF64, ComplexF64, Matrix{ComplexF64}, Vector{ComplexF64}}
values:
2-element Vector{ComplexF64}:
 0.0 - 1.0im
 0.0 + 1.0im
vectors:
2×2 Matrix{ComplexF64}:
 0.707107-0.0im       0.707107+0.0im
      0.0-0.707107im       0.0+0.707107im

Linear Stability Analysis¶

Why did Euler converge on the first problem (with sufficiently small steps) while always diverging on the second problem?

We can understand the convergence of methods by analyzing the test problem

\[ \dot u = \lambda u \]

for different values of \(\lambda\) in the complex plane. One step of the Euler method with step size \(h\) maps

\[ u \to u + h \lambda u = \underbrace{(1 + h \lambda)}_{R(h \lambda)} u \]

where we have introduced the complex-valued function \(R(z)\).

When does this map cause solutions to “blow up” and when is it stable?

Stability diagrams (a property of methods)¶

function plot_stability(Rz; xlims=(-2, 2), ylims=(-2, 2))
    x = LinRange(xlims[1], xlims[2], 100)
    y = LinRange(ylims[1], ylims[2], 100)
    heatmap(x, y, (x, y) -> abs(Rz(x + 1im*y)), c=:bwr, clims=(0, 2), axes=:equal)
end
plot_stability(z -> 1 + z)

../_images/2021-09-24-transient_43_0.svg

Implicit methods¶

Recall that forward Euler is the step

\[ \tilde u(h) = u(0) + h f(0, u(0)) . \]

This can be evaluated explicitly; all the terms on the right hand side are known so the approximation \(\tilde u(h)\) is computed merely by evaluating the right hand side. Let’s consider an alternative, backward Euler (or “implicit Euler”),

\[ \tilde u(h) = u(0) + h f(h, \tilde u(h)) . \]

This is a (generally) nonlinear equation for \(\tilde u(h)\). For the test equation \(\dot u = \lambda u\), the backward Euler method is

\[ \tilde u(h) = u(0) + h \lambda \tilde u(h) \]

\[ \tilde u(h) = \underbrace{\frac{1}{1 - h \lambda}}_{R(h\lambda)} u(0) . \]

plot_stability(z -> 1 / (1 - z))

../_images/2021-09-24-transient_46_0.svg

Advection with centered stencil¶

function advect_center(t, u)
    n = length(u)
    dx = 2 / n
    uwrap = [u[end], u..., u[1]]
    f = zero(u)
    for i in 1:n
        f[i] = -(uwrap[i+2] - uwrap[i]) / (2*dx)
    end
    f
end

x = LinRange(-1, 1, 51)[1:end-1]
u0 = exp.(-9 * x .^ 2)
thist, uhist = ode_euler(advect_center, u0, h=.002, tfinal=1);

nsteps = size(uhist, 2)
plot(x, uhist[:, 1:(nsteps÷8):end])

../_images/2021-09-24-transient_49_0.svg

Advection with upwind stencil¶

function advect_upwind(t, u)
    n = length(u)
    dx = 2 / n
    uwrap = [u[end], u..., u[1]]
    f = zero(u)
    for i in 1:n
        f[i] = -(uwrap[i+1] - uwrap[i]) / dx
    end
    f
end

x = LinRange(-1, 1, 51)[1:end-1]
u0 = exp.(-9 * x .^ 2)
@time thist, uhist = ode_euler(advect_upwind, u0, h=.002, tfinal=1);

  0.073689 seconds (119.23 k allocations: 7.136 MiB, 96.56% compilation time)

nsteps = size(uhist, 2)
plot(x, uhist[:, 1:(nsteps÷8):end])

../_images/2021-09-24-transient_52_0.svg

2021-09-22 Newton linearization

2021-09-27 Transient problems

Numerical Solution of Partial Differential Equations