2021-09-24 Transient problems¶
Last time¶
p-Laplacian
algorithmic differentiation for matrix assembly
principled by-hand differentiation
Today¶
recap/discuss methods for differentiating
intro to ordinary differential equations
accuracy and stability
transient advection
using Plots
using LinearAlgebra
using SparseArrays
using Zygote
function vander(x, k=nothing)
if k === nothing
k = length(x)
end
V = ones(length(x), k)
for j = 2:k
V[:, j] = V[:, j-1] .* x
end
V
end
function fdstencil(source, target, k)
"kth derivative stencil from source to target"
x = source .- target
V = vander(x)
rhs = zero(x)'
rhs[k+1] = factorial(k)
rhs / V
end
function newton(residual, jacobian, u0; maxits=20)
u = u0
uhist = [copy(u)]
normhist = []
for k in 1:maxits
f = residual(u)
push!(normhist, norm(f))
J = jacobian(u)
delta_u = - J \ f
u += delta_u
push!(uhist, copy(u))
end
uhist, normhist
end
newton (generic function with 1 method)
p-Laplacian with Zygote¶
function plaplace_fpoint(u, h)
u_xstag = diff(u) / h
kappa_stag = abs.(u_xstag) .^ (plaplace_p - 2)
[1/h, -1/h]' * (kappa_stag .* u_xstag)
end
function plaplace_fzygote(u)
n = length(u)
h = 2 / (n - 1)
u = copy(u)
f = copy(u)
u[[1, n]] = [0, 1]
f[n] -= 1
for i in 2:n-1
f[i] = plaplace_fpoint(u[i-1:i+1], h) - plaplace_forcing
end
f
end
plaplace_fzygote (generic function with 1 method)
function plaplace_Jzygote(u)
n = length(u)
h = 2 / (n - 1)
u = copy(u)
u[[1, n]] = [0, 1]
rows = [1, n]
cols = [1, n]
vals = [1., 1.] # diagonals entries (float)
for i in 2:n-1
js = i-1:i+1
fi_ujs = gradient(ujs -> plaplace_fpoint(ujs, h), u[js])[1]
mask = 1 .< js .< n
append!(rows, [i,i,i][mask])
append!(cols, js[mask])
append!(vals, fi_ujs[mask])
end
sparse(rows, cols, vals)
end
plaplace_Jzygote (generic function with 1 method)
Test it out¶
plaplace_p = 1.3
plaplace_forcing = .1
x = LinRange(-1, 1, 20)
u0 = collect(1 .+ x)
uhist, normhist = newton(plaplace_fzygote, plaplace_Jzygote, u0; maxits=20);
plot(normhist, marker=:auto, yscale=:log10)
InterruptException:
Stacktrace:
[1] Set
@ ./set.jl:10 [inlined]
[2] copymutable
@ ./set.jl:72 [inlined]
[3] setdiff
@ ./abstractset.jl:160 [inlined]
[4] iterate
@ ~/.julia/packages/RecipesPipeline/Bxu2O/src/utils.jl:37 [inlined]
[5] iterate
@ ./abstractdict.jl:64 [inlined]
[6] warn_on_unsupported_args(pkg::Plots.GRBackend, plotattributes::RecipesPipeline.DefaultsDict)
@ Plots ~/.julia/packages/Plots/1RWWg/src/args.jl:1593
[7] _add_the_series(plt::Plots.Plot{Plots.GRBackend}, sp::Plots.Subplot{Plots.GRBackend}, plotattributes::RecipesPipeline.DefaultsDict)
@ Plots ~/.julia/packages/Plots/1RWWg/src/pipeline.jl:390
[8] add_series!(plt::Plots.Plot{Plots.GRBackend}, plotattributes::RecipesPipeline.DefaultsDict)
@ Plots ~/.julia/packages/Plots/1RWWg/src/pipeline.jl:320
[9] _process_seriesrecipe(plt::Any, plotattributes::Any)
@ RecipesPipeline ~/.julia/packages/RecipesPipeline/Bxu2O/src/series_recipe.jl:46
[10] _process_seriesrecipes!(plt::Any, kw_list::Any)
@ RecipesPipeline ~/.julia/packages/RecipesPipeline/Bxu2O/src/series_recipe.jl:27
[11] recipe_pipeline!(plt::Any, plotattributes::Any, args::Any)
@ RecipesPipeline ~/.julia/packages/RecipesPipeline/Bxu2O/src/RecipesPipeline.jl:97
[12] _plot!(plt::Plots.Plot, plotattributes::Any, args::Any)
@ Plots ~/.julia/packages/Plots/1RWWg/src/plot.jl:208
[13] #plot#159
@ ~/.julia/packages/Plots/1RWWg/src/plot.jl:91 [inlined]
[14] top-level scope
@ In[4]:6
[15] eval
@ ./boot.jl:360 [inlined]
[16] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base ./loading.jl:1116
plot(x, uhist, marker=:auto, legend=:topleft)
Can you fix the
plaplace_J
to differentiate correctly (analytically)?What is causing Newton to diverge?
How might we fix or work around it?
A model problem¶
k = 1.05
fk(x) = tanh.(k*x)
Jk(x) = reshape(k / cosh.(k*x).^2, 1, 1)
xhist, normhist = newton(fk, Jk, [1.], maxits=10)
plot(xhist, fk.(xhist), marker=:auto, legend=:bottomright)
plot!(fk, xlims=(-3, 3))
What about symbolic differentiation?¶
import Symbolics: Differential, expand_derivatives, @variables
@variables x
Dx = Differential(x)
y = tanh(k*x)
Dx(y)
expand_derivatives(Dx(y))
Cool, what about products?¶
y = x
for _ in 1:2
y = cos(y^pi) * log(y)
end
expand_derivatives(Dx(y))
The size of these expressions can grow exponentially.
Hand coding derivatives: it’s all chain rule and associativity¶
function f(x)
y = x
for _ in 1:2
a = y^pi
b = cos(a)
c = log(y)
y = b * c
end
y
end
f(1.9), gradient(f, 1.9)
(-1.5346823414986814, (-34.03241959914049,))
function df(x, dx)
y = x
dy = dx
for _ in 1:2
a = y^pi
da = pi * y^(pi-1) * dy
b = cos(a)
db = -sin(a) * da
c = log(y)
dc = 1/y * dy
y = b * c
dy = db * c + b * dc
end
dy
end
df(1.9, 1)
-34.03241959914048
We can go the other way¶
We can differentiate a composition \(h(g(f(x)))\) as
What we’ve done above is called “forward mode”, and amounts to placing the parentheses in the chain rule like
The expression means the same thing if we rearrange the parentheses,
which we can compute with in reverse order via
A reverse mode example¶
function g(x)
a = x^pi
b = cos(a)
c = log(x)
y = b * c
y
end
(g(1.4), gradient(g, 1.4))
(-0.32484122107701546, (-1.2559761698835525,))
function gback(x, y_)
a = x^pi
b = cos(a)
c = log(x)
y = b * c
# backward pass
c_ = y_ * b
b_ = c * y_
a_ = -sin(a) * b_
x_ = 1/x * c_ + pi * x^(pi-1) * a_
x_
end
gback(1.4, 1)
-1.2559761698835525
Kinds of algorithmic differentation¶
Source transformation: Fortran code in, Fortran code out
Duplicates compiler features, usually incomplete language coverage
Produces efficient code
Operator overloading: C++ types
Hard to vectorize
Loops are effectively unrolled/inefficient
Just-in-time compilation: tightly coupled with compiler
JIT lag
Needs dynamic language features (JAX) or tight integration with compiler (Zygote, Enzyme)
Some sharp bits
How does Zygote work?¶
h1(x) = x^3 + 3*x
h2(x) = ((x * x) + 3) * x
@code_llvm h2(4.)
; @ In[92]:2 within `h2'
define double @julia_h2_16295(double %0) {
top:
; ┌ @ float.jl:332 within `*'
%1 = fmul double %0, %0
; └
; ┌ @ promotion.jl:321 within `+' @ float.jl:326
%2 = fadd double %1, 3.000000e+00
; └
; ┌ @ float.jl:332 within `*'
%3 = fmul double %2, %0
; └
ret double %3
}
@code_llvm gradient(h2, 4.)
; @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:74 within `gradient'
define [1 x double] @julia_gradient_16320(double %0) {
top:
; @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:75 within `gradient'
; ┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:40 within `pullback'
; │┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:34 within `_pullback' @ In[92]:2 @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface2.jl:9
; ││┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface2.jl within `macro expansion'
; │││┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/chainrules.jl:152 within `chain_rrule'
; ││││┌ @ /home/jed/.julia/packages/ChainRulesCore/Voykb/src/rules.jl:134 within `rrule' @ /home/jed/.julia/packages/ChainRules/3Qznz/src/rulesets/Base/fastmath_able.jl:222
; │││││┌ @ float.jl:332 within `*'
%1 = fmul double %0, %0
; ││└└└└
; ││ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:34 within `_pullback' @ In[92]:2 @ /home/jed/.julia/packages/ZygoteRules/OjfTt/src/adjoint.jl:57
; ││┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/lib/number.jl:15 within `adjoint'
; │││┌ @ promotion.jl:321 within `+' @ float.jl:326
%2 = fadd double %1, 3.000000e+00
; └└└└
; @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:76 within `gradient'
; ┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/compiler/interface.jl:41 within `#50'
; │┌ @ /home/jed/.julia/packages/Zygote/nsu1Y/src/lib/lib.jl:22 within `accum' @ /home/jed/.julia/packages/Zygote/nsu1Y/src/lib/lib.jl:17
; ││┌ @ float.jl:326 within `+'
%3 = fadd double %1, %2
%4 = fadd double %1, %3
; └└└
%.fca.0.insert = insertvalue [1 x double] undef, double %4, 0
ret [1 x double] %.fca.0.insert
}
Forward or reverse?¶
It all depends on the shape.
One input, many outputs: use forward mode
“One input” can be looking in one direction
Many inputs, one output: use reverse mode
Will need to traverse execution backwards (“tape”)
Hierarchical checkpointing
About square? Forward is usually a bit more efficient.
Ordinary Differential Equations¶
Given initial condition \(u_0 = u(t=0)\), find \(u(t)\) for \(t > 0\) that satisfies
Application |
\(u\) |
\(f\) |
---|---|---|
Orbital dynamics |
position, momentum |
conservation of momentum |
Chemical reactions |
concentration |
conservation of atoms |
Epidemiology |
infected/recovered population |
transmission and recovery |
Heat transfer |
temperature |
conservation of energy |
Seismology |
displacement, momentum |
conservative of momentum |
Solving differential equations¶
Linear equations¶
Autonomous if \(A(t) = A\) and source independent of \(t\)
Suppose \(u\) and \(a = A\) are scalars: \(u(t) = e^{at} u_0\)
Can do the same for systems¶
What does it mean to exponentiate a matrix?¶
Taylor series!
and there are many practical ways to compute it.
Question¶
Suppose that the diagonalization \(A = X \Lambda X^{-1}\) exists and derive a finite expression for the matrix exponential using the scalar exp
function.
Forward Euler method¶
function ode_euler(f, u0; tfinal=10., h=0.1)
u = copy(u0)
t = 0.
thist = [t]
uhist = [u0]
while t < tfinal
tnext = min(t+h, tfinal)
h = tnext - t
u += h * f(t, u)
t = tnext
push!(thist, t)
push!(uhist, u)
end
thist, hcat(uhist...)
end
ode_euler (generic function with 1 method)
f1(t, u; k=5) = -k * (u .- cos(t))
thist, uhist = ode_euler(f1, [.5], tfinal=10, h=.4)
scatter(thist, uhist[1,:])
plot!(cos)
Forward Euler on a linear system¶
f2(t, u) = [0 1; -1 0] * u
thist, uhist = ode_euler(f2, [0., 1], h=.03, tfinal=20)
scatter(thist, uhist')
plot!([cos, sin])
eigen([0 1; -1 0])
Eigen{ComplexF64, ComplexF64, Matrix{ComplexF64}, Vector{ComplexF64}}
values:
2-element Vector{ComplexF64}:
0.0 - 1.0im
0.0 + 1.0im
vectors:
2×2 Matrix{ComplexF64}:
0.707107-0.0im 0.707107+0.0im
0.0-0.707107im 0.0+0.707107im
Linear Stability Analysis¶
Why did Euler converge on the first problem (with sufficiently small steps) while always diverging on the second problem?
We can understand the convergence of methods by analyzing the test problem
When does this map cause solutions to “blow up” and when is it stable?
Stability diagrams (a property of methods)¶
function plot_stability(Rz; xlims=(-2, 2), ylims=(-2, 2))
x = LinRange(xlims[1], xlims[2], 100)
y = LinRange(ylims[1], ylims[2], 100)
heatmap(x, y, (x, y) -> abs(Rz(x + 1im*y)), c=:bwr, clims=(0, 2), axes=:equal)
end
plot_stability(z -> 1 + z)
Implicit methods¶
Recall that forward Euler is the step
plot_stability(z -> 1 / (1 - z))
Advection with centered stencil¶
function advect_center(t, u)
n = length(u)
dx = 2 / n
uwrap = [u[end], u..., u[1]]
f = zero(u)
for i in 1:n
f[i] = -(uwrap[i+2] - uwrap[i]) / (2*dx)
end
f
end
x = LinRange(-1, 1, 51)[1:end-1]
u0 = exp.(-9 * x .^ 2)
thist, uhist = ode_euler(advect_center, u0, h=.002, tfinal=1);
nsteps = size(uhist, 2)
plot(x, uhist[:, 1:(nsteps÷8):end])
Advection with upwind stencil¶
function advect_upwind(t, u)
n = length(u)
dx = 2 / n
uwrap = [u[end], u..., u[1]]
f = zero(u)
for i in 1:n
f[i] = -(uwrap[i+1] - uwrap[i]) / dx
end
f
end
x = LinRange(-1, 1, 51)[1:end-1]
u0 = exp.(-9 * x .^ 2)
@time thist, uhist = ode_euler(advect_upwind, u0, h=.002, tfinal=1);
0.073689 seconds (119.23 k allocations: 7.136 MiB, 96.56% compilation time)
nsteps = size(uhist, 2)
plot(x, uhist[:, 1:(nsteps÷8):end])