From 1e25a6885511c690db7482b3c430411f92e1a912 Mon Sep 17 00:00:00 2001 From: Zizhao Chen Date: Wed, 15 Dec 2021 08:17:42 -0500 Subject: [PATCH] Demo: Gaussian process regression (#632) --- examples/kernelregression.dx | 42 ++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/examples/kernelregression.dx b/examples/kernelregression.dx index 11fbb630a..ddbbe5081 100644 --- a/examples/kernelregression.dx +++ b/examples/kernelregression.dx @@ -1,7 +1,8 @@ +import linalg import plot -- Conjugate gradients solver -def solve (mat:m=>m=>Float) (b:m=>Float) : m=>Float = +def solve' (mat:m=>m=>Float) (b:m=>Float) : m=>Float = x0 = for i:m. 0.0 ax = mat **. x0 r0 = b - ax @@ -16,6 +17,11 @@ def solve (mat:m=>m=>Float) (b:m=>Float) : m=>Float = (x', r', p') xOut +def chol_solve (l:LowerTriMat m Float) (b:m=>Float) : m=>Float = + b' = forward_substitute l b + u = transposeLowerToUpper l + backward_substitute u b' + ' # Kernel ridge regression ' To learn a function $f_{true}: \mathcal{X} \to \mathbb R$ @@ -40,7 +46,7 @@ ys : Nx=>Float = for i. trueFun xs.i + noise * randn (ixkey k2 i) -- Kernel ridge regression def regress (kernel: a -> a -> Float) (xs: Nx=>a) (ys: Nx=>Float) : a -> Float = gram = for i j. kernel xs.i xs.j + select (i==j) 0.0001 0.0 - alpha = solve gram ys + alpha = solve' gram ys predict = \x. sum for i. alpha.i * kernel xs.i x predict @@ -59,3 +65,35 @@ preds = map predict xtest :html showPlot $ xyPlot xtest preds > + +' # Gaussian process regression + +' GP regression (kriging) works in a similar way. Compared with kernel ridge regression, GP regression assumes Gaussian distributed prior. This, combined +with the Bayes rule, gives the variance of the prediction. + +' In this implementation, the conjugate gradient solver is replaced with the +cholesky solver from `lib/linalg.dx` for efficiency. + +def gp_regress (kernel: a -> a -> Float) (xs: n=>a) (ys: n=>Float) + : (a -> (Float&Float)) = + noise_var = 0.0001 + gram = for i j. kernel xs.i xs.j + c = chol (gram + eye *. noise_var) + alpha = chol_solve c ys + predict = \x. + k' = for i. kernel xs.i x + mu = sum for i. alpha.i * k'.i + alpha' = chol_solve c k' + var = kernel x x + noise_var - sum for i. k'.i * alpha'.i + (mu, var) + predict + +gp_predict = gp_regress (rbf 0.2) xs ys + +(gp_preds, vars) = unzip (map gp_predict xtest) + +:html showPlot $ xycPlot xtest gp_preds (map sqrt vars) +> + +:html showPlot $ xyPlot xtest vars +>