From 1e25a6885511c690db7482b3c430411f92e1a912 Mon Sep 17 00:00:00 2001
From: Zizhao Chen <zoe.chenzizhao@gmail.com>
Date: Wed, 15 Dec 2021 08:17:42 -0500
Subject: [PATCH] Demo: Gaussian process regression (#632)

---
 examples/kernelregression.dx | 42 ++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)
diff --git a/examples/kernelregression.dx b/examples/kernelregression.dx
index 11fbb630a..ddbbe5081 100644
--- a/examples/kernelregression.dx
+++ b/examples/kernelregression.dx
@@ -1,7 +1,8 @@
+import linalg
 import plot
 
 -- Conjugate gradients solver
-def solve (mat:m=>m=>Float) (b:m=>Float) : m=>Float =
+def solve' (mat:m=>m=>Float) (b:m=>Float) : m=>Float =
   x0 = for i:m. 0.0
   ax = mat **. x0
   r0 = b - ax
@@ -16,6 +17,11 @@ def solve (mat:m=>m=>Float) (b:m=>Float) : m=>Float =
        (x', r', p')
   xOut
 
+def chol_solve (l:LowerTriMat m Float) (b:m=>Float) : m=>Float =
+    b' = forward_substitute l b
+    u = transposeLowerToUpper l
+    backward_substitute u b'
+
 ' # Kernel ridge regression
 
 ' To learn a function $f_{true}: \mathcal{X} \to \mathbb R$ 
@@ -40,7 +46,7 @@ ys : Nx=>Float = for i. trueFun xs.i + noise * randn (ixkey k2 i)
 -- Kernel ridge regression
 def regress (kernel: a -> a -> Float) (xs: Nx=>a) (ys: Nx=>Float) : a -> Float =
     gram = for i j. kernel xs.i xs.j + select (i==j) 0.0001 0.0
-    alpha = solve gram ys
+    alpha = solve' gram ys
     predict = \x. sum for i. alpha.i * kernel xs.i x
     predict
 
@@ -59,3 +65,35 @@ preds = map predict xtest
 
 :html showPlot $ xyPlot xtest preds
 > <html output>
+
+' # Gaussian process regression
+
+' GP regression (kriging) works in a similar way. Compared with kernel ridge regression, GP regression assumes Gaussian distributed prior. This, combined
+with the Bayes rule, gives the variance of the prediction.
+
+' In this implementation, the conjugate gradient solver is replaced with the
+cholesky solver from `lib/linalg.dx` for efficiency.
+
+def gp_regress (kernel: a -> a -> Float) (xs: n=>a) (ys: n=>Float)
+    : (a -> (Float&Float)) =
+    noise_var = 0.0001
+    gram = for i j. kernel xs.i xs.j
+    c = chol (gram + eye *. noise_var)
+    alpha = chol_solve c ys
+    predict = \x.
+        k' = for i. kernel xs.i x
+        mu = sum for i. alpha.i * k'.i
+        alpha' = chol_solve c k'
+        var = kernel x x + noise_var - sum for i. k'.i * alpha'.i
+        (mu, var)
+    predict
+
+gp_predict = gp_regress (rbf 0.2) xs ys
+
+(gp_preds, vars) = unzip (map gp_predict xtest)
+
+:html showPlot $ xycPlot xtest gp_preds (map sqrt vars)
+> <html output>
+
+:html showPlot $ xyPlot xtest vars
+> <html output>