rl layers working (for td at least)

emer · Oct 18, 2024 · 82dff96 · 82dff96
1 parent cf74bcf
commit 82dff96
Show file tree

Hide file tree

Showing 16 changed files with 117 additions and 91 deletions.
diff --git a/examples/hip_bench/def_params.go b/examples/hip_bench/def_params.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build not
+
 package main
 
 import "github.com/emer/emergent/v2/params"

diff --git a/examples/hip_bench/hip_bench.go b/examples/hip_bench/hip_bench.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build not
+
 // hip_bench runs a hippocampus model for testing parameters and new learning ideas
 package main
 

diff --git a/examples/hip_bench/orig_params.go b/examples/hip_bench/orig_params.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build not
+
 package main
 
 import "github.com/emer/emergent/v2/params"

diff --git a/examples/hip_bench/params.go b/examples/hip_bench/params.go
@@ -1,5 +1,7 @@
 // File generated by params.SaveGoCode
 
+//go:build not
+
 package main
 
 import "github.com/emer/emergent/v2/params"

diff --git a/leabra/enumgen.go b/leabra/enumgen.go
diff --git a/leabra/layer.go b/leabra/layer.go
@@ -627,6 +627,23 @@ func (ly *Layer) InhibFromPool(ctx *Context) {
 // ActFromG computes rate-code activation from Ge, Gi, Gl conductances
 // and updates learning running-average activations from that Act
 func (ly *Layer) ActFromG(ctx *Context) {
+	switch ly.Type {
+	case RWDaLayer:
+		ly.ActFromGRWDa(ctx)
+		return
+	case RWPredLayer:
+		ly.ActFromGRWPred(ctx)
+		return
+	case TDPredLayer:
+		ly.ActFromGTDPred(ctx)
+		return
+	case TDIntegLayer:
+		ly.ActFromGTDInteg(ctx)
+		return
+	case TDDaLayer:
+		ly.ActFromGTDDa(ctx)
+		return
+	}
 	for ni := range ly.Neurons {
 		nrn := &ly.Neurons[ni]
 		if nrn.IsOff() {

diff --git a/leabra/layerbase.go b/leabra/layerbase.go
@@ -168,7 +168,7 @@ func (ly *Layer) ShouldDisplay(field string) bool {
 	case "RW":
 		return ly.Type == RWPredLayer || ly.Type == RWDaLayer
 	case "TD":
-		return ly.Type == TDRewIntegLayer || ly.Type == TDDaLayer
+		return ly.Type == TDPredLayer || ly.Type == TDIntegLayer || ly.Type == TDDaLayer
 	case "PBWM":
 		return isPBWM
 	case "SendTo":

diff --git a/leabra/layertypes.go b/leabra/layertypes.go
@@ -81,21 +81,21 @@ const (
 	// RWPred prediction is also accessed directly from Rew layer to avoid any issues.
 	RWDaLayer
 
-	// TDRewPredLayer is the temporal differences reward prediction layer.
+	// TDPredLayer is the temporal differences reward prediction layer.
 	// It represents estimated value V(t) in the minus phase, and computes
 	// estimated V(t+1) based on its learned weights in plus phase.
-	// Use [TDRewPredPath] for DA modulated learning.
-	TDRewPredLayer
+	// Use [TDPredPath] for DA modulated learning.
+	TDPredLayer
 
-	// TDRewIntegLayer is the temporal differences reward integration layer.
+	// TDIntegLayer is the temporal differences reward integration layer.
 	// It represents estimated value V(t) in the minus phase, and
 	// estimated V(t+1) + r(t) in the plus phase.
 	// It computes r(t) from (typically fixed) weights from a reward layer,
-	// and directly accesses values from [TDRewPredLayer].
-	TDRewIntegLayer
+	// and directly accesses values from [TDPredLayer].
+	TDIntegLayer
 
 	// TDDaLayer computes a dopamine (DA) signal as the temporal difference (TD)
-	// between the [TDRewIntegLayer[] activations in the minus and plus phase.
+	// between the [TDIntegLayer[] activations in the minus and plus phase.
 	TDDaLayer
 
 	///////// BG Basal Ganglia

diff --git a/leabra/path.go b/leabra/path.go
@@ -278,8 +278,8 @@ func (pt *Path) DWt() {
 		pt.DWtMatrix()
 	case pt.Type == RWPath:
 		pt.DWtRW()
-	case pt.Type == TDRewPredPath:
-		pt.DWtTDRewPred()
+	case pt.Type == TDPredPath:
+		pt.DWtTDPred()
 	case pt.Type == DaHebbPath:
 		pt.DWtDaHebb()
 	default:
@@ -343,7 +343,7 @@ func (pt *Path) WtFromDWt() {
 		return
 	}
 	switch pt.Type {
-	case RWPath, TDRewPredPath:
+	case RWPath, TDPredPath:
 		pt.WtFromDWtLinear()
 		return
 	}

diff --git a/leabra/pathbase.go b/leabra/pathbase.go
@@ -139,8 +139,8 @@ func (pt *Path) DefaultsForType() {
 		pt.CHLDefaults()
 	case EcCa1Path:
 		pt.EcCa1Defaults()
-	case TDRewPredPath:
-		pt.TDRewPredDefaults()
+	case TDPredPath:
+		pt.TDPredDefaults()
 	case RWPath:
 		pt.RWDefaults()
 	case MatrixPath:

diff --git a/leabra/pathtypes.go b/leabra/pathtypes.go
@@ -55,11 +55,11 @@ const (
 	// Has no weight bounds or limits on sign etc.
 	RWPath
 
-	// TDRewPredPath does dopamine-modulated learning for reward prediction:
+	// TDPredPath does dopamine-modulated learning for reward prediction:
 	// DWt = Da * Send.ActQ0 (activity on *previous* timestep)
-	// Use in TDRewPredLayer typically to generate reward predictions.
+	// Use in TDPredLayer typically to generate reward predictions.
 	// Has no weight bounds or limits on sign etc.
-	TDRewPredPath
+	TDPredPath
 
 	//////// PBWM
 

diff --git a/leabra/pbwm_layers.go b/leabra/pbwm_layers.go
@@ -109,12 +109,8 @@ func (ly *Layer) DaAChFromLay(ctx *Context) {
 			continue
 		}
 		da := ly.DA
-		// ach := ly.ACh
 		if nrn.Shunt > 0 { // note: treating Shunt as binary variable -- could multiply
 			da *= ly.Matrix.PatchShunt
-			// if ly.Matrix.ShuntACh {
-			// 	ach *= ly.Matrix.PatchShunt
-			// }
 		}
 		nrn.DALrn = ly.DALrnFromDA(da)
 	}

diff --git a/leabra/rl.go b/leabra/rl.go
@@ -46,6 +46,7 @@ func (ly *Layer) ActFromGRWPred(ctx *Context) {
 			continue
 		}
 		nrn.Act = ly.RW.PredRange.ClipValue(nrn.Ge) // clipped linear
+		ly.Learn.AvgsFromAct(nrn)
 	}
 }
 
@@ -64,7 +65,7 @@ func (ly *Layer) RWLayers() (*Layer, *Layer, error) {
 	return tly, ply, nil
 }
 
-func (ly *Layer) RWDaActFromG(ctx *Context) {
+func (ly *Layer) ActFromGRWDa(ctx *Context) {
 	rly, ply, _ := ly.RWLayers()
 	if rly == nil || ply == nil {
 		return
@@ -87,6 +88,7 @@ func (ly *Layer) RWDaActFromG(ctx *Context) {
 		} else {
 			nrn.Act = 0 // nothing
 		}
+		ly.Learn.AvgsFromAct(nrn)
 	}
 }
 
@@ -149,23 +151,24 @@ type TDParams struct {
 	// discount factor -- how much to discount the future prediction from RewPred.
 	Discount float32
 
-	// name of [TDRewPredLayer] to get reward prediction from.
+	// name of [TDPredLayer] to get reward prediction from.
 	PredLay string
 
-	// name of [TDRewIntegLayer] from which this computes the temporal derivative.
+	// name of [TDIntegLayer] from which this computes the temporal derivative.
 	IntegLay string
 }
 
 func (tp *TDParams) Defaults() {
 	tp.Discount = 0.9
-	tp.PredLay = "RewPred"
+	tp.PredLay = "Pred"
+	tp.IntegLay = "Integ"
 }
 
 func (tp *TDParams) Update() {
 }
 
-// ActFromGTDRewPred computes linear activation for [TDRewPredLayer].
-func (ly *Layer) ActFromGTDRewPred(ctx *Context) {
+// ActFromGTDPred computes linear activation for [TDPredLayer].
+func (ly *Layer) ActFromGTDPred(ctx *Context) {
 	for ni := range ly.Neurons {
 		nrn := &ly.Neurons[ni]
 		if nrn.IsOff() {
@@ -176,20 +179,21 @@ func (ly *Layer) ActFromGTDRewPred(ctx *Context) {
 		} else {
 			nrn.Act = nrn.ActP // previous actP
 		}
+		ly.Learn.AvgsFromAct(nrn)
 	}
 }
 
-func (ly *Layer) TDRewPredLayer() (*Layer, error) {
+func (ly *Layer) TDPredLayer() (*Layer, error) {
 	tly := ly.Network.LayerByName(ly.TD.PredLay)
 	if tly == nil {
-		err := fmt.Errorf("TDRewIntegLayer %s RewPredLayer: %q not found", ly.Name, ly.TD.PredLay)
+		err := fmt.Errorf("TDIntegLayer %s RewPredLayer: %q not found", ly.Name, ly.TD.PredLay)
 		return nil, errors.Log(err)
 	}
 	return tly, nil
 }
 
-func (ly *Layer) ActFromGTDRewInteg(ctx *Context) {
-	rply, _ := ly.TDRewPredLayer()
+func (ly *Layer) ActFromGTDInteg(ctx *Context) {
+	rply, _ := ly.TDPredLayer()
 	if rply == nil {
 		return
 	}
@@ -205,13 +209,14 @@ func (ly *Layer) ActFromGTDRewInteg(ctx *Context) {
 		} else {
 			nrn.Act = rpActP // previous actP
 		}
+		ly.Learn.AvgsFromAct(nrn)
 	}
 }
 
-func (ly *Layer) TDRewIntegLayer() (*Layer, error) {
+func (ly *Layer) TDIntegLayer() (*Layer, error) {
 	tly := ly.Network.LayerByName(ly.TD.IntegLay)
 	if tly == nil {
-		err := fmt.Errorf("TDRewIntegLayer %s RewIntegLayer: %q not found", ly.Name, ly.TD.IntegLay)
+		err := fmt.Errorf("TDIntegLayer %s RewIntegLayer: %q not found", ly.Name, ly.TD.IntegLay)
 		return nil, errors.Log(err)
 	}
 	return tly, nil
@@ -222,7 +227,7 @@ func (ly *Layer) TDDaDefaults() {
 }
 
 func (ly *Layer) ActFromGTDDa(ctx *Context) {
-	rily, _ := ly.TDRewIntegLayer()
+	rily, _ := ly.TDIntegLayer()
 	if rily == nil {
 		return
 	}
@@ -242,15 +247,15 @@ func (ly *Layer) ActFromGTDDa(ctx *Context) {
 	}
 }
 
-func (pt *Path) TDRewPredDefaults() {
+func (pt *Path) TDPredDefaults() {
 	pt.Learn.WtSig.Gain = 1
 	pt.Learn.Norm.On = false
 	pt.Learn.Momentum.On = false
 	pt.Learn.WtBal.On = false
 }
 
-// DWtTDRewPred computes the weight change (learning) for [TDRewPredPath].
-func (pt *Path) DWtTDRewPred() {
+// DWtTDPred computes the weight change (learning) for [TDPredPath].
+func (pt *Path) DWtTDPred() {
 	slay := pt.Send
 	rlay := pt.Recv
 	da := rlay.DA
@@ -271,12 +276,12 @@ func (pt *Path) DWtTDRewPred() {
 }
 
 // AddTDLayers adds the standard TD temporal differences layers, generating a DA signal.
-// Pathway from Rew to RewInteg is given class TDRewToInteg -- should
+// Pathway from Rew to RewInteg is given class TDToInteg -- should
 // have no learning and 1 weight.
 func (nt *Network) AddTDLayers(prefix string, space float32) (rew, rp, ri, td *Layer) {
 	rew = nt.AddLayer2D(prefix+"Rew", 1, 1, InputLayer)
-	rp = nt.AddLayer2D(prefix+"RewPred", 1, 1, SuperLayer)
-	ri = nt.AddLayer2D(prefix+"RewInteg", 1, 1, TDRewIntegLayer)
+	rp = nt.AddLayer2D(prefix+"Pred", 1, 1, TDPredLayer)
+	ri = nt.AddLayer2D(prefix+"Integ", 1, 1, TDIntegLayer)
 	td = nt.AddLayer2D(prefix+"TD", 1, 1, TDDaLayer)
 	ri.TD.PredLay = rp.Name
 	td.TD.IntegLay = ri.Name
@@ -285,12 +290,12 @@ func (nt *Network) AddTDLayers(prefix string, space float32) (rew, rp, ri, td *L
 	td.PlaceBehind(ri, space)
 
 	pt := nt.ConnectLayers(rew, ri, paths.NewFull(), ForwardPath)
-	pt.AddClass("TDRewToInteg")
+	pt.AddClass("TDToInteg")
 	pt.Learn.Learn = false
 	pt.WtInit.Mean = 1
 	pt.WtInit.Var = 0
 	pt.WtInit.Sym = false
-	// {Sel: ".TDRewToInteg", Desc: "rew to integ",
+	// {Sel: ".TDToInteg", Desc: "rew to integ",
 	// 	Params: params.Params{
 	// 		"Path.Learn.Learn": "false",
 	// 		"Path.WtInit.Mean": "1",