Skip to content

Commit

Permalink
rl layers working (for td at least)
Browse files Browse the repository at this point in the history
  • Loading branch information
rcoreilly committed Oct 18, 2024
1 parent cf74bcf commit 82dff96
Show file tree
Hide file tree
Showing 16 changed files with 117 additions and 91 deletions.
2 changes: 2 additions & 0 deletions examples/hip_bench/def_params.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build not

package main

import "github.com/emer/emergent/v2/params"
Expand Down
2 changes: 2 additions & 0 deletions examples/hip_bench/hip_bench.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build not

// hip_bench runs a hippocampus model for testing parameters and new learning ideas
package main

Expand Down
2 changes: 2 additions & 0 deletions examples/hip_bench/orig_params.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build not

package main

import "github.com/emer/emergent/v2/params"
Expand Down
2 changes: 2 additions & 0 deletions examples/hip_bench/params.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// File generated by params.SaveGoCode

//go:build not

package main

import "github.com/emer/emergent/v2/params"
Expand Down
12 changes: 6 additions & 6 deletions leabra/enumgen.go

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions leabra/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,23 @@ func (ly *Layer) InhibFromPool(ctx *Context) {
// ActFromG computes rate-code activation from Ge, Gi, Gl conductances
// and updates learning running-average activations from that Act
func (ly *Layer) ActFromG(ctx *Context) {
switch ly.Type {
case RWDaLayer:
ly.ActFromGRWDa(ctx)
return
case RWPredLayer:
ly.ActFromGRWPred(ctx)
return
case TDPredLayer:
ly.ActFromGTDPred(ctx)
return
case TDIntegLayer:
ly.ActFromGTDInteg(ctx)
return
case TDDaLayer:
ly.ActFromGTDDa(ctx)
return
}
for ni := range ly.Neurons {
nrn := &ly.Neurons[ni]
if nrn.IsOff() {
Expand Down
2 changes: 1 addition & 1 deletion leabra/layerbase.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ func (ly *Layer) ShouldDisplay(field string) bool {
case "RW":
return ly.Type == RWPredLayer || ly.Type == RWDaLayer
case "TD":
return ly.Type == TDRewIntegLayer || ly.Type == TDDaLayer
return ly.Type == TDPredLayer || ly.Type == TDIntegLayer || ly.Type == TDDaLayer
case "PBWM":
return isPBWM
case "SendTo":
Expand Down
14 changes: 7 additions & 7 deletions leabra/layertypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,21 +81,21 @@ const (
// RWPred prediction is also accessed directly from Rew layer to avoid any issues.
RWDaLayer

// TDRewPredLayer is the temporal differences reward prediction layer.
// TDPredLayer is the temporal differences reward prediction layer.
// It represents estimated value V(t) in the minus phase, and computes
// estimated V(t+1) based on its learned weights in plus phase.
// Use [TDRewPredPath] for DA modulated learning.
TDRewPredLayer
// Use [TDPredPath] for DA modulated learning.
TDPredLayer

// TDRewIntegLayer is the temporal differences reward integration layer.
// TDIntegLayer is the temporal differences reward integration layer.
// It represents estimated value V(t) in the minus phase, and
// estimated V(t+1) + r(t) in the plus phase.
// It computes r(t) from (typically fixed) weights from a reward layer,
// and directly accesses values from [TDRewPredLayer].
TDRewIntegLayer
// and directly accesses values from [TDPredLayer].
TDIntegLayer

// TDDaLayer computes a dopamine (DA) signal as the temporal difference (TD)
// between the [TDRewIntegLayer[] activations in the minus and plus phase.
// between the [TDIntegLayer[] activations in the minus and plus phase.
TDDaLayer

///////// BG Basal Ganglia
Expand Down
6 changes: 3 additions & 3 deletions leabra/path.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,8 @@ func (pt *Path) DWt() {
pt.DWtMatrix()
case pt.Type == RWPath:
pt.DWtRW()
case pt.Type == TDRewPredPath:
pt.DWtTDRewPred()
case pt.Type == TDPredPath:
pt.DWtTDPred()
case pt.Type == DaHebbPath:
pt.DWtDaHebb()
default:
Expand Down Expand Up @@ -343,7 +343,7 @@ func (pt *Path) WtFromDWt() {
return
}
switch pt.Type {
case RWPath, TDRewPredPath:
case RWPath, TDPredPath:
pt.WtFromDWtLinear()
return
}
Expand Down
4 changes: 2 additions & 2 deletions leabra/pathbase.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ func (pt *Path) DefaultsForType() {
pt.CHLDefaults()
case EcCa1Path:
pt.EcCa1Defaults()
case TDRewPredPath:
pt.TDRewPredDefaults()
case TDPredPath:
pt.TDPredDefaults()
case RWPath:
pt.RWDefaults()
case MatrixPath:
Expand Down
6 changes: 3 additions & 3 deletions leabra/pathtypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ const (
// Has no weight bounds or limits on sign etc.
RWPath

// TDRewPredPath does dopamine-modulated learning for reward prediction:
// TDPredPath does dopamine-modulated learning for reward prediction:
// DWt = Da * Send.ActQ0 (activity on *previous* timestep)
// Use in TDRewPredLayer typically to generate reward predictions.
// Use in TDPredLayer typically to generate reward predictions.
// Has no weight bounds or limits on sign etc.
TDRewPredPath
TDPredPath

//////// PBWM

Expand Down
4 changes: 0 additions & 4 deletions leabra/pbwm_layers.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,8 @@ func (ly *Layer) DaAChFromLay(ctx *Context) {
continue
}
da := ly.DA
// ach := ly.ACh
if nrn.Shunt > 0 { // note: treating Shunt as binary variable -- could multiply
da *= ly.Matrix.PatchShunt
// if ly.Matrix.ShuntACh {
// ach *= ly.Matrix.PatchShunt
// }
}
nrn.DALrn = ly.DALrnFromDA(da)
}
Expand Down
47 changes: 26 additions & 21 deletions leabra/rl.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func (ly *Layer) ActFromGRWPred(ctx *Context) {
continue
}
nrn.Act = ly.RW.PredRange.ClipValue(nrn.Ge) // clipped linear
ly.Learn.AvgsFromAct(nrn)
}
}

Expand All @@ -64,7 +65,7 @@ func (ly *Layer) RWLayers() (*Layer, *Layer, error) {
return tly, ply, nil
}

func (ly *Layer) RWDaActFromG(ctx *Context) {
func (ly *Layer) ActFromGRWDa(ctx *Context) {
rly, ply, _ := ly.RWLayers()
if rly == nil || ply == nil {
return
Expand All @@ -87,6 +88,7 @@ func (ly *Layer) RWDaActFromG(ctx *Context) {
} else {
nrn.Act = 0 // nothing
}
ly.Learn.AvgsFromAct(nrn)
}
}

Expand Down Expand Up @@ -149,23 +151,24 @@ type TDParams struct {
// discount factor -- how much to discount the future prediction from RewPred.
Discount float32

// name of [TDRewPredLayer] to get reward prediction from.
// name of [TDPredLayer] to get reward prediction from.
PredLay string

// name of [TDRewIntegLayer] from which this computes the temporal derivative.
// name of [TDIntegLayer] from which this computes the temporal derivative.
IntegLay string
}

func (tp *TDParams) Defaults() {
tp.Discount = 0.9
tp.PredLay = "RewPred"
tp.PredLay = "Pred"
tp.IntegLay = "Integ"
}

func (tp *TDParams) Update() {
}

// ActFromGTDRewPred computes linear activation for [TDRewPredLayer].
func (ly *Layer) ActFromGTDRewPred(ctx *Context) {
// ActFromGTDPred computes linear activation for [TDPredLayer].
func (ly *Layer) ActFromGTDPred(ctx *Context) {
for ni := range ly.Neurons {
nrn := &ly.Neurons[ni]
if nrn.IsOff() {
Expand All @@ -176,20 +179,21 @@ func (ly *Layer) ActFromGTDRewPred(ctx *Context) {
} else {
nrn.Act = nrn.ActP // previous actP
}
ly.Learn.AvgsFromAct(nrn)
}
}

func (ly *Layer) TDRewPredLayer() (*Layer, error) {
func (ly *Layer) TDPredLayer() (*Layer, error) {
tly := ly.Network.LayerByName(ly.TD.PredLay)
if tly == nil {
err := fmt.Errorf("TDRewIntegLayer %s RewPredLayer: %q not found", ly.Name, ly.TD.PredLay)
err := fmt.Errorf("TDIntegLayer %s RewPredLayer: %q not found", ly.Name, ly.TD.PredLay)
return nil, errors.Log(err)
}
return tly, nil
}

func (ly *Layer) ActFromGTDRewInteg(ctx *Context) {
rply, _ := ly.TDRewPredLayer()
func (ly *Layer) ActFromGTDInteg(ctx *Context) {
rply, _ := ly.TDPredLayer()
if rply == nil {
return
}
Expand All @@ -205,13 +209,14 @@ func (ly *Layer) ActFromGTDRewInteg(ctx *Context) {
} else {
nrn.Act = rpActP // previous actP
}
ly.Learn.AvgsFromAct(nrn)
}
}

func (ly *Layer) TDRewIntegLayer() (*Layer, error) {
func (ly *Layer) TDIntegLayer() (*Layer, error) {
tly := ly.Network.LayerByName(ly.TD.IntegLay)
if tly == nil {
err := fmt.Errorf("TDRewIntegLayer %s RewIntegLayer: %q not found", ly.Name, ly.TD.IntegLay)
err := fmt.Errorf("TDIntegLayer %s RewIntegLayer: %q not found", ly.Name, ly.TD.IntegLay)
return nil, errors.Log(err)
}
return tly, nil
Expand All @@ -222,7 +227,7 @@ func (ly *Layer) TDDaDefaults() {
}

func (ly *Layer) ActFromGTDDa(ctx *Context) {
rily, _ := ly.TDRewIntegLayer()
rily, _ := ly.TDIntegLayer()
if rily == nil {
return
}
Expand All @@ -242,15 +247,15 @@ func (ly *Layer) ActFromGTDDa(ctx *Context) {
}
}

func (pt *Path) TDRewPredDefaults() {
func (pt *Path) TDPredDefaults() {
pt.Learn.WtSig.Gain = 1
pt.Learn.Norm.On = false
pt.Learn.Momentum.On = false
pt.Learn.WtBal.On = false
}

// DWtTDRewPred computes the weight change (learning) for [TDRewPredPath].
func (pt *Path) DWtTDRewPred() {
// DWtTDPred computes the weight change (learning) for [TDPredPath].
func (pt *Path) DWtTDPred() {
slay := pt.Send
rlay := pt.Recv
da := rlay.DA
Expand All @@ -271,12 +276,12 @@ func (pt *Path) DWtTDRewPred() {
}

// AddTDLayers adds the standard TD temporal differences layers, generating a DA signal.
// Pathway from Rew to RewInteg is given class TDRewToInteg -- should
// Pathway from Rew to RewInteg is given class TDToInteg -- should
// have no learning and 1 weight.
func (nt *Network) AddTDLayers(prefix string, space float32) (rew, rp, ri, td *Layer) {
rew = nt.AddLayer2D(prefix+"Rew", 1, 1, InputLayer)
rp = nt.AddLayer2D(prefix+"RewPred", 1, 1, SuperLayer)
ri = nt.AddLayer2D(prefix+"RewInteg", 1, 1, TDRewIntegLayer)
rp = nt.AddLayer2D(prefix+"Pred", 1, 1, TDPredLayer)
ri = nt.AddLayer2D(prefix+"Integ", 1, 1, TDIntegLayer)
td = nt.AddLayer2D(prefix+"TD", 1, 1, TDDaLayer)
ri.TD.PredLay = rp.Name
td.TD.IntegLay = ri.Name
Expand All @@ -285,12 +290,12 @@ func (nt *Network) AddTDLayers(prefix string, space float32) (rew, rp, ri, td *L
td.PlaceBehind(ri, space)

pt := nt.ConnectLayers(rew, ri, paths.NewFull(), ForwardPath)
pt.AddClass("TDRewToInteg")
pt.AddClass("TDToInteg")
pt.Learn.Learn = false
pt.WtInit.Mean = 1
pt.WtInit.Var = 0
pt.WtInit.Sym = false
// {Sel: ".TDRewToInteg", Desc: "rew to integ",
// {Sel: ".TDToInteg", Desc: "rew to integ",
// Params: params.Params{
// "Path.Learn.Learn": "false",
// "Path.WtInit.Mean": "1",
Expand Down
Loading

0 comments on commit 82dff96

Please sign in to comment.