Skip to content

Commit

Permalink
lvis slowinterval; simstats tracks LayerStates ActMAvg for longer int…
Browse files Browse the repository at this point in the history
…erval; params updates
  • Loading branch information
rcoreilly committed Jan 23, 2025
1 parent ef16079 commit 210b703
Show file tree
Hide file tree
Showing 12 changed files with 69 additions and 40 deletions.
1 change: 1 addition & 0 deletions axon/learn-layer.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions axon/learn-layer.goal
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ func (ly *LayerParams) AdaptInhib(ctx *Context) {
if ly.Inhib.ActAvg.AdaptGi.IsFalse() || ly.IsInput() {
return
}
// note: this is happening redundantly across all ndata based on shared LayerActMAvg values
for di := uint32(0); di < ctx.NData; di++ {
giMult := LayerStates[ly.Index, di, LayerGiMult]
avg := LayerStates[ly.Index, di, LayerActMAvg]
Expand Down
7 changes: 5 additions & 2 deletions axon/simstats.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,8 +330,11 @@ func StatLayerActGe(statsDir *tensorfs.Node, net *Network, trainMode, trialLevel
}
default:
subd := modeDir.Dir(levels[levi-1].String())
stat := stats.StatMean.Call(subd.Value(name))
tsr.AppendRow(stat)
if levi == 1 && si == 0 { // use official longer timescale avg stat here
tsr.AppendRowFloat(float64(LayerStates.Value(int(ly.Index), 0, int(LayerActMAvg))))
} else {
tsr.AppendRow(stats.StatMean.Call(subd.Value(name)))
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion kinase/linear/linear_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func TestMain(m *testing.M) {
func TestLinear(t *testing.T) {
var ls Linear
ls.Defaults()
ls.SynCaBin.Envelope = kinase.Env30
ls.SynCaBin.Envelope = kinase.Env10
ls.Cycles = 200
ls.PlusCycles = 50
ls.CyclesPerBin = 10 // now always 10
Expand Down
2 changes: 1 addition & 1 deletion sims/bgdorsal/bg-dorsal.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
net.ConnectToPFC(nil, vl, m1, m1CT, m1PT, m1PTp, full, "VLM1") // m1 predicts vl

// these pathways are *essential* -- must get current state here
net.ConnectLayers(m1, vl, full, axon.ForwardPath).AddClass("ToVL ToMotor")
net.ConnectLayers(m1, vl, full, axon.ForwardPath).AddClass("VLM1")

net.ConnectLayers(gpi, motor, p1to1, axon.InhibPath).AddClass("FmGPI")
net.ConnectLayers(m1PT, motor, full, axon.ForwardPath).AddClass("M1ToMotorBS ToMotor")
Expand Down
51 changes: 33 additions & 18 deletions sims/bgdorsal/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
// license that can be found in the LICENSE file.
package main

import "github.com/emer/axon/v2/axon"
import (
"github.com/emer/axon/v2/axon"
"github.com/emer/axon/v2/kinase"
)

// LayerParams sets the minimal non-default params.
// Base is always applied, and others can be optionally selected to apply on top of that.
Expand Down Expand Up @@ -104,9 +107,15 @@ var PathParams = axon.PathSheets{
{Sel: "Path", Doc: "",
Set: func(pt *axon.PathParams) {
pt.Learn.LRate.Base = 0.04 // 0.04 > 0.03
pt.Learn.DWt.CaPScale = 0.95 // 0.95 > 1 in cur
pt.Learn.DWt.Tau = 1 // 1 > 2
pt.Learn.DWt.CaPScale = 0.98 // 0.99 > 1 for 10
pt.Learn.SynCaBin.Envelope = kinase.Env20
}},
// {Sel: ".PFCPath", Doc: "",
// Set: func(pt *axon.PathParams) {
// pt.Learn.DWt.CaPScale = 1
// pt.Learn.SynCaBin.Envelope = kinase.Env30
// }},
{Sel: ".CTtoPred", Doc: "",
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 2 // 1 def
Expand All @@ -119,7 +128,7 @@ var PathParams = axon.PathSheets{
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 1.8 // 1.8 > others
pt.Learn.LRate.Base = 0.02 // rlr sig: .02 > .015 .025
pt.Learn.DWt.LearnThr = 0.1 // 0.1 > 0 > 0.2
pt.Learn.DWt.LearnThr = 0.1 // 0.1 > 0.2
pt.Matrix.Credit = 0.6 // key param, 0.6 > 0.5, 0.4, 0.7, 1 with pf modulation
pt.Matrix.BasePF = 0.005 // 0.005 > 0.01, 0.002 etc
pt.Matrix.Delta = 1 // should always be 1 except for testing; adjust lrate to compensate
Expand All @@ -146,29 +155,19 @@ var PathParams = axon.PathSheets{
pt.PathScale.Abs = 1.5 // now 1.5 > 2 > 1 ..
pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02
}},
{Sel: ".ToMotor", Doc: "all paths to MotorBS and VL",
{Sel: ".ToMotor", Doc: "all excitatory paths to MotorBS; see #DGPiToMotorBS too",
Set: func(pt *axon.PathParams) {
pt.Learn.LRate.Base = 0.02 // 0.02 > 0.04 > 0.01 -- still key
// note: MotorBS is a target, key for learning; SWts not used.
// pt.Learn.SynCaBin.Envelope = kinase.Env10
// pt.Learn.DWt.CaPScale = 1 // tbd in Env
}},
{Sel: ".VLM1", Doc: "",
Set: func(pt *axon.PathParams) {
pt.Learn.LRate.Base = 0.02 // 0.02 > 0.04 > 0.01 -- still key
// note: VL is a target layer; SWts not used.
}},
{Sel: "#StateToM1", Doc: "",
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 1 // 1 > 1.5, 2, 0.5 etc
}},
{Sel: "#MotorBSToPF", Doc: "",
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 1 // 1 > 1.1 > 0.9 >> 0.5
pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02
// fixed is not better:
// pt.Learn.Learn.SetBool(false)
// pt.SWts.Init.SPct = 0
// pt.SWts.Init.Mean = 0.8
// pt.SWts.Init.Var = 0.0
// pt.Learn.SynCaBin.Envelope = kinase.Env10
// pt.Learn.DWt.CaPScale = 1 // tbd in Env
}},
{Sel: "#DGPiToM1VM", Doc: "final inhibition",
Set: func(pt *axon.PathParams) {
Expand All @@ -179,12 +178,28 @@ var PathParams = axon.PathSheets{
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 3 // 3 > 2.5, 3.5
pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02 > 0.0005 with STN 150
// pt.Learn.SynCaBin.Envelope = kinase.Env10
// pt.Learn.DWt.CaPScale = 1 // tbd in Env
}},
{Sel: "#DGPiToPF", Doc: "",
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 0.4 // 0.4 >= 0.5, 0.3, 0.2 >> higher
pt.Learn.LRate.Base = 0.04 // 0.4 prev default
}},
{Sel: "#StateToM1", Doc: "",
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 1 // 1 > 1.5, 2, 0.5 etc
}},
{Sel: "#MotorBSToPF", Doc: "",
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 1 // 1 > 1.1 > 0.9 >> 0.5
pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02
// fixed is not better:
// pt.Learn.Learn.SetBool(false)
// pt.SWts.Init.SPct = 0
// pt.SWts.Init.Mean = 0.8
// pt.SWts.Init.Var = 0.0
}},
{Sel: ".M1ToMotorBS", Doc: "",
Set: func(pt *axon.PathParams) {
pt.PathScale.Abs = 2 // 2 > 1.5, 2.5
Expand Down
14 changes: 8 additions & 6 deletions sims/deepfsa/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package main

import (
"github.com/emer/axon/v2/axon"
"github.com/emer/axon/v2/kinase"
)

// LayerParams sets the minimal non-default params.
Expand Down Expand Up @@ -93,12 +94,13 @@ var PathParams = axon.PathSheets{
"Base": {
{Sel: "Path", Doc: "std",
Set: func(pt *axon.PathParams) {
pt.Learn.DWt.SubMean = 0 // 0 > 1 -- even with CTCtxt = 0
pt.Learn.LRate.Base = 0.03 // .03 > others -- same as CtCtxt
pt.SWts.Adapt.LRate = 0.01 // 0.01 or 0.0001 music
pt.SWts.Init.SPct = 1.0 // 1 works fine here -- .5 also ok
pt.Learn.DWt.Tau = 1 // 1 >> 2 v0.0.9
pt.Learn.DWt.CaPScale = 0.95 // 0.95 def; 1 maybe slightly more stable
pt.Learn.DWt.SubMean = 0 // 0 > 1 -- even with CTCtxt = 0
pt.Learn.LRate.Base = 0.03 // .03 > others -- same as CtCtxt
pt.SWts.Adapt.LRate = 0.01 // 0.01 or 0.0001 music
pt.SWts.Init.SPct = 1.0 // 1 works fine here -- .5 also ok
pt.Learn.DWt.Tau = 1 // 1 >> 2 v0.0.9
pt.Learn.DWt.CaPScale = 1.0 // 0.95 def; 1 maybe slightly more stable
pt.Learn.SynCaBin.Envelope = kinase.Env25
}},
{Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates",
Set: func(pt *axon.PathParams) {
Expand Down
6 changes: 5 additions & 1 deletion sims/lvis/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ type RunConfig struct {
// NData is the number of data-parallel items to process in parallel per trial.
// Is significantly faster for both CPU and GPU. Results in an effective
// mini-batch of learning.
NData int `default:"4" min:"1"`
NData int `default:"8" min:"1"`

// SlowInterval is the interval between slow adaptive processes.
// This generally needs to be longer than the default of 100 in larger models.
SlowInterval int `default:"400"`

// NThreads is the number of parallel threads for CPU computation;
// 0 = use default.
Expand Down
3 changes: 2 additions & 1 deletion sims/lvis/lvis.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ func (ss *Sim) ConfigEnv() {
func (ss *Sim) ConfigNet(net *axon.Network) {
net.SetMaxData(ss.Config.Run.NData)
net.Context().SetThetaCycles(int32(ss.Config.Run.Cycles)).
SetPlusCycles(int32(ss.Config.Run.PlusCycles))
SetPlusCycles(int32(ss.Config.Run.PlusCycles)).
SetSlowInterval(int32(ss.Config.Run.SlowInterval))
net.SetRandSeed(ss.RandSeeds[0]) // init new separate random seed, using run = 0

trn := ss.Envs.ByMode(Train).(*ImagesEnv)
Expand Down
14 changes: 8 additions & 6 deletions sims/lvis/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package main

import (
"github.com/emer/axon/v2/axon"
"github.com/emer/axon/v2/kinase"
)

// LayerParams sets the minimal non-default params.
Expand Down Expand Up @@ -177,12 +178,13 @@ var PathParams = axon.PathSheets{
"Base": {
{Sel: "Path", Doc: "exploring",
Set: func(pt *axon.PathParams) {
pt.SWts.Adapt.On.SetBool(true) // true > false, esp in cosdiff
pt.SWts.Adapt.LRate = 0.0002 // .0002, .001 > .01 > .1 after 250epc in NStrong
pt.SWts.Adapt.SubMean = 1 // 1 > 0 -- definitely needed
pt.Learn.LRate.Base = 0.005 // 0.01 > 0.02 later (trace)
pt.Learn.DWt.SubMean = 1 // 1 > 0 for trgavg weaker
pt.Learn.DWt.CaPScale = 0.96 // 0.96 best for 25 bin; 0.95 unstable
pt.SWts.Adapt.On.SetBool(true) // true > false, esp in cosdiff
pt.SWts.Adapt.LRate = 0.0002 // .0002, .001 > .01 > .1 after 250epc in NStrong
pt.SWts.Adapt.SubMean = 1 // 1 > 0 -- definitely needed
pt.Learn.LRate.Base = 0.005 // 0.005 def; 0.01 > 0.02 later (trace)
pt.Learn.DWt.SubMean = 1 // 1 > 0 for trgavg weaker
pt.Learn.DWt.CaPScale = 1.03 // Env20: 1.02 > 1
pt.Learn.SynCaBin.Envelope = kinase.Env20 // Env20 > Env25
}},
{Sel: ".BackPath", Doc: "top-down back-projections MUST have lower relative weight scale, otherwise network hallucinates -- smaller as network gets bigger",
Set: func(pt *axon.PathParams) {
Expand Down
6 changes: 3 additions & 3 deletions sims/objrec/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,9 @@ var PathParams = axon.PathSheets{
pt.Learn.DWt.SubMean = 1 // 1 -- faster if 0 until 20 epc -- prevents sig amount of late deterioration
pt.SWts.Adapt.LRate = 0.0001 // 0.005 == .1 == .01
pt.SWts.Init.SPct = 1 // 1 >= lower (trace-v11)
pt.Learn.DWt.CaPScale = 1 // 0.95 essential vs. 1.0
pt.Learn.DWt.Trace.SetBool(true) // no trace is faster but unstable
pt.Learn.SynCaBin.Envelope = kinase.Env25
pt.Learn.DWt.CaPScale = 1.0 // 0.95 essential vs. 1.0
pt.Learn.DWt.Trace.SetBool(true) // no trace starts faster but is unstable
pt.Learn.SynCaBin.Envelope = kinase.Env20
}},
{Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates -- smaller as network gets bigger",
Set: func(pt *axon.PathParams) {
Expand Down
2 changes: 1 addition & 1 deletion sims/ra25/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ var PathParams = axon.PathSheets{
pt.Learn.DWt.Trace.SetBool(true) // no trace is NOT faster. requires lrate = 0.02
pt.Learn.DWt.SubMean = 0 // 1 > 0 for long run stability
pt.Learn.DWt.CaPScale = 1 // 0.95 > 0.9 > 1
pt.Learn.SynCaBin.Envelope = kinase.Env25
pt.Learn.SynCaBin.Envelope = kinase.Env10
}},
{Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates",
Set: func(pt *axon.PathParams) {
Expand Down

0 comments on commit 210b703

Please sign in to comment.