diff --git a/axon/learn-layer.go b/axon/learn-layer.go index ad7917ec..34162c1a 100644 --- a/axon/learn-layer.go +++ b/axon/learn-layer.go @@ -125,6 +125,7 @@ func (ly *LayerParams) AdaptInhib(ctx *Context) { if ly.Inhib.ActAvg.AdaptGi.IsFalse() || ly.IsInput() { return } + // note: this is happening redundantly across all ndata based on shared LayerActMAvg values for di := uint32(0); di < ctx.NData; di++ { giMult := LayerStates.Value(int(ly.Index), int(di), int(LayerGiMult)) avg := LayerStates.Value(int(ly.Index), int(di), int(LayerActMAvg)) diff --git a/axon/learn-layer.goal b/axon/learn-layer.goal index 16e6be74..e18b9030 100644 --- a/axon/learn-layer.goal +++ b/axon/learn-layer.goal @@ -124,6 +124,7 @@ func (ly *LayerParams) AdaptInhib(ctx *Context) { if ly.Inhib.ActAvg.AdaptGi.IsFalse() || ly.IsInput() { return } + // note: this is happening redundantly across all ndata based on shared LayerActMAvg values for di := uint32(0); di < ctx.NData; di++ { giMult := LayerStates[ly.Index, di, LayerGiMult] avg := LayerStates[ly.Index, di, LayerActMAvg] diff --git a/axon/simstats.go b/axon/simstats.go index 99670747..d0cd90bd 100644 --- a/axon/simstats.go +++ b/axon/simstats.go @@ -330,8 +330,11 @@ func StatLayerActGe(statsDir *tensorfs.Node, net *Network, trainMode, trialLevel } default: subd := modeDir.Dir(levels[levi-1].String()) - stat := stats.StatMean.Call(subd.Value(name)) - tsr.AppendRow(stat) + if levi == 1 && si == 0 { // use official longer timescale avg stat here + tsr.AppendRowFloat(float64(LayerStates.Value(int(ly.Index), 0, int(LayerActMAvg)))) + } else { + tsr.AppendRow(stats.StatMean.Call(subd.Value(name))) + } } } } diff --git a/kinase/linear/linear_test.go b/kinase/linear/linear_test.go index 47d29dfb..9a75af48 100644 --- a/kinase/linear/linear_test.go +++ b/kinase/linear/linear_test.go @@ -20,7 +20,7 @@ func TestMain(m *testing.M) { func TestLinear(t *testing.T) { var ls Linear ls.Defaults() - ls.SynCaBin.Envelope = kinase.Env30 + ls.SynCaBin.Envelope = kinase.Env10 ls.Cycles = 200 ls.PlusCycles = 50 ls.CyclesPerBin = 10 // now always 10 diff --git a/sims/bgdorsal/bg-dorsal.go b/sims/bgdorsal/bg-dorsal.go index 4621b351..03e0de04 100644 --- a/sims/bgdorsal/bg-dorsal.go +++ b/sims/bgdorsal/bg-dorsal.go @@ -274,7 +274,7 @@ func (ss *Sim) ConfigNet(net *axon.Network) { net.ConnectToPFC(nil, vl, m1, m1CT, m1PT, m1PTp, full, "VLM1") // m1 predicts vl // these pathways are *essential* -- must get current state here - net.ConnectLayers(m1, vl, full, axon.ForwardPath).AddClass("ToVL ToMotor") + net.ConnectLayers(m1, vl, full, axon.ForwardPath).AddClass("VLM1") net.ConnectLayers(gpi, motor, p1to1, axon.InhibPath).AddClass("FmGPI") net.ConnectLayers(m1PT, motor, full, axon.ForwardPath).AddClass("M1ToMotorBS ToMotor") diff --git a/sims/bgdorsal/params.go b/sims/bgdorsal/params.go index e5ec2201..e5ea9a08 100644 --- a/sims/bgdorsal/params.go +++ b/sims/bgdorsal/params.go @@ -3,7 +3,10 @@ // license that can be found in the LICENSE file. package main -import "github.com/emer/axon/v2/axon" +import ( + "github.com/emer/axon/v2/axon" + "github.com/emer/axon/v2/kinase" +) // LayerParams sets the minimal non-default params. // Base is always applied, and others can be optionally selected to apply on top of that. @@ -104,9 +107,15 @@ var PathParams = axon.PathSheets{ {Sel: "Path", Doc: "", Set: func(pt *axon.PathParams) { pt.Learn.LRate.Base = 0.04 // 0.04 > 0.03 - pt.Learn.DWt.CaPScale = 0.95 // 0.95 > 1 in cur pt.Learn.DWt.Tau = 1 // 1 > 2 + pt.Learn.DWt.CaPScale = 0.98 // 0.99 > 1 for 10 + pt.Learn.SynCaBin.Envelope = kinase.Env20 }}, + // {Sel: ".PFCPath", Doc: "", + // Set: func(pt *axon.PathParams) { + // pt.Learn.DWt.CaPScale = 1 + // pt.Learn.SynCaBin.Envelope = kinase.Env30 + // }}, {Sel: ".CTtoPred", Doc: "", Set: func(pt *axon.PathParams) { pt.PathScale.Abs = 2 // 1 def @@ -119,7 +128,7 @@ var PathParams = axon.PathSheets{ Set: func(pt *axon.PathParams) { pt.PathScale.Abs = 1.8 // 1.8 > others pt.Learn.LRate.Base = 0.02 // rlr sig: .02 > .015 .025 - pt.Learn.DWt.LearnThr = 0.1 // 0.1 > 0 > 0.2 + pt.Learn.DWt.LearnThr = 0.1 // 0.1 > 0.2 pt.Matrix.Credit = 0.6 // key param, 0.6 > 0.5, 0.4, 0.7, 1 with pf modulation pt.Matrix.BasePF = 0.005 // 0.005 > 0.01, 0.002 etc pt.Matrix.Delta = 1 // should always be 1 except for testing; adjust lrate to compensate @@ -146,29 +155,19 @@ var PathParams = axon.PathSheets{ pt.PathScale.Abs = 1.5 // now 1.5 > 2 > 1 .. pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02 }}, - {Sel: ".ToMotor", Doc: "all paths to MotorBS and VL", + {Sel: ".ToMotor", Doc: "all excitatory paths to MotorBS; see #DGPiToMotorBS too", Set: func(pt *axon.PathParams) { pt.Learn.LRate.Base = 0.02 // 0.02 > 0.04 > 0.01 -- still key // note: MotorBS is a target, key for learning; SWts not used. + // pt.Learn.SynCaBin.Envelope = kinase.Env10 + // pt.Learn.DWt.CaPScale = 1 // tbd in Env }}, {Sel: ".VLM1", Doc: "", Set: func(pt *axon.PathParams) { pt.Learn.LRate.Base = 0.02 // 0.02 > 0.04 > 0.01 -- still key // note: VL is a target layer; SWts not used. - }}, - {Sel: "#StateToM1", Doc: "", - Set: func(pt *axon.PathParams) { - pt.PathScale.Abs = 1 // 1 > 1.5, 2, 0.5 etc - }}, - {Sel: "#MotorBSToPF", Doc: "", - Set: func(pt *axon.PathParams) { - pt.PathScale.Abs = 1 // 1 > 1.1 > 0.9 >> 0.5 - pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02 - // fixed is not better: - // pt.Learn.Learn.SetBool(false) - // pt.SWts.Init.SPct = 0 - // pt.SWts.Init.Mean = 0.8 - // pt.SWts.Init.Var = 0.0 + // pt.Learn.SynCaBin.Envelope = kinase.Env10 + // pt.Learn.DWt.CaPScale = 1 // tbd in Env }}, {Sel: "#DGPiToM1VM", Doc: "final inhibition", Set: func(pt *axon.PathParams) { @@ -179,12 +178,28 @@ var PathParams = axon.PathSheets{ Set: func(pt *axon.PathParams) { pt.PathScale.Abs = 3 // 3 > 2.5, 3.5 pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02 > 0.0005 with STN 150 + // pt.Learn.SynCaBin.Envelope = kinase.Env10 + // pt.Learn.DWt.CaPScale = 1 // tbd in Env }}, {Sel: "#DGPiToPF", Doc: "", Set: func(pt *axon.PathParams) { pt.PathScale.Abs = 0.4 // 0.4 >= 0.5, 0.3, 0.2 >> higher pt.Learn.LRate.Base = 0.04 // 0.4 prev default }}, + {Sel: "#StateToM1", Doc: "", + Set: func(pt *axon.PathParams) { + pt.PathScale.Abs = 1 // 1 > 1.5, 2, 0.5 etc + }}, + {Sel: "#MotorBSToPF", Doc: "", + Set: func(pt *axon.PathParams) { + pt.PathScale.Abs = 1 // 1 > 1.1 > 0.9 >> 0.5 + pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02 + // fixed is not better: + // pt.Learn.Learn.SetBool(false) + // pt.SWts.Init.SPct = 0 + // pt.SWts.Init.Mean = 0.8 + // pt.SWts.Init.Var = 0.0 + }}, {Sel: ".M1ToMotorBS", Doc: "", Set: func(pt *axon.PathParams) { pt.PathScale.Abs = 2 // 2 > 1.5, 2.5 diff --git a/sims/deepfsa/params.go b/sims/deepfsa/params.go index df5569ff..550cc778 100644 --- a/sims/deepfsa/params.go +++ b/sims/deepfsa/params.go @@ -6,6 +6,7 @@ package main import ( "github.com/emer/axon/v2/axon" + "github.com/emer/axon/v2/kinase" ) // LayerParams sets the minimal non-default params. @@ -93,12 +94,13 @@ var PathParams = axon.PathSheets{ "Base": { {Sel: "Path", Doc: "std", Set: func(pt *axon.PathParams) { - pt.Learn.DWt.SubMean = 0 // 0 > 1 -- even with CTCtxt = 0 - pt.Learn.LRate.Base = 0.03 // .03 > others -- same as CtCtxt - pt.SWts.Adapt.LRate = 0.01 // 0.01 or 0.0001 music - pt.SWts.Init.SPct = 1.0 // 1 works fine here -- .5 also ok - pt.Learn.DWt.Tau = 1 // 1 >> 2 v0.0.9 - pt.Learn.DWt.CaPScale = 0.95 // 0.95 def; 1 maybe slightly more stable + pt.Learn.DWt.SubMean = 0 // 0 > 1 -- even with CTCtxt = 0 + pt.Learn.LRate.Base = 0.03 // .03 > others -- same as CtCtxt + pt.SWts.Adapt.LRate = 0.01 // 0.01 or 0.0001 music + pt.SWts.Init.SPct = 1.0 // 1 works fine here -- .5 also ok + pt.Learn.DWt.Tau = 1 // 1 >> 2 v0.0.9 + pt.Learn.DWt.CaPScale = 1.0 // 0.95 def; 1 maybe slightly more stable + pt.Learn.SynCaBin.Envelope = kinase.Env25 }}, {Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates", Set: func(pt *axon.PathParams) { diff --git a/sims/lvis/config.go b/sims/lvis/config.go index 7e47b2a5..30bf7394 100644 --- a/sims/lvis/config.go +++ b/sims/lvis/config.go @@ -85,7 +85,11 @@ type RunConfig struct { // NData is the number of data-parallel items to process in parallel per trial. // Is significantly faster for both CPU and GPU. Results in an effective // mini-batch of learning. - NData int `default:"4" min:"1"` + NData int `default:"8" min:"1"` + + // SlowInterval is the interval between slow adaptive processes. + // This generally needs to be longer than the default of 100 in larger models. + SlowInterval int `default:"400"` // NThreads is the number of parallel threads for CPU computation; // 0 = use default. diff --git a/sims/lvis/lvis.go b/sims/lvis/lvis.go index f0692d03..c6453ac2 100644 --- a/sims/lvis/lvis.go +++ b/sims/lvis/lvis.go @@ -244,7 +244,8 @@ func (ss *Sim) ConfigEnv() { func (ss *Sim) ConfigNet(net *axon.Network) { net.SetMaxData(ss.Config.Run.NData) net.Context().SetThetaCycles(int32(ss.Config.Run.Cycles)). - SetPlusCycles(int32(ss.Config.Run.PlusCycles)) + SetPlusCycles(int32(ss.Config.Run.PlusCycles)). + SetSlowInterval(int32(ss.Config.Run.SlowInterval)) net.SetRandSeed(ss.RandSeeds[0]) // init new separate random seed, using run = 0 trn := ss.Envs.ByMode(Train).(*ImagesEnv) diff --git a/sims/lvis/params.go b/sims/lvis/params.go index 8cb6a3a8..c4b1d055 100644 --- a/sims/lvis/params.go +++ b/sims/lvis/params.go @@ -6,6 +6,7 @@ package main import ( "github.com/emer/axon/v2/axon" + "github.com/emer/axon/v2/kinase" ) // LayerParams sets the minimal non-default params. @@ -177,12 +178,13 @@ var PathParams = axon.PathSheets{ "Base": { {Sel: "Path", Doc: "exploring", Set: func(pt *axon.PathParams) { - pt.SWts.Adapt.On.SetBool(true) // true > false, esp in cosdiff - pt.SWts.Adapt.LRate = 0.0002 // .0002, .001 > .01 > .1 after 250epc in NStrong - pt.SWts.Adapt.SubMean = 1 // 1 > 0 -- definitely needed - pt.Learn.LRate.Base = 0.005 // 0.01 > 0.02 later (trace) - pt.Learn.DWt.SubMean = 1 // 1 > 0 for trgavg weaker - pt.Learn.DWt.CaPScale = 0.96 // 0.96 best for 25 bin; 0.95 unstable + pt.SWts.Adapt.On.SetBool(true) // true > false, esp in cosdiff + pt.SWts.Adapt.LRate = 0.0002 // .0002, .001 > .01 > .1 after 250epc in NStrong + pt.SWts.Adapt.SubMean = 1 // 1 > 0 -- definitely needed + pt.Learn.LRate.Base = 0.005 // 0.005 def; 0.01 > 0.02 later (trace) + pt.Learn.DWt.SubMean = 1 // 1 > 0 for trgavg weaker + pt.Learn.DWt.CaPScale = 1.03 // Env20: 1.02 > 1 + pt.Learn.SynCaBin.Envelope = kinase.Env20 // Env20 > Env25 }}, {Sel: ".BackPath", Doc: "top-down back-projections MUST have lower relative weight scale, otherwise network hallucinates -- smaller as network gets bigger", Set: func(pt *axon.PathParams) { diff --git a/sims/objrec/params.go b/sims/objrec/params.go index 0350eb83..7958ef52 100644 --- a/sims/objrec/params.go +++ b/sims/objrec/params.go @@ -91,9 +91,9 @@ var PathParams = axon.PathSheets{ pt.Learn.DWt.SubMean = 1 // 1 -- faster if 0 until 20 epc -- prevents sig amount of late deterioration pt.SWts.Adapt.LRate = 0.0001 // 0.005 == .1 == .01 pt.SWts.Init.SPct = 1 // 1 >= lower (trace-v11) - pt.Learn.DWt.CaPScale = 1 // 0.95 essential vs. 1.0 - pt.Learn.DWt.Trace.SetBool(true) // no trace is faster but unstable - pt.Learn.SynCaBin.Envelope = kinase.Env25 + pt.Learn.DWt.CaPScale = 1.0 // 0.95 essential vs. 1.0 + pt.Learn.DWt.Trace.SetBool(true) // no trace starts faster but is unstable + pt.Learn.SynCaBin.Envelope = kinase.Env20 }}, {Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates -- smaller as network gets bigger", Set: func(pt *axon.PathParams) { diff --git a/sims/ra25/params.go b/sims/ra25/params.go index 566337ba..cfaa7c53 100644 --- a/sims/ra25/params.go +++ b/sims/ra25/params.go @@ -51,7 +51,7 @@ var PathParams = axon.PathSheets{ pt.Learn.DWt.Trace.SetBool(true) // no trace is NOT faster. requires lrate = 0.02 pt.Learn.DWt.SubMean = 0 // 1 > 0 for long run stability pt.Learn.DWt.CaPScale = 1 // 0.95 > 0.9 > 1 - pt.Learn.SynCaBin.Envelope = kinase.Env25 + pt.Learn.SynCaBin.Envelope = kinase.Env10 }}, {Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates", Set: func(pt *axon.PathParams) {