lvis slowinterval; simstats tracks LayerStates ActMAvg for longer int…

…erval; params updates
emer · Jan 23, 2025 · 210b703 · 210b703
1 parent ef16079
commit 210b703
Show file tree

Hide file tree

Showing 12 changed files with 69 additions and 40 deletions.
diff --git a/axon/learn-layer.go b/axon/learn-layer.go
diff --git a/axon/learn-layer.goal b/axon/learn-layer.goal
@@ -124,6 +124,7 @@ func (ly *LayerParams) AdaptInhib(ctx *Context) {
 	if ly.Inhib.ActAvg.AdaptGi.IsFalse() || ly.IsInput() {
 		return
 	}
+	// note: this is happening redundantly across all ndata based on shared LayerActMAvg values
 	for di := uint32(0); di < ctx.NData; di++ {
 		giMult := LayerStates[ly.Index, di, LayerGiMult]
 		avg := LayerStates[ly.Index, di, LayerActMAvg]

diff --git a/axon/simstats.go b/axon/simstats.go
@@ -330,8 +330,11 @@ func StatLayerActGe(statsDir *tensorfs.Node, net *Network, trainMode, trialLevel
 					}
 				default:
 					subd := modeDir.Dir(levels[levi-1].String())
-					stat := stats.StatMean.Call(subd.Value(name))
-					tsr.AppendRow(stat)
+					if levi == 1 && si == 0 { // use official longer timescale avg stat here
+						tsr.AppendRowFloat(float64(LayerStates.Value(int(ly.Index), 0, int(LayerActMAvg))))
+					} else {
+						tsr.AppendRow(stats.StatMean.Call(subd.Value(name)))
+					}
 				}
 			}
 		}

diff --git a/kinase/linear/linear_test.go b/kinase/linear/linear_test.go
@@ -20,7 +20,7 @@ func TestMain(m *testing.M) {
 func TestLinear(t *testing.T) {
 	var ls Linear
 	ls.Defaults()
-	ls.SynCaBin.Envelope = kinase.Env30
+	ls.SynCaBin.Envelope = kinase.Env10
 	ls.Cycles = 200
 	ls.PlusCycles = 50
 	ls.CyclesPerBin = 10 // now always 10

diff --git a/sims/bgdorsal/bg-dorsal.go b/sims/bgdorsal/bg-dorsal.go
@@ -274,7 +274,7 @@ func (ss *Sim) ConfigNet(net *axon.Network) {
 	net.ConnectToPFC(nil, vl, m1, m1CT, m1PT, m1PTp, full, "VLM1") // m1 predicts vl
 
 	// these pathways are *essential* -- must get current state here
-	net.ConnectLayers(m1, vl, full, axon.ForwardPath).AddClass("ToVL ToMotor")
+	net.ConnectLayers(m1, vl, full, axon.ForwardPath).AddClass("VLM1")
 
 	net.ConnectLayers(gpi, motor, p1to1, axon.InhibPath).AddClass("FmGPI")
 	net.ConnectLayers(m1PT, motor, full, axon.ForwardPath).AddClass("M1ToMotorBS ToMotor")

diff --git a/sims/bgdorsal/params.go b/sims/bgdorsal/params.go
@@ -3,7 +3,10 @@
 // license that can be found in the LICENSE file.
 package main
 
-import "github.com/emer/axon/v2/axon"
+import (
+	"github.com/emer/axon/v2/axon"
+	"github.com/emer/axon/v2/kinase"
+)
 
 // LayerParams sets the minimal non-default params.
 // Base is always applied, and others can be optionally selected to apply on top of that.
@@ -104,9 +107,15 @@ var PathParams = axon.PathSheets{
 		{Sel: "Path", Doc: "",
 			Set: func(pt *axon.PathParams) {
 				pt.Learn.LRate.Base = 0.04   // 0.04 > 0.03
-				pt.Learn.DWt.CaPScale = 0.95 // 0.95 > 1 in cur
 				pt.Learn.DWt.Tau = 1         // 1 > 2
+				pt.Learn.DWt.CaPScale = 0.98 // 0.99 > 1 for 10
+				pt.Learn.SynCaBin.Envelope = kinase.Env20
 			}},
+		// {Sel: ".PFCPath", Doc: "",
+		// 	Set: func(pt *axon.PathParams) {
+		// 		pt.Learn.DWt.CaPScale = 1
+		// 		pt.Learn.SynCaBin.Envelope = kinase.Env30
+		// 	}},
 		{Sel: ".CTtoPred", Doc: "",
 			Set: func(pt *axon.PathParams) {
 				pt.PathScale.Abs = 2 // 1 def
@@ -119,7 +128,7 @@ var PathParams = axon.PathSheets{
 			Set: func(pt *axon.PathParams) {
 				pt.PathScale.Abs = 1.8          // 1.8 > others
 				pt.Learn.LRate.Base = 0.02      // rlr sig: .02 > .015 .025
-				pt.Learn.DWt.LearnThr = 0.1     // 0.1 > 0 > 0.2
+				pt.Learn.DWt.LearnThr = 0.1     // 0.1  > 0.2
 				pt.Matrix.Credit = 0.6          // key param, 0.6 > 0.5, 0.4, 0.7, 1 with pf modulation
 				pt.Matrix.BasePF = 0.005        // 0.005 > 0.01, 0.002 etc
 				pt.Matrix.Delta = 1             // should always be 1 except for testing; adjust lrate to compensate
@@ -146,29 +155,19 @@ var PathParams = axon.PathSheets{
 				pt.PathScale.Abs = 1.5     // now 1.5 > 2 > 1 ..
 				pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02
 			}},
-		{Sel: ".ToMotor", Doc: "all paths to MotorBS and VL",
+		{Sel: ".ToMotor", Doc: "all excitatory paths to MotorBS; see #DGPiToMotorBS too",
 			Set: func(pt *axon.PathParams) {
 				pt.Learn.LRate.Base = 0.02 // 0.02 > 0.04 > 0.01 -- still key
 				// note: MotorBS is a target, key for learning; SWts not used.
+				// pt.Learn.SynCaBin.Envelope = kinase.Env10
+				// pt.Learn.DWt.CaPScale = 1 // tbd in Env
 			}},
 		{Sel: ".VLM1", Doc: "",
 			Set: func(pt *axon.PathParams) {
 				pt.Learn.LRate.Base = 0.02 // 0.02 > 0.04 > 0.01 -- still key
 				// note: VL is a target layer; SWts not used.
-			}},
-		{Sel: "#StateToM1", Doc: "",
-			Set: func(pt *axon.PathParams) {
-				pt.PathScale.Abs = 1 // 1 > 1.5, 2, 0.5 etc
-			}},
-		{Sel: "#MotorBSToPF", Doc: "",
-			Set: func(pt *axon.PathParams) {
-				pt.PathScale.Abs = 1       // 1 > 1.1 > 0.9 >> 0.5
-				pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02
-				// fixed is not better:
-				// pt.Learn.Learn.SetBool(false)
-				// pt.SWts.Init.SPct = 0
-				// pt.SWts.Init.Mean = 0.8
-				// pt.SWts.Init.Var = 0.0
+				// pt.Learn.SynCaBin.Envelope = kinase.Env10
+				// pt.Learn.DWt.CaPScale = 1 // tbd in Env
 			}},
 		{Sel: "#DGPiToM1VM", Doc: "final inhibition",
 			Set: func(pt *axon.PathParams) {
@@ -179,12 +178,28 @@ var PathParams = axon.PathSheets{
 			Set: func(pt *axon.PathParams) {
 				pt.PathScale.Abs = 3       // 3 > 2.5, 3.5
 				pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02 > 0.0005 with STN 150
+				// pt.Learn.SynCaBin.Envelope = kinase.Env10
+				// pt.Learn.DWt.CaPScale = 1 // tbd in Env
 			}},
 		{Sel: "#DGPiToPF", Doc: "",
 			Set: func(pt *axon.PathParams) {
 				pt.PathScale.Abs = 0.4     // 0.4 >= 0.5, 0.3, 0.2 >> higher
 				pt.Learn.LRate.Base = 0.04 // 0.4 prev default
 			}},
+		{Sel: "#StateToM1", Doc: "",
+			Set: func(pt *axon.PathParams) {
+				pt.PathScale.Abs = 1 // 1 > 1.5, 2, 0.5 etc
+			}},
+		{Sel: "#MotorBSToPF", Doc: "",
+			Set: func(pt *axon.PathParams) {
+				pt.PathScale.Abs = 1       // 1 > 1.1 > 0.9 >> 0.5
+				pt.Learn.LRate.Base = 0.04 // 0.04 > 0.02
+				// fixed is not better:
+				// pt.Learn.Learn.SetBool(false)
+				// pt.SWts.Init.SPct = 0
+				// pt.SWts.Init.Mean = 0.8
+				// pt.SWts.Init.Var = 0.0
+			}},
 		{Sel: ".M1ToMotorBS", Doc: "",
 			Set: func(pt *axon.PathParams) {
 				pt.PathScale.Abs = 2 // 2 > 1.5, 2.5

diff --git a/sims/deepfsa/params.go b/sims/deepfsa/params.go
@@ -6,6 +6,7 @@ package main
 
 import (
 	"github.com/emer/axon/v2/axon"
+	"github.com/emer/axon/v2/kinase"
 )
 
 // LayerParams sets the minimal non-default params.
@@ -93,12 +94,13 @@ var PathParams = axon.PathSheets{
 	"Base": {
 		{Sel: "Path", Doc: "std",
 			Set: func(pt *axon.PathParams) {
-				pt.Learn.DWt.SubMean = 0     // 0 > 1 -- even with CTCtxt = 0
-				pt.Learn.LRate.Base = 0.03   // .03 > others -- same as CtCtxt
-				pt.SWts.Adapt.LRate = 0.01   // 0.01 or 0.0001 music
-				pt.SWts.Init.SPct = 1.0      // 1 works fine here -- .5 also ok
-				pt.Learn.DWt.Tau = 1         // 1 >> 2 v0.0.9
-				pt.Learn.DWt.CaPScale = 0.95 // 0.95 def; 1 maybe slightly more stable
+				pt.Learn.DWt.SubMean = 0    // 0 > 1 -- even with CTCtxt = 0
+				pt.Learn.LRate.Base = 0.03  // .03 > others -- same as CtCtxt
+				pt.SWts.Adapt.LRate = 0.01  // 0.01 or 0.0001 music
+				pt.SWts.Init.SPct = 1.0     // 1 works fine here -- .5 also ok
+				pt.Learn.DWt.Tau = 1        // 1 >> 2 v0.0.9
+				pt.Learn.DWt.CaPScale = 1.0 // 0.95 def; 1 maybe slightly more stable
+				pt.Learn.SynCaBin.Envelope = kinase.Env25
 			}},
 		{Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates",
 			Set: func(pt *axon.PathParams) {

diff --git a/sims/lvis/config.go b/sims/lvis/config.go
@@ -85,7 +85,11 @@ type RunConfig struct {
 	// NData is the number of data-parallel items to process in parallel per trial.
 	// Is significantly faster for both CPU and GPU.  Results in an effective
 	// mini-batch of learning.
-	NData int `default:"4" min:"1"`
+	NData int `default:"8" min:"1"`
+
+	// SlowInterval is the interval between slow adaptive processes.
+	// This generally needs to be longer than the default of 100 in larger models.
+	SlowInterval int `default:"400"`
 
 	// NThreads is the number of parallel threads for CPU computation;
 	// 0 = use default.

diff --git a/sims/lvis/lvis.go b/sims/lvis/lvis.go
@@ -244,7 +244,8 @@ func (ss *Sim) ConfigEnv() {
 func (ss *Sim) ConfigNet(net *axon.Network) {
 	net.SetMaxData(ss.Config.Run.NData)
 	net.Context().SetThetaCycles(int32(ss.Config.Run.Cycles)).
-		SetPlusCycles(int32(ss.Config.Run.PlusCycles))
+		SetPlusCycles(int32(ss.Config.Run.PlusCycles)).
+		SetSlowInterval(int32(ss.Config.Run.SlowInterval))
 	net.SetRandSeed(ss.RandSeeds[0]) // init new separate random seed, using run = 0
 
 	trn := ss.Envs.ByMode(Train).(*ImagesEnv)

diff --git a/sims/lvis/params.go b/sims/lvis/params.go
@@ -6,6 +6,7 @@ package main
 
 import (
 	"github.com/emer/axon/v2/axon"
+	"github.com/emer/axon/v2/kinase"
 )
 
 // LayerParams sets the minimal non-default params.
@@ -177,12 +178,13 @@ var PathParams = axon.PathSheets{
 	"Base": {
 		{Sel: "Path", Doc: "exploring",
 			Set: func(pt *axon.PathParams) {
-				pt.SWts.Adapt.On.SetBool(true) // true > false, esp in cosdiff
-				pt.SWts.Adapt.LRate = 0.0002   // .0002, .001 > .01 > .1 after 250epc in NStrong
-				pt.SWts.Adapt.SubMean = 1      // 1 > 0 -- definitely needed
-				pt.Learn.LRate.Base = 0.005    // 0.01 > 0.02 later (trace)
-				pt.Learn.DWt.SubMean = 1       // 1 > 0 for trgavg weaker
-				pt.Learn.DWt.CaPScale = 0.96   // 0.96 best for 25 bin; 0.95 unstable
+				pt.SWts.Adapt.On.SetBool(true)            // true > false, esp in cosdiff
+				pt.SWts.Adapt.LRate = 0.0002              // .0002, .001 > .01 > .1 after 250epc in NStrong
+				pt.SWts.Adapt.SubMean = 1                 // 1 > 0 -- definitely needed
+				pt.Learn.LRate.Base = 0.005               // 0.005 def; 0.01 > 0.02 later (trace)
+				pt.Learn.DWt.SubMean = 1                  // 1 > 0 for trgavg weaker
+				pt.Learn.DWt.CaPScale = 1.03              // Env20: 1.02 > 1
+				pt.Learn.SynCaBin.Envelope = kinase.Env20 // Env20 > Env25
 			}},
 		{Sel: ".BackPath", Doc: "top-down back-projections MUST have lower relative weight scale, otherwise network hallucinates -- smaller as network gets bigger",
 			Set: func(pt *axon.PathParams) {

diff --git a/sims/objrec/params.go b/sims/objrec/params.go
@@ -91,9 +91,9 @@ var PathParams = axon.PathSheets{
 				pt.Learn.DWt.SubMean = 1         // 1 -- faster if 0 until 20 epc -- prevents sig amount of late deterioration
 				pt.SWts.Adapt.LRate = 0.0001     // 0.005 == .1 == .01
 				pt.SWts.Init.SPct = 1            // 1 >= lower (trace-v11)
-				pt.Learn.DWt.CaPScale = 1        // 0.95 essential vs. 1.0
-				pt.Learn.DWt.Trace.SetBool(true) // no trace is faster but unstable
-				pt.Learn.SynCaBin.Envelope = kinase.Env25
+				pt.Learn.DWt.CaPScale = 1.0      // 0.95 essential vs. 1.0
+				pt.Learn.DWt.Trace.SetBool(true) // no trace starts faster but is unstable
+				pt.Learn.SynCaBin.Envelope = kinase.Env20
 			}},
 		{Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates -- smaller as network gets bigger",
 			Set: func(pt *axon.PathParams) {

diff --git a/sims/ra25/params.go b/sims/ra25/params.go
@@ -51,7 +51,7 @@ var PathParams = axon.PathSheets{
 				pt.Learn.DWt.Trace.SetBool(true) // no trace is NOT faster. requires lrate = 0.02
 				pt.Learn.DWt.SubMean = 0         // 1 > 0 for long run stability
 				pt.Learn.DWt.CaPScale = 1        // 0.95 > 0.9 > 1
-				pt.Learn.SynCaBin.Envelope = kinase.Env25
+				pt.Learn.SynCaBin.Envelope = kinase.Env10
 			}},
 		{Sel: ".BackPath", Doc: "top-down back-pathways MUST have lower relative weight scale, otherwise network hallucinates",
 			Set: func(pt *axon.PathParams) {