diff --git a/axon/act-layer.go b/axon/act-layer.go index 58aca1ebc..2a2474b68 100644 --- a/axon/act-layer.go +++ b/axon/act-layer.go @@ -291,8 +291,8 @@ func (ly *LayerParams) GInteg(ctx *Context, pi, ni, di uint32) { // conductance values prior to doing the standard updates in GFromRawSyn // drvAct is for Pulvinar layers, activation of driving neuron func (ly *LayerParams) SpecialPreGs(ctx *Context, pi, ni, di uint32, drvGe float32, nonDrivePct float32) float32 { - saveVal := float32(0) // sometimes we need to use a value computed here, for the post Gs step - pi0 := pi - 1 // 0-n pool index + saveVal := float32(0) // sometimes we need to use a value computed here, for the post Gs step + pi0 := pi - ly.PoolSt - 1 // 0-n pool index pnn := uint32(PoolNNeurons(pi)) pni := NeuronIxs.Value(int(NrnNeurIndex), int(ni)) - uint32(PoolsInt.Value(int(PoolNeurSt), int(pi), int(di))) nrnCtxtGe := Neurons.Value(int(CtxtGe), int(ni), int(di)) @@ -568,7 +568,7 @@ func (ly *LayerParams) SendSpike(ctx *Context, ni, di uint32) { func (ly *LayerParams) PostSpikeSpecial(ctx *Context, lpi, pi, ni, di uint32) { Neurons.Set(Neurons.Value(int(CaSpkP), int(ni), int(di)), int(Burst), int(ni), int(di)) li := ly.Index - pi0 := pi - 1 // 0-n pool index + pi0 := pi - ly.PoolSt - 1 // 0-n pool index pnn := uint32(PoolNNeurons(pi)) pni := NeuronIxs.Value(int(NrnNeurIndex), int(ni)) - uint32(PoolsInt.Value(int(PoolNeurSt), int(pi), int(di))) hasRew := GlobalScalars.Value(int(GvHasRew), int(di)) > 0 diff --git a/axon/act-layer.goal b/axon/act-layer.goal index 36176f6b5..02fce8f0e 100644 --- a/axon/act-layer.goal +++ b/axon/act-layer.goal @@ -290,7 +290,7 @@ func (ly *LayerParams) GInteg(ctx *Context, pi, ni, di uint32) { // drvAct is for Pulvinar layers, activation of driving neuron func (ly *LayerParams) SpecialPreGs(ctx *Context, pi, ni, di uint32, drvGe float32, nonDrivePct float32) float32 { saveVal := float32(0) // sometimes we need to use a value computed here, for the post Gs step - pi0 := pi - 1 // 0-n pool index + pi0 := pi - ly.PoolSt - 1 // 0-n pool index pnn := uint32(PoolNNeurons(pi)) pni := NeuronIxs[NrnNeurIndex, ni] - uint32(PoolsInt[PoolNeurSt, pi, di]) nrnCtxtGe := Neurons[CtxtGe, ni, di] @@ -462,7 +462,7 @@ func (ly *LayerParams) GFromRawSyn(ctx *Context, ni, di uint32) { extraSyn = md * nrnGModSyn * ly.Acts.Dend.ModGain default: if ly.Acts.Dend.HasMod.IsTrue() { - md := ly.Acts.Dend.ModBase + ly.Acts.Dend.ModGain*nrnGModSyn + md := ly.Acts.Dend.ModBase + ly.Acts.Dend.ModGain * nrnGModSyn if md > 1 { md = 1 } @@ -566,7 +566,7 @@ func (ly *LayerParams) SendSpike(ctx *Context, ni, di uint32) { func (ly *LayerParams) PostSpikeSpecial(ctx *Context, lpi, pi, ni, di uint32) { Neurons[Burst, ni, di] = Neurons[CaSpkP, ni, di] li := ly.Index - pi0 := pi - 1 // 0-n pool index + pi0 := pi - ly.PoolSt - 1 // 0-n pool index pnn := uint32(PoolNNeurons(pi)) pni := NeuronIxs[NrnNeurIndex, ni] - uint32(PoolsInt[PoolNeurSt, pi, di]) hasRew := GlobalScalars[GvHasRew, di] > 0 diff --git a/axon/act-net.go b/axon/act-net.go index 3b3b8fb3e..325b17f19 100644 --- a/axon/act-net.go +++ b/axon/act-net.go @@ -6,24 +6,36 @@ package axon -// Cycle runs one cycle of activation updating using threading methods. -func (nt *Network) Cycle() { - // todo: chunks of 10 cycles +// todo: don't even need layer-level ultimately. + +// Cycle runs n cycles of activation updating. +// If getNeurons is true, then neuron state is synced back +// from the GPU (for cycle-level display etc). Otherwise only +// layer-level state is synced. +func (nt *Network) Cycle(ncyc int, getNeurons bool) { nix := nt.NetIxs() ctx := nt.Context() nd := int(nix.NNeurons * ctx.NData) ld := int(nix.NLayers * ctx.NData) pd := int(nix.NPools * ctx.NData) - RunGatherSpikes(nd) - RunLayerGi(ld) - RunBetweenGi(ld) - RunPoolGi(pd) - RunCycleNeuron(nd) - RunSendSpike(nd) - RunCyclePost(ld) - RunCycleInc(1) - RunDoneLayers() + ToGPUCtxGlobal() + for range ncyc { + RunGatherSpikes(nd) + RunLayerGi(ld) + RunBetweenGi(ld) + RunPoolGi(pd) + RunCycleNeuron(nd) + RunSendSpike(nd) + RunCyclePost(ld) + RunCycleInc(1) + } + + if getNeurons { + RunDoneLayersNeurons() + } else { + RunDoneLayers() + } // todo: fix this: // var ldt, vta *Layer @@ -70,6 +82,7 @@ func (nt *Network) ApplyExts() { if !UseGPU { return } + ToGPU(ExtsVar) nix := nt.NetIxs() ctx := nt.Context() nd := int(nix.NNeurons * ctx.NData) @@ -85,6 +98,7 @@ func (nt *Network) MinusPhase() { RunMinusPhasePool(pd) RunMinusPhaseNeuron(nd) nt.MinusPhasePost() + ToGPULayersNeurons() // todo: // nt.GPU.SyncStateToGPU() } @@ -118,6 +132,7 @@ func (nt *Network) PlusPhase() { RunPlusPhasePool(pd) RunPlusPhaseNeuron(nd) nt.PlusPhasePost() + ToGPULayersNeurons() // todo: // nt.GPU.SyncStateToGPU() } diff --git a/axon/act-net.goal b/axon/act-net.goal index c0238d93c..cbe9f4910 100644 --- a/axon/act-net.goal +++ b/axon/act-net.goal @@ -4,24 +4,36 @@ package axon -// Cycle runs one cycle of activation updating using threading methods. -func (nt *Network) Cycle() { - // todo: chunks of 10 cycles +// todo: don't even need layer-level ultimately. + +// Cycle runs n cycles of activation updating. +// If getNeurons is true, then neuron state is synced back +// from the GPU (for cycle-level display etc). Otherwise only +// layer-level state is synced. +func (nt *Network) Cycle(ncyc int, getNeurons bool) { nix := nt.NetIxs() ctx := nt.Context() nd := int(nix.NNeurons * ctx.NData) ld := int(nix.NLayers * ctx.NData) pd := int(nix.NPools * ctx.NData) - RunGatherSpikes(nd) - RunLayerGi(ld) - RunBetweenGi(ld) - RunPoolGi(pd) - RunCycleNeuron(nd) - RunSendSpike(nd) - RunCyclePost(ld) - RunCycleInc(1) - RunDoneLayers() + ToGPUCtxGlobal() + for range ncyc { + RunGatherSpikes(nd) + RunLayerGi(ld) + RunBetweenGi(ld) + RunPoolGi(pd) + RunCycleNeuron(nd) + RunSendSpike(nd) + RunCyclePost(ld) + RunCycleInc(1) + } + + if getNeurons { + RunDoneLayersNeurons() + } else { + RunDoneLayers() + } // todo: fix this: // var ldt, vta *Layer @@ -64,6 +76,7 @@ func (nt *Network) ApplyExts() { if !UseGPU { return } + ToGPU(ExtsVar) nix := nt.NetIxs() ctx := nt.Context() nd := int(nix.NNeurons * ctx.NData) @@ -79,6 +92,7 @@ func (nt *Network) MinusPhase() { RunMinusPhasePool(pd) RunMinusPhaseNeuron(nd) nt.MinusPhasePost() + ToGPULayersNeurons() // todo: // nt.GPU.SyncStateToGPU() } @@ -112,6 +126,7 @@ func (nt *Network) PlusPhase() { RunPlusPhasePool(pd) RunPlusPhaseNeuron(nd) nt.PlusPhasePost() + ToGPULayersNeurons() // todo: // nt.GPU.SyncStateToGPU() } diff --git a/axon/basic_test.go b/axon/basic_test.go index 3fdb3a645..c2647fbbf 100644 --- a/axon/basic_test.go +++ b/axon/basic_test.go @@ -21,6 +21,7 @@ import ( "cogentcore.org/core/tensor" "github.com/emer/emergent/v2/etime" "github.com/emer/emergent/v2/paths" + "github.com/stretchr/testify/assert" "golang.org/x/exp/maps" ) @@ -228,7 +229,7 @@ func TestSpikeProp(t *testing.T) { inCyc := 0 hidCyc := 0 for cyc := range 100 { - net.Cycle() + net.Cycle(1, true) // fmt.Println(cyc, Neurons[Ge, hidLay.NeurStIndex, 0], Neurons[GeRaw, hidLay.NeurStIndex, 0]) if Neurons.Value(int(Spike), int(inLay.NeurStIndex), int(0)) > 0 { // fmt.Println("in spike:", cyc) @@ -327,7 +328,7 @@ func TestInitWeights(t *testing.T) { for qtr := range 4 { for range 50 { - testNet.Cycle() + testNet.Cycle(1, true) } if qtr == 2 { testNet.MinusPhase() @@ -343,6 +344,48 @@ func TestInitWeights(t *testing.T) { ReportValDiffs(t, Tol8, valMapA, valMapB, "init1", "init2") } +func TestGPUState(t *testing.T) { + if os.Getenv("TEST_GPU") != "true" { + t.Skip("Set TEST_GPU env var to run GPU tests") + } + + testNetA := newTestNet(1) + + GPUInit() + UseGPU = true + + testNetB := newTestNet(1) + + RunCycleInc(1) + // get everything back + RunDone(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar, NeuronsVar, NeuronAvgsVar, SynapsesVar, SynapseTracesVar, PathGBufVar, PathGSynsVar) + // note: the following requires turning off read-only in vars.go + // RunDone(LayersVar, PathsVar, NetworkIxsVar, NeuronIxsVar, SynapseIxsVar, PathSendConVar, RecvPathIxsVar, PathRecvConVar, RecvSynIxsVar, CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar, NeuronsVar, NeuronAvgsVar, SynapsesVar, SynapseTracesVar, PathGBufVar, PathGSynsVar) + // assert.Equal(t, testNetA.LayParams, testNetB.LayParams) + // assert.Equal(t, testNetA.LayParams, Layers) + // assert.Equal(t, testNetA.PathParams, testNetB.PathParams) + // assert.Equal(t, testNetA.NetworkIxs, testNetB.NetworkIxs) + // assert.Equal(t, testNetA.NeuronIxs.Values, testNetB.NeuronIxs.Values) + // assert.Equal(t, testNetA.SynapseIxs.Values, testNetB.SynapseIxs.Values) + // assert.Equal(t, testNetA.PathSendCon.Values, testNetB.PathSendCon.Values) + // assert.Equal(t, testNetA.RecvPathIxs.Values, testNetB.RecvPathIxs.Values) + // assert.Equal(t, testNetA.PathRecvCon.Values, testNetB.PathRecvCon.Values) + // assert.Equal(t, testNetA.RecvSynIxs.Values, testNetB.RecvSynIxs.Values) + assert.NotEqual(t, testNetA.Ctx, testNetB.Ctx) + assert.Equal(t, testNetA.Neurons.Values, testNetB.Neurons.Values) + assert.Equal(t, testNetA.NeuronAvgs.Values, testNetB.NeuronAvgs.Values) + assert.Equal(t, testNetA.LayerStates.Values, testNetB.LayerStates.Values) + assert.Equal(t, testNetA.GlobalScalars.Values, testNetB.GlobalScalars.Values) + assert.Equal(t, testNetA.GlobalVectors.Values, testNetB.GlobalVectors.Values) + assert.Equal(t, testNetA.Exts.Values, testNetB.Exts.Values) + assert.Equal(t, testNetA.Pools.Values, testNetB.Pools.Values) + assert.Equal(t, testNetA.PoolsInt.Values, testNetB.PoolsInt.Values) + assert.Equal(t, testNetA.PathGBuf.Values, testNetB.PathGBuf.Values) + assert.Equal(t, testNetA.PathGSyns.Values, testNetB.PathGSyns.Values) + assert.Equal(t, testNetA.Synapses.Values, testNetB.Synapses.Values) + assert.Equal(t, testNetA.SynapseTraces.Values, testNetB.SynapseTraces.Values) +} + func TestNetAct(t *testing.T) { NetActTest(t, Tol7, false) } @@ -359,6 +402,11 @@ func TestGPUAct(t *testing.T) { // Note: use NetDebugAct for printf debugging of all values -- // "this is only a test" func NetActTest(t *testing.T, tol float32, gpu bool) { + if gpu { + GPUInit() + UseGPU = true + } + testNet := newTestNet(1) ctx := testNet.Context() testNet.InitExt() @@ -368,14 +416,6 @@ func NetActTest(t *testing.T, tol float32, gpu bool) { hidLay := testNet.LayerByName("Hidden") outLay := testNet.LayerByName("Output") - if gpu { - GPUInit() - UseGPU = true - ToGPUAll() - // testNet.ConfigGPUnoGUI() - // testNet.GPU.CycleByCycle = true // alt modes - } - qtr0HidActs := []float32{0.6944439, 0, 0, 0} qtr0HidGes := []float32{0.35385746, 0, 0, 0} qtr0HidGis := []float32{0.15478331, 0.15478331, 0.15478331, 0.15478331} @@ -426,10 +466,7 @@ func NetActTest(t *testing.T, tol float32, gpu bool) { for qtr := range 4 { for cyc := range cycPerQtr { _ = cyc - testNet.Cycle() - // if gpu { - // testNet.GPU.SyncNeuronsFromGPU() - // } + testNet.Cycle(1, true) } if qtr == 2 { testNet.MinusPhase() @@ -484,41 +521,6 @@ func NetActTest(t *testing.T, tol float32, gpu bool) { // testNet.GPU.Destroy() } -func TestGPUDiffs(t *testing.T) { - if os.Getenv("TEST_GPU") != "true" { - t.Skip("Set TEST_GPU env var to run GPU tests") - } - nonGPUValues := NetDebugAct(t, false, false, 1, false) - gpuValues := NetDebugAct(t, false, true, 1, false) - // note: this has bad tolerance due to NMDA -- can see that if you raise tol to Tol5 etc - ReportValDiffs(t, Tol4, nonGPUValues, gpuValues, "CPU", "GPU") -} - -func TestDebugAct(t *testing.T) { - t.Skip("skipped in regular testing") - NetDebugAct(t, true, false, 1, false) -} - -func TestDebugGPUAct(t *testing.T) { - t.Skip("skipped in regular testing") - NetDebugAct(t, true, true, 1, false) -} - -func TestNDataDiffs(t *testing.T) { - nd1Values := NetDebugAct(t, false, false, 1, true) - nd4Values := NetDebugAct(t, false, false, 4, true) - ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4") -} - -func TestGPUNDataDiffs(t *testing.T) { - if os.Getenv("TEST_GPU") != "true" { - t.Skip("Set TEST_GPU env var to run GPU tests") - } - nd1Values := NetDebugAct(t, false, true, 1, true) - nd4Values := NetDebugAct(t, false, true, 4, true) - ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4") -} - // ReportValDiffs -- reports diffs between a, b values at given tolerance func ReportValDiffs(t *testing.T, tolerance float32, va, vb map[string]float32, aLabel, bLabel string, exclude ...string) { keys := maps.Keys(va) @@ -556,6 +558,11 @@ func ReportValDiffs(t *testing.T, tolerance float32, va, vb map[string]float32, // and also returns a map of all values and variables that can be used for a more // fine-grained diff test, e.g., see the GPU version. func NetDebugAct(t *testing.T, printValues bool, gpu bool, nData int, initWts bool) map[string]float32 { + if gpu { + GPUInit() + UseGPU = true + } + testNet := newTestNet(nData) ApplyParamSheets(testNet, layerParams["FullDecay"], pathParams["FullDecay"]) @@ -577,20 +584,14 @@ func RunDebugAct(t *testing.T, testNet *Network, printValues bool, gpu bool, ini var vals []float32 - if gpu { - // testNet.ConfigGPUnoGUI() - // testNet.GPU.RecFunTimes = true - // testNet.GPU.CycleByCycle = true // key for recording results cycle-by-cycle - } - // these control what is printed. // the whole thing is run and returned in the valMap - valsPerRow := 8 + valsPerRow := 4 nQtrs := 1 // max 4 - cycPerQtr := 5 // max 50 - nPats := 2 // max 4 - stLayer := 1 // max 2 - edLayer := 2 // max 3 + cycPerQtr := 1 // max 50 + nPats := 1 // max 4 + stLayer := 0 // max 2 + edLayer := 1 // max 3 nNeurs := 1 // max 4 -- number of neuron values to print for pi := 0; pi < 4; pi++ { @@ -611,9 +612,9 @@ func RunDebugAct(t *testing.T, testNet *Network, printValues bool, gpu bool, ini testNet.ApplyExts() // key now for GPU - for qtr := 0; qtr < 4; qtr++ { - for cyc := 0; cyc < 50; cyc++ { - testNet.Cycle() + for qtr := 0; qtr < nQtrs; qtr++ { + for cyc := 0; cyc < cycPerQtr; cyc++ { + testNet.Cycle(1, true) // get neuron state for ni := 0; ni < 4; ni++ { for li := 0; li < 3; li++ { @@ -668,6 +669,41 @@ func RunDebugAct(t *testing.T, testNet *Network, printValues bool, gpu bool, ini return valMap } +func TestGPUDiffs(t *testing.T) { + if os.Getenv("TEST_GPU") != "true" { + t.Skip("Set TEST_GPU env var to run GPU tests") + } + nonGPUValues := NetDebugAct(t, false, false, 1, false) + gpuValues := NetDebugAct(t, false, true, 1, false) + // note: this has bad tolerance due to NMDA -- can see that if you raise tol to Tol5 etc + ReportValDiffs(t, Tol4, nonGPUValues, gpuValues, "CPU", "GPU") +} + +func TestDebugAct(t *testing.T) { + // t.Skip("skipped in regular testing") + NetDebugAct(t, true, false, 1, false) +} + +func TestDebugGPUAct(t *testing.T) { + // t.Skip("skipped in regular testing") + NetDebugAct(t, true, true, 1, false) +} + +func TestNDataDiffs(t *testing.T) { + nd1Values := NetDebugAct(t, false, false, 1, true) + nd4Values := NetDebugAct(t, false, false, 4, true) + ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4") +} + +func TestGPUNDataDiffs(t *testing.T) { + if os.Getenv("TEST_GPU") != "true" { + t.Skip("Set TEST_GPU env var to run GPU tests") + } + nd1Values := NetDebugAct(t, false, true, 1, true) + nd4Values := NetDebugAct(t, false, true, 4, true) + ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4") +} + func TestNetLearn(t *testing.T) { NetTestLearn(t, Tol7, false) } @@ -680,6 +716,11 @@ func TestGPULearn(t *testing.T) { } func NetTestLearn(t *testing.T, tol float32, gpu bool) { + if gpu { + GPUInit() + UseGPU = true + } + testNet := newTestNet(1) ctx := testNet.Context() @@ -735,12 +776,6 @@ func NetTestLearn(t *testing.T, tol float32, gpu bool) { testNet.InitWeights() testNet.InitExt() - if gpu { - // testNet.ConfigGPUnoGUI() - // testNet.GPU.RecFunTimes = true // alt forms - // testNet.GPU.CycleByCycle = true // - } - for pi := 0; pi < 4; pi++ { testNet.NewState(etime.Train, false) @@ -752,10 +787,7 @@ func NetTestLearn(t *testing.T, tol float32, gpu bool) { for qtr := 0; qtr < 4; qtr++ { for cyc := 0; cyc < cycPerQtr; cyc++ { - testNet.Cycle() - if gpu { - // testNet.GPU.SyncNeuronsFromGPU() - } + testNet.Cycle(1, true) hidLay.UnitValues(&hidAct, "Act", 0) hidLay.UnitValues(&hidGes, "Ge", 0) @@ -916,8 +948,7 @@ func NetTestRLRate(t *testing.T, tol float32, gpu bool) { testNet.NewState(etime.Train, false) for qtr := 0; qtr < 4; qtr++ { for cyc := 0; cyc < cycPerQtr; cyc++ { - testNet.Cycle() - // testNet.GPU.SyncNeuronsFromGPU() + testNet.Cycle(1, true) hidLay.UnitValues(&hidAct, "Act", 0) hidLay.UnitValues(&hidGes, "Ge", 0) @@ -1076,7 +1107,7 @@ func RunDebugLearn(t *testing.T, testNet *Network, printValues bool, gpu bool, i for qtr := 0; qtr < 4; qtr++ { for cyc := 0; cyc < 50; cyc++ { - testNet.Cycle() + testNet.Cycle(1, true) } if qtr == 2 { testNet.MinusPhase() @@ -1311,7 +1342,7 @@ func TestInhibAct(t *testing.T) { inhibNet.NewState(etime.Train, false) for qtr := 0; qtr < 4; qtr++ { for cyc := 0; cyc < cycPerQtr; cyc++ { - inhibNet.Cycle() + inhibNet.Cycle(1, true) if printCycs { inLay.UnitValues(&inActs, "Act", 0) diff --git a/axon/basic_test.goal b/axon/basic_test.goal index b7277c643..475c08ac8 100644 --- a/axon/basic_test.goal +++ b/axon/basic_test.goal @@ -19,6 +19,7 @@ import ( "cogentcore.org/core/tensor" "github.com/emer/emergent/v2/etime" "github.com/emer/emergent/v2/paths" + "github.com/stretchr/testify/assert" "golang.org/x/exp/maps" ) @@ -116,7 +117,7 @@ func newTestNet(nData int) *Network { testNet.Build() testNet.Defaults() ApplyParamSheets(testNet, layerParams["Base"], pathParams["Base"]) - testNet.InitWeights() // get GScale here + testNet.InitWeights() // get GScale here testNet.NewState(etime.Train, false) return testNet } @@ -228,7 +229,7 @@ func TestSpikeProp(t *testing.T) { inCyc := 0 hidCyc := 0 for cyc := range 100 { - net.Cycle() + net.Cycle(1, true) // fmt.Println(cyc, Neurons[Ge, hidLay.NeurStIndex, 0], Neurons[GeRaw, hidLay.NeurStIndex, 0]) if Neurons[Spike, inLay.NeurStIndex, 0] > 0 { // fmt.Println("in spike:", cyc) @@ -327,7 +328,7 @@ func TestInitWeights(t *testing.T) { for qtr := range 4 { for range 50 { - testNet.Cycle() + testNet.Cycle(1, true) } if qtr == 2 { testNet.MinusPhase() @@ -343,6 +344,48 @@ func TestInitWeights(t *testing.T) { ReportValDiffs(t, Tol8, valMapA, valMapB, "init1", "init2") } +func TestGPUState(t *testing.T) { + if os.Getenv("TEST_GPU") != "true" { + t.Skip("Set TEST_GPU env var to run GPU tests") + } + + testNetA := newTestNet(1) + + GPUInit() + UseGPU = true + + testNetB := newTestNet(1) + + RunCycleInc(1) + // get everything back + RunDone(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar, NeuronsVar, NeuronAvgsVar, SynapsesVar, SynapseTracesVar, PathGBufVar, PathGSynsVar) + // note: the following requires turning off read-only in vars.go + // RunDone(LayersVar, PathsVar, NetworkIxsVar, NeuronIxsVar, SynapseIxsVar, PathSendConVar, RecvPathIxsVar, PathRecvConVar, RecvSynIxsVar, CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar, NeuronsVar, NeuronAvgsVar, SynapsesVar, SynapseTracesVar, PathGBufVar, PathGSynsVar) + // assert.Equal(t, testNetA.LayParams, testNetB.LayParams) + // assert.Equal(t, testNetA.LayParams, Layers) + // assert.Equal(t, testNetA.PathParams, testNetB.PathParams) + // assert.Equal(t, testNetA.NetworkIxs, testNetB.NetworkIxs) + // assert.Equal(t, testNetA.NeuronIxs.Values, testNetB.NeuronIxs.Values) + // assert.Equal(t, testNetA.SynapseIxs.Values, testNetB.SynapseIxs.Values) + // assert.Equal(t, testNetA.PathSendCon.Values, testNetB.PathSendCon.Values) + // assert.Equal(t, testNetA.RecvPathIxs.Values, testNetB.RecvPathIxs.Values) + // assert.Equal(t, testNetA.PathRecvCon.Values, testNetB.PathRecvCon.Values) + // assert.Equal(t, testNetA.RecvSynIxs.Values, testNetB.RecvSynIxs.Values) + assert.NotEqual(t, testNetA.Ctx, testNetB.Ctx) + assert.Equal(t, testNetA.Neurons.Values, testNetB.Neurons.Values) + assert.Equal(t, testNetA.NeuronAvgs.Values, testNetB.NeuronAvgs.Values) + assert.Equal(t, testNetA.LayerStates.Values, testNetB.LayerStates.Values) + assert.Equal(t, testNetA.GlobalScalars.Values, testNetB.GlobalScalars.Values) + assert.Equal(t, testNetA.GlobalVectors.Values, testNetB.GlobalVectors.Values) + assert.Equal(t, testNetA.Exts.Values, testNetB.Exts.Values) + assert.Equal(t, testNetA.Pools.Values, testNetB.Pools.Values) + assert.Equal(t, testNetA.PoolsInt.Values, testNetB.PoolsInt.Values) + assert.Equal(t, testNetA.PathGBuf.Values, testNetB.PathGBuf.Values) + assert.Equal(t, testNetA.PathGSyns.Values, testNetB.PathGSyns.Values) + assert.Equal(t, testNetA.Synapses.Values, testNetB.Synapses.Values) + assert.Equal(t, testNetA.SynapseTraces.Values, testNetB.SynapseTraces.Values) +} + func TestNetAct(t *testing.T) { NetActTest(t, Tol7, false) } @@ -359,6 +402,11 @@ func TestGPUAct(t *testing.T) { // Note: use NetDebugAct for printf debugging of all values -- // "this is only a test" func NetActTest(t *testing.T, tol float32, gpu bool) { + if gpu { + GPUInit() + UseGPU = true + } + testNet := newTestNet(1) ctx := testNet.Context() testNet.InitExt() @@ -368,14 +416,6 @@ func NetActTest(t *testing.T, tol float32, gpu bool) { hidLay := testNet.LayerByName("Hidden") outLay := testNet.LayerByName("Output") - if gpu { - GPUInit() - UseGPU = true - ToGPUAll() - // testNet.ConfigGPUnoGUI() - // testNet.GPU.CycleByCycle = true // alt modes - } - qtr0HidActs := []float32{0.6944439, 0, 0, 0} qtr0HidGes := []float32{0.35385746, 0, 0, 0} qtr0HidGis := []float32{0.15478331, 0.15478331, 0.15478331, 0.15478331} @@ -426,10 +466,7 @@ func NetActTest(t *testing.T, tol float32, gpu bool) { for qtr := range 4 { for cyc := range cycPerQtr { _ = cyc - testNet.Cycle() - // if gpu { - // testNet.GPU.SyncNeuronsFromGPU() - // } + testNet.Cycle(1, true) } if qtr == 2 { testNet.MinusPhase() @@ -484,41 +521,6 @@ func NetActTest(t *testing.T, tol float32, gpu bool) { // testNet.GPU.Destroy() } -func TestGPUDiffs(t *testing.T) { - if os.Getenv("TEST_GPU") != "true" { - t.Skip("Set TEST_GPU env var to run GPU tests") - } - nonGPUValues := NetDebugAct(t, false, false, 1, false) - gpuValues := NetDebugAct(t, false, true, 1, false) - // note: this has bad tolerance due to NMDA -- can see that if you raise tol to Tol5 etc - ReportValDiffs(t, Tol4, nonGPUValues, gpuValues, "CPU", "GPU") -} - -func TestDebugAct(t *testing.T) { - t.Skip("skipped in regular testing") - NetDebugAct(t, true, false, 1, false) -} - -func TestDebugGPUAct(t *testing.T) { - t.Skip("skipped in regular testing") - NetDebugAct(t, true, true, 1, false) -} - -func TestNDataDiffs(t *testing.T) { - nd1Values := NetDebugAct(t, false, false, 1, true) - nd4Values := NetDebugAct(t, false, false, 4, true) - ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4") -} - -func TestGPUNDataDiffs(t *testing.T) { - if os.Getenv("TEST_GPU") != "true" { - t.Skip("Set TEST_GPU env var to run GPU tests") - } - nd1Values := NetDebugAct(t, false, true, 1, true) - nd4Values := NetDebugAct(t, false, true, 4, true) - ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4") -} - // ReportValDiffs -- reports diffs between a, b values at given tolerance func ReportValDiffs(t *testing.T, tolerance float32, va, vb map[string]float32, aLabel, bLabel string, exclude ...string) { keys := maps.Keys(va) @@ -556,6 +558,11 @@ func ReportValDiffs(t *testing.T, tolerance float32, va, vb map[string]float32, // and also returns a map of all values and variables that can be used for a more // fine-grained diff test, e.g., see the GPU version. func NetDebugAct(t *testing.T, printValues bool, gpu bool, nData int, initWts bool) map[string]float32 { + if gpu { + GPUInit() + UseGPU = true + } + testNet := newTestNet(nData) ApplyParamSheets(testNet, layerParams["FullDecay"], pathParams["FullDecay"]) @@ -577,20 +584,14 @@ func RunDebugAct(t *testing.T, testNet *Network, printValues bool, gpu bool, ini var vals []float32 - if gpu { - // testNet.ConfigGPUnoGUI() - // testNet.GPU.RecFunTimes = true - // testNet.GPU.CycleByCycle = true // key for recording results cycle-by-cycle - } - // these control what is printed. // the whole thing is run and returned in the valMap - valsPerRow := 8 + valsPerRow := 4 nQtrs := 1 // max 4 - cycPerQtr := 5 // max 50 - nPats := 2 // max 4 - stLayer := 1 // max 2 - edLayer := 2 // max 3 + cycPerQtr := 1 // max 50 + nPats := 1 // max 4 + stLayer := 0 // max 2 + edLayer := 1 // max 3 nNeurs := 1 // max 4 -- number of neuron values to print for pi := 0; pi < 4; pi++ { @@ -611,9 +612,9 @@ func RunDebugAct(t *testing.T, testNet *Network, printValues bool, gpu bool, ini testNet.ApplyExts() // key now for GPU - for qtr := 0; qtr < 4; qtr++ { - for cyc := 0; cyc < 50; cyc++ { - testNet.Cycle() + for qtr := 0; qtr < nQtrs; qtr++ { + for cyc := 0; cyc < cycPerQtr; cyc++ { + testNet.Cycle(1, true) // get neuron state for ni := 0; ni < 4; ni++ { for li := 0; li < 3; li++ { @@ -668,6 +669,41 @@ func RunDebugAct(t *testing.T, testNet *Network, printValues bool, gpu bool, ini return valMap } +func TestGPUDiffs(t *testing.T) { + if os.Getenv("TEST_GPU") != "true" { + t.Skip("Set TEST_GPU env var to run GPU tests") + } + nonGPUValues := NetDebugAct(t, false, false, 1, false) + gpuValues := NetDebugAct(t, false, true, 1, false) + // note: this has bad tolerance due to NMDA -- can see that if you raise tol to Tol5 etc + ReportValDiffs(t, Tol4, nonGPUValues, gpuValues, "CPU", "GPU") +} + +func TestDebugAct(t *testing.T) { + // t.Skip("skipped in regular testing") + NetDebugAct(t, true, false, 1, false) +} + +func TestDebugGPUAct(t *testing.T) { + // t.Skip("skipped in regular testing") + NetDebugAct(t, true, true, 1, false) +} + +func TestNDataDiffs(t *testing.T) { + nd1Values := NetDebugAct(t, false, false, 1, true) + nd4Values := NetDebugAct(t, false, false, 4, true) + ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4") +} + +func TestGPUNDataDiffs(t *testing.T) { + if os.Getenv("TEST_GPU") != "true" { + t.Skip("Set TEST_GPU env var to run GPU tests") + } + nd1Values := NetDebugAct(t, false, true, 1, true) + nd4Values := NetDebugAct(t, false, true, 4, true) + ReportValDiffs(t, Tol8, nd1Values, nd4Values, "nData = 1", "nData = 4") +} + func TestNetLearn(t *testing.T) { NetTestLearn(t, Tol7, false) } @@ -680,6 +716,11 @@ func TestGPULearn(t *testing.T) { } func NetTestLearn(t *testing.T, tol float32, gpu bool) { + if gpu { + GPUInit() + UseGPU = true + } + testNet := newTestNet(1) ctx := testNet.Context() @@ -735,12 +776,6 @@ func NetTestLearn(t *testing.T, tol float32, gpu bool) { testNet.InitWeights() testNet.InitExt() - if gpu { - // testNet.ConfigGPUnoGUI() - // testNet.GPU.RecFunTimes = true // alt forms - // testNet.GPU.CycleByCycle = true // - } - for pi := 0; pi < 4; pi++ { testNet.NewState(etime.Train, false) @@ -752,10 +787,7 @@ func NetTestLearn(t *testing.T, tol float32, gpu bool) { for qtr := 0; qtr < 4; qtr++ { for cyc := 0; cyc < cycPerQtr; cyc++ { - testNet.Cycle() - if gpu { - // testNet.GPU.SyncNeuronsFromGPU() - } + testNet.Cycle(1, true) hidLay.UnitValues(&hidAct, "Act", 0) hidLay.UnitValues(&hidGes, "Ge", 0) @@ -916,8 +948,7 @@ func NetTestRLRate(t *testing.T, tol float32, gpu bool) { testNet.NewState(etime.Train, false) for qtr := 0; qtr < 4; qtr++ { for cyc := 0; cyc < cycPerQtr; cyc++ { - testNet.Cycle() - // testNet.GPU.SyncNeuronsFromGPU() + testNet.Cycle(1, true) hidLay.UnitValues(&hidAct, "Act", 0) hidLay.UnitValues(&hidGes, "Ge", 0) @@ -1076,7 +1107,7 @@ func RunDebugLearn(t *testing.T, testNet *Network, printValues bool, gpu bool, i for qtr := 0; qtr < 4; qtr++ { for cyc := 0; cyc < 50; cyc++ { - testNet.Cycle() + testNet.Cycle(1, true) } if qtr == 2 { testNet.MinusPhase() @@ -1310,7 +1341,7 @@ func TestInhibAct(t *testing.T) { inhibNet.NewState(etime.Train, false) for qtr := 0; qtr < 4; qtr++ { for cyc := 0; cyc < cycPerQtr; cyc++ { - inhibNet.Cycle() + inhibNet.Cycle(1, true) if printCycs { inLay.UnitValues(&inActs, "Act", 0) diff --git a/axon/gosl.go b/axon/gosl.go index 5781c5362..73d319673 100644 --- a/axon/gosl.go +++ b/axon/gosl.go @@ -61,24 +61,24 @@ func GPUInit() { { sy := gpu.NewComputeSystem(gp, "Default") GPUSystem = sy - gpu.NewComputePipelineShaderFS(shaders, "shaders/DWtSubMeanPath.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/WtFromDWtSyn.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/DWtSyn.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/GatherSpikes.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/LayerGi.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/SendSpike.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/CyclePost.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/MinusPhaseNeuron.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/DWtSyn.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/PlusPhaseStartNeuron.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/ApplyExtsNeuron.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/CycleNeuron.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/CycleInc.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/PlusPhasePool.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/DWtFromDiSyn.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/BetweenGi.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/SendSpike.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/CycleInc.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/LayerGi.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/PoolGi.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/MinusPhasePool.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/CycleNeuron.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/ApplyExtsNeuron.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/PlusPhaseStartNeuron.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/PlusPhasePool.wgsl", sy) gpu.NewComputePipelineShaderFS(shaders, "shaders/PlusPhaseNeuron.wgsl", sy) - gpu.NewComputePipelineShaderFS(shaders, "shaders/DWtFromDiSyn.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/DWtSubMeanPath.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/WtFromDWtSyn.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/MinusPhasePool.wgsl", sy) + gpu.NewComputePipelineShaderFS(shaders, "shaders/MinusPhaseNeuron.wgsl", sy) vars := sy.Vars() { sgp := vars.AddGroup(gpu.Storage) @@ -146,760 +146,760 @@ func GPURelease() { ComputeGPU.Release() } -// RunDWtSyn runs the DWtSyn kernel with given number of elements, +// RunBetweenGi runs the BetweenGi kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneDWtSyn call does Run and Done for a +// Alternatively, a single-shot RunOneBetweenGi call does Run and Done for a // single run-and-sync case. -func RunDWtSyn(n int) { +func RunBetweenGi(n int) { if UseGPU { - RunDWtSynGPU(n) + RunBetweenGiGPU(n) } else { - RunDWtSynCPU(n) + RunBetweenGiCPU(n) } } -// RunDWtSynGPU runs the DWtSyn kernel on the GPU. See [RunDWtSyn] for more info. -func RunDWtSynGPU(n int) { +// RunBetweenGiGPU runs the BetweenGi kernel on the GPU. See [RunBetweenGi] for more info. +func RunBetweenGiGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["DWtSyn"] + pl := sy.ComputePipelines["BetweenGi"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunDWtSynCPU runs the DWtSyn kernel on the CPU. -func RunDWtSynCPU(n int) { - gpu.VectorizeFunc(0, n, DWtSyn) +// RunBetweenGiCPU runs the BetweenGi kernel on the CPU. +func RunBetweenGiCPU(n int) { + gpu.VectorizeFunc(0, n, BetweenGi) } -// RunOneDWtSyn runs the DWtSyn kernel with given number of elements, +// RunOneBetweenGi runs the BetweenGi kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneDWtSyn(n int, syncVars ...GPUVars) { +func RunOneBetweenGi(n int, syncVars ...GPUVars) { if UseGPU { - RunDWtSynGPU(n) + RunBetweenGiGPU(n) RunDone(syncVars...) } else { - RunDWtSynCPU(n) + RunBetweenGiCPU(n) } } -// RunDWtSubMeanPath runs the DWtSubMeanPath kernel with given number of elements, +// RunSendSpike runs the SendSpike kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneDWtSubMeanPath call does Run and Done for a +// Alternatively, a single-shot RunOneSendSpike call does Run and Done for a // single run-and-sync case. -func RunDWtSubMeanPath(n int) { +func RunSendSpike(n int) { if UseGPU { - RunDWtSubMeanPathGPU(n) + RunSendSpikeGPU(n) } else { - RunDWtSubMeanPathCPU(n) + RunSendSpikeCPU(n) } } -// RunDWtSubMeanPathGPU runs the DWtSubMeanPath kernel on the GPU. See [RunDWtSubMeanPath] for more info. -func RunDWtSubMeanPathGPU(n int) { +// RunSendSpikeGPU runs the SendSpike kernel on the GPU. See [RunSendSpike] for more info. +func RunSendSpikeGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["DWtSubMeanPath"] + pl := sy.ComputePipelines["SendSpike"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunDWtSubMeanPathCPU runs the DWtSubMeanPath kernel on the CPU. -func RunDWtSubMeanPathCPU(n int) { - gpu.VectorizeFunc(0, n, DWtSubMeanPath) +// RunSendSpikeCPU runs the SendSpike kernel on the CPU. +func RunSendSpikeCPU(n int) { + gpu.VectorizeFunc(0, n, SendSpike) } -// RunOneDWtSubMeanPath runs the DWtSubMeanPath kernel with given number of elements, +// RunOneSendSpike runs the SendSpike kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneDWtSubMeanPath(n int, syncVars ...GPUVars) { +func RunOneSendSpike(n int, syncVars ...GPUVars) { if UseGPU { - RunDWtSubMeanPathGPU(n) + RunSendSpikeGPU(n) RunDone(syncVars...) } else { - RunDWtSubMeanPathCPU(n) + RunSendSpikeCPU(n) } } -// RunWtFromDWtSyn runs the WtFromDWtSyn kernel with given number of elements, +// RunCycleInc runs the CycleInc kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneWtFromDWtSyn call does Run and Done for a +// Alternatively, a single-shot RunOneCycleInc call does Run and Done for a // single run-and-sync case. -func RunWtFromDWtSyn(n int) { +func RunCycleInc(n int) { if UseGPU { - RunWtFromDWtSynGPU(n) + RunCycleIncGPU(n) } else { - RunWtFromDWtSynCPU(n) + RunCycleIncCPU(n) } } -// RunWtFromDWtSynGPU runs the WtFromDWtSyn kernel on the GPU. See [RunWtFromDWtSyn] for more info. -func RunWtFromDWtSynGPU(n int) { +// RunCycleIncGPU runs the CycleInc kernel on the GPU. See [RunCycleInc] for more info. +func RunCycleIncGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["WtFromDWtSyn"] + pl := sy.ComputePipelines["CycleInc"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunWtFromDWtSynCPU runs the WtFromDWtSyn kernel on the CPU. -func RunWtFromDWtSynCPU(n int) { - gpu.VectorizeFunc(0, n, WtFromDWtSyn) +// RunCycleIncCPU runs the CycleInc kernel on the CPU. +func RunCycleIncCPU(n int) { + gpu.VectorizeFunc(0, n, CycleInc) } -// RunOneWtFromDWtSyn runs the WtFromDWtSyn kernel with given number of elements, +// RunOneCycleInc runs the CycleInc kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneWtFromDWtSyn(n int, syncVars ...GPUVars) { +func RunOneCycleInc(n int, syncVars ...GPUVars) { if UseGPU { - RunWtFromDWtSynGPU(n) + RunCycleIncGPU(n) RunDone(syncVars...) } else { - RunWtFromDWtSynCPU(n) + RunCycleIncCPU(n) } } -// RunGatherSpikes runs the GatherSpikes kernel with given number of elements, +// RunDWtFromDiSyn runs the DWtFromDiSyn kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneGatherSpikes call does Run and Done for a +// Alternatively, a single-shot RunOneDWtFromDiSyn call does Run and Done for a // single run-and-sync case. -func RunGatherSpikes(n int) { +func RunDWtFromDiSyn(n int) { if UseGPU { - RunGatherSpikesGPU(n) + RunDWtFromDiSynGPU(n) } else { - RunGatherSpikesCPU(n) + RunDWtFromDiSynCPU(n) } } -// RunGatherSpikesGPU runs the GatherSpikes kernel on the GPU. See [RunGatherSpikes] for more info. -func RunGatherSpikesGPU(n int) { +// RunDWtFromDiSynGPU runs the DWtFromDiSyn kernel on the GPU. See [RunDWtFromDiSyn] for more info. +func RunDWtFromDiSynGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["GatherSpikes"] + pl := sy.ComputePipelines["DWtFromDiSyn"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunGatherSpikesCPU runs the GatherSpikes kernel on the CPU. -func RunGatherSpikesCPU(n int) { - gpu.VectorizeFunc(0, n, GatherSpikes) +// RunDWtFromDiSynCPU runs the DWtFromDiSyn kernel on the CPU. +func RunDWtFromDiSynCPU(n int) { + gpu.VectorizeFunc(0, n, DWtFromDiSyn) } -// RunOneGatherSpikes runs the GatherSpikes kernel with given number of elements, +// RunOneDWtFromDiSyn runs the DWtFromDiSyn kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneGatherSpikes(n int, syncVars ...GPUVars) { +func RunOneDWtFromDiSyn(n int, syncVars ...GPUVars) { if UseGPU { - RunGatherSpikesGPU(n) + RunDWtFromDiSynGPU(n) RunDone(syncVars...) } else { - RunGatherSpikesCPU(n) + RunDWtFromDiSynCPU(n) } } -// RunLayerGi runs the LayerGi kernel with given number of elements, +// RunPoolGi runs the PoolGi kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneLayerGi call does Run and Done for a +// Alternatively, a single-shot RunOnePoolGi call does Run and Done for a // single run-and-sync case. -func RunLayerGi(n int) { +func RunPoolGi(n int) { if UseGPU { - RunLayerGiGPU(n) + RunPoolGiGPU(n) } else { - RunLayerGiCPU(n) + RunPoolGiCPU(n) } } -// RunLayerGiGPU runs the LayerGi kernel on the GPU. See [RunLayerGi] for more info. -func RunLayerGiGPU(n int) { +// RunPoolGiGPU runs the PoolGi kernel on the GPU. See [RunPoolGi] for more info. +func RunPoolGiGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["LayerGi"] + pl := sy.ComputePipelines["PoolGi"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunLayerGiCPU runs the LayerGi kernel on the CPU. -func RunLayerGiCPU(n int) { - gpu.VectorizeFunc(0, n, LayerGi) +// RunPoolGiCPU runs the PoolGi kernel on the CPU. +func RunPoolGiCPU(n int) { + gpu.VectorizeFunc(0, n, PoolGi) } -// RunOneLayerGi runs the LayerGi kernel with given number of elements, +// RunOnePoolGi runs the PoolGi kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneLayerGi(n int, syncVars ...GPUVars) { +func RunOnePoolGi(n int, syncVars ...GPUVars) { if UseGPU { - RunLayerGiGPU(n) + RunPoolGiGPU(n) RunDone(syncVars...) } else { - RunLayerGiCPU(n) + RunPoolGiCPU(n) } } -// RunSendSpike runs the SendSpike kernel with given number of elements, +// RunCycleNeuron runs the CycleNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneSendSpike call does Run and Done for a +// Alternatively, a single-shot RunOneCycleNeuron call does Run and Done for a // single run-and-sync case. -func RunSendSpike(n int) { +func RunCycleNeuron(n int) { if UseGPU { - RunSendSpikeGPU(n) + RunCycleNeuronGPU(n) } else { - RunSendSpikeCPU(n) + RunCycleNeuronCPU(n) } } -// RunSendSpikeGPU runs the SendSpike kernel on the GPU. See [RunSendSpike] for more info. -func RunSendSpikeGPU(n int) { +// RunCycleNeuronGPU runs the CycleNeuron kernel on the GPU. See [RunCycleNeuron] for more info. +func RunCycleNeuronGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["SendSpike"] + pl := sy.ComputePipelines["CycleNeuron"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunSendSpikeCPU runs the SendSpike kernel on the CPU. -func RunSendSpikeCPU(n int) { - gpu.VectorizeFunc(0, n, SendSpike) +// RunCycleNeuronCPU runs the CycleNeuron kernel on the CPU. +func RunCycleNeuronCPU(n int) { + gpu.VectorizeFunc(0, n, CycleNeuron) } -// RunOneSendSpike runs the SendSpike kernel with given number of elements, +// RunOneCycleNeuron runs the CycleNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneSendSpike(n int, syncVars ...GPUVars) { +func RunOneCycleNeuron(n int, syncVars ...GPUVars) { if UseGPU { - RunSendSpikeGPU(n) + RunCycleNeuronGPU(n) RunDone(syncVars...) } else { - RunSendSpikeCPU(n) + RunCycleNeuronCPU(n) } } -// RunCyclePost runs the CyclePost kernel with given number of elements, +// RunApplyExtsNeuron runs the ApplyExtsNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneCyclePost call does Run and Done for a +// Alternatively, a single-shot RunOneApplyExtsNeuron call does Run and Done for a // single run-and-sync case. -func RunCyclePost(n int) { +func RunApplyExtsNeuron(n int) { if UseGPU { - RunCyclePostGPU(n) + RunApplyExtsNeuronGPU(n) } else { - RunCyclePostCPU(n) + RunApplyExtsNeuronCPU(n) } } -// RunCyclePostGPU runs the CyclePost kernel on the GPU. See [RunCyclePost] for more info. -func RunCyclePostGPU(n int) { +// RunApplyExtsNeuronGPU runs the ApplyExtsNeuron kernel on the GPU. See [RunApplyExtsNeuron] for more info. +func RunApplyExtsNeuronGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["CyclePost"] + pl := sy.ComputePipelines["ApplyExtsNeuron"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunCyclePostCPU runs the CyclePost kernel on the CPU. -func RunCyclePostCPU(n int) { - gpu.VectorizeFunc(0, n, CyclePost) +// RunApplyExtsNeuronCPU runs the ApplyExtsNeuron kernel on the CPU. +func RunApplyExtsNeuronCPU(n int) { + gpu.VectorizeFunc(0, n, ApplyExtsNeuron) } -// RunOneCyclePost runs the CyclePost kernel with given number of elements, +// RunOneApplyExtsNeuron runs the ApplyExtsNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneCyclePost(n int, syncVars ...GPUVars) { +func RunOneApplyExtsNeuron(n int, syncVars ...GPUVars) { if UseGPU { - RunCyclePostGPU(n) + RunApplyExtsNeuronGPU(n) RunDone(syncVars...) } else { - RunCyclePostCPU(n) + RunApplyExtsNeuronCPU(n) } } -// RunMinusPhaseNeuron runs the MinusPhaseNeuron kernel with given number of elements, +// RunPlusPhaseStartNeuron runs the PlusPhaseStartNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneMinusPhaseNeuron call does Run and Done for a +// Alternatively, a single-shot RunOnePlusPhaseStartNeuron call does Run and Done for a // single run-and-sync case. -func RunMinusPhaseNeuron(n int) { +func RunPlusPhaseStartNeuron(n int) { if UseGPU { - RunMinusPhaseNeuronGPU(n) + RunPlusPhaseStartNeuronGPU(n) } else { - RunMinusPhaseNeuronCPU(n) + RunPlusPhaseStartNeuronCPU(n) } } -// RunMinusPhaseNeuronGPU runs the MinusPhaseNeuron kernel on the GPU. See [RunMinusPhaseNeuron] for more info. -func RunMinusPhaseNeuronGPU(n int) { +// RunPlusPhaseStartNeuronGPU runs the PlusPhaseStartNeuron kernel on the GPU. See [RunPlusPhaseStartNeuron] for more info. +func RunPlusPhaseStartNeuronGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["MinusPhaseNeuron"] + pl := sy.ComputePipelines["PlusPhaseStartNeuron"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunMinusPhaseNeuronCPU runs the MinusPhaseNeuron kernel on the CPU. -func RunMinusPhaseNeuronCPU(n int) { - gpu.VectorizeFunc(0, n, MinusPhaseNeuron) -} +// RunPlusPhaseStartNeuronCPU runs the PlusPhaseStartNeuron kernel on the CPU. +func RunPlusPhaseStartNeuronCPU(n int) { + gpu.VectorizeFunc(0, n, PlusPhaseStartNeuron) +} -// RunOneMinusPhaseNeuron runs the MinusPhaseNeuron kernel with given number of elements, +// RunOnePlusPhaseStartNeuron runs the PlusPhaseStartNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneMinusPhaseNeuron(n int, syncVars ...GPUVars) { +func RunOnePlusPhaseStartNeuron(n int, syncVars ...GPUVars) { if UseGPU { - RunMinusPhaseNeuronGPU(n) + RunPlusPhaseStartNeuronGPU(n) RunDone(syncVars...) } else { - RunMinusPhaseNeuronCPU(n) + RunPlusPhaseStartNeuronCPU(n) } } -// RunPlusPhaseStartNeuron runs the PlusPhaseStartNeuron kernel with given number of elements, +// RunPlusPhasePool runs the PlusPhasePool kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOnePlusPhaseStartNeuron call does Run and Done for a +// Alternatively, a single-shot RunOnePlusPhasePool call does Run and Done for a // single run-and-sync case. -func RunPlusPhaseStartNeuron(n int) { +func RunPlusPhasePool(n int) { if UseGPU { - RunPlusPhaseStartNeuronGPU(n) + RunPlusPhasePoolGPU(n) } else { - RunPlusPhaseStartNeuronCPU(n) + RunPlusPhasePoolCPU(n) } } -// RunPlusPhaseStartNeuronGPU runs the PlusPhaseStartNeuron kernel on the GPU. See [RunPlusPhaseStartNeuron] for more info. -func RunPlusPhaseStartNeuronGPU(n int) { +// RunPlusPhasePoolGPU runs the PlusPhasePool kernel on the GPU. See [RunPlusPhasePool] for more info. +func RunPlusPhasePoolGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["PlusPhaseStartNeuron"] + pl := sy.ComputePipelines["PlusPhasePool"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunPlusPhaseStartNeuronCPU runs the PlusPhaseStartNeuron kernel on the CPU. -func RunPlusPhaseStartNeuronCPU(n int) { - gpu.VectorizeFunc(0, n, PlusPhaseStartNeuron) +// RunPlusPhasePoolCPU runs the PlusPhasePool kernel on the CPU. +func RunPlusPhasePoolCPU(n int) { + gpu.VectorizeFunc(0, n, PlusPhasePool) } -// RunOnePlusPhaseStartNeuron runs the PlusPhaseStartNeuron kernel with given number of elements, +// RunOnePlusPhasePool runs the PlusPhasePool kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOnePlusPhaseStartNeuron(n int, syncVars ...GPUVars) { +func RunOnePlusPhasePool(n int, syncVars ...GPUVars) { if UseGPU { - RunPlusPhaseStartNeuronGPU(n) + RunPlusPhasePoolGPU(n) RunDone(syncVars...) } else { - RunPlusPhaseStartNeuronCPU(n) + RunPlusPhasePoolCPU(n) } } -// RunApplyExtsNeuron runs the ApplyExtsNeuron kernel with given number of elements, +// RunPlusPhaseNeuron runs the PlusPhaseNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneApplyExtsNeuron call does Run and Done for a +// Alternatively, a single-shot RunOnePlusPhaseNeuron call does Run and Done for a // single run-and-sync case. -func RunApplyExtsNeuron(n int) { +func RunPlusPhaseNeuron(n int) { if UseGPU { - RunApplyExtsNeuronGPU(n) + RunPlusPhaseNeuronGPU(n) } else { - RunApplyExtsNeuronCPU(n) + RunPlusPhaseNeuronCPU(n) } } -// RunApplyExtsNeuronGPU runs the ApplyExtsNeuron kernel on the GPU. See [RunApplyExtsNeuron] for more info. -func RunApplyExtsNeuronGPU(n int) { +// RunPlusPhaseNeuronGPU runs the PlusPhaseNeuron kernel on the GPU. See [RunPlusPhaseNeuron] for more info. +func RunPlusPhaseNeuronGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["ApplyExtsNeuron"] + pl := sy.ComputePipelines["PlusPhaseNeuron"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunApplyExtsNeuronCPU runs the ApplyExtsNeuron kernel on the CPU. -func RunApplyExtsNeuronCPU(n int) { - gpu.VectorizeFunc(0, n, ApplyExtsNeuron) +// RunPlusPhaseNeuronCPU runs the PlusPhaseNeuron kernel on the CPU. +func RunPlusPhaseNeuronCPU(n int) { + gpu.VectorizeFunc(0, n, PlusPhaseNeuron) } -// RunOneApplyExtsNeuron runs the ApplyExtsNeuron kernel with given number of elements, +// RunOnePlusPhaseNeuron runs the PlusPhaseNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneApplyExtsNeuron(n int, syncVars ...GPUVars) { +func RunOnePlusPhaseNeuron(n int, syncVars ...GPUVars) { if UseGPU { - RunApplyExtsNeuronGPU(n) + RunPlusPhaseNeuronGPU(n) RunDone(syncVars...) } else { - RunApplyExtsNeuronCPU(n) + RunPlusPhaseNeuronCPU(n) } } -// RunBetweenGi runs the BetweenGi kernel with given number of elements, +// RunLayerGi runs the LayerGi kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneBetweenGi call does Run and Done for a +// Alternatively, a single-shot RunOneLayerGi call does Run and Done for a // single run-and-sync case. -func RunBetweenGi(n int) { +func RunLayerGi(n int) { if UseGPU { - RunBetweenGiGPU(n) + RunLayerGiGPU(n) } else { - RunBetweenGiCPU(n) + RunLayerGiCPU(n) } } -// RunBetweenGiGPU runs the BetweenGi kernel on the GPU. See [RunBetweenGi] for more info. -func RunBetweenGiGPU(n int) { +// RunLayerGiGPU runs the LayerGi kernel on the GPU. See [RunLayerGi] for more info. +func RunLayerGiGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["BetweenGi"] + pl := sy.ComputePipelines["LayerGi"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunBetweenGiCPU runs the BetweenGi kernel on the CPU. -func RunBetweenGiCPU(n int) { - gpu.VectorizeFunc(0, n, BetweenGi) +// RunLayerGiCPU runs the LayerGi kernel on the CPU. +func RunLayerGiCPU(n int) { + gpu.VectorizeFunc(0, n, LayerGi) } -// RunOneBetweenGi runs the BetweenGi kernel with given number of elements, +// RunOneLayerGi runs the LayerGi kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneBetweenGi(n int, syncVars ...GPUVars) { +func RunOneLayerGi(n int, syncVars ...GPUVars) { if UseGPU { - RunBetweenGiGPU(n) + RunLayerGiGPU(n) RunDone(syncVars...) } else { - RunBetweenGiCPU(n) + RunLayerGiCPU(n) } } -// RunCycleNeuron runs the CycleNeuron kernel with given number of elements, +// RunWtFromDWtSyn runs the WtFromDWtSyn kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneCycleNeuron call does Run and Done for a +// Alternatively, a single-shot RunOneWtFromDWtSyn call does Run and Done for a // single run-and-sync case. -func RunCycleNeuron(n int) { +func RunWtFromDWtSyn(n int) { if UseGPU { - RunCycleNeuronGPU(n) + RunWtFromDWtSynGPU(n) } else { - RunCycleNeuronCPU(n) + RunWtFromDWtSynCPU(n) } } -// RunCycleNeuronGPU runs the CycleNeuron kernel on the GPU. See [RunCycleNeuron] for more info. -func RunCycleNeuronGPU(n int) { +// RunWtFromDWtSynGPU runs the WtFromDWtSyn kernel on the GPU. See [RunWtFromDWtSyn] for more info. +func RunWtFromDWtSynGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["CycleNeuron"] + pl := sy.ComputePipelines["WtFromDWtSyn"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunCycleNeuronCPU runs the CycleNeuron kernel on the CPU. -func RunCycleNeuronCPU(n int) { - gpu.VectorizeFunc(0, n, CycleNeuron) +// RunWtFromDWtSynCPU runs the WtFromDWtSyn kernel on the CPU. +func RunWtFromDWtSynCPU(n int) { + gpu.VectorizeFunc(0, n, WtFromDWtSyn) } -// RunOneCycleNeuron runs the CycleNeuron kernel with given number of elements, +// RunOneWtFromDWtSyn runs the WtFromDWtSyn kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneCycleNeuron(n int, syncVars ...GPUVars) { +func RunOneWtFromDWtSyn(n int, syncVars ...GPUVars) { if UseGPU { - RunCycleNeuronGPU(n) + RunWtFromDWtSynGPU(n) RunDone(syncVars...) } else { - RunCycleNeuronCPU(n) + RunWtFromDWtSynCPU(n) } } -// RunCycleInc runs the CycleInc kernel with given number of elements, +// RunMinusPhasePool runs the MinusPhasePool kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneCycleInc call does Run and Done for a +// Alternatively, a single-shot RunOneMinusPhasePool call does Run and Done for a // single run-and-sync case. -func RunCycleInc(n int) { +func RunMinusPhasePool(n int) { if UseGPU { - RunCycleIncGPU(n) + RunMinusPhasePoolGPU(n) } else { - RunCycleIncCPU(n) + RunMinusPhasePoolCPU(n) } } -// RunCycleIncGPU runs the CycleInc kernel on the GPU. See [RunCycleInc] for more info. -func RunCycleIncGPU(n int) { +// RunMinusPhasePoolGPU runs the MinusPhasePool kernel on the GPU. See [RunMinusPhasePool] for more info. +func RunMinusPhasePoolGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["CycleInc"] + pl := sy.ComputePipelines["MinusPhasePool"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunCycleIncCPU runs the CycleInc kernel on the CPU. -func RunCycleIncCPU(n int) { - gpu.VectorizeFunc(0, n, CycleInc) +// RunMinusPhasePoolCPU runs the MinusPhasePool kernel on the CPU. +func RunMinusPhasePoolCPU(n int) { + gpu.VectorizeFunc(0, n, MinusPhasePool) } -// RunOneCycleInc runs the CycleInc kernel with given number of elements, +// RunOneMinusPhasePool runs the MinusPhasePool kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneCycleInc(n int, syncVars ...GPUVars) { +func RunOneMinusPhasePool(n int, syncVars ...GPUVars) { if UseGPU { - RunCycleIncGPU(n) + RunMinusPhasePoolGPU(n) RunDone(syncVars...) } else { - RunCycleIncCPU(n) + RunMinusPhasePoolCPU(n) } } -// RunPlusPhasePool runs the PlusPhasePool kernel with given number of elements, +// RunMinusPhaseNeuron runs the MinusPhaseNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOnePlusPhasePool call does Run and Done for a +// Alternatively, a single-shot RunOneMinusPhaseNeuron call does Run and Done for a // single run-and-sync case. -func RunPlusPhasePool(n int) { +func RunMinusPhaseNeuron(n int) { if UseGPU { - RunPlusPhasePoolGPU(n) + RunMinusPhaseNeuronGPU(n) } else { - RunPlusPhasePoolCPU(n) + RunMinusPhaseNeuronCPU(n) } } -// RunPlusPhasePoolGPU runs the PlusPhasePool kernel on the GPU. See [RunPlusPhasePool] for more info. -func RunPlusPhasePoolGPU(n int) { +// RunMinusPhaseNeuronGPU runs the MinusPhaseNeuron kernel on the GPU. See [RunMinusPhaseNeuron] for more info. +func RunMinusPhaseNeuronGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["PlusPhasePool"] + pl := sy.ComputePipelines["MinusPhaseNeuron"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunPlusPhasePoolCPU runs the PlusPhasePool kernel on the CPU. -func RunPlusPhasePoolCPU(n int) { - gpu.VectorizeFunc(0, n, PlusPhasePool) +// RunMinusPhaseNeuronCPU runs the MinusPhaseNeuron kernel on the CPU. +func RunMinusPhaseNeuronCPU(n int) { + gpu.VectorizeFunc(0, n, MinusPhaseNeuron) } -// RunOnePlusPhasePool runs the PlusPhasePool kernel with given number of elements, +// RunOneMinusPhaseNeuron runs the MinusPhaseNeuron kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOnePlusPhasePool(n int, syncVars ...GPUVars) { +func RunOneMinusPhaseNeuron(n int, syncVars ...GPUVars) { if UseGPU { - RunPlusPhasePoolGPU(n) + RunMinusPhaseNeuronGPU(n) RunDone(syncVars...) } else { - RunPlusPhasePoolCPU(n) + RunMinusPhaseNeuronCPU(n) } } -// RunDWtFromDiSyn runs the DWtFromDiSyn kernel with given number of elements, +// RunDWtSubMeanPath runs the DWtSubMeanPath kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneDWtFromDiSyn call does Run and Done for a +// Alternatively, a single-shot RunOneDWtSubMeanPath call does Run and Done for a // single run-and-sync case. -func RunDWtFromDiSyn(n int) { +func RunDWtSubMeanPath(n int) { if UseGPU { - RunDWtFromDiSynGPU(n) + RunDWtSubMeanPathGPU(n) } else { - RunDWtFromDiSynCPU(n) + RunDWtSubMeanPathCPU(n) } } -// RunDWtFromDiSynGPU runs the DWtFromDiSyn kernel on the GPU. See [RunDWtFromDiSyn] for more info. -func RunDWtFromDiSynGPU(n int) { +// RunDWtSubMeanPathGPU runs the DWtSubMeanPath kernel on the GPU. See [RunDWtSubMeanPath] for more info. +func RunDWtSubMeanPathGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["DWtFromDiSyn"] + pl := sy.ComputePipelines["DWtSubMeanPath"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunDWtFromDiSynCPU runs the DWtFromDiSyn kernel on the CPU. -func RunDWtFromDiSynCPU(n int) { - gpu.VectorizeFunc(0, n, DWtFromDiSyn) +// RunDWtSubMeanPathCPU runs the DWtSubMeanPath kernel on the CPU. +func RunDWtSubMeanPathCPU(n int) { + gpu.VectorizeFunc(0, n, DWtSubMeanPath) } -// RunOneDWtFromDiSyn runs the DWtFromDiSyn kernel with given number of elements, +// RunOneDWtSubMeanPath runs the DWtSubMeanPath kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneDWtFromDiSyn(n int, syncVars ...GPUVars) { +func RunOneDWtSubMeanPath(n int, syncVars ...GPUVars) { if UseGPU { - RunDWtFromDiSynGPU(n) + RunDWtSubMeanPathGPU(n) RunDone(syncVars...) } else { - RunDWtFromDiSynCPU(n) + RunDWtSubMeanPathCPU(n) } } -// RunPoolGi runs the PoolGi kernel with given number of elements, +// RunGatherSpikes runs the GatherSpikes kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOnePoolGi call does Run and Done for a +// Alternatively, a single-shot RunOneGatherSpikes call does Run and Done for a // single run-and-sync case. -func RunPoolGi(n int) { +func RunGatherSpikes(n int) { if UseGPU { - RunPoolGiGPU(n) + RunGatherSpikesGPU(n) } else { - RunPoolGiCPU(n) + RunGatherSpikesCPU(n) } } -// RunPoolGiGPU runs the PoolGi kernel on the GPU. See [RunPoolGi] for more info. -func RunPoolGiGPU(n int) { +// RunGatherSpikesGPU runs the GatherSpikes kernel on the GPU. See [RunGatherSpikes] for more info. +func RunGatherSpikesGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["PoolGi"] + pl := sy.ComputePipelines["GatherSpikes"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunPoolGiCPU runs the PoolGi kernel on the CPU. -func RunPoolGiCPU(n int) { - gpu.VectorizeFunc(0, n, PoolGi) +// RunGatherSpikesCPU runs the GatherSpikes kernel on the CPU. +func RunGatherSpikesCPU(n int) { + gpu.VectorizeFunc(0, n, GatherSpikes) } -// RunOnePoolGi runs the PoolGi kernel with given number of elements, +// RunOneGatherSpikes runs the GatherSpikes kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOnePoolGi(n int, syncVars ...GPUVars) { +func RunOneGatherSpikes(n int, syncVars ...GPUVars) { if UseGPU { - RunPoolGiGPU(n) + RunGatherSpikesGPU(n) RunDone(syncVars...) } else { - RunPoolGiCPU(n) + RunGatherSpikesCPU(n) } } -// RunMinusPhasePool runs the MinusPhasePool kernel with given number of elements, +// RunCyclePost runs the CyclePost kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOneMinusPhasePool call does Run and Done for a +// Alternatively, a single-shot RunOneCyclePost call does Run and Done for a // single run-and-sync case. -func RunMinusPhasePool(n int) { +func RunCyclePost(n int) { if UseGPU { - RunMinusPhasePoolGPU(n) + RunCyclePostGPU(n) } else { - RunMinusPhasePoolCPU(n) + RunCyclePostCPU(n) } } -// RunMinusPhasePoolGPU runs the MinusPhasePool kernel on the GPU. See [RunMinusPhasePool] for more info. -func RunMinusPhasePoolGPU(n int) { +// RunCyclePostGPU runs the CyclePost kernel on the GPU. See [RunCyclePost] for more info. +func RunCyclePostGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["MinusPhasePool"] + pl := sy.ComputePipelines["CyclePost"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunMinusPhasePoolCPU runs the MinusPhasePool kernel on the CPU. -func RunMinusPhasePoolCPU(n int) { - gpu.VectorizeFunc(0, n, MinusPhasePool) +// RunCyclePostCPU runs the CyclePost kernel on the CPU. +func RunCyclePostCPU(n int) { + gpu.VectorizeFunc(0, n, CyclePost) } -// RunOneMinusPhasePool runs the MinusPhasePool kernel with given number of elements, +// RunOneCyclePost runs the CyclePost kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOneMinusPhasePool(n int, syncVars ...GPUVars) { +func RunOneCyclePost(n int, syncVars ...GPUVars) { if UseGPU { - RunMinusPhasePoolGPU(n) + RunCyclePostGPU(n) RunDone(syncVars...) } else { - RunMinusPhasePoolCPU(n) + RunCyclePostCPU(n) } } -// RunPlusPhaseNeuron runs the PlusPhaseNeuron kernel with given number of elements, +// RunDWtSyn runs the DWtSyn kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // Can call multiple Run* kernels in a row, which are then all launched // in the same command submission on the GPU, which is by far the most efficient. // MUST call RunDone (with optional vars to sync) after all Run calls. -// Alternatively, a single-shot RunOnePlusPhaseNeuron call does Run and Done for a +// Alternatively, a single-shot RunOneDWtSyn call does Run and Done for a // single run-and-sync case. -func RunPlusPhaseNeuron(n int) { +func RunDWtSyn(n int) { if UseGPU { - RunPlusPhaseNeuronGPU(n) + RunDWtSynGPU(n) } else { - RunPlusPhaseNeuronCPU(n) + RunDWtSynCPU(n) } } -// RunPlusPhaseNeuronGPU runs the PlusPhaseNeuron kernel on the GPU. See [RunPlusPhaseNeuron] for more info. -func RunPlusPhaseNeuronGPU(n int) { +// RunDWtSynGPU runs the DWtSyn kernel on the GPU. See [RunDWtSyn] for more info. +func RunDWtSynGPU(n int) { sy := GPUSystem - pl := sy.ComputePipelines["PlusPhaseNeuron"] + pl := sy.ComputePipelines["DWtSyn"] ce, _ := sy.BeginComputePass() pl.Dispatch1D(ce, n, 64) } -// RunPlusPhaseNeuronCPU runs the PlusPhaseNeuron kernel on the CPU. -func RunPlusPhaseNeuronCPU(n int) { - gpu.VectorizeFunc(0, n, PlusPhaseNeuron) +// RunDWtSynCPU runs the DWtSyn kernel on the CPU. +func RunDWtSynCPU(n int) { + gpu.VectorizeFunc(0, n, DWtSyn) } -// RunOnePlusPhaseNeuron runs the PlusPhaseNeuron kernel with given number of elements, +// RunOneDWtSyn runs the DWtSyn kernel with given number of elements, // on either the CPU or GPU depending on the UseGPU variable. // This version then calls RunDone with the given variables to sync // after the Run, for a single-shot Run-and-Done call. If multiple kernels // can be run in sequence, it is much more efficient to do multiple Run* // calls followed by a RunDone call. -func RunOnePlusPhaseNeuron(n int, syncVars ...GPUVars) { +func RunOneDWtSyn(n int, syncVars ...GPUVars) { if UseGPU { - RunPlusPhaseNeuronGPU(n) + RunDWtSynGPU(n) RunDone(syncVars...) } else { - RunPlusPhaseNeuronCPU(n) + RunDWtSynCPU(n) } } // RunDone must be called after Run* calls to start compute kernels. @@ -920,6 +920,9 @@ func RunDone(syncVars ...GPUVars) { // ToGPU copies given variables to the GPU for the system. func ToGPU(vars ...GPUVars) { + if !UseGPU { + return + } sy := GPUSystem syVars := sy.Vars() for _, vr := range vars { diff --git a/axon/init-net.go b/axon/init-net.go index 623e3aba6..bec2bc8e0 100644 --- a/axon/init-net.go +++ b/axon/init-net.go @@ -45,9 +45,7 @@ func (nt *Network) NewState(mode enums.Enum, testing bool) { } ly.NewState(ctx) } - // if nt.GPU.On { - // nt.GPU.SyncStateGBufToGPU() - // } + ToGPULayers() } // InitWeights initializes synaptic weights and all other associated long-term state variables @@ -75,8 +73,10 @@ func (nt *Network) InitWeights() { //types:add } // dur := time.Now().Sub(st) // fmt.Printf("sym: %v\n", dur) + ToGPUAll() + ToGPU(SynapseTracesVar) // only time we call this + ToGPU(PathGBufVar, PathGSynsVar) // and this // nt.GPU.SyncAllToGPU() - // nt.GPU.SyncSynCaToGPU() // only time we call this // nt.GPU.SyncGBufToGPU() } diff --git a/axon/init-net.goal b/axon/init-net.goal index bc7608801..15d2ae780 100644 --- a/axon/init-net.goal +++ b/axon/init-net.goal @@ -42,9 +42,7 @@ func (nt *Network) NewState(mode enums.Enum, testing bool) { } ly.NewState(ctx) } - // if nt.GPU.On { - // nt.GPU.SyncStateGBufToGPU() - // } + ToGPULayers() } // InitWeights initializes synaptic weights and all other associated long-term state variables @@ -72,8 +70,10 @@ func (nt *Network) InitWeights() { //types:add } // dur := time.Now().Sub(st) // fmt.Printf("sym: %v\n", dur) + ToGPUAll() + ToGPU(SynapseTracesVar) // only time we call this + ToGPU(PathGBufVar, PathGSynsVar) // and this // nt.GPU.SyncAllToGPU() - // nt.GPU.SyncSynCaToGPU() // only time we call this // nt.GPU.SyncGBufToGPU() } diff --git a/axon/layer_test.go b/axon/layer_test.go index d0f6f1613..28005823e 100644 --- a/axon/layer_test.go +++ b/axon/layer_test.go @@ -117,7 +117,7 @@ func TestLayerToJson(t *testing.T) { // from net B. TODO: Would be better if we ran a cycle first, to get more variance. net := createNetwork(ctx, shape, t) hiddenLayer := net.LayerByName("Hidden") - net.Cycle() // run one cycle to make the weights more different + net.Cycle(1, true) // run one cycle to make the weights more different netC := createNetwork(ctxC, shape, t) hiddenLayerC := netC.LayerByName("Hidden") diff --git a/axon/looper.go b/axon/looper.go index 6510b9258..41a61c7d2 100644 --- a/axon/looper.go +++ b/axon/looper.go @@ -13,13 +13,15 @@ import ( // LooperStandard adds all the standard Axon Trial and Cycle level processing calls // to the given Looper Stacks. cycle and trial are the enums for the looper levels, // trainMode is the training mode enum value. +// - fastNCycles is the number of cycles to run in one chunk, when single-cycle iteration +// is not otherwise required (based on step level, netview update level). // - minus and plus phases of the theta cycle (trial), at plusStart (150) and plusEnd (199) cycles. // - embedded beta phases within theta, that record St1 and St2 states. // - net.Cycle() at every cycle step. // - net.DWt() and net.WtFromDWt() learning calls in training mode, with netview update // between these two calls if it is visible and viewing synapse variables. // - netview update calls at appropriate levels (no-op if no GUI) -func LooperStandard(ls *looper.Stacks, net *Network, viewFunc func() *netview.NetView, plusStart, plusEnd int, cycle, trial, trainMode enums.Enum) { +func LooperStandard(ls *looper.Stacks, net *Network, viewFunc func(mode enums.Enum) *NetViewUpdate, fastNCycles, plusStart, plusEnd int, cycle, trial, trainMode enums.Enum) { ls.AddOnStartToAll("SetContextMode", func(md, tm enums.Enum) { ctx := net.Context() ctx.Mode = int32(md.Int64()) @@ -39,34 +41,38 @@ func LooperStandard(ls *looper.Stacks, net *Network, viewFunc func() *netview.Ne ctx.NewPhase(true) net.PlusPhaseStart() }) - for m, st := range ls.Stacks { + for mode, st := range ls.Stacks { cycLoop := st.Loops[cycle] cycLoop.OnStart.Add("Cycle", func() { - // TODO: - // if ls.ModeStack().StepLevel == cycle { - // net.GPU.CycleByCycle = true - // } else { - // if viewupdt.IsCycleUpdating() { - // net.GPU.CycleByCycle = true - // } else { - // net.GPU.CycleByCycle = false - // } - // } - net.Cycle() + nCycles := 10 + getNeurons := false + if ls.ModeStack().StepLevel.Int64() == cycle.Int64() { + nCycles = 1 + getNeurons = true + } else if view := viewFunc(mode); view != nil { + if view.IsCycleUpdating() { + nCycles = 1 + getNeurons = true + } + } + net.Cycle(nCycles, getNeurons) + if nCycles > 1 { + cycLoop.Counter.Cur += nCycles - 1 + } }) trlLoop := st.Loops[trial] trlLoop.OnStart.Add("NewState", func() { - testing := m.Int64() != trainMode.Int64() - net.NewState(m, testing) + testing := mode.Int64() != trainMode.Int64() + net.NewState(mode, testing) }) trlLoop.OnEnd.Add("PlusPhase:End", func() { net.PlusPhase() }) - if m.Int64() == trainMode.Int64() { + if mode.Int64() == trainMode.Int64() { trlLoop.OnEnd.Add("UpdateWeights", func() { - net.DWt() - if view := viewFunc(); view != nil && view.IsViewingSynapse() { + net.DWt() // todo: need to get synapses here, not after + if view := viewFunc(mode); view != nil && view.IsViewingSynapse() { //TODO: // net.GPU.SyncSynapsesFromGPU() // net.GPU.SyncSynCaFromGPU() // note: only time we call this @@ -79,23 +85,24 @@ func LooperStandard(ls *looper.Stacks, net *Network, viewFunc func() *netview.Ne } // LooperUpdateNetView adds netview update calls to the given -// trial and cycle levels for given NetViewUpdate associated with given mode. -// The netviewCountersFunc returns the counters and other stats -// to display at the bottom of the NetView, and is passed the CountersString() -// for the given mode's [looper.Stack]. -func LooperUpdateNetView(ls *looper.Stacks, mode, cycle, trial enums.Enum, viewupdt *NetViewUpdate, countersFunc func(md, tm enums.Enum) string) { - st := ls.Stacks[mode] - cycLoop := st.Loops[cycle] - cycLoop.OnEnd.Add("GUI:UpdateNetView", func() { - counters := countersFunc(mode, cycle) - viewupdt.UpdateCycle(cycLoop.Counter.Cur, counters) - }) - trlLoop := st.Loops[trial] - trlLoop.OnEnd.Add("GUI:UpdateNetView", func() { - counters := countersFunc(mode, trial) - viewupdt.GoUpdate(counters) - }) - +// trial and cycle levels for given NetViewUpdate associated with the mode, +// returned by the given viewFunc function. +// The countersFunc returns the counters and other stats to display at the +// bottom of the NetView, based on given mode and level. +func LooperUpdateNetView(ls *looper.Stacks, cycle, trial enums.Enum, viewFunc func(mode enums.Enum) *NetViewUpdate, countersFunc func(mode, level enums.Enum) string) { + for mode, st := range ls.Stacks { + viewUpdt := viewFunc(mode) + cycLoop := st.Loops[cycle] + cycLoop.OnEnd.Add("GUI:UpdateNetView", func() { + counters := countersFunc(mode, cycle) + viewUpdt.UpdateCycle(cycLoop.Counter.Cur, counters) + }) + trlLoop := st.Loops[trial] + trlLoop.OnEnd.Add("GUI:UpdateNetView", func() { + counters := countersFunc(mode, trial) + viewUpdt.GoUpdate(counters) + }) + } } //////// NetViewUpdate diff --git a/axon/network.go b/axon/network.go index df4613510..b935aa7ba 100644 --- a/axon/network.go +++ b/axon/network.go @@ -131,6 +131,8 @@ type Network struct { //////// Params + // todo: rename LayParams -> LayerParams + // LayParams are all the layer parameters. [NLayers] LayParams []LayerParams `display:"-"` @@ -921,30 +923,56 @@ func ToGPUIndexes() { ToGPU(NetworkIxsVar, NeuronIxsVar, SynapseIxsVar, PathSendConVar, RecvPathIxsVar, PathRecvConVar, RecvSynIxsVar) } -// ToGPULayers copies all the layer-level state to the GPU. +// ToGPUCtxGlobal copies Context and Global vars to the GPU. +// This is done at start of each Cycle update. +func ToGPUCtxGlobal() { + ToGPU(CtxVar, GlobalScalarsVar, GlobalVectorsVar) +} + +// todo: probably don't need PoolsIntVar beyond the first init + +// ToGPULayers copies all the layer-level state to the GPU, including context and globals. func ToGPULayers() { - ToGPU(CtxVar, NeuronsVar, NeuronAvgsVar, LayerStatesVar, PoolsVar, PoolsIntVar) + ToGPU(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar) } -// RunDoneLayers finishes running and copies all the layer-level state to the GPU. -func RunDoneLayers() { - RunDone(CtxVar, NeuronsVar, NeuronAvgsVar, LayerStatesVar, PoolsVar, PoolsIntVar) +// ToGPUNeurons copies Neurons, NeuronAvgs to the GPU. +func ToGPUNeurons() { + ToGPU(NeuronsVar, NeuronAvgsVar) +} + +// ToGPULayersNeurons copies all the layer-level and neuron state to the GPU. +func ToGPULayersNeurons() { + ToGPU(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar, NeuronsVar, NeuronAvgsVar) } // ToGPUSynapses copies the Synapse state to the GPU. func ToGPUSynapses() { - ToGPU(SynapsesVar, SynapseTracesVar) + ToGPU(SynapsesVar) } -// ToGPUAll copies all state up to the GPU. +// ToGPUAll copies all state up to the GPU. Only for InitWeights. func ToGPUAll() { ToGPUIndexes() - ToGPU(CtxVar) ToGPUParams() - ToGPULayers() + ToGPULayersNeurons() ToGPUSynapses() } +// note: RunDone can only be run once, so all vars need to be present in the one call. + +// RunDoneLayers finishes running and copies all the layer-level state from the GPU, +// (and Context, Globals) but NOT neurons. This is the minimal case for Cycle(). +func RunDoneLayers() { + RunDone(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar) +} + +// RunDoneLayersNeurons finishes running and copies all the layer-level +// and neuron state from the GPU, including context and globals. +func RunDoneLayersNeurons() { + RunDone(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar, NeuronsVar, NeuronAvgsVar) +} + // BuildPathGBuf builds the PathGBuf, PathGSyns, // based on the MaxDelay values in the PathParams, // which should have been configured by this point. diff --git a/axon/network.goal b/axon/network.goal index b7f828087..6ad069362 100644 --- a/axon/network.goal +++ b/axon/network.goal @@ -129,6 +129,8 @@ type Network struct { //////// Params + // todo: rename LayParams -> LayerParams + // LayParams are all the layer parameters. [NLayers] LayParams []LayerParams `display:"-"` @@ -918,30 +920,56 @@ func ToGPUIndexes() { ToGPU(NetworkIxsVar, NeuronIxsVar, SynapseIxsVar, PathSendConVar, RecvPathIxsVar, PathRecvConVar, RecvSynIxsVar) } -// ToGPULayers copies all the layer-level state to the GPU. +// ToGPUCtxGlobal copies Context and Global vars to the GPU. +// This is done at start of each Cycle update. +func ToGPUCtxGlobal() { + ToGPU(CtxVar, GlobalScalarsVar, GlobalVectorsVar) +} + +// todo: probably don't need PoolsIntVar beyond the first init + +// ToGPULayers copies all the layer-level state to the GPU, including context and globals. func ToGPULayers() { - ToGPU(CtxVar, NeuronsVar, NeuronAvgsVar, LayerStatesVar, PoolsVar, PoolsIntVar) + ToGPU(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar) } -// RunDoneLayers finishes running and copies all the layer-level state to the GPU. -func RunDoneLayers() { - RunDone(CtxVar, NeuronsVar, NeuronAvgsVar, LayerStatesVar, PoolsVar, PoolsIntVar) +// ToGPUNeurons copies Neurons, NeuronAvgs to the GPU. +func ToGPUNeurons() { + ToGPU(NeuronsVar, NeuronAvgsVar) +} + +// ToGPULayersNeurons copies all the layer-level and neuron state to the GPU. +func ToGPULayersNeurons() { + ToGPU(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar, NeuronsVar, NeuronAvgsVar) } // ToGPUSynapses copies the Synapse state to the GPU. func ToGPUSynapses() { - ToGPU(SynapsesVar, SynapseTracesVar) + ToGPU(SynapsesVar) } -// ToGPUAll copies all state up to the GPU. +// ToGPUAll copies all state up to the GPU. Only for InitWeights. func ToGPUAll() { ToGPUIndexes() - ToGPU(CtxVar) ToGPUParams() - ToGPULayers() + ToGPULayersNeurons() ToGPUSynapses() } +// note: RunDone can only be run once, so all vars need to be present in the one call. + +// RunDoneLayers finishes running and copies all the layer-level state from the GPU, +// (and Context, Globals) but NOT neurons. This is the minimal case for Cycle(). +func RunDoneLayers() { + RunDone(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar) +} + +// RunDoneLayersNeurons finishes running and copies all the layer-level +// and neuron state from the GPU, including context and globals. +func RunDoneLayersNeurons() { + RunDone(CtxVar, GlobalScalarsVar, GlobalVectorsVar, LayerStatesVar, PoolsVar, PoolsIntVar, NeuronsVar, NeuronAvgsVar) +} + // BuildPathGBuf builds the PathGBuf, PathGSyns, // based on the MaxDelay values in the PathParams, // which should have been configured by this point. diff --git a/axon/params.go b/axon/params.go index 026431269..9a10a4998 100644 --- a/axon/params.go +++ b/axon/params.go @@ -23,10 +23,10 @@ type PathSheets = params.Sheets[*PathParams] type Params struct { // Layer has the parameters to apply to the [LayerParams] for layers. - Layer LayerSheets + Layer LayerSheets `display:"-"` // Path has the parameters to apply to the [PathParams] for paths. - Path PathSheets + Path PathSheets `display:"-"` // ExtraSheets has optional additional sheets of parameters // to apply after the default Base sheet. diff --git a/axon/shaders/CycleNeuron.wgsl b/axon/shaders/CycleNeuron.wgsl index 8486f4a08..080c8c861 100644 --- a/axon/shaders/CycleNeuron.wgsl +++ b/axon/shaders/CycleNeuron.wgsl @@ -97,10 +97,9 @@ fn SetNeuronExtPosNeg(ctx: ptr, ni: u32,di: u32, val: f32) { } } fn LayerParams_CycleNeuron(ly: ptr, ctx: ptr, ni: u32,di: u32) { - var pi = LayerParams_PoolIndex(ly, NeuronIxs[IndexU322D(NeuronIxs[0], NeuronIxs[1], u32(NrnSubPool),u32(ni))]); - var lpi = LayerParams_PoolIndex(ly, u32(u32(0))); + var pi = LayerParams_PoolIndex(ly, NeuronIxs[IndexU322D(NeuronIxs[0], NeuronIxs[1], + u32(NrnSubPool),u32(ni))]); LayerParams_GInteg(ly, ctx, pi, ni, di); - LayerParams_SpikeFromG(ly, ctx, lpi, ni, di); } fn LayerParams_PulvinarDriver(ly: ptr, ctx: ptr, lni: u32,di: u32, drvGe: ptr,nonDrivePct: ptr) { var dli = u32((*ly).Pulv.DriveLayIndex); @@ -125,8 +124,8 @@ fn LayerParams_GInteg(ly: ptr, ctx: ptr, LayerParams_SpecialPostGs(ly, ctx, ni, di, saveVal); } fn LayerParams_SpecialPreGs(ly: ptr, ctx: ptr, pi: u32,ni: u32,di: u32, drvGe: f32, nonDrivePct: f32) -> f32 { - var saveVal = f32(0); // sometimes we need to use a value computed here, for the post Gs step - var pi0 = pi - 1; // 0-n pool index + var saveVal = f32(0); // sometimes we need to use a value computed here, for the post Gs step + var pi0 = pi - (*ly).PoolSt - 1; // 0-n pool index var pnn = u32(PoolNNeurons(pi)); var pni = NeuronIxs[IndexU322D(NeuronIxs[0], NeuronIxs[1], u32(NrnNeurIndex),u32(ni))] - u32(PoolsInt[IndexI323D(PoolsInt[0], PoolsInt[1], PoolsInt[2], u32(PoolNeurSt),u32(pi),u32(di))]); var nrnCtxtGe = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CtxtGe),u32(ni),u32(di))]; @@ -360,30 +359,6 @@ fn LayerParams_GNeuroMod(ly: ptr, ctx: ptr, ctx: ptr, lpi: u32,ni: u32,di: u32) { - ActParams_VmFromG(&(*ly).Acts, ctx, ni, di); - ActParams_SpikeFromVm(&(*ly).Acts, ctx, ni, di); - LearnNeurParams_CaFromSpike(&(*ly).Learn, ctx, ni, di); - var lmax = PoolAvgMax(AMGeInt, AMCycle, Max, lpi, di); - if (lmax > 0) { - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(GeIntNorm),u32(ni),u32(di))] = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(GeInt),u32(ni),u32(di))] / lmax; - } else { - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(GeIntNorm),u32(ni),u32(di))] = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(GeInt),u32(ni),u32(di))]; - } - if ((*ctx).Cycle >= (*ly).Acts.Dt.MaxCycStart) { - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(SpkMaxCa),u32(ni),u32(di))] += (*ly).Learn.CaSpk.Dt.PDt * (Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaSpkM),u32(ni),u32(di))] - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(SpkMaxCa),u32(ni),u32(di))]); - var spkmax = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(SpkMaxCa),u32(ni),u32(di))]; - if (spkmax > Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(SpkMax),u32(ni),u32(di))]) { - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(SpkMax),u32(ni),u32(di))] = spkmax; - } - } - var spk = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Spike),u32(ni),u32(di))]; - if (spk > 0) { - var spksper = (*ctx).ThetaCycles / 8; - var bin = min((*ctx).Cycle/spksper, 7); - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(SpkBin0 + NeuronVars(bin)),u32(ni),u32(di))] += spk; - } -} ///////////// import: "act-net.go" fn CycleNeuron(i: u32) { //gosl:kernel @@ -436,23 +411,6 @@ struct SpikeParams { RDt: f32, pad: i32, } -fn SpikeParams_ActFromISI(sk: ptr, isi: f32,timeInc: f32,integ: f32) -> f32 { - if (isi <= 0) { - return f32(0); - } - var maxInt = 1.0 / (timeInc * integ * (*sk).MaxHz); // interval at max hz.. -return maxInt / isi; // normalized -} -fn SpikeParams_AvgFromISI(sk: ptr, avg: f32, isi: f32) -> f32 { - var av = avg; - if (av <= 0) { - av = isi; - } else if (isi < 0.8*av) { - av = isi; // if significantly less than we take that - } else { // integrate on slower - av += (*sk).ISIDt * (isi - av); // running avg updt - }return av; -} struct DendParams { GbarExp: f32, GbarR: f32, @@ -759,131 +717,6 @@ fn ActParams_GiFromSyn(ac: ptr, ctx: ptr, return f32(0); }return giSyn; } -fn ActParams_InetFromG(ac: ptr, vm: f32,ge: f32,gl: f32,gi: f32,gk: f32) -> f32 { - var inet = ge*((*ac).Erev.E-vm) + gl*(*ac).Gbar.L*((*ac).Erev.L-vm) + gi*((*ac).Erev.I-vm) + gk*((*ac).Erev.K-vm); - if (inet > (*ac).Dt.VmTau) { - inet = (*ac).Dt.VmTau; - } else if (inet < -(*ac).Dt.VmTau) { - inet = -(*ac).Dt.VmTau; - }return inet; -} -fn ActParams_VmFromInet(ac: ptr, vm: f32,dt: f32,inet: f32) -> f32 { - return F32_ClipValue(&(*ac).VmRange, vm + dt*inet); -} -fn ActParams_VmInteg(ac: ptr, vm: f32,dt: f32,ge: f32,gl: f32,gi: f32,gk: f32, nvm: ptr,inet: ptr) { - var dtEff = dt * (*ac).Dt.DtStep; - *nvm = vm; - for (var i = i32(0); i < (*ac).Dt.VmSteps; i++) { - *inet = ActParams_InetFromG(ac, *nvm, ge, gl, gi, gk); - *nvm = ActParams_VmFromInet(ac, *nvm, dtEff, *inet); - } -} -fn ActParams_VmFromG(ac: ptr, ctx: ptr, ni: u32,di: u32) { - var updtVm = true; - var isi = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(ISI),u32(ni),u32(di))]; - if ((*ac).Spikes.Tr > 0 && isi >= 0 && isi < f32((*ac).Spikes.Tr)) { - updtVm = false; // don't update the spiking vm during refract - } - var ge = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Ge),u32(ni),u32(di))] * (*ac).Gbar.E; - var gi = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Gi),u32(ni),u32(di))] * (*ac).Gbar.I; - var gk = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Gk),u32(ni),u32(di))] * (*ac).Gbar.K; - var nvm: f32; - var inet: f32; - var expi: f32; - if (updtVm) { - ActParams_VmInteg(ac, Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Vm),u32(ni),u32(di))], (*ac).Dt.VmDt, ge, f32(f32(1)), gi, gk, &nvm, &inet); - if (updtVm && (*ac).Spikes.Exp == 1) { // add spike current if relevant - var exVm: f32; - exVm = 0.5 * (nvm + Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[ // midpoint for this - 2], u32(Vm),u32(ni),u32(di))]); - expi = (*ac).Gbar.L * (*ac).Spikes.ExpSlope * - FastExp((exVm-(*ac).Spikes.Thr)/(*ac).Spikes.ExpSlope); - if (expi > (*ac).Dt.VmTau) { - expi = (*ac).Dt.VmTau; - } - inet += expi; - nvm = ActParams_VmFromInet(ac, nvm, (*ac).Dt.VmDt, expi); - } - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Vm),u32(ni),u32(di))] = nvm; - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Inet),u32(ni),u32(di))] = inet; - } else { // decay back to VmR - var dvm: f32; - if (i32(isi) == (*ac).Spikes.Tr-1) { - dvm = (*ac).Spikes.VmR - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Vm),u32(ni),u32(di))]; - } else { - dvm = (*ac).Spikes.RDt * ((*ac).Spikes.VmR - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Vm),u32(ni),u32(di))]); - } - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Vm),u32(ni),u32(di))] += dvm; - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Inet),u32(ni),u32(di))] = dvm * (*ac).Dt.VmTau; - } - var glEff = f32(1); - if (!updtVm) { - glEff += (*ac).Dend.GbarR; - } - var giEff: f32; - giEff = gi + (*ac).Gbar.I*Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(SSGiDend),u32(ni),u32(di))]; - ActParams_VmInteg(ac, Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(VmDend),u32(ni),u32(di))], (*ac).Dt.VmDendDt, ge, glEff, giEff, gk, &nvm, &inet); - if (updtVm) { - nvm = ActParams_VmFromInet(ac, nvm, (*ac).Dt.VmDendDt, (*ac).Dend.GbarExp*expi); - } - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], - u32(VmDend),u32(ni),u32(di))] = nvm; -} -fn ActParams_SpikeFromVmVars(ac: ptr, nrnISI: ptr,nrnISIAvg: ptr,nrnSpike: ptr,nrnSpiked: ptr,nrnAct: ptr, nrnVm: f32) { - var thr: f32; - if ((*ac).Spikes.Exp == 1) { - thr = (*ac).Spikes.ExpThr; - } else { - thr = (*ac).Spikes.Thr; - } - if (nrnVm >= thr) { - *nrnSpike = f32(1); - if (*nrnISIAvg == -1) { - *nrnISIAvg = f32(-2); - } else if (*nrnISI > 0) { // must have spiked to update - *nrnISIAvg = SpikeParams_AvgFromISI(&(*ac).Spikes, *nrnISIAvg, *nrnISI+1); - } - *nrnISI = f32(0); - } else { - *nrnSpike = f32(0); - if (*nrnISI >= 0) { - *nrnISI += f32(1); - if (*nrnISI < 10) { - *nrnSpiked = f32(1); - } else { - *nrnSpiked = f32(0); - } - if (*nrnISI > 200) { // keep from growing infinitely large - *nrnISI = f32(-1); - } - } else { - *nrnSpiked = f32(0); - } - if (*nrnISIAvg >= 0 && *nrnISI > 0 && *nrnISI > 1.2**nrnISIAvg) { - *nrnISIAvg = SpikeParams_AvgFromISI(&(*ac).Spikes, *nrnISIAvg, *nrnISI); - } - } - var nwAct = SpikeParams_ActFromISI(&(*ac).Spikes, *nrnISIAvg, f32(.001), (*ac).Dt.Integ); - if (nwAct > 1) { - nwAct = f32(1); - } - nwAct = *nrnAct + (*ac).Dt.VmDt*(nwAct-*nrnAct); - *nrnAct = nwAct; -} -fn ActParams_SpikeFromVm(ac: ptr, ctx: ptr, ni: u32,di: u32) { - var nrnISI = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(ISI),u32(ni),u32(di))]; - var nrnISIAvg = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(ISIAvg),u32(ni),u32(di))]; - var nrnSpike = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Spike),u32(ni),u32(di))]; - var nrnSpiked = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Spiked),u32(ni),u32(di))]; - var nrnAct = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Act),u32(ni),u32(di))]; - var nrnVm = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Vm),u32(ni),u32(di))]; - ActParams_SpikeFromVmVars(ac, &nrnISI, &nrnISIAvg, &nrnSpike, &nrnSpiked, &nrnAct, nrnVm); - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(ISI),u32(ni),u32(di))] = nrnISI; - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(ISIAvg),u32(ni),u32(di))] = nrnISIAvg; - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Spike),u32(ni),u32(di))] = nrnSpike; - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Spiked),u32(ni),u32(di))] = nrnSpiked; - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Act),u32(ni),u32(di))] = nrnAct; -} ///////////// import: "chans-ak.go" struct AKsParams { @@ -1436,11 +1269,6 @@ struct CaDtParams { //types:add pad: i32, pad1: i32, } -fn CaDtParams_FromCa(kp: ptr, ca: f32, caM: ptr,caP: ptr,caD: ptr) { - *caM += (*kp).MDt * (ca - *caM); - *caP += (*kp).PDt * (*caM - *caP); - *caD += (*kp).DDt * (*caP - *caD); -} struct NeurCaParams { SpikeG: f32, SynTau: f32, @@ -1448,11 +1276,6 @@ struct NeurCaParams { pad: i32, Dt: CaDtParams, } -fn NeurCaParams_CaFromSpike(np: ptr, spike: f32, caSyn: ptr,caM: ptr,caP: ptr,caD: ptr) { - var nsp = (*np).SpikeG * spike; - *caSyn += (*np).SynDt * (nsp - *caSyn); - CaDtParams_FromCa(&(*np).Dt, nsp, caM, caP, caD); -} struct SynCaParams { //types:add CaScale: f32, pad: i32, @@ -1589,20 +1412,6 @@ struct CaLrnParams { NormInv: f32, pad: i32, } -fn CaLrnParams_VgccCaFromSpike(np: ptr, ctx: ptr, ni: u32,di: u32) { - if ((*np).SpkVGCC == 1) { - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(VgccCa),u32(ni),u32(di))] = (*np).SpkVgccCa * Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Spike),u32(ni),u32(di))]; - } - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(VgccCaInt),u32(ni),u32(di))] += Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(VgccCa),u32(ni),u32(di))] - (*np).VgccDt*Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(VgccCaInt),u32(ni),u32(di))]; -} -fn CaLrnParams_CaLrns(np: ptr, ctx: ptr, ni: u32,di: u32) { - CaLrnParams_VgccCaFromSpike(np, ctx, ni, di); - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaLrn),u32(ni),u32(di))] = (*np).NormInv * (Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NmdaCa),u32(ni),u32(di))] + Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(VgccCaInt),u32(ni),u32(di))]); - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaM),u32(ni),u32(di))] += (*np).Dt.MDt * (Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaLrn),u32(ni),u32(di))] - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaM),u32(ni),u32(di))]); - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaP),u32(ni),u32(di))] += (*np).Dt.PDt * (Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaM),u32(ni),u32(di))] - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaP),u32(ni),u32(di))]); - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaD),u32(ni),u32(di))] += (*np).Dt.DDt * (Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaP),u32(ni),u32(di))] - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaD),u32(ni),u32(di))]); - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaDiff),u32(ni),u32(di))] = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaP),u32(ni),u32(di))] - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(NrnCaD),u32(ni),u32(di))]; -} struct TrgAvgActParams { GiBaseInit: f32, RescaleOn: i32, @@ -1639,17 +1448,6 @@ fn LearnNeurParams_LrnNMDAFromRaw(ln: ptr, ctx: ptr, ctx: ptr, ni: u32,di: u32) { - var caSyn: f32; - var caSpkM = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaSpkM),u32(ni),u32(di))]; - var caSpkP = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaSpkP),u32(ni),u32(di))]; - var caSpkD = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaSpkD),u32(ni),u32(di))]; - NeurCaParams_CaFromSpike(&(*ln).CaSpk, Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Spike),u32(ni),u32(di))], &caSyn, &caSpkM, &caSpkP, &caSpkD); - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaSpkM),u32(ni),u32(di))] = caSpkM; - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaSpkP),u32(ni),u32(di))] = caSpkP; - Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaSpkD),u32(ni),u32(di))] = caSpkD; - CaLrnParams_CaLrns(&(*ln).CaLearn, ctx, ni, di); -} struct SWtInitParams { SPct: f32, Mean: f32, @@ -1744,14 +1542,6 @@ struct F32 { pad: i32, pad1: i32, // for gpu use } -fn F32_ClipValue(mr: ptr, val: f32) -> f32 { - if (val < (*mr).Min) { - return (*mr).Min; - } - if (val > (*mr).Max) { - return (*mr).Max; - }return val; -} ///////////// import: "network.go" struct NetworkIndexes { diff --git a/axon/shaders/SendSpike.wgsl b/axon/shaders/SendSpike.wgsl index a6df3c3db..97cc3369e 100644 --- a/axon/shaders/SendSpike.wgsl +++ b/axon/shaders/SendSpike.wgsl @@ -94,7 +94,7 @@ fn LayerParams_SendSpike(ly: ptr, ctx: ptr, ctx: ptr, lpi: u32,pi: u32,ni: u32,di: u32) { Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(Burst),u32(ni),u32(di))] = Neurons[IndexF323D(Neurons[0], Neurons[1], Neurons[2], u32(CaSpkP),u32(ni),u32(di))]; var li = (*ly).Index; - var pi0 = pi - 1; // 0-n pool index + var pi0 = pi - (*ly).PoolSt - 1; // 0-n pool index var pnn = u32(PoolNNeurons(pi)); var pni = NeuronIxs[IndexU322D(NeuronIxs[0], NeuronIxs[1], u32(NrnNeurIndex),u32(ni))] - u32(PoolsInt[IndexI323D(PoolsInt[0], PoolsInt[1], PoolsInt[2], u32(PoolNeurSt),u32(pi),u32(di))]); var hasRew = GlobalScalars[IndexF322D(GlobalScalars[0], GlobalScalars[1], u32(GvHasRew),u32(di))] > 0; diff --git a/axon/vars.go b/axon/vars.go index 71ebfc038..4c3da5082 100644 --- a/axon/vars.go +++ b/axon/vars.go @@ -126,6 +126,8 @@ var ( //gosl:dims 3 Pools *tensor.Float32 + // todo: following should be read-only + // PoolsInt are the [PoolIntVars] int32 state values for layer and sub-pool // inhibition, AvgMax atomic integration, and other vars: use [AvgMaxIntVarIndex] // [PoolIntVars+AvgMax][Layer * Pools][Data] diff --git a/examples/ra25/ra25.go b/examples/ra25/ra25.go index cd99f6acc..f530e0b98 100644 --- a/examples/ra25/ra25.go +++ b/examples/ra25/ra25.go @@ -202,7 +202,7 @@ type Sim struct { Net *axon.Network `new-window:"+" display:"no-inline"` // network parameter management - Params axon.Params `display:"add-fields"` + Params axon.Params // contains looper control loops for running sim Loops *looper.Stacks `new-window:"+" display:"no-inline"` @@ -374,6 +374,14 @@ func (ss *Sim) CurrentMode() Modes { return md } +// NetViewUpdater returns the NetViewUpdate for given mode. +func (ss *Sim) NetViewUpdater(mode enums.Enum) *axon.NetViewUpdate { + if mode.Int64() == Train.Int64() { + return &ss.TrainUpdate + } + return &ss.TestUpdate +} + // ConfigLoops configures the control loops: Training, Testing func (ss *Sim) ConfigLoops() { ls := looper.NewStacks() @@ -391,7 +399,7 @@ func (ss *Sim) ConfigLoops() { AddLevelIncr(Trial, trls, ss.Config.Run.NData). AddLevel(Cycle, 200) - axon.LooperStandard(ls, ss.Net, ss.GUI.NetView, 150, 199, Cycle, Trial, Train) + axon.LooperStandard(ls, ss.Net, ss.NetViewUpdater, 10, 150, 199, Cycle, Trial, Train) ls.Stacks[Train].OnInit.Add("Init", func() { ss.Init() }) @@ -401,7 +409,8 @@ func (ss *Sim) ConfigLoops() { ls.Loop(Train, Run).OnStart.Add("NewRun", ss.NewRun) - ls.Loop(Train, Epoch).IsDone.AddBool("NZeroStop", func() bool { + trainEpoch := ls.Loop(Train, Epoch) + trainEpoch.IsDone.AddBool("NZeroStop", func() bool { stopNz := ss.Config.Run.NZero if stopNz <= 0 { return false @@ -413,8 +422,6 @@ func (ss *Sim) ConfigLoops() { return false }) - // Add Testing - trainEpoch := ls.Loop(Train, Epoch) trainEpoch.OnStart.Add("TestAtInterval", func() { if (ss.Config.Run.TestInterval > 0) && ((trainEpoch.Counter.Cur+1)%ss.Config.Run.TestInterval == 0) { ss.TestAll() @@ -426,10 +433,6 @@ func (ss *Sim) ConfigLoops() { ls.AddOnStartToAll("StatsStart", ss.StatsStart) ls.AddOnEndToAll("StatsStep", ss.StatsStep) - // ls.Loop(Test, Epoch).OnEnd.Add("LogTestErrors", func() { - // axon.LogTestErrors(&ss.Logs) - // }) - // Save weights to file, to look at later ls.Loop(Train, Run).OnEnd.Add("SaveWeights", func() { ctrString := fmt.Sprintf("%03d_%05d", ls.Loop(Train, Run).Counter.Cur, ls.Loop(Train, Epoch).Counter.Cur) @@ -439,8 +442,7 @@ func (ss *Sim) ConfigLoops() { //////// GUI if ss.Config.GUI { - axon.LooperUpdateNetView(ls, Train, Cycle, Trial, &ss.TrainUpdate, ss.StatCounters) - axon.LooperUpdateNetView(ls, Test, Cycle, Trial, &ss.TestUpdate, ss.StatCounters) + axon.LooperUpdateNetView(ls, Cycle, Trial, ss.NetViewUpdater, ss.StatCounters) ls.Stacks[Train].OnInit.Add("GUI-Init", func() { ss.GUI.UpdateWindow() }) ls.Stacks[Test].OnInit.Add("GUI-Init", func() { ss.GUI.UpdateWindow() })