diff --git a/PVLV.md b/PVLV.md index a2c210e95..df4483cbd 100644 --- a/PVLV.md +++ b/PVLV.md @@ -38,16 +38,16 @@ Note that we use anatomical labels for computationally-specified functions consi In contrast to the minus-plus phase-based timing of cortical learning, the RL-based learning in PVLV is generally organized on trial-wise boundaries, with some factors computed online within the trial. Here is a schematic, for an intermediate about of positive CS learning and VSPatch prediction of a positive US outcome, with an "Eat" action that drives the US: -| Trial Step: | 0 | 1 | 2 | 3 | -| ------------ | -------- | ---- | ---- | ----------- | -| Event / Act | CS | | Eat | US | -| SC -> ACh | ++ | | | | -| BLA | ++ | | Rp | R | -| BLA dw | tr=S*ACh | | | R(R-Rp)tr | -| OFC | BLA-> | PT | PT | reset PT | -| VSPatch = VP | | | ++ Rp | | -| VP dw | | | | Sp Rp DA | -| DA | ++ (BLA) | | | ++ (US-VPp) | +| Trial Step: | 0 | 1 | 2 | 3 | +| ------------ | --------- | ---- | ----- | ----------- | +| Event / Act | CS | | Eat | +++ US | +| SC -> ACh | +++ | | | | +| BLA | ++ | | Rp | R | +| BLA dw | tr=S ⋅ ACh | | | R(R-Rp)tr | +| OFC | BLA-> | PT | PT | reset PT | +| VSPatch = VP | | | ++ Rp | | +| VP dw | | | | Sp ⋅ Rp ⋅ DA | +| DA | ++ (BLA) | | | + (US-VPp) | * Rp = receiving activity on previous trial * DA at US is computed at start of trial in PVLV.NewState, based on VS D1 - D2 on prev trial. @@ -190,8 +190,6 @@ The learning rule here is a standard "3 factor" dopamine-modulated learning, ver where `DAlr` is the dopamine-signed learning rate factor for D1 vs. D2, which is a function of US for the current trial (applied at start of a trial) minus VSPatch _from the prior time step_. Thus the prediction error in VSPatch relative to US reward drives learning, such that it will always adjust to reduce error, consistent with standard Rescorla-Wagner / TD learning rules. -Also, the learning factor for the `Rp` receiving activity on the prior time step is the `GeIntNorm` Max-normalized value, not raw activity, because VSPatch neurons can be relatively inactive at the start (this is done by setting `SpkPrv` to `GeIntNorm` for this layer type only). - # Negative USs and Costs There are two qualitatively-different types of negative outcome values, which require distinct pathways within the model: diff --git a/axon/gtigen.go b/axon/gtigen.go index 319ea717a..f4b332a06 100644 --- a/axon/gtigen.go +++ b/axon/gtigen.go @@ -120,7 +120,7 @@ var _ = gti.AddType(>i.Type{Name: "github.com/emer/axon/v2/axon.DAModTypes", I var _ = gti.AddType(>i.Type{Name: "github.com/emer/axon/v2/axon.ValenceTypes", IDName: "valence-types", Doc: "ValenceTypes are types of valence coding: positive or negative."}) -var _ = gti.AddType(>i.Type{Name: "github.com/emer/axon/v2/axon.NeuroModParams", IDName: "neuro-mod-params", Doc: "NeuroModParams specifies the effects of neuromodulators on neural\nactivity and learning rate. These can apply to any neuron type,\nand are applied in the core cycle update equations.", Fields: []gti.Field{{Name: "DAMod", Doc: "dopamine receptor-based effects of dopamine modulation on excitatory and inhibitory conductances: D1 is excitatory, D2 is inhibitory as a function of increasing dopamine"}, {Name: "Valence", Doc: "valence coding of this layer -- may affect specific layer types but does not directly affect neuromodulators currently"}, {Name: "DAModGain", Doc: "multiplicative factor on overall DA modulation specified by DAMod -- resulting overall gain factor is: 1 + DAModGain * DA, where DA is appropriate DA-driven factor"}, {Name: "DALRateSign", Doc: "modulate the sign of the learning rate factor according to the DA sign, taking into account the DAMod sign reversal for D2Mod, also using BurstGain and DipGain to modulate DA value -- otherwise, only the magnitude of the learning rate is modulated as a function of raw DA magnitude according to DALRateMod (without additional gain factors)"}, {Name: "DALRateMod", Doc: "if not using DALRateSign, this is the proportion of maximum learning rate that Abs(DA) magnitude can modulate -- e.g., if 0.2, then DA = 0 = 80% of std learning rate, 1 = 100%"}, {Name: "AChLRateMod", Doc: "proportion of maximum learning rate that ACh can modulate -- e.g., if 0.2, then ACh = 0 = 80% of std learning rate, 1 = 100%"}, {Name: "AChDisInhib", Doc: "amount of extra Gi inhibition added in proportion to 1 - ACh level -- makes ACh disinhibitory"}, {Name: "BurstGain", Doc: "multiplicative gain factor applied to positive dopamine signals -- this operates on the raw dopamine signal prior to any effect of D2 receptors in reversing its sign!"}, {Name: "DipGain", Doc: "multiplicative gain factor applied to negative dopamine signals -- this operates on the raw dopamine signal prior to any effect of D2 receptors in reversing its sign! should be small for acq, but roughly equal to burst for ext"}, {Name: "pad"}, {Name: "pad1"}, {Name: "pad2"}}}) +var _ = gti.AddType(>i.Type{Name: "github.com/emer/axon/v2/axon.NeuroModParams", IDName: "neuro-mod-params", Doc: "NeuroModParams specifies the effects of neuromodulators on neural\nactivity and learning rate. These can apply to any neuron type,\nand are applied in the core cycle update equations.", Fields: []gti.Field{{Name: "DAMod", Doc: "dopamine receptor-based effects of dopamine modulation on excitatory and inhibitory conductances: D1 is excitatory, D2 is inhibitory as a function of increasing dopamine"}, {Name: "Valence", Doc: "valence coding of this layer -- may affect specific layer types but does not directly affect neuromodulators currently"}, {Name: "DAModGain", Doc: "dopamine modulation of excitatory and inhibitory conductances (i.e., \"performance dopamine\" effect -- this does NOT affect learning dopamine modulation in terms of RLrate): g *= 1 + (DAModGain * DA)"}, {Name: "DALRateSign", Doc: "modulate the sign of the learning rate factor according to the DA sign, taking into account the DAMod sign reversal for D2Mod, also using BurstGain and DipGain to modulate DA value -- otherwise, only the magnitude of the learning rate is modulated as a function of raw DA magnitude according to DALRateMod (without additional gain factors)"}, {Name: "DALRateMod", Doc: "if not using DALRateSign, this is the proportion of maximum learning rate that Abs(DA) magnitude can modulate -- e.g., if 0.2, then DA = 0 = 80% of std learning rate, 1 = 100%"}, {Name: "AChLRateMod", Doc: "proportion of maximum learning rate that ACh can modulate -- e.g., if 0.2, then ACh = 0 = 80% of std learning rate, 1 = 100%"}, {Name: "AChDisInhib", Doc: "amount of extra Gi inhibition added in proportion to 1 - ACh level -- makes ACh disinhibitory"}, {Name: "BurstGain", Doc: "multiplicative gain factor applied to positive dopamine signals -- this operates on the raw dopamine signal prior to any effect of D2 receptors in reversing its sign!"}, {Name: "DipGain", Doc: "multiplicative gain factor applied to negative dopamine signals -- this operates on the raw dopamine signal prior to any effect of D2 receptors in reversing its sign! should be small for acq, but roughly equal to burst for ext"}, {Name: "pad"}, {Name: "pad1"}, {Name: "pad2"}}}) var _ = gti.AddType(>i.Type{Name: "github.com/emer/axon/v2/axon.NeuronFlags", IDName: "neuron-flags", Doc: "NeuronFlags are bit-flags encoding relevant binary state for neurons"}) diff --git a/axon/layerparams.go b/axon/layerparams.go index 4796ffeda..bd2113495 100644 --- a/axon/layerparams.go +++ b/axon/layerparams.go @@ -949,10 +949,6 @@ func (ly *LayerParams) NewStateNeuron(ctx *Context, ni, di uint32, vals *LayerVa SetNrnV(ctx, ni, di, SpkMax, 0) SetNrnV(ctx, ni, di, SpkMaxCa, 0) - if ly.LayType == VSPatchLayer { - SetNrnV(ctx, ni, di, SpkPrv, NrnV(ctx, ni, di, GeIntNorm)) - } - ly.Acts.DecayState(ctx, ni, di, ly.Acts.Decay.Act, ly.Acts.Decay.Glong, ly.Acts.Decay.AHP) // Note: synapse-level Ca decay happens in DWt ly.Acts.KNaNewState(ctx, ni, di) diff --git a/axon/neuromod.go b/axon/neuromod.go index 4c04433ff..e9085fbe0 100644 --- a/axon/neuromod.go +++ b/axon/neuromod.go @@ -60,7 +60,7 @@ type NeuroModParams struct { // valence coding of this layer -- may affect specific layer types but does not directly affect neuromodulators currently Valence ValenceTypes - // multiplicative factor on overall DA modulation specified by DAMod -- resulting overall gain factor is: 1 + DAModGain * DA, where DA is appropriate DA-driven factor + // dopamine modulation of excitatory and inhibitory conductances (i.e., "performance dopamine" effect -- this does NOT affect learning dopamine modulation in terms of RLrate): g *= 1 + (DAModGain * DA) DAModGain float32 // modulate the sign of the learning rate factor according to the DA sign, taking into account the DAMod sign reversal for D2Mod, also using BurstGain and DipGain to modulate DA value -- otherwise, only the magnitude of the learning rate is modulated as a function of raw DA magnitude according to DALRateMod (without additional gain factors) @@ -86,7 +86,7 @@ type NeuroModParams struct { func (nm *NeuroModParams) Defaults() { // nm.DAMod is typically set by BuildConfig -- don't reset here - nm.DAModGain = 0.5 + nm.DAModGain = 0 nm.DALRateMod = 0 nm.AChLRateMod = 0 nm.BurstGain = 1 diff --git a/axon/pvlv_prjns.go b/axon/pvlv_prjns.go index 5d2bfe36e..bf1efe803 100644 --- a/axon/pvlv_prjns.go +++ b/axon/pvlv_prjns.go @@ -56,8 +56,8 @@ func (pj *PrjnParams) VSPatchDefaults() { pj.SWts.Adapt.On.SetBool(false) pj.SWts.Adapt.SigGain = 1 pj.SWts.Init.SPct = 0 - pj.SWts.Init.Mean = 0.1 - pj.SWts.Init.Var = 0.05 + pj.SWts.Init.Mean = 0.5 + pj.SWts.Init.Var = 0.25 pj.SWts.Init.Sym.SetBool(false) pj.Learn.Trace.Tau = 1 pj.Learn.Trace.LearnThr = 0 // 0.3 diff --git a/axon/shaders/gpu_newstate_neuron.spv b/axon/shaders/gpu_newstate_neuron.spv index 13fc4c851..ff1f94efe 100644 Binary files a/axon/shaders/gpu_newstate_neuron.spv and b/axon/shaders/gpu_newstate_neuron.spv differ diff --git a/examples/pcore_ds/params.go b/examples/pcore_ds/params.go index 7b83e778d..b43ffed40 100644 --- a/examples/pcore_ds/params.go +++ b/examples/pcore_ds/params.go @@ -24,10 +24,11 @@ var ParamSets = netparams.Sets{ }}, {Sel: ".MatrixLayer", Desc: "all mtx", Params: params.Params{ - "Layer.Inhib.Pool.Gi": "0.5", // 0.5 > others - "Layer.Learn.NeuroMod.BurstGain": "0.1", // 0.1 == 0.2 > 0.05 > 0.5 -- key lrate modulator - "Layer.Learn.RLRate.On": "true", // note: applied for tr update trials - "Layer.Learn.TrgAvgAct.On": "true", // true > false + "Layer.Inhib.Pool.Gi": "0.5", // 0.5 > others + "Layer.Learn.NeuroMod.BurstGain": "0.1", // 0.1 == 0.2 > 0.05 > 0.5 -- key lrate modulator + "Layer.Learn.NeuroMod.DAModGain": "0.2", // was 0.5 + "Layer.Learn.RLRate.On": "true", // note: applied for tr update trials + "Layer.Learn.TrgAvgAct.RescaleOn": "true", // true > false }, Hypers: params.Hypers{ "Layer.Learn.NeuroMod.BurstGain": {"Tweak": "-"}, diff --git a/examples/pvlv/params.go b/examples/pvlv/params.go index 954788b21..cce3f4b40 100644 --- a/examples/pvlv/params.go +++ b/examples/pvlv/params.go @@ -118,9 +118,9 @@ var ParamSets = netparams.Sets{ }}, {Sel: ".VSPatchPrjn", Desc: "", Params: params.Params{ - "Prjn.PrjnScale.Abs": "6", - "Prjn.Learn.Trace.LearnThr": "0.1", - "Prjn.Learn.LRate.Base": "0.2", // 0.05 def -- todo: needs faster + "Prjn.PrjnScale.Abs": "3", + "Prjn.Learn.Trace.LearnThr": "0", + "Prjn.Learn.LRate.Base": "0.05", // 0.05 def -- todo: needs faster }}, {Sel: "#OFCposUSPTToOFCposUSPT", Desc: "", Params: params.Params{ diff --git a/examples/vspatch/config.go b/examples/vspatch/config.go index 558891968..be9e412c1 100644 --- a/examples/vspatch/config.go +++ b/examples/vspatch/config.go @@ -68,7 +68,7 @@ type RunConfig struct { NEpochs int `default:"30"` // total number of trials per epoch. Should be an even multiple of NData. - NTrials int `default:"32"` + NTrials int `default:"128"` } // LogConfig has config parameters related to logging data diff --git a/examples/vspatch/params.go b/examples/vspatch/params.go index b0737602f..ce1f01e10 100644 --- a/examples/vspatch/params.go +++ b/examples/vspatch/params.go @@ -17,18 +17,27 @@ var ParamSets = netparams.Sets{ Params: params.Params{ "Layer.Acts.Clamp.Ge": "1.0", // 1.5 is def, was 0.6 (too low) }}, + {Sel: "#State", Desc: "", + Params: params.Params{ + "Layer.Inhib.ActAvg.Nominal": "0.2", + }}, {Sel: ".VSPatchLayer", Desc: "", Params: params.Params{ + "Layer.Inhib.Pool.On": "false", + "Layer.Inhib.Pool.Gi": "0.2", "Layer.Learn.NeuroMod.DipGain": "1", // boa requires balanced.. "Layer.Learn.TrgAvgAct.GiBaseInit": "0", // 0.5 default; 0 better "Layer.Learn.RLRate.SigmoidMin": "0.05", // 0.05 def "Layer.Learn.NeuroMod.AChLRateMod": "0", + "Layer.Learn.NeuroMod.DAModGain": "0", // this is actual perf mod }}, {Sel: ".VSPatchPrjn", Desc: "", Params: params.Params{ - "Prjn.PrjnScale.Abs": "6", + "Prjn.PrjnScale.Abs": "2", "Prjn.Learn.Trace.LearnThr": "0", "Prjn.Learn.LRate.Base": "0.05", // 0.05 def + "Prjn.SWts.Init.Mean": "0.5", + "Prjn.SWts.Init.Var": "0.25", }}, }, } diff --git a/examples/vspatch/vspatch_env.go b/examples/vspatch/vspatch_env.go index 366de0367..a18b1bda6 100644 --- a/examples/vspatch/vspatch_env.go +++ b/examples/vspatch/vspatch_env.go @@ -30,6 +30,9 @@ type VSPatchEnv struct { // trial counter is for the step within condition Trial env.Ctr `view:"inline"` + // if true, reward value is a probability of getting a 1 reward + Probs bool + // number of conditions, each of which can have a different reward value NConds int @@ -82,6 +85,7 @@ func (ev *VSPatchEnv) Desc() string { } func (ev *VSPatchEnv) Defaults() { + ev.Probs = true ev.NConds = 4 ev.NTrials = 3 ev.NUnitsY = 5 @@ -176,7 +180,16 @@ func (ev *VSPatchEnv) Step() bool { ev.RenderState(ev.Sequence.Cur, ev.Trial.Cur) ev.Rew = 0 if ev.Trial.Cur == ev.NTrials-1 { - ev.Rew = ev.CondVals[ev.Sequence.Cur] + rv := ev.CondVals[ev.Sequence.Cur] + if ev.Probs { + if erand.BoolP32(rv, -1, &ev.Rand) { + ev.Rew = 1 + } else { + ev.Rew = 0.001 + } + } else { + ev.Rew = rv + } } ev.Sequence.Same() if ev.Trial.Incr() {