Skip to content

Commit

Permalink
l1 l2 with lr-decay
Browse files Browse the repository at this point in the history
  • Loading branch information
andreasdominik committed Aug 7, 2023
1 parent 421a7ac commit ddd807d
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 9 deletions.
2 changes: 2 additions & 0 deletions docs/src/changelog.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# ChangeLog of NNHelferlein package

### wip
+ use CUDA.CuIterator instead
+ Padding added to emebdding layer
+ GPU selection added (not yet exported)

### v1.3.1 - unreleased
+ l1 and l2 decay always parallel to learning rate decay
+ severeal bioinformatics tools (as embedding, blosum, vhse8)
+ dataframe_minibatch default "y" changed to nothing.
+ Bioinformatics: Aminoacid tokenisation added
Expand Down
33 changes: 24 additions & 9 deletions src/train.jl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ The model is updated (in-place) and the trained model is returned.
by a constant factor (e.g. 0.9) resulting in an exponential decay.
If `true`, lr is modified by the same step size, i.e. linearly.
+ `l1=nothing`: L1 regularisation; implemented as weight decay per
parameter
parameter. If learning-rate decay is used, L1 and L2 are also decayed.
+ `l2=nothing`: L2 regularisation; implemented as weight decay per
parameter
+ `opti_args...`: optional keyword arguments for the optimiser can be specified
Expand Down Expand Up @@ -154,7 +154,8 @@ function tb_train!(mdl, opti, trn, vld=nothing; epochs=1,
# do lr-decay only if lr is explicitly defined:
#
if !isnothing(lr_decay) && haskey(opti_args, :lr)
lr_decay = calc_d_η(opti_args[:lr], lr_decay, lrd_linear, lrd_steps)
Δlr = calc_d_η(opti_args[:lr], lr_decay, lrd_linear, lrd_steps)
#println("Δlr: $Δlr")
else
lr_decay = nothing
end
Expand All @@ -163,10 +164,22 @@ function tb_train!(mdl, opti, trn, vld=nothing; epochs=1,
# prepare l1/l2:
#
if !isnothing(l2)
l2 = Float32(1 - l2/2)
l2_factor = Float32(1 - l2/2)
end
if !isnothing(l1)
l1 = Float32(1 - l1)
l1_factor = Float32(1 - l1)
end
if !isnothing(lr_decay)
if !isnothing(l2)
l2_decay = lr_decay/opti_args[:lr] * l2 # final l2
Δl2 = calc_d_η(l2, l2_decay, lrd_linear, lrd_steps)
#println("l2_decay: $l2_decay, Δl2: $Δl2")
end
if !isnothing(l1)
l1_decay = lr_decay/opti_args[:lr] * l1 # final l1
Δl1 = calc_d_η(l1, l1_decay, lrd_linear, lrd_steps)
#println("l1_decay: $l1_decay, Δl1: $Δl1")
end
end

# mk log directory:
Expand Down Expand Up @@ -275,10 +288,10 @@ function tb_train!(mdl, opti, trn, vld=nothing; epochs=1,
Δw = grad(loss, p)

if !isnothing(l1)
p.value .= p.value .* l1
p.value .= p.value .* l1_factor
end
if !isnothing(l2)
p.value .= p.value .* l2
p.value .= p.value .* l2_factor
end

Knet.update!(value(p), Δw, p.opt)
Expand Down Expand Up @@ -336,16 +349,18 @@ function tb_train!(mdl, opti, trn, vld=nothing; epochs=1,
#
if (!isnothing(lr_decay)) && i > 1 && ((i-1) % lr_nth == 0)
lr = first(params(mdl)).opt.lr
lr = lrd_linear ? lr + lr_decay : lr * lr_decay
lr = lrd_linear ? lr + Δlr : lr * Δlr

l1_report = ""
l2_report = ""
if !isnothing(l1)
il1 = lrd_linear ? l1 + lr_decay : l1 * lr_decay
l1 = lrd_linear ? l1 + Δl1 : l1 * Δl1
l1_factor = Float32(1 - l1)
l1_report = @sprintf(", l1=%.2e", l1)
end
if !isnothing(l2)
l2 = lrd_linear ? l2 + lr_decay : l2 * lr_decay
l2 = lrd_linear ? l2 + Δl2 : l2 * Δl2
l2_factor = Float32(1 - l2/2)
l2_report = @sprintf(", l2=%.2e", l2)
end
@printf("\nSetting learning rate to η=%.2e%s%s in epoch %.1f\n",
Expand Down

0 comments on commit ddd807d

Please sign in to comment.