diff --git a/notebooks/generative-vs-discriminative-circuit.ipynb b/notebooks/generative-vs-discriminative-circuit.ipynb index b86d655e..5bb521a9 100644 --- a/notebooks/generative-vs-discriminative-circuit.ipynb +++ b/notebooks/generative-vs-discriminative-circuit.ipynb @@ -268,7 +268,7 @@ "The discriminative training loss is the **cross-entropy** between the target distribution, $p^*$, and our predicted distribution, $p$:\n", "$$\n", "\\begin{align}\n", - "\\mathcal{L}_{dis}(\\theta) &= \\frac{1}{N} \\sum_{i=1}^N \\underbrace{\\sum_{y' \\in \\{0,\\ldots,9\\}} p^*(y' \\mid \\mathbf{x}^{(i)}) \\log p(y' \\mid \\mathbf{x}^{(i)})}_{\\text{cross-entropy}}\n", + "\\mathcal{L}_{dis}(\\theta) &= -\\frac{1}{N} \\sum_{i=1}^N \\underbrace{\\sum_{y' \\in \\{0,\\ldots,9\\}} p^*(y' \\mid \\mathbf{x}^{(i)}) \\log p(y' \\mid \\mathbf{x}^{(i)})}_{\\text{cross-entropy}}\n", "\\end{align}\n", "$$\n", "\n", @@ -286,9 +286,9 @@ "\n", "$$\n", "\\begin{align}\n", - "\\mathcal{L}_{dis}(\\theta) &= \\frac{1}{N} \\sum_{i=1}^N \\sum_{y' \\in \\{0,\\ldots,9\\}} p^*(y' \\mid \\mathbf{x}^{(i)}) \\log p(y' \\mid \\mathbf{x}^{(i)}) & \\\\\n", - " &= \\frac{1}{N}\\sum_{i=1}^N \\left( 1 \\log p(y^{(i)} \\mid \\mathbf{x}^{(i)}) + \\sum_{y' \\neq y^{(i)}} 0 \\log p(y' \\mid \\mathbf{x}^{(i)}) \\right) & \\text{$p^*$ is one-hot} \\\\\n", - " &= \\frac{1}{N} \\sum_{i=1}^N \\log p(y^{(i)} \\mid \\mathbf{x}^{(i)}) & \\text{negative log-likelihood}\n", + "\\mathcal{L}_{dis}(\\theta) &= -\\frac{1}{N} \\sum_{i=1}^N \\sum_{y' \\in \\{0,\\ldots,9\\}} p^*(y' \\mid \\mathbf{x}^{(i)}) \\log p(y' \\mid \\mathbf{x}^{(i)}) & \\\\\n", + " &= - \\frac{1}{N}\\sum_{i=1}^N \\left( 1 \\log p(y^{(i)} \\mid \\mathbf{x}^{(i)}) + \\sum_{y' \\neq y^{(i)}} 0 \\log p(y' \\mid \\mathbf{x}^{(i)}) \\right) & \\text{$p^*$ is one-hot} \\\\\n", + " &= - \\frac{1}{N} \\sum_{i=1}^N \\log p(y^{(i)} \\mid \\mathbf{x}^{(i)}) & \\text{negative log-likelihood}\n", "\\end{align}\n", "$$\n", "\n",