diff --git a/coq/QLearn/jaakkola_vector.v b/coq/QLearn/jaakkola_vector.v
index d5c76b47..61cf3d1a 100644
--- a/coq/QLearn/jaakkola_vector.v
+++ b/coq/QLearn/jaakkola_vector.v
@@ -7981,6 +7981,9 @@ Section qlearn.
     reflexivity.
   Qed.
 
+  Existing Instance isl2_qlearn_Q.
+
+
   Theorem qlearn_jaakkola
       (x' : Rfct (sigT M.(act)))
       (adapt_alpha : forall sa, IsAdapted borel_sa (fun t ω => α t ω sa) F)
@@ -8625,7 +8628,7 @@ Section qlearn.
               now apply identically_distributed_rvs_proper.
       } 
 
-      assert (exists (A : R),
+               assert (exists (A : R),
                 0 < A /\
                forall k sa,
                  almostR2 prts Rbar_le
@@ -8738,14 +8741,21 @@ Section qlearn.
         apply rvminus_rv; try typeclasses eauto.
         apply FiniteCondexp_rv'.
       }
+      assert (isl2_X :forall k sa,
+                 IsLp prts 2 (fun ω => X k ω sa)).
+      {
+        apply isl2_qlearn_Q; try typeclasses eauto.
+        apply alpha_bound.
+        apply filt_sub.
+      }
       assert (forall k sa,
-                  almostR2 prts Rle 
-                           (FiniteConditionalExpectation 
-                              prts (filt_sub k)
-                              (rvsqr (rvminus 
-                                        (Xmin k sa)
-                                        (FiniteConditionalExpectation prts (filt_sub k) (Xmin k sa)))))
-                           (fun ω => Rmax_all (fun sa => Rsqr (X k ω sa)))).
+                 almostR2 prts Rle 
+                   (FiniteConditionalExpectation 
+                      prts (filt_sub k)
+                      (rvsqr (rvminus 
+                                (Xmin k sa)
+                                (FiniteConditionalExpectation prts (filt_sub k) (Xmin k sa)))))
+                   (fun ω => Rmax_all (fun sa => Rsqr (X k ω sa)))).
       {
         intros.
         assert (RandomVariable dom borel_sa (Xmin k sa)) by typeclasses eauto.
@@ -8757,7 +8767,7 @@ Section qlearn.
           apply isfe_Rmax_all; intros.
           - apply rvsqr_rv.
             apply (RandomVariable_sa_sub (filt_sub k)); try typeclasses eauto.
-          - admit.
+          -  admit.
         }
         generalize (conditional_variance_bound_L2_fun (dom2 := F k)
                       (Xmin k sa)
@@ -8844,9 +8854,175 @@ Section qlearn.
             generalize (Rabs_pos x); intros.
             lra.
      }          
-                 
-      eexists; split.
+     assert (exists (A B : R),
+                0 < A /\ 0 < B /\
+                    almost prts
+                      (fun ω : Ts =>
+                         forall (k : nat) (sa : {x : state M & act M x}),
+                           Rbar_le
+                             (ConditionalVariance prts (filt_sub k) (fun ω0 : Ts => FT k sa ω0) ω)
+                             (A + B *
+                                   (Rmax_norm {x : state M & act M x}
+                                      (fun sa0 => X k ω sa0)) ^ 2))).
+      {
+        destruct H5 as [A [??]].
+        exists (2*A).
+        exists 2.
+        split; try lra.
+        split; try lra.
+        apply almost_forall; intros.
+        apply almost_countable_forall; try typeclasses eauto; intros.        
+        specialize (H13 n a).
+        revert H13; apply almost_impl.
+        specialize (H10 n a).
+        revert H10; apply almost_impl.
+        assert (rvf : RandomVariable dom borel_sa
+                        (rvminus (cost n a) 
+                           (FiniteConditionalExpectation prts (filt_sub n) (cost n a)))) by admit.
+        assert (rvg : RandomVariable dom borel_sa 
+                        (rvminus (rvscale γ (Xmin n a))
+                           (FiniteConditionalExpectation prts (filt_sub n)
+                              (rvscale γ (Xmin n a))))) by admit.
+        generalize (nncondexp_sqr_sum_bound_nneg
+                      (rvminus
+                         (cost n a)
+                         (FiniteConditionalExpectation prts (filt_sub n) (cost n a)))
+                      (rvminus
+                         (rvscale γ (Xmin n a))
+                         (FiniteConditionalExpectation 
+                            prts (filt_sub n)
+                            (rvscale γ (Xmin n a))))
+                      (filt_sub n)
+                   ); apply almost_impl.
+        generalize (ConditionalVariance_minus_const  (fun ω0 : Ts =>
+        cost n a ω0 + γ * qlearn_Qmin (X n ω0) (next_state n a ω0)) (x' a) (filt_sub n)); intros.
+        apply almost_prob_space_sa_sub_lift in H10.
+        revert H10; apply almost_impl.
+        apply all_almost; intros ?????.
+        unfold FT.
+        assert ( Rbar_le
+                   (ConditionalVariance prts (filt_sub n)
+                      (fun ω0 : Ts =>
+                         cost n a ω0 + γ * qlearn_Qmin (X n ω0) (next_state n a ω0)) x)
+                   (2 * A +
+                      2 *
+                        Rmax_norm {x0 : state M & act M x0}
+                          (fun sa0 : {x0 : state M & act M x0} => X n x sa0) ^ 2)).
+        {
+          
+          
+          replace (
+              (ConditionalVariance prts (filt_sub n)
+                 (fun ω0 : Ts => cost n a ω0 + γ * qlearn_Qmin (X n ω0) (next_state n a ω0))
+                 x)) with
+             (NonNegCondexp prts (filt_sub n)
+             (rvsqr
+                (rvplus (rvminus (cost n a) (FiniteConditionalExpectation prts (filt_sub n) (cost n a)))
+                   (rvminus (rvscale γ (Xmin n a)) (FiniteConditionalExpectation prts (filt_sub n) (rvscale γ (Xmin n a)))))) x).
+          - eapply Rbar_le_trans.
+            apply H13.
+            rewrite <- Rmult_plus_distr_l.
+            unfold Rbar_rvmult, Rbar_rvplus, const.
+            replace  (Finite (2 * (A + Rmax_norm {x0 : state M & act M x0} (fun sa0 : {x0 : state M & act M x0} => X n x sa0) ^ 2)))
+              with
+              (Rbar_mult 2 (Rbar_plus A (Rmax_norm {x0 : state M & act M x0} (fun sa0 : {x0 : state M & act M x0} => X n x sa0) ^ 2))) by now simpl.
+            apply Rbar_mult_le_compat_l.
+            + simpl; lra.
+            + apply Rbar_plus_le_compat.
+              * unfold ConditionalVariance, const in H15.
+                erewrite Condexp_nneg_simpl in H15.
+                eapply Rbar_le_trans; cycle 1.
+                apply H15.
+                apply slln.eq_Rbar_le.
+                apply NonNegCondexp_ext.
+                reflexivity.
+              * admit.
+          - unfold ConditionalVariance.
+            erewrite Condexp_nneg_simpl.
+            apply NonNegCondexp_ext.
+            intros ?.
+            unfold rvsqr, rvplus.
+            do 3 rewrite rvminus_unfold.
+            f_equal.
+            unfold Rminus.
+            repeat rewrite Rplus_assoc.
+            f_equal.
+            unfold rvscale.
+            replace (FiniteConditionalExpectation prts (filt_sub n) (fun ω0 : Ts => cost n a ω0 + γ * qlearn_Qmin (X n ω0) (next_state n a ω0)) a0)
+              with (FiniteConditionalExpectation prts (filt_sub n) (cost n a) a0 +
+                       FiniteConditionalExpectation prts (filt_sub n) (fun omega : Ts => γ * Xmin n a omega) a0).
+            + unfold Xmin.
+              ring.
+            + admit.
+        }
+        rewrite <- H10 in H16.
+        eapply Rbar_le_trans; cycle 1.
+        apply H16.
+        apply slln.eq_Rbar_le.
+        apply ConditionalVariance_ext.
+        reflexivity.
+      }
+      destruct H13 as [A [B [? [??]]]].
+      assert (exists (c1 c2 : R),
+                 0 <= c1 /\ 0 <= c2 /\
+                 forall k ω,
+                   (Rmax_norm {x : state M & act M x} (fun sa0 : {x : state M & act M x} => X k ω sa0) ^ 2) <=
+                     c1 + c2 * (Rmax_norm {x : state M & act M x} (fun sa0 : {x : state M & act M x} => X k ω sa0 - x' sa0)) ^ 2).
+      {
+        exists (2 * (Rmax_norm {x : state M & act M x} x')^2).
+        exists 2.
+        intros.
+        admit.
+      }
+      destruct H16 as [c1 [c2 [? [??]]]].
+      exists ((A + B*c1) + B*c2).
+      assert (0 < A + B*c1 + B*c2).
+      {
+        eapply Rlt_le_trans.
+        apply H13.
+        assert (0 <= B * c1 + B * c2).
+        {
+          rewrite <- Rmult_plus_distr_l.
+          apply Rmult_le_pos; try lra.
+        }
+        lra.
+      }
+      split; trivial.
+      revert H15.
+      apply almost_impl.
+      apply all_almost; intros ??.
+      intros.
+      specialize (H15 k sa).
+      eapply Rbar_le_trans.
+      apply H15.
+      rewrite Rbar_le_Rle.
+      specialize (H18 k x).
+      apply Rmult_le_compat_l with (r := B) in H18; try lra.
+      apply Rplus_le_compat_l with (r := A) in H18.
+      eapply Rle_trans.
+      apply H18.
+      rewrite Rmult_plus_distr_l.
+      rewrite <- Rplus_assoc.
+      rewrite <- Rmult_assoc.
 
+      assert (0 <= A + B * c1).
+      {
+        apply Rplus_le_le_0_compat; try lra.
+        apply Rmult_le_pos; lra.
+      }
+      assert (0 <= B * c2).
+      {
+        apply Rmult_le_pos; lra.
+      }
+      do 2 rewrite <- Rsqr_pow2.
+      assert (0 <= Rmax_norm {x0 : state M & act M x0} (fun sa0 : {x0 : state M & act M x0} => X k x sa0 - x' sa0)).
+      {
+        admit.
+      }
+      apply (jaakkola_tsitsilis_coefs2 (mknonnegreal _ H22) (mknonnegreal _ H20) (mknonnegreal _ H21)).
+      
+(*
+        Search Rmax_norm.
        + admit.
        + apply almost_forall; intros.
          apply almost_countable_forall; try typeclasses eauto; intros.
@@ -8911,10 +9087,13 @@ Section qlearn.
            admit.
          }
        admit.
+*)
      - intros.
        unfold X, FT.
        simpl.
        now ring_simplify.
+       Unshelve.
+       apply nnfsqr.
     Admitted.