-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnotes.tex
1594 lines (1260 loc) · 59.6 KB
/
notes.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[12pt]{report} % You can also use 'article' or another class
% Import the geometry package to adjust margins
\usepackage[a4paper, margin=1in]{geometry} % Adjust margin as needed
\usepackage{fullpage}
\usepackage{wrapfig}
\usepackage{graphicx}
\usepackage[utf8]{inputenc}
\usepackage{multirow}
\usepackage[table]{xcolor}
\usepackage{color,soul}
\usepackage{chngcntr}
\usepackage{listings}
\usepackage{blindtext}
\usepackage{amsmath}
\usepackage{subfig} % Use this instead of subfigure
\usepackage{multicol}
\usepackage{adjustbox}
\usepackage{tabularx} % For table resizing
\usepackage{float} % For table placement
\usepackage{geometry} % For page margin adjustments
\geometry{a4paper, margin=1in}
\usepackage{caption} % For better table captions
\usepackage{booktabs}
\usepackage[hidelinks]{hyperref}
\usepackage{titlesec} % For section/chapter font customization
\usepackage{longtable}
\usepackage{lscape} % Use landscape for larger tables if needed
\definecolor{myblue}{RGB}{135,206,250}
\definecolor{dkgreen}{rgb}{0,0.6,0}
\definecolor{gray}{rgb}{0.5,0.5,0.5}
\definecolor{mauve}{rgb}{0.58,0,0.82}
\lstset{frame=tb,
language=Python,
aboveskip=3mm,
belowskip=3mm,
showstringspaces=false,
columns=flexible,
basicstyle={\small\ttfamily},
numbers=none,
numberstyle=\tiny\color{gray},
keywordstyle=\color{blue},
commentstyle=\color{dkgreen},
stringstyle=\color{mauve},
breaklines=true,
breakatwhitespace=true,
tabsize=3
}
\title{Pseudo Spectral Methods for Optimal Control}
\date{August 19, 2024}
\author{}
\begin{document}
\maketitle
\tableofcontents
\listoffigures
\chapter{Introduction}
Pseudo Spectral Methods for optimal control are a class of numerical techniques used for solving optimal control problems by approximating the state and control variables using spectral methods.
\section{Spectral Methods and Pseudospectral Methods}
\subsection{Spectral Methods}
\textbf{Definition}: Spectral methods are numerical techniques used for solving differential and integral equations by expanding the solution in terms of globally defined basis functions, such as orthogonal polynomials or trigonometric functions.
\subsubsection{Core Idea}
The core idea is to represent the solution \( u(x) \) as a series:
\[
u(x) \approx \sum_{n=0}^{N} a_n \phi_n(x),
\]
where \( \phi_n(x) \) are the chosen basis functions, and \( a_n \) are coefficients determined by projecting the original problem onto the basis functions.
\subsubsection{Types of Spectral Methods}
\begin{itemize}
\item \textbf{Fourier Spectral Method}:
\begin{itemize}
\item \textbf{Basis Functions}: Trigonometric functions such as sine and cosine.
\item \textbf{Application}: Best suited for problems with periodic boundary conditions.
\item \textbf{Formulation}:
\[
u(x) \approx \sum_{k=-N/2}^{N/2} \hat{u}_k e^{ikx},
\]
where \( \hat{u}_k \) are the Fourier coefficients.
\end{itemize}
\item \textbf{Polynomial Spectral Method}:
\begin{itemize}
\item \textbf{Basis Functions}: Orthogonal polynomials, e.g., Chebyshev or Legendre polynomials.
\item \textbf{Application}: Suitable for non-periodic boundary conditions.
\item \textbf{Formulation}:
\[
u(x) \approx \sum_{n=0}^{N} a_n \phi_n(x),
\]
where \( a_n \) are coefficients found through projection.
\end{itemize}
\end{itemize}
\subsubsection{Advantages and Disadvantages}
\begin{itemize}
\item \textbf{Advantages}:
\begin{itemize}
\item High accuracy for smooth problems (exponential convergence).
\item Efficient for global behavior of solutions.
\end{itemize}
\item \textbf{Disadvantages}:
\begin{itemize}
\item Not well-suited for non-smooth solutions.
\item More complex for problems with sharp discontinuities.
\end{itemize}
\end{itemize}
\subsection{Pseudospectral Methods}
\textbf{Definition}: Pseudospectral methods approximate derivatives and solve differential equations by evaluating the function at selected collocation points, instead of directly manipulating the coefficients in the basis function expansion.
\subsubsection{Core Idea}
The function \( u(x) \) is expanded similarly to spectral methods:
\[
u(x) \approx \sum_{n=0}^{N} a_n \phi_n(x).
\]
However, the derivatives are computed at discrete points, known as collocation points, leading to a practical discretization.
\subsubsection{Collocation Points}
Common choices for collocation points include:
\begin{itemize}
\item \textbf{Chebyshev Nodes}:
\[
x_k = \cos\left(\frac{2k - 1}{2N} \pi\right), \quad k = 1, 2, \dots, N.
\]
\item \textbf{Gauss-Lobatto Points}: Nodes used for polynomial interpolation, chosen to enhance numerical stability and minimize errors.
\end{itemize}
\subsubsection{Types of Pseudospectral Methods}
\begin{itemize}
\item \textbf{Fourier Pseudospectral Method}:
\begin{itemize}
\item Utilizes FFT for fast computation.
\item Ideal for periodic problems.
\end{itemize}
\item \textbf{Chebyshev Pseudospectral Method}:
\begin{itemize}
\item Uses Chebyshev polynomials and their nodes for derivatives.
\item Suitable for non-periodic problems.
\end{itemize}
\end{itemize}
\subsubsection{Formulation}
For a function \( u(x) \) and its derivative \( u'(x) \), the derivative at collocation points \( x_k \) can be approximated as:
\[
u'(x_k) \approx \sum_{j=0}^{N} D_{kj} u(x_j),
\]
where \( D_{kj} \) is the differentiation matrix based on the basis functions.
\subsubsection{Advantages and Disadvantages}
\begin{itemize}
\item \textbf{Advantages}:
\begin{itemize}
\item Flexible for complex boundary conditions.
\item High accuracy, similar to spectral methods, with a practical approach to discretization.
\end{itemize}
\item \textbf{Disadvantages}:
\begin{itemize}
\item Computational cost due to dense matrix operations.
\item Sensitive to the choice of collocation points.
\end{itemize}
\end{itemize}
\subsection{Comparison Between Spectral and Pseudospectral Methods}
\begin{itemize}
\item \textbf{Approach to Differentiation}:
\begin{itemize}
\item \textbf{Spectral Methods}: Operate in the spectral space by manipulating expansion coefficients.
\item \textbf{Pseudospectral Methods}: Operate in the physical space by evaluating derivatives at collocation points.
\end{itemize}
\item \textbf{Flexibility}:
\begin{itemize}
\item \textbf{Spectral Methods}: Less flexible for complex boundaries.
\item \textbf{Pseudospectral Methods}: More adaptable for varied boundary conditions.
\end{itemize}
\item \textbf{Implementation}:
\begin{itemize}
\item \textbf{Spectral Methods}: Require exact integration for coefficient projection.
\item \textbf{Pseudospectral Methods}: Use discrete points and fast algorithms like FFT.
\end{itemize}
\end{itemize}
\section{Introduction}
Pseudospectral methods are a class of numerical techniques used primarily for solving differential equations, particularly in the context of optimal control problems and partial differential equations. They combine the strengths of spectral methods and finite difference methods. Spectral methods approximate the solution to a differential equation by expressing it as a sum of global basis functions, typically orthogonal polynomials like Chebyshev polynomials or trigonometric functions. Pseudospectral methods are a specific type of spectral method where the differential equation is enforced at a set of discrete points in the domain, known as collocation points.
The concepts of direct and indirect methods are widely used in optimal control theory and numerical optimization. The two approaches differ fundamentally in the sequence in which they handle the discretization and optimization processes. In direct methods, the problem is first discretized and then optimized.
In this course, we will be using direct methods with a major focus on the discretization part.
\section{Important Terms}
Brief introduction to the important terms we will be using in the discussions.
\subsection{Optimal Control Problem}
Components of an Optimal Control Problem:
\begin{enumerate}
\item \textbf{State Variables} \(x(t)\): These describe the state of the system at any time \(t\). The evolution of these states is typically governed by differential equations.
\item \textbf{Control Variables} \(u(t)\): These are the inputs or decisions that can be manipulated over time to influence the behavior of the state variables.
\item \textbf{Dynamics (State Equations)}:
\[
\dot{x}(t) = f(x(t), u(t), t)
\]
This is a set of differential equations describing how the state variables evolve over time as a function of the current state, control inputs, and time.
\item \textbf{Cost Function (Objective Function)}:
\[
J = \int_{t_0}^{t_f} L(x(t), u(t), t)\, dt + \Phi(x(t_f))
\]
Here, \(L(x(t), u(t), t)\) is the running cost (instantaneous cost rate), and \(\Phi(x(t_f))\) is the terminal cost at the final time \(t_f\). The goal is to minimize (or maximize) this cost function.
\item \textbf{Boundary Conditions}:
\[
x(t_0) = x_0 \quad \text{and possibly} \quad x(t_f) = x_f
\]
These specify the initial state (and sometimes the final state) of the system.
\item \textbf{Constraints}: These can include state constraints, control constraints, and mixed constraints.
\begin{itemize}
\item \textbf{State Constraints}: \(g(x(t)) \leq 0\)
\item \textbf{Control Constraints}: \(h(u(t)) \leq 0\)
\item \textbf{Mixed Constraints}: Involving both state and control variables.
\end{itemize}
\end{enumerate}
\section{Least Squares Approach}
The \textbf{Least Squares approach} is a mathematical method used to find the best-fitting solution to a set of data by minimizing the sum of the squares of the differences between the observed and predicted values. Suppose you have a set of observed data points $(x_i, y_i)$, and you want to fit a model $f(x)$ to these data points. The least squares method minimizes the following sum:
\begin{equation}
S = \sum_{i=1}^{n} \left( y_i - f(x_i) \right)^2
\end{equation}
In the least squares method, the approximated function is not guaranteed to be equal to the value of the actual function at the node points.
\section{Interpolation}
\textbf{Interpolation} is a mathematical technique used to estimate unknown values that fall within the range of known data points. It is widely used in numerical analysis, data science, engineering, and other fields where it is necessary to predict values for intermediate points based on a set of discrete data points. Unlike the least squares approach, in interpolation, the modeled function passes through the selected data points.
One common interpolation method is the \textbf{Lagrange Interpolation}, which constructs the interpolating polynomial using a linear combination of Lagrange basis polynomials. The interpolating polynomial $P(x)$ is given by:
\begin{equation}
P(x) = \sum_{i=0}^{n} y_i \prod_{\substack{0 \leq j \leq n \\ j \neq i}} \frac{x - x_j}{x_i - x_j}
\end{equation}
\section{Class Discussion - MATLAB Plot: Least Squares vs Interpolation}
As discussed in class, the interpolation method ensures that the value of the approximated function is exactly the value of the actual function at the selected node points. Figure \ref{fig:comparison_plot} shows the difference between the two approximation methods (Least Squares and Interpolation) via a simple MATLAB plot discussed during the lecture.
\begin{figure}[h]
\centering
\includegraphics[width=0.7\textwidth]{least_squares_vs_interpolation.png} % Include your plot here
\caption{Comparison of Least Squares and Interpolation Methods.}
\label{fig:comparison_plot}
\end{figure}
\section{Collocation Method}
The collocation method is a technique used to approximate solutions of differential equations by forcing the solution to satisfy the differential equation at a finite number of points called collocation points.
For a given set of nodes, the polynomial can be approximated in either of the two ways:
\begin{equation}
P_n(x) = \sum_{i=0}^{n} a_i f(x_i)
\end{equation}
or
\begin{equation}
P_n(x) = \sum_{i=0}^{n} b_i x_i
\end{equation}
Both of these are types of interpolation matrices, and the value of the approximated polynomial is the same as the actual function at the node points.
\subsection{Vandermonde Matrix}
Given a vector of elements \( x = [x_1, x_2, \dots, x_n] \), the Vandermonde matrix \( V \) associated with this vector is an \( n \times n \) matrix, where each row corresponds to a geometric progression of the elements \( x_i \). Specifically, the matrix is defined as follows:
\[
V = \begin{pmatrix}
1 & x_1 & x_1^2 & \dots & x_1^{n-1} \\
1 & x_2 & x_2^2 & \dots & x_2^{n-1} \\
\vdots & \vdots & \vdots & \dots & \vdots \\
1 & x_n & x_n^2 & \dots & x_n^{n-1}
\end{pmatrix}
\]
Each element \( V_{ij} \) in the matrix is given by:
\begin{equation}
V_{ij} = x_i^{j-1}
\end{equation}
The determinant of the Vandermonde matrix has a special form and is given by:
\begin{equation}
\det(V) = \prod_{1 \leq i < j \leq n} (x_j - x_i)
\end{equation}
This product is non-zero if and only if all the \( x_i \) are distinct, meaning the determinant of the Vandermonde matrix is non-zero (and hence the matrix is invertible) if all the elements in the vector \( x \) are distinct. The proof for this will be discussed later.
\section{Runge Phenomenon}
When interpolating a set of points, one common approach is to fit a polynomial that passes through all the given data points. The idea is that the polynomial will closely approximate the underlying function from which the data points are sampled. However, when the function is sampled at equally spaced points and a high-degree polynomial is used, the interpolation error can become quite significant near the endpoints of the interval. This is known as \textbf{Runge's Phenomenon}.
\section{Chebyshev Nodes}
Chebyshev nodes are specific points used in numerical interpolation, particularly in polynomial interpolation, to reduce the problems associated with Runge's phenomenon and to achieve more accurate approximations.\\\\
Chebyshev nodes are the roots of Chebyshev polynomials of the first kind. For a given interval \([-1, 1]\), the Chebyshev nodes \(x_i\) for \(n\) interpolation points are defined as:
\begin{equation}
x_i = \cos\left(\frac{2i - 1}{2n} \pi\right), \quad i = 1, 2, \dots, n
\end{equation}
These nodes are spaced more densely near the endpoints of the interval \([-1, 1]\) and more sparsely near the center. When interpolating over a different interval \([a, b]\), you can linearly transform the Chebyshev nodes from \([-1, 1]\) to \([a, b]\) using the transformation:
\[
x'_i = \frac{b-a}{2} \left( x_i + 1 \right) + a
\]
\section{Weierstrass Theorem}
The \textbf{Weierstrass Approximation Theorem} states that for every continuous function \(f\) defined on a closed interval \([a, b]\), and for every \(\epsilon > 0\), there exists a polynomial \(P(x)\) such that:
\begin{equation}
\|f(x) - P(x)\| < \epsilon
\end{equation}
for all \(x\) in \([a, b]\). In other words, any continuous function on a closed interval can be uniformly approximated as closely as desired by a polynomial function. The proof of the Weierstrass Approximation Theorem typically involves constructing a sequence of polynomials that converge uniformly to the given continuous function. One standard approach uses Bernstein polynomials, which are constructed from binomial coefficients and shown to converge uniformly to the function \(f(x)\).
\section{Trapezoidal Method}
The \textbf{Trapezoidal Method} is used to approximate the area under a curve by dividing the region into a series of trapezoids. The area of each trapezoid is computed and summed to give an approximation of the definite integral.
Consider a function \(f(x)\) defined on the interval \([a, b]\). The goal is to approximate the definite integral:
\begin{equation}
\int_a^b f(x) dx
\end{equation}
Using the trapezoidal method, the interval \([a, b]\) is divided into \(n\) subintervals of equal width:
\[
h = \frac{b - a}{n}
\]
The points dividing the interval are:
\[
x_0 = a, \quad x_1 = a + h, \quad x_2 = a + 2h, \dots, x_n = b
\]
The trapezoidal rule then approximates the integral as:
\begin{equation}
\int_a^b f(x) dx \approx \frac{h}{2} \left[ f(x_0) + 2f(x_1) + 2f(x_2) + \dots + 2f(x_{n-1}) + f(x_n) \right]
\end{equation}
This formula sums the function values at the endpoints and the midpoints, weighted appropriately, to give a numerical estimate of the integral.
\section{Trapezoidal Method}
\subsection{Example}
The Trapezoidal method provides good accuracy for periodic functions and becomes exact after a certain number of nodes are used for discretization. (The proof for this can be found in the class notes on Moodle.) Figure \ref{fig:trapezoid_accuracy} demonstrates the accuracy of the trapezoidal method, as shown by the MATLAB code discussed in class.
\begin{figure}[h!]
\centering
\includegraphics[width=0.8\textwidth]{trapezoid_accuracy.png} % Assuming you have an image here.
\caption{Trapezoid method accuracy.}
\label{fig:trapezoid_accuracy}
\end{figure}
\section{Central Difference Method}
The Central Difference method is a finite difference scheme used to approximate derivatives by expanding around two neighboring points. It is second-order accurate, meaning it approximates derivatives to second-order accuracy in terms of the grid spacing \(\Delta x\).
\subsection{Expansion Around \(x_{i+1} = x_i + \Delta x\)}
By expanding the function \(f(x)\) around \(x_{i+1} = x_i + \Delta x\), we obtain the following Taylor series expansion:
\begin{equation}
f(x_{i+1}) = f(x_i + \Delta x) = f(x_i) + \frac{f'(x_i)}{1!} \Delta x + \frac{f''(x_i)}{2!} (\Delta x)^2 + \frac{f'''(x_i)}{3!} (\Delta x)^3 + O((\Delta x)^4)
\end{equation}
\subsection{Expansion Around \(x_{i-1} = x_i - \Delta x\)}
Similarly, expanding the function around \(x_{i-1} = x_i - \Delta x\), we get:
\begin{equation}
f(x_{i-1}) = f(x_i - \Delta x) = f(x_i) - \frac{f'(x_i)}{1!} \Delta x + \frac{f''(x_i)}{2!} (\Delta x)^2 - \frac{f'''(x_i)}{3!} (\Delta x)^3 + O((\Delta x)^4)
\end{equation}
\subsection{Central Difference Formula}
Taking the difference between \(f(x_{i+1})\) and \(f(x_{i-1})\), we get:
\begin{equation}
f(x_{i+1}) - f(x_{i-1}) = 2 f'(x_i) \Delta x + \frac{2 f'''(x_i)}{6} (\Delta x)^3 + O((\Delta x)^4)
\end{equation}
Approximating over small intervals, the first derivative \(f'(x_i)\) is approximated as:
\begin{equation}
f'(x_i) \approx \frac{f(x_{i+1}) - f(x_{i-1})}{2\Delta x}
\end{equation}
Thus, the central difference scheme provides exact results if the highest order of the function is cubic.
\subsection{Higher-Order Central Difference}
By increasing the size of the stencil (i.e., including more neighboring points), we can reduce the approximation error and improve the accuracy of the method. As the stencil size increases, the error tends toward zero for higher-order functions.
\section{Example Problem}
Consider a block with the following boundary conditions:
\[
x(0) = 0, \quad v(0) = 0, \quad x(1) = 1, \quad v(1) = 0
\]
The system is governed by the equations:
\[
\dot{x} = v, \quad \dot{v} = u
\]
The objective is to optimize the following function:
\[
J = \int_0^1 U(x)^2 \, dx
\]
Using the trapezoidal method, we can approximate the integral as:
\[
J = \left( \frac{U_0^2}{2} + U_1^2 + \cdots + U_{n-1}^2 + \frac{U_n^2}{2} \right) \Delta t
\]
### Two Methods for Non-Linear Equality Constraints:
#### 1. Central Difference Method
In the central difference method, the constraint conditions are formed as follows for the interior points:
\[
\frac{x_{k+1} - x_{k-1}}{2h} = v_k, \quad \frac{v_{k+1} - v_{k-1}}{2h} = u_k
\]
For the boundary points, forward and backward Euler methods are used to satisfy the conditions at the boundaries.
#### 2. Trapezoidal Method
In the trapezoidal method, the constraint conditions are written as:
\[
x_{k+1} - x_k = \frac{v_{k+1} + v_k}{2} \Delta t, \quad v_{k+1} - v_k = \frac{u_{k+1} + u_k}{2} \Delta t
\]
The full implementation of this method can be found in the \texttt{nonlincon.m} file, as discussed in class.
To perform the optimization, we use the \texttt{fmincon} function from MATLAB. \texttt{fmincon} is used for solving constrained nonlinear optimization problems, and it finds the minimum of a scalar function subject to the specified constraints.
\section{Discussion of MATLAB and Mathematica Presentation in Class}
With the same boundary conditions as the previous problem, a related problem discussed in class has 5 unknowns and 4 conditions, making it an optimization problem. This requires the use of similar numerical methods for solution.
\section{Rolle's Theorem}
Rolle’s Theorem is a fundamental result in calculus that provides a specific condition under which a function must have a horizontal tangent line (or a derivative equal to zero) at least once within a given interval.
The theorem states that if a continuous function \(f(x)\) is differentiable on the open interval \((a, b)\) and satisfies \(f(a) = f(b)\), then there exists at least one point \(c \in (a, b)\) such that:
\[
f'(c) = 0
\]
This result is often used in optimization and root-finding algorithms.
\section{Rolle's Theorem}
Rolle’s Theorem is a fundamental result in calculus that provides a specific condition under which a function must have a horizontal tangent line (or a derivative equal to zero) at least once within a given interval. It’s a special case of the Mean Value Theorem.
Let \( f(x) \) be a function that satisfies the following three conditions:
\begin{enumerate}
\item \textbf{Continuity:} \( f(x) \) is continuous on the closed interval \([a, b]\).
\item \textbf{Differentiability:} \( f(x) \) is differentiable on the open interval \((a, b)\).
\item \textbf{Equal Endpoints:} \( f(a) = f(b) \).
\end{enumerate}
If these conditions are met, then there exists at least one point \( c \in (a, b) \) such that the derivative of \( f(x) \) at \( c \) is zero:
\[
f'(c) = 0
\]
\section{Derivatives of Lagrange Polynomial}
Consider a function approximated with a second-order Lagrange polynomial using equally spaced nodes at \(0\), \(h\), and \(2h\). The polynomial can be expressed as:
\[
P(x) = y_0 \cdot \ell_0(x) + y_1 \cdot \ell_1(x) + y_2 \cdot \ell_2(x)
\]
Where the Lagrange basis polynomials are defined as follows:
\[
\ell_0(x) = \frac{(x - x_1)(x - x_2)}{(x_0 - x_1)(x_0 - x_2)}
\]
\[
\ell_1(x) = \frac{(x - x_0)(x - x_2)}{(x_1 - x_0)(x_1 - x_2)}
\]
\[
\ell_2(x) = \frac{(x - x_0)(x - x_1)}{(x_2 - x_0)(x_2 - x_1)}
\]
The derivative of the polynomial \( P'(x) \) is given by:
\[
P'(x) = y_0 \cdot \frac{2x - (x_1 + x_2)}{(x_0 - x_1)(x_0 - x_2)} + y_1 \cdot \frac{2x - (x_0 + x_2)}{(x_1 - x_0)(x_1 - x_2)} + y_2 \cdot \frac{2x - (x_0 + x_1)}{(x_2 - x_0)(x_2 - x_1)}
\]
To represent the derivatives at the node points in matrix format, we can express it as:
\[
\begin{bmatrix}
P'(x_0) \\
P'(x_1) \\
P'(x_2)
\end{bmatrix} =
\begin{bmatrix}
-\frac{3}{2h} & \frac{2}{h} & -\frac{1}{2h} \\
-\frac{1}{2h} & 0 & \frac{1}{h} \\
\frac{1}{h} & -\frac{2}{h} & \frac{3}{2h}
\end{bmatrix}
\begin{bmatrix}
P(x_0) \\
P(x_1) \\
P(x_2)
\end{bmatrix}
\]
This shows that the derivatives of the approximated function at the node points are a linear combination of the actual function values at the node points.
\section{Controlling Error by Altering the Location of Nodes}
\textbf{Claim:} When we approximate a function using a polynomial of order \( n \), the error in the representation can be altered by changing the position of the nodes (collocation points).\\\\
\textbf{Proof:} Consider the error term defined as:
\[
W(t) = f(t) - p_n(f; t) - (t - x_0)(t - x_1) \cdots (t - x_n) K(x)
\]
Where \( K(x) \) is defined as:
\[
K(x) = \frac{f(x) - p_n(f; x)}{(x - x_0)(x - x_1) \cdots (x - x_n)}
\]
For an \( n \)-th order polynomial, we have \( (n + 1) \) variables. The function \( W \) is zero at:
\begin{itemize}
\item \( t = x_i \) for all \( i = 0, 1, \ldots, n \) (the node points)
\item \( t = x \)
\end{itemize}
By applying Rolle’s Theorem, there exists some \( \xi \) in the interval between \( x_0 \) and \( x_n \) such that:
\[
W^{(n+1)}(t) = f^{(n+1)}(t) - (n + 1)! K(x)
\]
Thus, at point \( \xi \):
\[
0 = W^{(n+1)}(\xi) = f^{(n+1)}(\xi) - (n + 1)! K(x)
\]
From this, we can express \( K(x) \) as:
\[
K(x) = \frac{1}{(n + 1)!} f^{(n+1)}(\xi)
\]
Consequently, the error \( e(x) \) can be written as:
\[
e(x) = f(x) - p_n(x) = \frac{1}{(n + 1)!} f^{(n+1)}(\xi) (x - x_0)(x - x_1) \cdots (x - x_n)
\]
This shows that while we do not have direct control over \( f^{(n+1)} \) to reduce the error, we can alter the location of the nodes to change the error. \\\\
Figure 4 shows the class representation in Mathematica, illustrating that for a function approximated using a second-order polynomial, taking the derivative at the central control point is the same as applying the central difference at that point.
\section{Difference Between a Polynomial and Its \( n \)th Order Interpolation Approximation}
The error in approximating a function \( f(x) \) using an \( n \)th order polynomial \( p_n(x) \) can be expressed as:
\[
e(x) = f(x) - p_n(x) = \frac{1}{(n + 1)!} f^{(n+1)}(\xi)(x - x_0)(x - x_1) \cdots (x - x_n)
\]
From MATLAB simulations, we found that the optimal node locations for approximating a 2D polynomial with a linear Lagrange polynomial (in the symmetric case) are as follows:
- For least RMS error: \( x_0 = -\sqrt{\frac{1}{3}} \) and \( x_1 = \sqrt{\frac{1}{3}} \)
- For least maximum error: \( x_0 = -\sqrt{\frac{1}{2}} \) and \( x_1 = \sqrt{\frac{1}{2}} \)
Figure 5 shows the Mathematica representation of these findings as discussed in class. A similar analysis applies to a 3D polynomial; refer to the MATLAB code for the optimal node points for a third-order polynomial function.
\section*{Example Problem}
Consider a polynomial function \( X_{n+1} \) represented using an \( n \)th order polynomial:
\[
P_n = \sum_{i=0}^{n} a_i x^i
\]
We need to find the leading coefficient of the approximation, \( a_n \). Equally spaced node points are chosen from 0 to 1 (including both endpoints).
\textbf{Solution:} Using the remainder theorem:
\[
X_{n+1} - \sum_{i=0}^{n} a_i x^i = A (x - 0)(x - \frac{1}{n})(x - \frac{2}{n}) \cdots (x - \frac{n-1}{n})(x - 1)
\]
Here, \( A \) comes out to be 1. Comparing the coefficients on the left-hand side and the right-hand side, we find the leading coefficient \( a_n = \frac{n + 1}{2} \).
\section{Condition for Vandermonde Matrix Being Invertible}
The Vandermonde matrix is always invertible except when two node points are the same. To understand this, we can observe the Vandermonde matrix defined as:
\[
V =
\begin{pmatrix}
1 & x_0 & x_0^2 & \cdots & x_0^n \\
1 & x_1 & x_1^2 & \cdots & x_1^n \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
1 & x_n & x_n^2 & \cdots & x_n^n
\end{pmatrix}
\]
If two node points become the same, two rows of the matrix become identical, causing the determinant to be zero, thus making the matrix non-invertible.
Now, consider the Vandermonde matrix with the last row replaced by \( x \) instead of \( x_n \):
\[
V =
\begin{pmatrix}
1 & x_0 & x_0^2 & \cdots & x_0^n \\
1 & x_1 & x_1^2 & \cdots & x_1^n \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
1 & x & x^2 & \cdots & x^n
\end{pmatrix}
\]
If \( x \) takes any of the values \( x_0, x_1, \ldots, x_{n-1} \), the determinant is zero. Thus, the determinant can be expressed as:
\[
\text{det}(V) = A \prod_{k=0}^{n-1} (x_n - x_k)
\]
Where \( A \) is a coefficient that does not contain any \( x \) term. Further observation shows that \( A \) is the determinant of a Vandermonde matrix with \( n \) terms, applying similar operations to \( x_n \) for \( x_{n-1} \) now:
\[
\text{det}(V) = (x_1 - x_0) \prod_{k=0}^{1} (x_2 - x_k) \prod_{k=0}^{2} (x_3 - x_k) \cdots \prod_{k=0}^{n-1} (x_n - x_k)
\]
This form proves that unless two nodes are identical, the determinant of the Vandermonde matrix will exist.
\section{Lagrange Basis Polynomial}
\subsection{Cramer’s Rule}
Consider a system of linear equations:
\[
Ax = b
\]
where:
- \( A \) is an \( n \times n \) matrix of coefficients,
- \( x \) is a column vector of unknowns,
- \( b \) is a column vector of constants.
Cramer’s Rule states that if \( \det(A) \neq 0 \), the solution to the system is given by:
\[
x_i = \frac{\det(A_i)}{\det(A)} \quad \text{for } i = 1, 2, \ldots, n,
\]
where \( A_i \) is obtained by replacing the \( i \)-th column of \( A \) with \( b \).
\subsubsection{Proof}
For a particular \( m \), consider:
\[
x_m = \sum_{j=0}^{n} W(x_j)x_{j,m}
\]
where \( 0 \leq m \leq n \). Note that even though for all \( m \) less than \( n \), it appears that the right-hand side will have a higher power of \( x \) than \( m \) due to the Lagrange polynomial bases, the terms cancel out to match the power on the left-hand side.
The aim is to prove that
\[
P_n(x) = \sum_{i=0}^{n} a_ix^i
\]
can be represented as
\[
P_n(x) = \sum_{i=0}^{n} W_i f(x_i)
\]
where the \( W_i \) are the Lagrange bases.
Thus,
\[
P_n(x) = \sum_{k=0}^{n} a_k(x_k)
\]
can be rearranged as follows:
\[
P_n(x) = \sum_{k=0}^{n} a_k \sum_{j=0}^{n} W(x_j)x^j_k
\]
Rearranging gives:
\[
P_n(x) = \sum_{j=0}^{n} W(x_j) \sum_{k=0}^{n} a_k x_j^k
\]
Therefore,
\[
P_n(x) = \sum_{j=0}^{n} W(x_j) P(x_j)
\]
To find the Lagrange basis, we write the system of equations in a matrix format:
\[
\begin{pmatrix}
1 & 1 & \cdots & 1 \\
x_1 & x_2 & \cdots & x_n \\
x_1^2 & x_2^2 & \cdots & x_n^2 \\
\vdots & \vdots & \ddots & \vdots \\
x_{n-1} & 1 & x_{n-1}^2 & \cdots & x_{n-1}^n
\end{pmatrix}
\begin{pmatrix}
W_0 \\
W_1 \\
W_2 \\
\vdots \\
W_n
\end{pmatrix}
=
\begin{pmatrix}
x_0 \\
x_1 \\
x_2 \\
\vdots \\
x_n
\end{pmatrix}
\]
Notably, the Vandermonde matrix \( V \) is given by:
\[
V^T =
\begin{pmatrix}
1 & 1 & \cdots & 1 \\
x_1 & x_2 & \cdots & x_n \\
x_1^2 & x_2^2 & \cdots & x_n^2 \\
\vdots & \vdots & \ddots & \vdots \\
x_{n-1} & 1 & x_{n-1}^2 & \cdots & x_{n-1}^n
\end{pmatrix}
\]
Using Cramer’s rule, we know that:
\[
W_j = \frac{\Delta_j}{\Delta}
\]
where \( \Delta \) is the determinant of the Vandermonde matrix and \( \Delta_j \) is the determinant of the Vandermonde matrix with the elements \( x_j \) replaced with \( x \).
To find the Lagrange polynomials:
\[
\Delta = \det(V) = (x_1 - x_0) \prod_{k=1}^{n-1} (x_2 - x_k) \prod_{k=2}^{n-1} (x_3 - x_k) \cdots \prod_{k=n-1}^{n-1} (x_n - x_k)
\]
Separating all terms that have an \( x_j \) in them:
\[
\det(V) = \lambda_j \prod_{k=0}^{j-1} (x_j - x_k) \prod_{k=j+1}^{n} (x_k - x_j)
\]
Here, \( \lambda_j \) is the part of the determinant that does not have any \( x_j \) terms. Replacing \( x_j \) with \( x \):
\[
\Delta_j = \det(V) = \lambda_j \prod_{k=0}^{j-1} (x - x_k) \prod_{k=j+1}^{n} (x_k - x)
\]
Thus,
\[
W(x_j) = \ell_j(x) = \prod_{\substack{0 \leq k \leq n \\ k \neq j}} \frac{x - x_k}{x_j - x_k}
\]
We can conclude that all the Lagrange basis polynomials are of the same order, and for \( n + 1 \) node points over which the polynomial is approximated, the basis function will be of order \( n \). Note that the Lagrange basis polynomial \( \ell_j \) has a value of 1 at the node \( x_j \) and zero at all other node points. The polynomial cannot have more zeros than the number of remaining node points, as verified from the degree of the polynomial.
\subsection{Important Points}
\subsubsection{Intuitive Understanding of Why the Sum of Weights is One}
Every approximation of a function must be able to model a constant function as well. Thus, the sum of weights or the coefficients must equal one. This can be mathematically represented as:
\[
P(x) = \sum_{i=0}^{n} W_k(x) f(x_k)
\]
For a constant function, say \( f(x) = c \):
\[
P(x) = c = \sum_{i=0}^{n} W_k(x) c
\]
Canceling \( c \) on both sides gives:
\[
\sum_{i=0}^{n} W_k(x) = 1
\]
\section{Derivatives of Lagrange Polynomial/ Differentiation Matrix}
The derivative of a Lagrange polynomial can be computed using specific formulas, which are essential in interpolation problems.
\chapter{Runge Phenomena}
The Runge phenomenon refers to the large oscillations that can occur when using polynomial interpolation at equidistant points.
\section{Controlling error by controlling location of nodes}
By strategically choosing the locations of interpolation nodes, one can minimize the error in polynomial interpolation.
\section{Chebyshev Nodes}
Chebyshev nodes are specific points used in polynomial interpolation to minimize the interpolation error, particularly to avoid the Runge phenomenon.
\chapter{Weistress Theorem}
The Weistress approximation theorem states that any continuous function defined on a closed interval can be uniformly approximated by polynomial functions to any degree of accuracy.
\section*{Weierstrass Theorem (Bernstein polynomial)}
\[
\exists \ P_n(x) \text{ such that } |f(x) - P_n(x)| < \epsilon
\]
\[
f(x), \ x \in [0, 1]
\]
\[
|f(x) - f(x_0)| < \epsilon
\]
\[
x \in [x_0 - \delta, x_0 + \delta] \quad \text{closed interval (can go to $\infty$ at endpoints)}
\]
\[
(a + b)^n = \sum_{k=0}^{n} \binom{n}{k} a^{k} b^{n-k}
\]
\[
(a + (1 - x))^n = \sum_{k=0}^{n} \binom{n}{k} x^k (1 - x)^{n-k} = 1
\]
Bernstein polynomial terms: $B_k(x)$
\[
B_n(f, x) = \sum_{k=0}^{n} f\left( \frac{k}{n} \right) b_k(x)
\]
\[
\Rightarrow B_n(f) \to f
\]
\[
\left| B_n(f, x) - f(x) \right| = \left| \sum_{k=0}^{n} \left( f\left( \frac{k}{n} \right) - f(x) \right) b_k(x) \right|
\]
Trying to find upper bound for this
\[
|x - \frac{k}{n}| \leq \delta
\]
\[
f(x) \leq M \quad \text{bounded and continuous in interval taken}
\]
\[
|f(x) - f(x_0)| \leq 2M
\]
\section*{Further Analysis on Bernstein Polynomials}
\[
\left| \sum_{k=0}^{n} \left( f\left( \frac{k}{n} \right) - f(x) \right) b_k(x) \right| = \sum_{k=0}^{n} \left| f\left( \frac{k}{n} \right) - f(x) \right| b_k(x)
\]
\[
|B_n(f, x) - f(x)| \leq \sum_{|x - \frac{k}{n}| \leq \delta} \left| f\left( \frac{k}{n} \right) - f(x) \right| b_k(x) + \sum_{|x - \frac{k}{n}| > \delta} \left| f\left( \frac{k}{n} \right) - f(x) \right| b_k(x)
\]
\[
\leq \sum_{|x - \frac{k}{n}| \leq \delta} \frac{\epsilon}{2} b_k(x) + \sum_{|x - \frac{k}{n}| > \delta} \frac{2M}{n \delta^2} b_k(x)
\]
\[
\leq \sum_{|x - \frac{k}{n}| \leq \delta} \frac{\epsilon}{2} b_k(x) + \sum_{|x - \frac{k}{n}| > \delta} \frac{2M}{n \delta^2} b_k(x)
\]
Using bounds and conditions, we proceed as follows:
\[
= \sum_{|x - \frac{k}{n}| \leq \delta} \frac{\epsilon}{2} b_k(x) + \frac{2M}{n \delta^2} \sum_{|x - \frac{k}{n}| > \delta} b_k(x)
\]
Expanding on this:
\[
= \frac{\epsilon}{2} \sum b_k(x) + \frac{2M}{n \delta^2} \sum \left( \frac{k}{n} - x \right)^2 b_k(x)
\]
Define parameters for probability and expectation:
\[
p = nx, \quad \sigma = \sqrt{n x (1 - x)}, \quad E[(k - \mu)^2] = \sigma^2 = E[k^2] - E[k]^2
\]
\[
E[k] = \sum k \cdot p(k), \quad \sigma^2 = E[(k - \mu)^2]
\]
Where:
\[
E[(k - \mu)^2] = E[k^2] - E[k]^2 \quad \text{with} \quad \mu = nx
\]
\[
B_n(f, x) = \sum_{k=0}^{n} f\left( \frac{k}{n} \right) \binom{n}{k} x^k (1 - x)^{n-k}
\]
\[
|B_n(f, x) - f(x)| = \left| \sum_{k=0}^{n} \left( f\left( \frac{k}{n} \right) - f(x) \right) \binom{n}{k} x^k (1 - x)^{n-k} \right|
\]
\[
\leq \sum \left| f\left( \frac{k}{n} \right) - f(x) \right| \binom{n}{k} x^k (1 - x)^{n-k}
\]
\[
= \sum_{|x - \frac{k}{n}| \leq \delta} \left| f\left( \frac{k}{n} \right) - f(x) \right| \binom{n}{k} x^k (1 - x)^{n-k} + \sum_{|x - \frac{k}{n}| > \delta} \left| f\left( \frac{k}{n} \right) - f(x) \right| \binom{n}{k} x^k (1 - x)^{n-k}
\]
Define these two regions as:
\[
A = \sum_{|x - \frac{k}{n}| \leq \delta} \left| f\left( \frac{k}{n} \right) - f(x) \right| \binom{n}{k} x^k (1 - x)^{n-k} \leq \frac{\epsilon}{2}
\]
\[
B = \sum_{|x - \frac{k}{n}| > \delta} \left| f\left( \frac{k}{n} \right) - f(x) \right| \binom{n}{k} x^k (1 - x)^{n-k}
\]
For \( B \):
\[
\leq \sum_{k \in B} \frac{2M}{n^2 \delta^2} \binom{n}{k} x^k (1 - x)^{n-k}
\]
Using the probability statistics:
\[
\leq \frac{\epsilon}{2} + \frac{2M}{n \delta^2}
\]
We adjust so that:
\[
\frac{M}{2n\delta} = \frac{\epsilon}{2} \Rightarrow \epsilon
\]
Thus:
\[
|B_n(f, x) - f(x)| \leq \epsilon
\]
Hence, this justifies that interpolation is valid.
\[
\int_0^1 h(t) \, dt
\]
And constraints:
\[
\mathbf{D} \mathbf{x} = A
\]
\[
\int_{0}^{T} h(t) \, dt \approx \int_{0}^{T} \sum_{k=0}^{n} h(t_k) L_k(t) \, dt
= \sum_{k=0}^{n} h(t_k) \underbrace{\int_{0}^{T} L_k(t) \, dt}_{w_k}
\]
\[
h \in \mathbb{P}_n
\]
\noindent
{\textit{can this also be reversed}} \hspace{3cm}
{\textit{we're approximating the integral to a higher order to make it exact,}} \\
{\textit{not approximating the function.}}
\section{Bernstein Polynomials and Their Applications}
\textbf{Definition}: Bernstein polynomials are a particular set of polynomials that are used to approximate continuous functions on a closed interval \( [0, 1] \). They are fundamental in approximation theory and are used to prove the Weierstrass approximation theorem, which states that any continuous function can be uniformly approximated by polynomials.
\subsection{Mathematical Definition}
Given a function \( f(x) \) defined on the interval \( [0, 1] \), the Bernstein polynomial \( B_n(f, x) \) of degree \( n \) is defined as:
\[
B_n(f, x) = \sum_{k=0}^{n} f\left( \frac{k}{n} \right) \binom{n}{k} x^k (1 - x)^{n-k},
\]
where \( \binom{n}{k} \) is the binomial coefficient.
\subsection{Properties}
\begin{itemize}
\item \textbf{Pointwise Convergence}: The sequence \( B_n(f, x) \) converges to \( f(x) \) as \( n \to \infty \) for any continuous function \( f(x) \) on \( [0, 1] \).
\item \textbf{Preservation of Monotonicity}: If \( f(x) \) is a monotonically increasing or decreasing function, then so is \( B_n(f, x) \).
\item \textbf{Preservation of Convexity}: If \( f(x) \) is convex, then \( B_n(f, x) \) is also convex for sufficiently large \( n \).
\end{itemize}
\subsection{Weierstrass Approximation Theorem}
The Weierstrass approximation theorem states that for any continuous function \( f(x) \) defined on a closed interval \( [0, 1] \) and for any \( \epsilon > 0 \), there exists a polynomial \( P_n(x) \) such that:
\[
|f(x) - P_n(x)| < \epsilon \quad \text{for all } x \in [0, 1].
\]
Using Bernstein polynomials, this theorem can be demonstrated by showing that:
\[
\lim_{n \to \infty} B_n(f, x) = f(x).