From 7edd2bc910623e372f72975376f5ef1adbbe5388 Mon Sep 17 00:00:00 2001 From: Gilles Peiffer Date: Thu, 24 Dec 2020 22:44:31 +0100 Subject: [PATCH] LINMA2380: Update exercise solutions for 2020--2021 --- src/q7/matrix-INMA2380/exercises/ch1.tex | 607 ++++++++++++------ src/q7/matrix-INMA2380/exercises/ch2.tex | 464 ++++--------- src/q7/matrix-INMA2380/exercises/ch3.tex | 364 ++++++----- src/q7/matrix-INMA2380/exercises/ch4.tex | 252 ++++---- src/q7/matrix-INMA2380/exercises/ch5.tex | 135 +++- src/q7/matrix-INMA2380/exercises/ch6.tex | 158 ++--- src/q7/matrix-INMA2380/exercises/ch7.tex | 122 +++- .../exercises/matrix-INMA2380-exercises.tex | 7 +- src/q7/matrix-INMA2380/exercises/old.tex | 122 ++++ 9 files changed, 1304 insertions(+), 927 deletions(-) create mode 100644 src/q7/matrix-INMA2380/exercises/old.tex diff --git a/src/q7/matrix-INMA2380/exercises/ch1.tex b/src/q7/matrix-INMA2380/exercises/ch1.tex index d2c5cf818..82c2ed14d 100644 --- a/src/q7/matrix-INMA2380/exercises/ch1.tex +++ b/src/q7/matrix-INMA2380/exercises/ch1.tex @@ -1,67 +1,153 @@ -\section{Matrix' algebra} +\section{Matrix algebras} \exo{1} -Prove that the addition of matrices is commutative and associative and that its neutral element is the zero matrix: $O = [0]_{i,j=1}^{m,n}$. +Show that addition of matrices is commutative, associative and that the neutral element is the zero matrix: $0_{m \times n} = [0]_{i,j=1}^{m,n}$. \begin{solution} - Let $\mathcal{R}$ be the ring in which the elements of the matrix are + Let $\mathcal{R}$ be the ring to which the elements of the matrix belong. \begin{description} \item[Commutativity] We should have $A+B=B+A$ or - $[a_{ij} + b_{ij}]_{i,j=1}^{m,n} = [b_{ij} + a_{ij}]_{i,j=1}^{m,n}$ - which is true since $\mathcal{R}$ is commutative. + $[a_{ij} + b_{ij}]_{i,j=1}^{m,n} = [b_{ij} + a_{ij}]_{i,j=1}^{m,n}$, + which is true since addition is commutative in $\mathcal{R}$ by virtue of it being a ring (strictly speaking, a commutative group would be sufficient). \item[Associativity] $A+(B+C)=(A+B)+C$ We should have $A+(B+C)=(A+B)+C$ or $[a_{ij} + (b_{ij} + c_{ij})]_{i,j=1}^{m,n} = [(a_{ij} + b_{ij}) + c_{ij}]_{i,j=1}^{m,n}$ - which is true since $\mathcal{R}$ is associative. + which is true since addition is commutative in $\mathcal{R}$ by virtue of it being a ring (strictly speaking, a semigroup would be sufficient). \item[Neutral element] Let $B$ be the neutral element. We have $A + B = [a_{ij} + b_{ij}]_{i,j=1}^{m,n}$ and $A = [a_{ij}]_{i,j=1}^{m,n}$. - Two matrices are equal if all their elements are equal so $b_{ij}$ - should be the neutral element of $\mathcal{R}$. + Two matrices are equal if all their elements are equal, so $b_{ij}$ + should be the neutral element of $\mathcal{R}$, which exists by virtue of it being a ring (strictly speaking, a monoid would be sufficient). \end{description} \end{solution} \exo{1} +Verify that, for all matrices \(A\) and \(B\) belonging to \(M\), the set of matrices of fixed dimensions \(m \times n\), and for all scalars \(\alpha, \beta\) belonging to a field \(\mathcal{F}\) or a ring \(\mathcal{R}\), it holds that +\begin{align*} +0A &= 0,\\ +1A &= A,\\ +(\alpha + \beta)A &= \alpha A + \beta A,\\ +\alpha(A + B) &= \alpha A + \alpha B,\\ +\alpha(\beta A) &= (\alpha \beta) A. +\end{align*} + \begin{solution} +\begin{comment} % old solution Simply develop $\alpha A_{m \times n}=[\alpha a_{ij}]_{i,j=1}^{m,n}$. +\end{comment} + +\begin{itemize} + \item By the definition, \(0A = [0 a_{ij}]_{i,j = 1}^{m, n}\), and thus \(0A = [0]_{i, j = 1}^{m, n} = 0_{m, n}\). + \item By the definition, \(1A = [1 a_{ij}]_{i,j = 1}^{m, n}\), and thus \(1A = [a_{ij}]_{i, j = 1}^{m, n} = A\). + \item By the definition, \((\alpha + \beta)A = [(\alpha + \beta) a_{ij}]_{i,j = 1}^{m, n} = [\alpha a_{ij} + \beta a_{ij}]_{i, j = 1}^{m, n} = \alpha A + \beta A\). + \item By the definition, \(\alpha (A + B) = [\alpha (a_{ij} + b_{ij})]_{i,j = 1}^{m, n} = [\alpha a_{ij} + \alpha b_{ij}]_{i, j = 1}^{m, n} = \alpha A + \alpha B\). + \item By the definition, \(\alpha (\beta A) = [\alpha (\beta a_{ij})]_{i,j = 1}^{m, n} = \alpha [\beta a_{ij}]_{i, j = 1}^{m, n} = [(\alpha \beta) a_{ij}]_{i, j = 1}^{m, n} = (\alpha \beta) A\). +\end{itemize} + \end{solution} \exo{1} +Show that the following properties hold true: +\begin{align*} +AI &= IA = A, \tag{neutral element}\\ +A(B + C) &= AB + AC, \tag{distributivity}\\ +(B + C) D &= BD + CD, \tag{distributivity}\\ +A(BC) &= (AB)C. \tag{associativity} +\end{align*} + \begin{solution} +\begin{comment} % old solution Develop the products. +\end{comment} + +\begin{itemize} + \item We use \(\delta_{ij}\), the Kronecker delta, which is \(1\) when \(i = j\) and \(0\) otherwise. + \begin{align*} + AI &= \left[\sum_k a_{ik} \delta_{kj}\right]_{i, j = 1}^{m, n}\\ + &= [a_{ij}]_{i, j = 1}^{m, n}\\ + &= \left[\sum_k \delta_{ik} a_{kj}\right]_{i, j = 1}^{m, n}\\ + &= IA. + \end{align*} + \item \begin{align*} + A (B + C) &= \left[\sum_k a_{ik} (b_{kj} + c_{kj})\right]_{i, j = 1}^{m, n}\\ + &= \left[\sum_k \left(a_{ik} b_{kj} + a_{ik} c_{kj}\right)\right]_{i, j = 1}^{m, n}\\ + &= \left[\sum_k a_{ik} b_{kj} + \sum_k a_{ik} c_{kj}\right]_{i, j = 1}^{m, n}\\ + &= AB + AC. + \end{align*} + \item \begin{align*} + (B + C) D &= \left[\sum_k (b_{ik} + c_{ik}) d_{kj}\right]_{i, j = 1}^{m, n}\\ + &= \left[\sum_k \left(b_{ik} d_{kj} + c_{ik} d_{kj}\right)\right]_{i, j = 1}^{m, n}\\ + &= \left[\sum_k b_{ik} d_{kj} + \sum_k c_{ik} d_{kj}\right]_{i, j = 1}^{m, n}\\ + &= BD + CD. + \end{align*} + \item \begin{align*} + A(BC) &= \left[\sum_\ell a_{i\ell} \left(\sum_k b_{\ell k} c_{k j}\right)\right]_{i, j = 1}^{m, n}\\ + &= \left[\sum_\ell \sum_k a_{i\ell} (b_{\ell k} c_{kj})\right]_{i, j = 1}^{m, n}\\ + &= \left[\sum_\ell \sum_k (a_{i\ell} b_{\ell k}) c_{kj}\right]_{i, j = 1}^{m, n}\\ + &= \left[\sum_\ell \left(\sum_k a_{i\ell} b_{\ell k}\right) c_{kj}\right]_{i, j = 1}^{m, n}\\ + &= (AB)C. + \end{align*} +\end{itemize} + \end{solution} \exo{1} +Verify that the matrices +\[ +A = \begin{bmatrix} 2 & 1 \\ 0 & 0 \\ 1 & 0 \end{bmatrix},\quad B = \begin{bmatrix} 0 & 1 & 2 \\ -1 & 0 & 0 \end{bmatrix} +\] +and +\[ +A = \begin{bmatrix} 2 & 1 \\ 0 & 0 \end{bmatrix},\quad B = \begin{bmatrix} 1 & 0 \\ -1 & 0 \end{bmatrix} +\] +do not satisfy \(AB = BA\). + \begin{solution} +\begin{comment} % old solution Develop the products. +\end{comment} + +For the first pair, \(AB\) is a \(3 \times 3\) matrix, whereas \(BA\) is a \(2 \times 2\) matrix. +They can hence not be equal. + +For the second pair, computation yields +\[ +AB = \begin{bmatrix} 1 & 0 \\ 0 & 0 \end{bmatrix} \neq BA = \begin{bmatrix} 2 & 1 \\ -2 & -1 \end{bmatrix}. +\] \end{solution} \exo{2} +Show that the set of square matrices of dimension \(n\) forms a ring. + \begin{solution} We have to prove that \begin{itemize} - \item $(\mathcal{R}^{n \times n}, +)$ is a commutative group - \item $(\mathcal{R}^{n \times n}, \cdot)$ is a monoïd - \item $\cdot$ is distributive over $+$ + \item $(\mathcal{R}^{n \times n}, +)$ is a commutative group; + \item $(\mathcal{R}^{n \times n}, \cdot)$ is a monoid; + \item $\cdot$ is distributive over $+$, \end{itemize} which can be shown with the help of the previous exercises. \begin{itemize} - \item If $A$ and $B$ are $\mathcal{R}^{n \times n}$, $A+B \in \mathcal{R}^{n \times n}$ + \item If $A$ and $B$ are in $\mathcal{R}^{n \times n}$, $A+B \in \mathcal{R}^{n \times n}$, so $+$ is a composition law in $\mathcal{R}^{n \times n}$. - We have seen in exercise~1.1 that the addition is associative, and that the neutral element is - $[0]_{i,j=1}^{n,n}$. - The symmetric element of $A$ exists and is $[-a_{ij}]_{i,j=1}^{n,n}$ since - \[ [a_{ij}]_{i,j=1}^{n,n} + [-a_{ij}]_{i,j=1}^{n,n} = [a_{ij}-a_{ij}]_{i,j=1}^{n,n} = [0]_{i,j=1}^{n,n}. \] - It is also commutative as we have seen in exercise~1.1. - \item If $A$ and $B$ are $\mathcal{R}^{n \times n}$, $A \cdot B \in \mathcal{R}^{n \times n}$ + We have seen in Exercise~1.1 that addition of matrices is associative, and that the neutral element is $[0]_{i,j=1}^{n}$. + The symmetric element of $A$ exists and is $[-a_{ij}]_{i,j=1}^{n}$ since + \[ [a_{ij}]_{i,j=1}^{n} + [-a_{ij}]_{i,j=1}^{n} = [a_{ij}-a_{ij}]_{i,j=1}^{n} = [0]_{i,j=1}^{n}. \] + It is also commutative as we have seen in Exercise~1.1. + \item If $A$ and $B$ are in $\mathcal{R}^{n \times n}$, $A \cdot B \in \mathcal{R}^{n \times n}$, so $\cdot$ is a composition law in $\mathcal{R}^{n \times n}$. - It is associative and has a neutral element as seen in exercise~1.3. - \item We have seen in exercise~1.3 that $\cdot$ is distributive over $+$. + It is associative and has a neutral element as seen in Exercise~1.3. + \item We have seen in Exercise~1.3 that $\cdot$ is distributive over $+$. \end{itemize} \end{solution} \exo{2} +Show that if \(AC\) and \(BD\) are well defined, then +\[ +(A \otimes B)(C \otimes D) = (AC) \otimes (BD). +\] + \begin{solution} Let $m,n,o,p,q,s \in \mathbb{N}$ such that $A \in \mathcal{R}^{m \times n}, B \in \mathcal{R}^{p \times q}, C \in \mathcal{R}^{n \times o}, D \in \mathcal{R}^{q \times s}$. @@ -73,28 +159,28 @@ \section{Matrix' algebra} \begin{align*} (A \otimes B) (C \otimes D) & = - \begin{pmatrix} + \begin{bmatrix} a_{11}B & \cdots & a_{1n}B\\ \vdots & \ddots & \vdots\\ a_{m1}B & \cdots & a_{mn}B - \end{pmatrix} - \begin{pmatrix} + \end{bmatrix} + \begin{bmatrix} c_{11}D & \cdots & c_{1o}D\\ \vdots & \ddots & \vdots\\ c_{n1}D & \cdots & c_{no}D - \end{pmatrix}\\ + \end{bmatrix}\\ & = - \begin{pmatrix} + \begin{bmatrix} \sum_{i=1}^n a_{1i}c_{i1}BD & \cdots & \sum_{i=1}^n a_{1i}c_{io}BD\\ \vdots & \ddots & \vdots\\ \sum_{i=1}^n a_{mi}c_{i1}BD & \cdots & \sum_{i=1}^n a_{mi}c_{io}BD - \end{pmatrix}\\ + \end{bmatrix}\\ & = - \begin{pmatrix} + \begin{bmatrix} \sum_{i=1}^n a_{1i}c_{i1} & \cdots & \sum_{i=1}^n a_{1i}c_{io}\\ \vdots & \ddots & \vdots\\ \sum_{i=1}^n a_{mi}c_{i1} & \cdots & \sum_{i=1}^n a_{mi}c_{io} - \end{pmatrix} + \end{bmatrix} \otimes BD\\ & = AC \otimes BD. @@ -102,6 +188,11 @@ \section{Matrix' algebra} \end{solution} \exo{1} +Show that for the matrix powers the classical laws of exponentiation hold true: that is, for all nonnegative integers \(p, q\), and letting \(A^0 \coloneqq I\), we have +\[ +A^pA^q = A^{p+q} = A^qA^p \quad \textnormal{and} \quad (A^{p})^q = A^{pq}. +\] + \begin{solution} \begin{align*} A^pA^q @@ -110,92 +201,92 @@ \section{Matrix' algebra} & = A^{p+q}\\ & = \underbrace{A \cdots A}_{q+p}\\ & = \underbrace{A \cdots A}_{q} \underbrace{A \cdots A}_{p}\\ - & = A^qA^p\\ + & = A^qA^p.\\ (A^p)^q & = \underbrace{\underbrace{A \cdots A}_{p} \cdots \underbrace{A \cdots A}_{p}}_q\\ & = \underbrace{A \cdots A}_{pq}\\ - & = A^{pq} + & = A^{pq}. \end{align*} \end{solution} \exo{2} -Let's consider (upper) triangular Toeplitz, -i.e. the (upper) triangular matrices having equal elements - along the diagonal ($t_{ij} = t_{i+k,j+k}$): +Consider the set of upper triangular Toeplitz matrices, i.e., the upper triangular matrices with equal elements + along the diagonals ($t_{ij} = t_{i+k,j+k}$): \[ T = - \begin{pmatrix} + \begin{bmatrix} t_1 & t_2 & \cdots & t_n\\ 0 & t_1 & \ddots & \vdots\\ \vdots & \ddots & \ddots & t_2\\ 0 & \cdots & 0 & t_1 - \end{pmatrix}. + \end{bmatrix}. \] -Prove that these matrices commute. +Show that these matrices commute. \begin{solution} - Let $T,U$ be upper triangular Toeplitz, + Let $T,U$ be upper triangular Toeplitz matrices. + Then \begin{align*} TU & = - \begin{pmatrix} + \begin{bmatrix} t_1 & t_2 & \cdots & t_n\\ 0 & t_1 & \ddots & \vdots\\ \vdots & \ddots & \ddots & t_2\\ 0 & \cdots & 0 & t_1 - \end{pmatrix} - \begin{pmatrix} + \end{bmatrix} + \begin{bmatrix} u_1 & u_2 & \cdots & u_n\\ 0 & u_1 & \ddots & \vdots\\ \vdots & \ddots & \ddots & u_2\\ 0 & \cdots & 0 & u_1 - \end{pmatrix}\\ + \end{bmatrix}\\ & = - \begin{pmatrix} + \begin{bmatrix} t_1u_1 & t_1u_2 + t_2u_1 & \cdots & t_1u_n + \cdots + t_nu_1\\ 0 & t_1u_1 & \ddots & \vdots\\ \vdots & \ddots & \ddots & t_1u_2 + t_2u_1\\ 0 & \cdots & 0 & t_1u_1 - \end{pmatrix}\\ + \end{bmatrix}\\ & = - \begin{pmatrix} + \begin{bmatrix} u_1t_1 & u_1t_2 + u_2t_1 & \cdots & u_1t_n + \cdots + u_nt_1\\ 0 & u_1t_1 & \ddots & \vdots\\ \vdots & \ddots & \ddots & u_1t_2 + u_2t_1\\ 0 & \cdots & 0 & u_1t_1 - \end{pmatrix}\\ + \end{bmatrix}\\ & = UT. \end{align*} \end{solution} \exo{3} -Let's consider square circulant matrices: +Consider the set of square, circulant matrices: \[ C = - \begin{pmatrix} + \begin{bmatrix} c_1 & c_2 & \cdots & c_n\\ c_n & c_1 & \ddots & \vdots\\ \vdots & \ddots & \ddots & c_2\\ c_2 & \cdots & c_n & c_1 - \end{pmatrix}. + \end{bmatrix}. \] -Prove that these matrices commute. +Show that these matrices commute. \begin{solution} - Let $C,D$ be square circulant matrices and let's define - \[ \sigma : (i,j) \mapsto (j - i) \pmod{n}+1 \] + Let $C,D$ be square circulant matrices and define + \[ \sigma \colon (i,j) \mapsto \big((j - i) \bmod{n}\big)+1 \] such that - $C = [c_{\sigma(i,j)}]_{i,j=1}^{n,n}$ and - $D = [d_{\sigma(i,j)}]_{i,j=1}^{n,n}$. + $C = [c_{\sigma(i,j)}]_{i,j=1}^{n}$ and + $D = [d_{\sigma(i,j)}]_{i,j=1}^{n}$. We can observe that \begin{equation} \label{eq:cycln} - \sigma(i,j+n) = \sigma(i+n,j) = \sigma(i,j). + \sigma(i,j+n) = \sigma(i+n,j) = \sigma(i,j), \end{equation} and \begin{equation} \label{eq:cycldiff} - \sigma(i,j) = \sigma(i+a,j+a) + \sigma(i,j) = \sigma(i+a,j+a), \end{equation} for all $a \in \mathbb{N}$. We can now observe that @@ -205,94 +296,103 @@ \section{Matrix' algebra} \left[ \sum_{k=1}^n c_{\sigma(i,k)}d_{\sigma(k,j)} - \right]_{i,j=1}^{n,n}\\ + \right]_{i,j=1}^{n}\\ & = \left[ \sum_{k=1}^{i+j-1} - d_{\sigma(i,k)}c_{\sigma(k,j)} + c_{\sigma(i,k)}d_{\sigma(k,j)} + \sum_{k=i+j}^{n} - d_{\sigma(i,k)}c_{\sigma(k,j)} - \right]_{i,j=1}^{n,n}\\ + c_{\sigma(i,k)}d_{\sigma(k,j)} + \right]_{i,j=1}^{n}\\ & = \left[ \sum_{k=1}^{i+j-1} - d_{\sigma(i,i+j-k)}c_{\sigma(i+j-k,j)} + c_{\sigma(i,i+j-k)}d_{\sigma(i+j-k,j)} + \sum_{k=i+j}^{n} - d_{\sigma(i,n+i+j-k)}c_{\sigma(n+i+j-k,j)} - \right]_{i,j=1}^{n,n}\\ + c_{\sigma(i,n+i+j-k)}d_{\sigma(n+i+j-k,j)} + \right]_{i,j=1}^{n}\\ & \stackrel{\eqref{eq:cycln}}{=} \left[ \sum_{k=1}^{i+j-1} - d_{\sigma(i,i+j-k)}c_{\sigma(i+j-k,j)} + c_{\sigma(i,i+j-k)}d_{\sigma(i+j-k,j)} + \sum_{k=i+j}^{n} - d_{\sigma(i,i+j-k)}c_{\sigma(i+j-k,j)} - \right]_{i,j=1}^{n,n}\\ + c_{\sigma(i,i+j-k)}d_{\sigma(i+j-k,j)} + \right]_{i,j=1}^{n}\\ & = \left[ \sum_{k=1}^n c_{\sigma(i,i+j-k)}d_{\sigma(i+j-k,j)} - \right]_{i,j=1}^{n,n}\\ + \right]_{i,j=1}^{n}\\ & \stackrel{\eqref{eq:cycldiff}}{=} \left[ \sum_{k=1}^n c_{\sigma(k,j)}d_{\sigma(i,k)} - \right]_{i,j=1}^{n,n}\\ - & = DC + \right]_{i,j=1}^{n}\\ + &= + \left[ + \sum_{k=1}^n + d_{\sigma(i,k)}c_{\sigma(k,j)} + \right]_{i,j=1}^{n}\\ + & = DC. \end{align*} \end{solution} \exo{2} +Show that a square matrix of dimension \(n\) commuting with all the other matrices of the same dimension is necessarily a ``scalar'' matrix, i.e., it has the form \(cI\). + \begin{solution} - Asking that they commute to every matrix is actually equivalent to asking - that they commute to every element of the base of $\mathcal{R}^{n \times n}$. - The canonical base is $e_ie_j^T$ for $i,j \in \{1, \ldots, n\}$ where - $e_k$ is a column vector with 1 at $k$ and 0 elsewhere. - We can see that $Ae_ie_j^T = A_{:i}e_j^T$ and $e_ie_j^TA = e_iA_{j:}$. - - If $A_{:i}e_j^T = e_iA_{j:}$, the LHS imposes that only the $j$th column is non-zero - and the RHS imposes that only the $i$th row is non-zero. + Asking that a matrix commutes with every matrix is actually equivalent to asking + that it commutes with every element of the base of $\mathcal{R}^{n \times n}$. + The canonical base is $e_ie_j^\top$ for $i,j \in \{1, \dots, n\}$ where + $e_k$ is a column vector with \(1\) at position $k$ and \(0\) elsewhere. + We can see that $Ae_ie_j^\top = A_{:i}e_j^\top$ and $e_ie_j^\top A = e_iA_{j:}$. + + If $A_{:i}e_j^\top = e_iA_{j:}$, the LHS imposes that only the $j$-th column is nonzero + and the RHS imposes that only the $i$-th row is nonzero. We therefore need $A_{ki} = 0$ for $k \neq i$ and $A_{jk} = 0$ for $k \neq j$. - What's left is $e_j^Te_iA_{ii} = A_{jj}e_j^Te_i$. + What's left is $e_j^\top e_iA_{ii} = A_{jj}e_j^\top e_i$. Therefore $A_{ii} = A_{jj}$. - Since this must hold for every $i,j \in \{1, \ldots, n\}$, $A$ must be diagonal - with equal elements at the diagonal. + Since this must hold for every $i,j \in \{1, \dots, n\}$, $A$ must be diagonal + with equal elements on its diagonal, that is, a scalar multiple of the identity matrix: \(A = cI\). \end{solution} \exo{2} +Show that a square matrix of dimension \(n\) commuting with a diagonal matrix \(\diag\{a_1, \dots, a_n\}\), where \(a_i \neq a_j\) for all \(i \neq j\), is also diagonal. + \begin{solution} \begin{description} \item[Solution 1] - Asking that a matrix $A$ commutes to a diagonal matrix is actually equivalent to asking - that it commutes to every element of the base - $e_ie_i^T$ for $i \in \{1, \ldots, n\}$ where - $e_k$ is a column vector with 1 at $k$ and 0 elsewhere. - We can see that $Ae_ie_i^T = A_{:i}e_i^T$ and $e_ie_i^TA = e_iA_{i:}$. + Asking that a matrix $A$ commutes with a diagonal matrix is actually equivalent to asking + that it commutes with every element of the base + $e_ie_i^\top$ for $i \in \{1, \ldots, n\}$ where + $e_k$ is a column vector with \(1\) at position $k$ and \(0\) elsewhere. + We can see that $Ae_ie_i^\top = A_{:i}e_i^\top$ and $e_ie_i^\top A = e_iA_{i:}$. - If $A_{:i}e_i^T = e_iA_{i:}$, the LHS imposes that only the $i$th column is non-zero - and the RHS imposes that only the $i$th line is non-zero. + If $A_{:i}e_i^\top = e_iA_{i:}$, the LHS imposes that only the $i$-th column is nonzero + and the RHS imposes that only the $i$-th row is nonzero. We therefore need $A_{ki} = 0$ for $k \neq i$ and $A_{ik} = 0$ for $k \neq i$. Since this must hold for every $i \in \{1, \ldots, n\}$, $A$ must be diagonal. \item[Solution 2] - Let $B$ a square matrix of dimension $n$ commuting with $A = \diag\{a_1,\dots,a_n\}$, - we have that + Let $B$ be a square matrix of dimension $n$ commuting with $A = \diag\{a_1,\dots,a_n\}$. + We then have that \[ B A = - \begin{pmatrix} + \begin{bmatrix} b_{11} a_1 & \dots & b_{1n} a_n \\ \vdots & \ddots & \vdots \\ b_{n1} a_1 & \dots & b_{nn} a_n \\ - \end{pmatrix} + \end{bmatrix} = - \begin{pmatrix} + \begin{bmatrix} b_{11} a_1 & \dots & b_{1n} a_1 \\ \vdots & \ddots & \vdots \\ b_{n1} a_n & \dots & b_{nn} a_n \\ - \end{pmatrix} - = A B + \end{bmatrix} + = A B. \] We thus need $b_{ij} a_j = b_{ij} a_i$ for every $i \neq j$ and since $a_i \neq a_j$ for $i \neq j$, $B$ has to be diagonal itself. @@ -300,106 +400,127 @@ \section{Matrix' algebra} \end{solution} \exo{1} +Show that for every matrix \(A \in \C^{m \times n}\), the matrices \(AA^\top\) and \(A^\top A\) are symmetric, and the matrices \(AA^*\) and \(A^*A\) are Hermitian. + \begin{solution} - Using the properties $(AB)^T = B^TA^T$ and $(AB)^* = B^*A^*$, + Using the properties $(AB)^\top = B^\top A^\top$ and $(AB)^* = B^*A^*$, \begin{align*} - (AA^T)^T & = (A^T)^TA^T\\ - & = AA^T\\ - (A^TA)^T & = A^T(A^T)^T\\ - & = A^TA\\ + (AA^\top)^\top & = (A^\top)^\top A^\top\\ + & = AA^\top,\\ + (A^\top A)^\top & = A^\top(A^\top)^\top\\ + & = A^\top A,\\ (AA^*)^* & = (A^*)^*A^*\\ - & = AA^*\\ + & = AA^*,\\ (A^*A)^* & = A^*(A^*)^*\\ & = A^*A. \end{align*} \end{solution} \exo{1} +Show that for a square matrix \(A \in \C^{n \times n}\), the matrix \(A + A^\top\) is symmetric, the matrix \(A + A^*\) is Hermitian, the matrix \(A - A^\top\) is antisymmetric and the matrix \(A - A^*\) is anti-Hermitian. + \begin{solution} We have \begin{align*} - (A + A^T)^T & = A^T + A\\ - & = A + A^T\\ - (A - A^T)^T & = A^T - A\\ - & = -(A - A^T)\\ + (A + A^\top)^\top & = A^\top + A\\ + & = A + A^\top,\\ + (A - A^\top)^\top & = A^\top - A\\ + & = -(A - A^\top),\\ (A + A^*)^* & = A^* + A\\ - & = A + A^*\\ + & = A + A^*,\\ (A - A^*)^* & = A^* - A\\ & = -(A - A^*). \end{align*} \end{solution} \exo{2} +Show that every complex matrix can be written as the sum of a symmetric matrix and an antisymmetric matrix, and as the sum of a Hermitian matrix and an anti-Hermitian matrix. + \begin{solution} We simply notice that \begin{align*} - A & = \frac{A + A^T}{2} + \frac{A - A^T}{2}\\ + A & = \frac{A + A^\top}{2} + \frac{A - A^\top}{2}\\ & = \frac{A + A^*}{2} + \frac{A - A^*}{2}. \end{align*} \end{solution} \exo{1} +Show that the Kronecker product satisfies +\[ +(A \otimes B)^* = A^* \otimes B^*. +\] + \begin{solution} It is shown by \begin{align*} (A \otimes B)^* & = - \begin{pmatrix} + \begin{bmatrix} a_{11}B & \cdots & a_{1n}B\\ \vdots & \ddots & \vdots\\ a_{m1}B & \cdots & a_{mn}B - \end{pmatrix}^*\\ + \end{bmatrix}^*\\ & = - \begin{pmatrix} + \begin{bmatrix} (a_{11}B)^* & \cdots & (a_{m1}B)^*\\ \vdots & \ddots & \vdots\\ (a_{1n}B)^* & \cdots & (a_{mn})B^* - \end{pmatrix}\\ + \end{bmatrix}\\ & = - \begin{pmatrix} + \begin{bmatrix} a_{11}^*B^* & \cdots & a_{m1}^*B^*\\ \vdots & \ddots & \vdots\\ a_{1n}^*B^* & \cdots & a_{mn}^*B^* - \end{pmatrix}\\ + \end{bmatrix}\\ & = A^* \otimes B^*. \end{align*} \end{solution} \exo{2} +Show with the help of the previous exercise that if \(U_1\) and \(U_2\) are unitary matrices, then \(U_1 \otimes U_2\) is unitary as well. + \begin{solution} - If $U_1$ and $U_2$ are unitary, - using exercise~1.6 and exercise~1.15, + If $U_1$ and $U_2$ are unitary, then using Exercise~1.6 and Exercise~1.15 we know that \begin{align*} (U_1 \otimes U_2) (U_1 \otimes U_2)^* & = (U_1 \otimes U_2) (U_1^* \otimes U_2^*)\\ & = U_1U_1^* \otimes U_2U_2^*\\ & = I \otimes I\\ - & = I + & = I, \end{align*} which implies that $(U_1 \otimes U_2)^*$ is the inverse of $U_1 \otimes U_2$ - (also its left inverse since it is a square matrix). + (it is also its left inverse since it is a square matrix). \end{solution} \exo{4} +Show that for arbitrary matrices \(A_{m \times n}\) and \(B_{n \times m}\), we have +\[ +\det\begin{bmatrix} +A & 0\\ +-I_n & B +\end{bmatrix} = +\det\begin{bmatrix} +A & AB\\ +-I_n & 0 +\end{bmatrix}. +\] \begin{solution} Let's prove the equivalent property - \[ - \det\begin{pmatrix} - A^T & -I_n\\ - 0 & B^T - \end{pmatrix} = - \det\begin{pmatrix} - A^T & -I_n\\ - B^TA^T & 0 - \end{pmatrix} + \det\begin{bmatrix} + A^\top & -I_n\\ + 0 & B^\top + \end{bmatrix} = + \det\begin{bmatrix} + A^\top & -I_n\\ + B^\top A^\top & 0 + \end{bmatrix} \] - - to work on the columns since the properties are expressed + to work on the columns, since the properties are expressed on the columns. Let $C$ be the matrix of the LHS and @@ -407,54 +528,58 @@ \section{Matrix' algebra} We can see that \begin{align*} - d_{i:} &= c_{i:} & i & = 1, \ldots, n\\ - d_{i:} &= c_{i:} + \sum_{k=1}^n b_{k(i-n)} c_{k:} & i & = n+1, \ldots, 2n + d_{i:} &= c_{i:} & i & = 1, \dots, n,\\ + d_{i:} &= c_{i:} + \sum_{k=1}^n b_{k(i-n)} c_{k:} & i & = n+1, \dots, 2n, \end{align*} since \begin{align*} - \begin{pmatrix} - B^TA^T & 0 - \end{pmatrix} + \begin{bmatrix} + B^\top A^\top & 0 + \end{bmatrix} & = - \begin{pmatrix} - 0 & B^T - \end{pmatrix} + \begin{bmatrix} + 0 & B^\top + \end{bmatrix} + - \begin{pmatrix} - B^TA^T & -B^T - \end{pmatrix}\\ + \begin{bmatrix} + B^\top A^\top & -B^\top + \end{bmatrix}\\ & = - \begin{pmatrix} - 0 & B^T - \end{pmatrix} + \begin{bmatrix} + 0 & B^\top + \end{bmatrix} + - B^T - \begin{pmatrix} - A^T & -I_n - \end{pmatrix}. + B^\top + \begin{bmatrix} + A^\top & -I_n + \end{bmatrix}. \end{align*} - This gives us what we want using properties 3. and 8. - + This gives us what we want using Properties~3 and~8 from Proposition~1.2. \end{solution} \exo{3} +Using the previous exercise, show that for two arbitrary square matrices \(A_{n \times n}\) and \(B_{n \times n}\), it holds that +\[ +\det(A) \det(B) = \det(AB). +\] + \begin{solution} - We know that for $C \in \mathcal{R}^{2n}$ + We know that for $C \in \mathcal{R}^{2n \times 2n}$ \[ \det(C) = \sum_{\mathbf{j}} (-1)^{t(\mathbf{j})} c_{1j_1} \cdots c_{(2n)j_{2n}}. \] For \[ - C = \begin{pmatrix} + C = \begin{bmatrix} A & 0\\ -I_n & B - \end{pmatrix}, + \end{bmatrix}, \] we see that if $j_i > n$ for $i \leq n$, $c_{ij_i} = 0$. Therefore the first $n$ elements of $\mathbf{j}$ must be a permutation of $\{1, \ldots, n\}$ - for the product $c_{1j_1} \cdots c_{(2n)j_{2n}}$ to be non-zero. - The last $n$ elements of $\mathcal{j}$ are therefore a permutation of $\{n+1, \ldots, 2n\}$. + for the product $c_{1j_1} \cdots c_{(2n)j_{2n}}$ to be nonzero. + The last $n$ elements of $\mathbf{j}$ are therefore a permutation of $\{n+1, \ldots, 2n\}$. Consequently, we have (defining $J$ as the set of such permutations) \begin{align*} \det(C) @@ -467,155 +592,197 @@ \section{Matrix' algebra} For \[ - D = \det\begin{pmatrix} + D = \begin{bmatrix} A & AB\\ -I_n & 0 - \end{pmatrix}, + \end{bmatrix}, \] we see using the same reasoning that we can restrict ourself to $\mathbf{j} = \mathbf{j}_1 \times \mathbf{j}_2$ where $\mathbf{j}_1$ is a permutation of $\{n+1, \ldots, 2n\}$ and $\mathbf{j}_2$ is a permutation of $\{1, \ldots, n\}$. This time, we have $t(\mathbf{j}) = t(\mathbf{j_1}) + t(\mathbf{j_2}) + n^2$ so - (defining $J$ as expected, noting $AB = [x_{ij}]_{i,j=1}^{n,n}$) + (defining $J$ as expected, noting $AB = [x_{ij}]_{i,j=1}^{n}$ and with \(\delta_{ij}\) the Kronecker delta function) \begin{align*} \det(D) & = \sum_{\mathbf{j} \in J} (-1)^{t(\mathbf{j})} d_{1j_1} \cdots d_{nj_n} d_{(n+1)j_{n+1}} \cdots d_{(2n)j_{2n}}\\ - & = \sum_{\mathbf{j} \in J} (-1)^{t(\mathbf{j}_1)} x_{1(j_1-n)} \cdots x_{n(j_n-n)} (-1)^{t(\mathbf{j}_2)+n^2} (-\delta_{1j_{n+1})}) \cdots (-\delta_{nj_{2n})})\\ + & = \sum_{\mathbf{j} \in J} (-1)^{t(\mathbf{j}_1)} x_{1(j_1-n)} \cdots x_{n(j_n-n)} (-1)^{t(\mathbf{j}_2)+n^2} (-\delta_{1(j_{n+1})}) \cdots (-\delta_{n(j_{2n})})\\ & = \left(\sum_{\mathbf{j}} (-1)^{t(\mathbf{j})} x_{1j_1} \cdots x_{nj_n}\right) (-1)^{n^2}\left(\sum_{\mathbf{j}} (-1)^{t(\mathbf{j})} (-\delta_{1(j_1)}) \cdots (-\delta_{n(j_n)})\right)\\ & = \det(AB) (-1)^{n^2}\det(-I_n)\\ & = \det(AB) (-1)^{n^2+n}\\ & = \det(AB) (-1)^{n(n+1)}\\ - & = \det(AB) + & = \det(AB), \end{align*} since either $n$ or $n+1$ is even. + Since we know \(\det C = \det D\), we thus conclude that \(\det A \det B = \det(AB)\). \end{solution} \exo{2} +Show that we have +\begin{align*} +\det(\lambda I_n - C) &= \det \begin{bmatrix} +\lambda & -1 & & & 0\\ +& \lambda & \ddots & & \\ +&& \ddots & \ddots &\\ +0 &&& \lambda & -1\\ +a_0 & a_1 & \cdots & a_{n-2} & \lambda + a_{n-1} +\end{bmatrix}\\ +&= a_0 + a_1 \lambda + \dots + a_{n-1}\lambda^{n-1} + \lambda^n, +\end{align*} +where the matrix \(C_{n \times n}\) above is called the \emph{companion matrix} of the polynomial. + \begin{solution} We first notice that \begin{align*} - \det\begin{pmatrix} + \det\begin{bmatrix} \lambda & -1 &\\ & \ddots & \ddots\\ & & \lambda & -1\\ & & & \lambda - \end{pmatrix}^{n \times n} + \end{bmatrix}^{n \times n} & = \lambda - \det\begin{pmatrix} + \det\begin{bmatrix} \lambda & -1 &\\ & \ddots & \ddots\\ & & \lambda & -1\\ & & & \lambda - \end{pmatrix}^{n-1 \times n-1} \\ - & = \lambda^n + \end{bmatrix}^{(n-1) \times (n-1)} \\ + & = \lambda^n, \end{align*} where $n$ is the dimension of the original matrix. Using the cofactor technique on the last column recursively, \begin{align*} \det(\lambda I_n - C) & = (\lambda + a_{n-1})\lambda^{n-1} - + \det(\lambda I_{n-1} - C_{n-1} - \lambda e_{n-1}e_{n-1}^T)\\ + + \det(\lambda I_{n-1} - C_{n-1} - \lambda e_{n-1}e_{n-1}^\top)\\ & = \lambda^n + a_{n-1}\lambda^{n-1} + a_{n-2} \lambda^{n-1} - + \det(\lambda I_{n-2} - C_{n-2} - \lambda e_{n-2}e_{n-2}^T)\\ + + \det(\lambda I_{n-2} - C_{n-2} - \lambda e_{n-2}e_{n-2}^\top)\\ & = \lambda^n + a_{n-1}\lambda^{n-1} + \cdots + a_0. \end{align*} \end{solution} \exo{2} +Show that the determinant of a tridiagonal matrix (sometimes referred to as a \emph{Jacobi matrix}) +\[ +J_n = \begin{bmatrix} +a_1 & b_2 & & & 0\\ +c_2 & a_2 & \ddots & & \\ +& \ddots & \ddots & \ddots &\\ +&& \ddots & \ddots & b_n\\ +0 &&& c_n & a_n\\ +\end{bmatrix} +\] +satisfies the following recurrence relation: +\[ +\det(J_n) = a_n \det(J_{n-1}) - b_n c_n \det(J_{n-2}). +\] + \begin{solution} Using the cofactor technique on the last column then on the last line, \begin{align*} \det(J_n) & = a_n\det(J_{n-1}) - b_n \det - \begin{pmatrix} + \begin{bmatrix} J_{n-2} & & 0 \\ & & b_{n-1} \\ 0 & 0 & c_n - \end{pmatrix}\\ + \end{bmatrix}\\ & = a_n\det(J_{n-1}) - b_n c_n \det(J_{n-2}). \end{align*} \end{solution} \exo{4} +Verify that, for a \emph{Vandermonde matrix}, the following identity holds true: +\[ +\det +\begin{bmatrix} + 1 & 1 & \cdots & 1\\ + x_1 & x_2 & \cdots & x_n\\ + x_1^2 & x_2^2 & \cdots & x_n^2\\ + \vdots & \vdots & & \vdots\\ + x_1^{n-1} & x_2^{n-1} & \cdots & x_n^{n-1} +\end{bmatrix} = \prod_{j < i} (x_i - x_j). +\] + \begin{solution} Let's prove it by induction. \begin{itemize} \item If $n = 1$, we have \[ \det - \begin{pmatrix} + \begin{bmatrix} 1 - \end{pmatrix} - = 1 + \end{bmatrix} + = 1, \] and for $n = 2$, we have \[ \det - \begin{pmatrix} + \begin{bmatrix} 1 & 1\\ x_1 & x_2 - \end{pmatrix} - = (x_2 - x_1) + \end{bmatrix} + = (x_2 - x_1). \] \item - For $i = 1, \ldots, n-1$, replacing the line $i+1$ - by the line $i+1$ subtracted by $x_n$ times the line $i$ + For $i = 1, \ldots, n-1$, replacing line $i+1$ + by line $i+1$ from which we subtract $x_n$ times line $i$ does not change the determinant since - its an elementary line transformation (property 9.). - We have therefore + it is an elementary line transformation (Property~9 of Proposition~1.2). + We therefore have \begin{align*} \det - \begin{pmatrix} + \begin{bmatrix} 1 & 1 & \cdots & 1\\ x_1 & x_2 & \cdots & x_n\\ x_1^2 & x_2^2 & \cdots & x_n^2\\ \vdots & \vdots & \ddots & \vdots\\ x_1^{n-1} & x_2^{n-1} & \cdots & x_n^{n-1} - \end{pmatrix} + \end{bmatrix} & = \det - \begin{pmatrix} + \begin{bmatrix} 1 & 1 & \cdots & 1\\ x_1-x_n & x_2-x_n & \cdots & 0\\ x_1^2-x_1x_n & x_2^2 - x_2x_n & \cdots & 0\\ \vdots & \vdots & \ddots & \vdots\\ x_1^{n-1}-x_1^{n-2}x_n & x_2^{n-1}-x_2^{n-2}x_n & \cdots & 0 - \end{pmatrix}\\ + \end{bmatrix}\\ & = \det - \begin{pmatrix} + \begin{bmatrix} 1 & 1 & \cdots & 1\\ (x_1-x_n) & x_2-x_n & \cdots & 0\\ (x_1-x_n)x_1 & (x_2-x_n)x_2 & \cdots & 0\\ \vdots & \vdots & \ddots & \vdots\\ (x_1-x_n)x_1^{n-2} & (x_2-x_n)x_2^{n-2} & \cdots & 0 - \end{pmatrix}\\ + \end{bmatrix}\\ & = (-1)^{n+1} \det - \begin{pmatrix} + \begin{bmatrix} (x_1-x_n) & x_2-x_n & \cdots\\% & x_{n-1}-x_n\\ (x_1-x_n)x_1 & (x_2-x_n)x_2 & \cdots\\% & (x_{n-1}-x_n)x_{n-1}\\ \vdots & \vdots & \ddots\\% & \vdots\\ (x_1-x_n)x_1^{n-2} & (x_2-x_n)x_2^{n-2} & \cdots\\% & (x_{n-1}-x_n)x_{n-1}^{n-2} - \end{pmatrix}\\ + \end{bmatrix}\\ & = (-1)^{n-1} \prod_{j < n} (x_j - x_n) \det - \begin{pmatrix} + \begin{bmatrix} 1 & 1 & \cdots & 1\\ x_1 & x_2 & \cdots & x_{n-1}\\ \vdots & \vdots & \ddots & \vdots\\ x_1^{n-2} & x_2^{n-2} & \cdots & x_{n-1}^{n-2} - \end{pmatrix}\\ + \end{bmatrix}\\ & = \left(\prod_{j < i = n} (x_i - x_j)\right) - \left(\prod_{j < i < n} (x_i - x_j)\right). + \left(\prod_{j < i < n} (x_i - x_j)\right)\\ + &= \prod_{j < i} (x_i - x_j). \end{align*} \end{itemize} \end{solution} @@ -719,13 +886,14 @@ \subsection{} \nosolution \exo{1} +Show that the set of square invertible matrices forms a multiplicative group. \begin{solution} - For the set of \emph{invertible square} matrices $\mathcal{M}$, + For the set of \emph{square invertible} matrices $\mathcal{M}$, to be a \emph{multiplicative group}, it has to verify the four following properties, which we will demonstrate. \begin{enumerate} \item \emph{Closure}. If $A,B \in \mathcal{M}$ then $AB \in \mathcal{M}$. - Using the Cauchy-Binet formula, when $A$ and $B$ have $n \times n$ dimensions + Using the Cauchy--Binet formula, when $A$ and $B$ are $n \times n$ matrices \[ \det(AB) = \sum_{\mathbf{j}_n} A @@ -739,63 +907,80 @@ \subsection{} \mathbf{n} \\ \end{pmatrix} =\det\left([a_{i_k,j_l}]_{k,l=1}^{n}\right) \det\left([b_{i_k,j_l}]_{k,l=1}^{n}\right) - =\det(A) \det(B) + =\det(A) \det(B), \] - because the only set $\mathbf{j}_n$ possible is $\mathbf{j}_n = \{1,\dots,n\}$. + because the only possible tuple $\mathbf{j}_n$ is $\mathbf{j}_n = (1,\dots,n)$. We also know that $\det(A), \det(B) \neq 0$ because $A, B \in \mathcal{M}$, therefore $\det(AB) \neq 0$ and thus $AB \in \mathcal{M}$. - \item \emph{Associativity}. See exercise~1.3. - \item \emph{Identity element}. See exercise~1.3. - \item \emph{Invertibility}. Using theorem~1.3 (A non singular matrix $A$ defined on a - field $\mathcal{F}$ has an inverse.), + \item \emph{Associativity}. See Exercise~1.3. + \item \emph{Identity element}. See Exercise~1.3. + \item \emph{Invertibility}. Using Theorem~1.3 (``a non-singular matrix $A$ defined on a + field $\mathcal{F}$ has an inverse''), we know that if $A \in \mathcal{M}$, - there exists a matrix $A^{-1} = (\det A)^{-1} \text{adj}A$ + there exists a matrix $A^{-1} = (\det A)^{-1} \adj A$ such that $A A^{-1} = A^{-1} A = I$. \end{enumerate} - We conclude that invertible square matrices form a multiplicative group. + We conclude that square invertible matrices form a multiplicative group. \end{solution} \exo{1} +Show that the following properties hold true. + +\begin{align*} +\adj(A^\top) &= \adj(A)^\top, & \adj(A^*) &= \adj(A)^*,\\ +\adj(I) &= I, & \adj(kA) &= k^{n-1} \adj(A),\\ +(A^\top)^{-1} &= (A^{-1})^\top, & (A^*)^{-1} &= (A^{-1})^*,\\ +\det(A^{-1}) &= \det(A)^{-1}, & (AB)^{-1} &= B^{-1}A^{-1}. +\end{align*} + \begin{solution} \begin{itemize} - \item $\adj(A^T) = [A_{ji}^T]_{i,j=1}^n = [A_{ij}]_{i,j=1}^n = (\adj A)^T$ - \item $\adj I = [I_{ji}]_{i,j=1}^n = [1_{ii}]_{i=1}^n = I$ - \item Here we need to verify that the inverse of $A^T$ is $(A^{-1})^T$. + \item $\adj(A^\top) = [A_{ji}^\top]_{i,j=1}^n = [A_{ij}]_{i,j=1}^n = (\adj A)^\top$. + \item $\adj I = [I_{ji}]_{i,j=1}^n = [1_{ii}]_{i=1}^n = I$. + \item Here we need to verify that the inverse of $A^\top$ is $(A^{-1})^\top$. More precisely \[ \left\{ \begin{array}{l} - (A^T)(A^{-1})^T = (A^{-1}A)^T = I^T = I \\ - (A^{-1})^T (A^T) = (A A^{-1})^T = I^T = I \\ + (A^\top)(A^{-1})^\top = (A^{-1}A)^\top = I^\top = I,\\ + (A^{-1})^\top (A^\top) = (A A^{-1})^\top = I^\top = I.\\ \end{array}\right. \] \item We know that $\det(A^{-1}) \det(A) = \det(A^{-1}A) = \det(I) = 1$ and thus that $\det(A^{-1}) = (\det(A))^{-1}$. - \item $\adj(A^*) = (\adj A)^*$ : same as the first point except we take the - complex conjugate. - \item Using determinant and inverse matrix properties + \item $\adj(A^*) = (\adj A)^*$: same as the first point except we take the + complex conjugate instead of the transpose. + \item Using determinant and inverse matrix properties, \begin{align*} \adj(kA) &= (kA)^{-1} \det(kA) \\ &= k^{-1} A^{-1} k^n \det(A) \\ - &= k^{n-1} \adj(A) + &= k^{n-1} \adj(A). \end{align*} \item Here we need to verify that the inverse of $A^*$ is $(A^{-1})^*$. - More precisely + More precisely, \[ \left\{ \begin{array}{l} - (A^*)(A^{-1})^* = (A^{-1}A)^* = I^* = I \\ - (A^{-1})^* (A^*) = (A A^{-1})^* = I^* = I \\ + (A^*)(A^{-1})^* = (A^{-1}A)^* = I^* = I,\\ + (A^{-1})^* (A^*) = (A A^{-1})^* = I^* = I.\\ \end{array}\right. \] \item Here we need to verify that the inverse of $AB$ is $B^{-1}A^{-1}$. - More precisely + More precisely, \[ \left\{ \begin{array}{l} - (AB) (B^{-1}A^{-1}) = A I A^{-1} = A A^{-1} = I \\ - (B^{-1}A^{-1}) (AB) = B^{-1} I B = B^{-1} B = I \\ + (AB) (B^{-1}A^{-1}) = A I A^{-1} = A A^{-1} = I, \\ + (B^{-1}A^{-1}) (AB) = B^{-1} I B = B^{-1} B = I. \\ \end{array}\right. \] \end{itemize} \end{solution} \subsection{} +The LDU decomposition of a matrix \(A\) (whose columns and rows can be permuted) is given by: +\[ +P_1 A P_2 = LDU, +\] +where \(P_1\) and \(P_2\) are permutation matrices, \(L\) and \(U\) are triangular (lower and upper respectively) matrices with ones on the diagonal and \(D\) is a diagonal matrix. +Such a decomposition is the result of Gaussian elimination with complete pivoting. +Compare it to the canonical form (1.14). + \nosolution diff --git a/src/q7/matrix-INMA2380/exercises/ch2.tex b/src/q7/matrix-INMA2380/exercises/ch2.tex index 74fdbcf58..a877b5b08 100644 --- a/src/q7/matrix-INMA2380/exercises/ch2.tex +++ b/src/q7/matrix-INMA2380/exercises/ch2.tex @@ -1,50 +1,62 @@ -\section{Linear system} +\section{Linear applications, orthogonalization +and the \(QR\) factorization} + \exo{1} +Verify that \(\sset_1 \cap \sset_2, \sset_1 + \sset_2, \Ker(A), \Ima(A)\) and \(A\sset\) are linear subspaces. \begin{solution} \begin{itemize} - \item If $a, b \in \mathcal{S}_1 \cap \mathcal{S}_2$, - it means that $a \in \mathcal{S}_1, a \in \mathcal{S}_2$ - and $b \in \mathcal{S}_1, b \in \mathcal{S}_2$. + \item If $\mathbf{a}, \mathbf{b} \in \mathcal{S}_1 \cap \mathcal{S}_2$, + it means that $\mathbf{a} \in \mathcal{S}_1, \mathbf{a} \in \mathcal{S}_2$ + and $\mathbf{b} \in \mathcal{S}_1, \mathbf{b} \in \mathcal{S}_2$. Since $\mathcal{S}_1$ and $\mathcal{S}_2$ - are linear subspaces, $\alpha a + \beta b \in \mathcal{S}_1$ - and $\alpha a + \beta b \in \mathcal{S}_2$. - Finally, $\alpha a + \beta b \in \mathcal{S}_1 \cap \mathcal{S}_2$. - \item If $a,b \in \mathcal{S}_1 + \mathcal{S}_2$, - it means that $a$ can be written as $a_1 + a_2$ - such that $a_1 \in \mathcal{S}_1, a_2 \in \mathcal{S}_2$ - and that $b$ can be written as $b_1 + b_2$ - such that $b_1 \in \mathcal{S}_1, b_2 \in \mathcal{S}_2$. - Next, $\alpha a + \beta b = \alpha (a_1 + a_2) + \beta (b_1 + b_2) - =(\alpha a_1 + \beta b_1) + (\alpha a_2 + \beta b_2)$, + are linear subspaces, $\alpha \mathbf{a} + \beta \mathbf{b} \in \mathcal{S}_1$ + and $\alpha \mathbf{a} + \beta \mathbf{b} \in \mathcal{S}_2$. + Finally, $\alpha \mathbf{a} + \beta \mathbf{b} \in \mathcal{S}_1 \cap \mathcal{S}_2$. + \item If $\mathbf{a},\mathbf{b} \in \mathcal{S}_1 + \mathcal{S}_2$, + it means that $\mathbf{a}$ can be written as $\mathbf{a}_1 + \mathbf{a}_2$ + such that $\mathbf{a}_1 \in \mathcal{S}_1, \mathbf{a}_2 \in \mathcal{S}_2$ + and that $\mathbf{b}$ can be written as $\mathbf{b}_1 + \mathbf{b}_2$ + such that $\mathbf{b}_1 \in \mathcal{S}_1, \mathbf{b}_2 \in \mathcal{S}_2$. + Next, $\alpha \mathbf{a} + \beta \mathbf{b} = \alpha (\mathbf{a}_1 + \mathbf{a}_2) + \beta (\mathbf{b}_1 + \mathbf{b}_2) + =(\alpha \mathbf{a}_1 + \beta \mathbf{b}_1) + (\alpha \mathbf{a}_2 + \beta \mathbf{b}_2)$, which is the sum of an element of $\mathcal{S}_1$ and an element of $\mathcal{S}_2$, and thus $\mathcal{S}_1 + \mathcal{S}_2$ is a linear subspace. - \item We need to show that $\alpha a + \beta b \in \Ker A$, - or equivalently $A(\alpha a + \beta b) = 0$ - if $a, b \in \Ker A$. - We have that $A(\alpha a + \beta b) = \alpha Aa + \beta Ab = 0$. + \item We need to show that $\alpha \mathbf{a} + \beta \mathbf{b} \in \Ker A$, + or equivalently $A(\alpha \mathbf{a} + \beta \mathbf{b}) = 0$ + if $\mathbf{a}, \mathbf{b} \in \Ker A$. + We have that $A(\alpha \mathbf{a} + \beta \mathbf{b}) = \alpha A\mathbf{a} + \beta A\mathbf{b} = 0$. \item Same reasoning as third point. \item Same reasoning as third point. \end{itemize} \end{solution} \exo{1} +Show that for all matrices \(R\) (with suitable dimensions), we have +\[ +\Ker(RA) \supseteq \Ker(A), \quad \Ima(RA) \subseteq \Ima(A). +\] \begin{solution} \begin{itemize} \item If $x \in \Ker A$, then $Ax = 0$ which also means that $RAx = R0 = 0$ so $x \in \Ker(RA)$. - \item If $y \in \Ima(AR)$, then there is a $x$ + \item If $y \in \Ima(AR)$, then there is an $x$ such that $ARx = y$ - which also means that there is a $x'$ such that - $Ax' = y$ since we can just take $x' = Rx$. + which also means that there is an $x'$ such that + $Ax' = y$, since we can just take $x' = Rx$. So $y \in \Ima A$. \end{itemize} \end{solution} \exo{2} +Show that all vectors \(\mathbf{x} \in \sset = \mathop{\mathrm{span}}\{\mathbf{a}_1, \dots, \mathbf{a}_k\}\) have a unique representation +\[ +\mathbf{x} = \sum_{i=1}^k \alpha_i \mathbf{a}_i +\] +if \(\{\mathbf{a}_i \mid i = 1, \dots, k\}\) is a basis of \(\sset\). \begin{solution} - Let's suppose by contradiction that there are 2 ways - to decompose $\mathbf{x}$ + Let's suppose by contradiction that there are two ways + to decompose $\mathbf{x}$. \begin{align} \label{eq:2.3.1} @@ -53,59 +65,61 @@ \section{Linear system} & = \sum_{i=1}^k \beta_i \mathbf{a}_i \end{align} with $\beta_i \neq \alpha_i$ for at least one $i \in \{0, \ldots, k\}$. - $\eqref{eq:2.3.2}-\eqref{eq:2.3.1}$ gives + However, $\eqref{eq:2.3.2}-\eqref{eq:2.3.1}$ gives \[ 0 = \sum_{i=1}^k (\beta_i-\alpha_i) \mathbf{a}_i. \] - by hypothesis, there is a $i$ such that $\beta_i-\alpha_i \neq 0$ - which is a contradiction with the linear independence of the $\mathbf{a}_i$. + By hypothesis, there is an $i$ such that $\beta_i-\alpha_i \neq 0$ + which is in contradiction with the linear independence of the vectors $\mathbf{a}_i$. \end{solution} \exo{4} +Show that two bases of a same space \(\sset\) have the same number of elements. \begin{solution} - Let's suppose by contradiction that there are 2 - basis $a_1, \ldots, a_n$ and $b_1, \ldots, b_m$ with $n < m$. - - Let's just remember that $a_1, \ldots, a_n$ is spanning - and $b_1, \ldots, b_m$ are linearly independent. - - We have therefore that - \[ b_1 = \alpha_1 a_1 + \ldots + \alpha_n a_n\] - with at least one $\alpha_i \neq 0$ since $b_1 \neq 0$. - Wlog, let's say that $\alpha_1 \neq 0$. - Therefore - \[ a_1 = \frac{1}{\alpha_1}b_1 + \frac{\alpha_2}{\alpha_1} a_2 + \ldots + \frac{\alpha_n}{\alpha_1} a_n\] - so $(b_1, a_2, \ldots, a_n)$ is spanning. + Let's suppose by contradiction that there are two + bases $\{\mathbf{a}_1, \ldots, \mathbf{a}_n\}$ and $\{\mathbf{b}_1, \ldots, \mathbf{b}_m\}$ for \(\sset\), with $n < m$. + + Remembering that $\mathbf{a}_1, \ldots, \mathbf{a}_n$ is spanning + and $\mathbf{b}_1, \ldots, \mathbf{b}_m$ are linearly independent, we have that + \[ \mathbf{b}_1 = \alpha_1 \mathbf{a}_1 + \dots + \alpha_n \mathbf{a}_n,\] + with at least one $\alpha_i \neq 0$ since $\mathbf{b}_1 \neq 0$. + Without loss of generality, let's say that $\alpha_1 \neq 0$. + Therefore, + \[ \mathbf{a}_1 = \frac{1}{\alpha_1}\mathbf{b}_1 + \frac{\alpha_2}{\alpha_1} \mathbf{a}_2 + \dots + \frac{\alpha_n}{\alpha_1} \mathbf{a}_n,\] + so $(\mathbf{b}_1, \mathbf{a}_2, \ldots, \mathbf{a}_n)$ is spanning. We now have - \[ b_2 = \beta_1 b_1 + \beta_2 a_2 + \ldots + \beta_n a_n\] - if $\beta_2 = \cdots = \beta_n = 0$, $b_1, b_2$ are not linearly independent. - Wlog, let's say that $\beta_2 \neq 0$, - $(b_1,b_2,a_3,\ldots,a_n)$ is therefore spanning. + \[ \mathbf{b}_2 = \beta_1 \mathbf{b}_1 + \beta_2 \mathbf{a}_2 + \dots + \beta_n \mathbf{a}_n.\] + If $\beta_2 = \cdots = \beta_n = 0$, $\mathbf{b}_1$ and $\mathbf{b}_2$ are not linearly independent. + Without loss of generality, let's say that $\beta_2 \neq 0$; + $(\mathbf{b}_1,\mathbf{b}_2,\mathbf{a}_3,\ldots,\mathbf{a}_n)$ is therefore spanning. - Continuing this reasoning, $(b_1,\ldots,b_n)$ is spanning. - $b_{n+1}$ is therefore a linear combination of $b_1, \ldots, b_n$ which is a contradiction. + Continuing this reasoning, $(\mathbf{b}_1,\ldots,\mathbf{b}_n)$ is spanning. + $\mathbf{b}_{n+1}$ would therefore be a linear combination of $\mathbf{b}_1, \ldots, \mathbf{b}_n$, which is a contradiction. \end{solution} \exo{2} +Using the Schwarz inequality, show that +\[ +\abs{\trace(Y^*X)} \leqslant \norm{X}_F \norm{Y}_F +\] +where \(X, Y \in \C^{m \times n}\) and \(\norm{\cdot}_F\) is the Frobenius norm. \begin{solution} Since $\mathbb{C}^{m \times n}$ is a vector space and if $X,Y \in \mathbb{C}^{m \times n}$, $\trace(Y^*X) \in \mathbb{C}$ which is a field, - $\trace(Y^*X)$ could be the scalar product $(X,Y)$. - Let's check. + $\trace(Y^*X)$ could be a scalar product $\langle X,Y \rangle$. Since - \begin{align*} \trace(X^*X) - & = \sum_{i = 1}^m \sum_{j = 1}^n \overline{x_{ij}}x_{ij}\\ - & = \sum_{i = 1}^m \sum_{j = 1}^n |x_{ij}|^2, + & = \sum_{i = 1}^n \sum_{j = 1}^m \overline{x_{ji}}x_{ji}\\ + & = \sum_{i = 1}^n \sum_{j = 1}^m \abs{x_{ji}}^2, \end{align*} we clearly have $\trace(X^*X) \geq 0$ for all $X \in \mathbb{C}^{m \times n}$ and - $\trace(X^*X) = 0 \Longleftrightarrow X = 0$ for all $X \in \mathbb{C}^{m \times n}$. + $\trace(X^*X) = 0 \iff X = 0$. We also have \begin{align*} \trace(Z^*(\alpha X + \beta Y)) & = \trace(\alpha Z^*X + \beta Z^*Y)\\ - & = \alpha\trace(Z^*X) + \beta\trace(Z^*Y)\\ + & = \alpha\trace(Z^*X) + \beta\trace(Z^*Y) \end{align*} and \begin{align*} @@ -114,19 +128,18 @@ \section{Linear system} & = \overline{\trace(X^*Y)}. \end{align*} - We just proved it is a scalar product and we can - therefore apply the \emph{Schwarz inequality} which gives + We thus just proved that the trace can be used to define a scalar product and we can therefore apply the \emph{Schwarz inequality} which gives \begin{align*} - |\trace(Y^*X)| + \abs{\trace(Y^*X)} & \leq \sqrt{\trace(X^*X)} \sqrt{\trace(Y^*Y)}\\ - & = \|X\|_F \|Y\|_F. + & = \norm{X}_F \norm{Y}_F. \end{align*} \end{solution} \exo{1} -Prove that $R$ is the Cholesky factor of the positive definite matrix $A^*A$. +Show that $R$ is the \emph{Cholesky factor} of the positive definite matrix $A^*A$. \begin{solution} - Let us recall that the Cholesky decomposition of a matrix $A$ consist in finding + Let us recall that the Cholesky decomposition of a matrix $A$ consists in finding a lower triangular matrix $L$ such that $A = L L^*$. In this case we have @@ -137,40 +150,43 @@ \section{Linear system} & = R^*R. \end{align*} - Note that $A$ is \emph{squared} + Note that $A$ is \emph{square} (since otherwise it wouldn't be defined for it to be positive definite) - and is \emph{full rank} (if not, it has an zero eigenvalue and cannot be positive definite). - Therefore $R$ is an upper triangular square matrix with no zero element in this case. + and has \emph{full rank} (if not, it must have a zero eigenvalue and cannot be positive definite). + Therefore, $R$ is an upper triangular square matrix with no zero element in this case. \end{solution} \exo{2} +Let \(X\) and \(Y\) be two matrices whose columns form bases of the subspaces \(\mathcal{X}\) and \(\mathcal{Y}\) respectively. +Show that \(\mathcal{X} \perp \mathcal{Y}\) if and only if \(Y^*X = 0\). \begin{solution} \begin{description} - \item[$\Rightarrow$] + \item[$\implies$] $\mathcal{X} \perp \mathcal{Y}$ implies that - $(\mathbf{x},\mathbf{y}) = 0$, $\forall \mathbf{x} \in \mathcal{X}$ + $\langle \mathbf{x},\mathbf{y} \rangle = 0$, $\forall \mathbf{x} \in \mathcal{X}$ and $\forall \mathbf{y} \in \mathcal{Y}$. - In particular, it should be true for every element of the base. - We need to have $(x_i,y_j) = 0$, $\forall i,j$ so in other words + In particular, it should be true for every element of the basis. + We need to have $\langle \mathbf{x}_i,\mathbf{y}_j\rangle = 0$, $\forall i,j$. + In other words \[ - \begin{pmatrix} - y_1 & y_2 & \cdots & y_n - \end{pmatrix}^* - \begin{pmatrix} - x_1 & x_2 & \cdots & x_n - \end{pmatrix} - = 0. + \begin{bmatrix} + \mathbf{y}_1 & \mathbf{y}_2 & \cdots & \mathbf{y}_n + \end{bmatrix}^* + \begin{bmatrix} + \mathbf{x}_1 & \mathbf{x}_2 & \cdots & \mathbf{x}_n + \end{bmatrix} + = 0, \] and thus $Y^* X = 0$. - \item[$\Leftarrow$] - Let $\sum a_ix_i$ be an element of $\mathcal{X}$ and $\sum b_iy_i$ be an element of $\mathcal{Y}$. + \item[$\impliedby$] + Let $\sum_i a_i\mathbf{x}_i$ be an element of $\mathcal{X}$ and $\sum_i b_i\mathbf{y}_i$ be an element of $\mathcal{Y}$. We need \begin{align*} - (\sum_i a_ix_i, \sum_i b_iy_i) - & = \sum_i a_i (x_i, \sum_j b_jy_j)\\ - & = \sum_i a_i \overline{(\sum_j b_jy_j, x_i)}\\ - & = \sum_i \sum_j a_i\overline{b_j} \, \overline{(y_j, x_i)}\\ - & = \sum_i \sum_j a_i\overline{b_j} (x_i, y_j)\\ + \left\langle\sum_i a_i\mathbf{x}_i, \sum_i b_i\mathbf{y}_i\right\rangle + & = \sum_i a_i \left\langle \mathbf{x}_i, \sum_j b_j\mathbf{y}_j\right\rangle\\ + & = \sum_i a_i \overline{\left\langle \sum_j b_j\mathbf{y}_j, \mathbf{x}_i\right\rangle}\\ + & = \sum_i \sum_j a_i\overline{b_j} \, \overline{\langle \mathbf{y}_j, \mathbf{x}_i\rangle}\\ + & = \sum_i \sum_j a_i\overline{b_j} \langle \mathbf{x}_i, \mathbf{y}_j\rangle\\ & = \sum_i \sum_j a_i \overline{b_j} 0\\ & = 0. \end{align*} @@ -178,43 +194,46 @@ \section{Linear system} \end{solution} \exo{3} +Show that the orthogonal complement satisfies the following properties: +\begin{align*} +(\sset^\perp)^\perp &= \sset,\\ +(\sset_1 + \sset_2)^\perp &= \sset_1^\perp \cap \sset_2^\perp,\\ +(\sset_1 \cap \sset_2)^\perp &= \sset_1^\perp + \sset_2^\perp. +\end{align*} \begin{solution} \begin{enumerate} \item \begin{align*} - (S^\perp)^\perp - & = \{x | (x,y) = 0, \forall y \in \mathcal{S}^\perp\}\\ - & = \{x | (x,y) = 0, \forall y \in \{z | (z,w) = 0, \forall w \in \mathcal{S}\}\}. + (\sset^\perp)^\perp + & = \{\mathbf{x} \mid \langle \mathbf{x},\mathbf{y}\rangle = 0, \forall \mathbf{y} \in \mathcal{S}^\perp\}\\ + & = \big\{\mathbf{x} \mid \langle \mathbf{x},\mathbf{y}\rangle = 0, \forall \mathbf{y} \in \{\mathbf{z} \mid \langle \mathbf{z},\mathbf{w}\rangle = 0, \forall \mathbf{w} \in \mathcal{S}\}\big\}. \end{align*} - In other word, $x$ should be perpendicular to every vector which is perpendicular to all vectors of $\mathcal{S}$. - This is clearly true for $x \in \mathcal{S}$. - If $x \notin \mathcal{S}$, since it is perpendicular to every vector of $\mathcal{S}^\perp$, $x \notin \mathcal{S}^\perp$, - this contradicts the lemma~2.8. + In other words, $\mathbf{x}$ should be perpendicular to every vector which is perpendicular to all vectors of $\mathcal{S}$. + This is clearly true for $\mathbf{x} \in \mathcal{S}$. + If $\mathbf{x} \notin \mathcal{S}$, since it is perpendicular to every vector of $\mathcal{S}^\perp$, $\mathbf{x} \notin \mathcal{S}^\perp$, which contradicts Lemma~2.8. \item \begin{align*} (\mathcal{S}_1 + \mathcal{S}_2)^\perp - & = \{x | (x,y) = 0, \forall y \in (\mathcal{S}_1+\mathcal{S}_2)\}\\ - & = \{x | (x,\alpha y_1 + \beta y_2) = 0, \forall \alpha,\beta, \forall y_1 \in \mathcal{S}_1, \forall y_2 \in \mathcal{S}_2\}\\ - & = \{x | ((x, y_1) = 0, \forall y_1 \in \mathcal{S}_1) \land ((x, y_2) = 0, \forall y_2 \in \mathcal{S}_2)\}\\ - & = \{x | (x, y_1) = 0, \forall y_1 \in \mathcal{S}_1\} \cap \{x | (x, y_2) = 0, \forall y_2 \in \mathcal{S}_2\}\\ + & = \{\mathbf{x} \mid \langle\mathbf{x},\mathbf{y}\rangle = 0, \forall \mathbf{y} \in (\mathcal{S}_1+\mathcal{S}_2)\}\\ + & = \{\mathbf{x} \mid \langle \mathbf{x},\alpha \mathbf{y}_1 + \beta \mathbf{y}_2\rangle = 0, \forall \alpha,\beta, \forall \mathbf{y}_1 \in \mathcal{S}_1, \forall \mathbf{y}_2 \in \mathcal{S}_2\}\\ + & = \{\mathbf{x} \mid (\langle\mathbf{x}, \mathbf{y}_1\rangle = 0, \forall \mathbf{y}_1 \in \mathcal{S}_1) \land (\langle\mathbf{x}, \mathbf{y}_2\rangle = 0, \forall \mathbf{y}_2 \in \mathcal{S}_2)\}\\ + & = \{\mathbf{x} \mid \langle\mathbf{x}, \mathbf{y}_1\rangle = 0, \forall \mathbf{y}_1 \in \mathcal{S}_1\} \cap \{\mathbf{x} \mid \langle\mathbf{x}, \mathbf{y}_2\rangle = 0, \forall \mathbf{y}_2 \in \mathcal{S}_2\}\\ & = \mathcal{S}_1^\perp \cap \mathcal{S}_2^\perp. \end{align*} \item - $(\mathcal{S}^\perp)^\perp$ actually ensures us that - 2 subspaces are equal iff their orthogonal complements are equal. - If they are equal, it is obvious that their complement is equal - and if their complement is equal, the complement of their complement is equal so their are equal. - - We can therefore prove that the complements are equals + $(\mathcal{S}^\perp)^\perp = \sset$ actually ensures that + two subspaces are equal if and only if their orthogonal complements are equal. + If they are equal, it is obvious that their complements are equal + and if their complements are equal, then the complements of their complements are equal so they are equal. + We can therefore prove that the complements are equal: \begin{align*} ((\mathcal{S}_1 \cap \mathcal{S}_2)^\perp)^\perp & = (\mathcal{S}_1^\perp + \mathcal{S}_2^\perp)^\perp\\ \mathcal{S}_1 \cap \mathcal{S}_2 - & = (\mathcal{S}_1^\perp + \mathcal{S}_2^\perp)^\perp + & = (\mathcal{S}_1^\perp + \mathcal{S}_2^\perp)^\perp. \end{align*} - Using the previous property, we have - + Using the previous property, we then have \begin{align*} (\mathcal{S}_1^\perp + \mathcal{S}_2^\perp)^\perp & = (\mathcal{S}_1^\perp)^\perp \cap (\mathcal{S}_2^\perp)^\perp\\ @@ -224,234 +243,31 @@ \section{Linear system} \end{solution} \exo{2} -\begin{solution} - Let $U_{up} = [u_{ij}]_{i,j=1}^{n,n}$. - We must have - \[ U_{up}\Lambda = \Lambda U_{up}. \] - At the elements $i,j$, we have - \[ u_{ij} \lambda_j = \lambda_i u_{ij} \] - or - \[ u_{ij} (\lambda_j - \lambda_i) = 0. \] - If $i \neq j$, this means that $u_{ij} = 0$ since $\lambda_j - \lambda_i \neq 0$. - - $U_{up}$ must therefore be diagonal. - However it also needs to be hermitian for $UU_{up}$ to be hermitian. - Indeed, we need to have - - \begin{align*} - UU_{up} U_{up}^*U^* & = I\\ - U_{up} U_{up}^* & = U^*U\\ - U_{up} U_{up}^* & = I. - \end{align*} - This means that we need to have $1 = u_{ii}\overline{u_{ii}} = |u_{ii}|^2$ - so $u_{ii}$ is on the unit circle. -\end{solution} +How to construct the complex Givens transformation \(G \in \C^{2 \times 2}\) that transforms an arbitrary vector \(\mathbf{x} \in \C^2\) into \(\begin{bmatrix}\norm{\mathbf{x}} \\ 0\end{bmatrix}\)? -\exo{4} \begin{solution} - \emph{Hint:} - Use the property $\trace(AB) = \trace(BA)$. - - Since $H$ is hermitian, there is an unitary $U$ and diagonal $\Lambda$ such that $H = U \Lambda U^*$. - Let's first analyse the RHS - - \begin{align*} - \trace(H) & = \trace(U \Lambda U^*)\\ - & = \trace(\Lambda U^*U)\\ - & = \trace(\Lambda)\\ - & = \sum_{i=1}^n \lambda_i. - \end{align*} - For the LHS, we first need to analyse $Q$. - It is unitary so its eigenvalues are on the unit circles therefore $|x^* Q x| \leq 1$ for all $x$ such that $\|x\|=1$. - Using the triangle inequality and the fact that the $\lambda_i$ are positive - - \begin{align*} - \trace(HQ) - & = \trace(U \Lambda U^* Q)\\ - & = \trace(\Lambda U^* Q U)\\ - & = \sum_{i=1}^n \lambda_i u_{:i}^* Q u_{:i}\\ - & \leq \left|\sum_{i=1}^n \lambda_i u_{:i}^* Q u_{:i}\right|\\ - & \leq \sum_{i=1}^n |\lambda_i| \cdot |u_{:i}^* Q u_{:i}|\\ - & = \sum_{i=1}^n \lambda_i |u_{:i}^* Q u_{:i}|\\ - & \leq \sum_{i=1}^n \lambda_i. - \end{align*} +Suppose \(G\) has the form +\[ +G = \begin{bmatrix} c & s \\ -\bar{s} & \bar{c} \end{bmatrix}. +\] +It is easy to see that \(G^*G = I\), as required, when \(\abs{c}^2 + \abs{s}^2 = 1\). +Next, we observe that +\[ +G\mathbf{x} = \begin{bmatrix} c & s \\ -\bar{s} & \bar{c} \end{bmatrix} \begin{bmatrix} x_1 \\ x_2 \end{bmatrix} \triangleq \begin{bmatrix} \norm{\mathbf{x}} \\ 0 \end{bmatrix}. +\] +The choices +\[ +c = \frac{\bar{x}_1}{\sqrt{\abs{x_1}^2 + \abs{x_2}^2}}, \quad s = \frac{\bar{x}_2}{\sqrt{\abs{x_1}^2 + \abs{x_2}^2}} +\] +satisfy this condition, and thus we have a formula for \(G\). \end{solution} \exo{3} +How to construct the complex Householder transformation \(H \in \C^{n \times n}\) that transforms an arbitrary vector \(\mathbf{x} \in \C^n\) into \[ +H\mathbf{x} = \begin{bmatrix}\pm\norm{\mathbf{x}} \\ 0 \\ \vdots \\ 0\end{bmatrix} +\] +and satisfies \(H^*H = HH^* = I_n\)? \begin{solution} - If $A$ is $m \times n$, then $H$ is $m \times r$ and $Q$ is $r \times n$ for some $r$. - We want $H$ to be positive definite so we need $H$ to be square for it to be defined. - That means that $Q$ is $m \times n$. - - The problem with the proof when $m \neq n$ is that we cannot say - $U \Sigma V^* = U \Sigma U^* U V^*$ since the product is not defined because $U$ is $m \times m$ - and $V$ is $n \times n$. - \begin{itemize} - \item If $m < n$, we have - \begin{align*} - A - & = U - \begin{pmatrix} - \Sigma & 0 - \end{pmatrix} - \begin{pmatrix} - V_1 & V_2 - \end{pmatrix}^*\\ - & = U \Sigma V_1^*\\ - & = (U \Sigma U^*) (U V_1^*) - \end{align*} - with $V_1^*V_1 = I$ (but not $V_1V_1^* = I$, since $I$ is full rank and $V_1$ is not full line rank) - since - \begin{align*} - I - & = - \begin{pmatrix} - V_1 & V_2 - \end{pmatrix}^* - \begin{pmatrix} - V_1 & V_2 - \end{pmatrix}\\ - & = - \begin{pmatrix} - V_1^*V_1 & V_1^*V_2\\ - V_2^*V_1 & V_2^*V_2 - \end{pmatrix} - \end{align*} - - Therefore $QQ^* = (U V_1^*) (U V_1)^* = I$ but $Q^*Q = (U V_1^*)^* (U V_1^*) = V_1V_1^* \neq I$. - \item If $m > n$, we have - \begin{align*} - A - & = - \begin{pmatrix} - U_1 & U_2 - \end{pmatrix} - \begin{pmatrix} - \Sigma \\ 0 - \end{pmatrix} - V^*\\ - & = U_1 \Sigma V^*\\ - & = (U_1 \Sigma U_1^*) (U_1 V^*) - \end{align*} - with $U_1^*U_1 = I$ but not $U_1U_1^* = I$ like for the previous point. - - This time it's $Q^*Q = I$ and $QQ^* = I$. - \end{itemize} -\end{solution} - -\exo{3} -\begin{solution} - \emph{Hint} Use the property $\|A\|_F = \trace(A^TA) = \trace(AA^T)$ and the property $\trace(AB) = \trace(BA)$. - - We have (also using $\trace(A^T) = \trace(A)$). - - \begin{align*} - \|AQ^T - B\|_F^2 - & = \trace((AQ^T - B)(AQ^T - B)^T)\\ - & = \trace((AQ^T - B)(QA^T - B^T))\\ - & = \trace(AQ^TQA^T) + \trace(BB^T) - \trace(AQ^TB^T) - \trace(BQA^T)\\ - & = \trace(AQ^TQA^T) + \trace(BB^T) - \trace(AQ^TB^T) - \trace(BQA^T)\\ - & = \trace(AA^T) + \trace(BB^T) - \trace(B^TAQ^T) - \trace(QA^TB)\\ - & = \|A\|_F^2 + \|B\|_F^2 - \trace(B^TAQ^T) - \trace(B^TAQ^T)\\ - & = \|A\|_F^2 + \|B\|_F^2 - 2\trace(B^TAQ^T)\\ - & = \|A\|_F^2 + \|B\|_F^2 - 2\trace(\tilde{H}\tilde{Q} Q^T)\\ - & \geq \|A\|_F^2 + \|B\|_F^2 - 2\trace(\tilde{H}) - \end{align*} - using exercise~2.10 since $\tilde{Q}Q^T$ is unitary and $\tilde{H}$ is hermitian positive semi-definite. - However, taking $Q = \tilde{Q}$, we have the equality. It is therefore optimal. -\end{solution} - -\exo{2} -\begin{solution} - Let $\mathcal{S}_1$ of dimension $m$ and $\mathcal{S}_2$ of dimension $n < m$. - We can have $n$ canonical angles using - - \begin{align*} - S_1^*S_2 - & = U_1 - \begin{pmatrix} - \Sigma\\0 - \end{pmatrix} - U_2^*\\ - & = - \begin{pmatrix} - U_{1,1} & U_{1,2} - \end{pmatrix} - \begin{pmatrix} - \Sigma\\0 - \end{pmatrix} - U_2\\ - & = U_{1,1} \Sigma U_2 - \end{align*} - $\Sigma = \cos(\Theta)$ where the $\theta_i$ are the angles between the $n$ vectors of $S_1U_{1,1}$ - and the $n$ vectors of $S_2U_2$. -\end{solution} - -\exo{2} -\begin{solution} - See syllabus. -\end{solution} - -\exo{2} -\begin{solution} - See syllabus. -\end{solution} - -\exo{2} -\begin{solution} - \begin{itemize} - \item - \begin{align*} - AXAX - & = (AXA)X\\ - & = AX. - \end{align*} - \item - $AX$ is a projector since (1) is satisfied. - - From the theorem~2.18, $\Ker(AX) = \Ima((AX)^*)^\perp$. - Therefore we can just show that $\Ima((AX)^*) = \Ima(AX)$ which is obvious - since (3) is satisfied. - \item - \begin{align*} - XAXA - & = (XAX)A\\ - & = XA. - \end{align*} - \item - $XA$ is a projector since (1) is satisfied. - - From the theorem~2.18, $\Ker(XA) = \Ima((XA)^*)^\perp$. - Therefore we can just show that $\Ima((XA)^*) = \Ima(XA)$ which is obvious - since (4) is satisfied. - \end{itemize} -\end{solution} - -\exo{3} -\begin{solution} - See syllabus. - - If $P$ is orthogonal, we have - \begin{align*} - PP & = P\\ - PPP^* & = PP^*\\ - P & = PP^* - \end{align*} - and $P^*P = I$. -\end{solution} - -\exo{0} -\nosolution - -\exo{0} -\nosolution - -\exo{3} -\begin{solution} - For $s_i \in [\sigma_i - \sigma_{s+1}(A), \sigma_i + \sigma_{s+1}(A)]$ for $i = 1, \ldots, s$, - and - \[ b = \sum_{i=1}^s u_i s_i v_i^T, \] - we have - \[ A - B = \sum_{i=1}^s u_i(\sigma_i - s_i)v_i^T + \sum_{i=s+1}^r u_i\sigma_iv_i^T. \] - which also has its maximum singular value equal to $\sigma_{s+1}(A)$ by definition of the $s_i$. -\end{solution} +We write \(x_1\) as \(r e^{i \theta}\), for \(r, \theta \in \R\). +Taking \(\mathbf{v} = \mathbf{x} \pm e^{i \theta} \norm{\mathbf{x}} \mathbf{e}_1\), and \(H = I_n - 2\frac{\mathbf{v}\mathbf{v}^*}{\mathbf{v}^*\mathbf{v}}\) yields \(H\mathbf{x} = \mp e^{i \theta} \norm{\mathbf{x}} \mathbf{e}_1\). +\end{solution} \ No newline at end of file diff --git a/src/q7/matrix-INMA2380/exercises/ch3.tex b/src/q7/matrix-INMA2380/exercises/ch3.tex index 8d2c10211..5c8d2b63e 100644 --- a/src/q7/matrix-INMA2380/exercises/ch3.tex +++ b/src/q7/matrix-INMA2380/exercises/ch3.tex @@ -1,187 +1,229 @@ -\section{Eigenvalues, eigenvectors and similitude} -\exo{1} -\begin{solution} - If $A \Ima(x) \subseteq \Ima(x)$, since $\Ima(x)$ and $A\Ima(x)$ - are vector subspaces and $\Ima(x)$ is of dimension 1, - that means that either $A\Ima(x) = \Ima(x)$ or $A\Ima(x) = \{0\}$. - In the first case, it is an eigenvector of a nonzero eigenvalue. - In the second case, it is an eigenvector of 0. -\end{solution} +\section{Unitary transformations and the Singular Value Decomposition} \exo{2} -\begin{solution} - Let's first show that if $A$ is normal, - it is still normal after an unitary transformation. - - \begin{align*} - (U^TAU)^T(U^TAU) - & = U^TA^TAU\\ - & = U^TAA^TU\\ - & = U^TAUU^TA^TU\\ - & = U^TAU(U^TAU)^T. - \end{align*} - That means that the Schur form of $A$ is normal too. +Assuming that all the diagonal elements of \(\Lambda\) are distinct, show that the matrix \(U_{up}\) is necessarily diagonal and consists only of phases: +\[ +U_{up} = \diag\{e^{i\psi_1}, \dots, e^{i \psi_n}\}. +\] - We can now use the same argument as in theorem~3.3 since $A_s^* = A_s^T$ - and apply it for all $n_1+n_2 = n$ such that $n_1$ is at the end of a block. +\begin{solution} + Let $U_{up} = [u_{ij}]_{i,j=1}^{n}$. + We must have + \[ U_{up}\Lambda = \Lambda U_{up}. \] + At the position $i,j$, we have + \[ u_{ij} \lambda_j = \lambda_i u_{ij} \] + or + \[ u_{ij} (\lambda_j - \lambda_i) = 0. \] + If $i \neq j$, this means that $u_{ij} = 0$ since $\lambda_j - \lambda_i \neq 0$. + + $U_{up}$ must therefore be diagonal. + However it also needs to be hermitian for $UU_{up}$ to be Hermitian. + Indeed, we need to have + \begin{align*} + UU_{up} U_{up}^*U^* & = I\\ + U_{up} U_{up}^* & = U^*U\\ + U_{up} U_{up}^* & = I. + \end{align*} + This means that we need to have $1 = u_{ii}\overline{u_{ii}} = \abs{u_{ii}}^2$ + so $u_{ii}$ is on the unit circle and is a phase. \end{solution} \exo{2} +Show that if \(X\) satisfies +\begin{itemize} + \item (1), then \(AX\) is a projection; + \item (1) and (3), then \(AX\) is an orthogonal projection; + \item (2), then \(XA\) is a projection; + \item (2) and (4), then \(XA\) is an orthogonal projection. +\end{itemize} + \begin{solution} - Let's first show that if $A$ is anti-symmetric, - it is still anti-symmetric after an unitary transformation. + \begin{itemize} + \item + \begin{align*} + AXAX + & = (AXA)X\\ + & = AX. + \end{align*} + \item + $AX$ is a projection since (1) is satisfied. + + From Theorem~3.6, $\Ker(AX) = \Ima((AX)^*)^\perp$. + Therefore, we can just show that $\Ima((AX)^*) = \Ima(AX)$ which is obvious + since (3) is satisfied. + \item + \begin{align*} + XAXA + & = (XAX)A\\ + & = XA. + \end{align*} + \item + $XA$ is a projection since (2) is satisfied. + + From Theorem~3.6, $\Ker(XA) = \Ima((XA)^*)^\perp$. + Therefore we can just show that $\Ima((XA)^*) = \Ima(XA)$ which is obvious + since (4) is satisfied. + \end{itemize} +\end{solution} - \begin{align*} - (U^TAU)^T - & = U^TA^TU\\ - & = U^T(-A)U\\ - & = -U^TAU. - \end{align*} - That means that the Schur form of $A$ is anti-symmetric too. - Since it is anti-symmetric, $A^TA = -A^2 = AA^T$ so it is also normal. +\exo{3} +How can we define the notion of canonical angles between two spaces of different dimension? - By the exercise~3.2, we know that the Schur form is block diagonal. - But since $A_s = -A_s^T$, - we must have $\alpha_j = -\alpha_j$ which means that $\alpha_j = 0$ - (note that it imposes no restriction on $\beta_j$). - Same for $A_{ii}$. -\end{solution} +% i feel like there's some fuckery going on with matrix dimensions here :( -\exo{1} \begin{solution} - We simply use the corollary since we remove the last line and column. - - If the $\beta_i$ are nonzero, we can see that it is strict since - if $p_i(\lambda) = p_{i-1}(\lambda) = 0$, we have - \[ 0 = -\beta_i^2 p_{i-2}(\lambda) \] - so $p_{i-2}(\lambda) = 0$. - By induction, we have $p_0(\lambda) = 0$ which is absurd. + Let $\mathcal{S}_1$ be of dimension $m$ and $\mathcal{S}_2$ be of dimension $n < m$. + We can have $n$ canonical angles using + \begin{align*} + S_1^*S_2 + & = U_1 + \begin{pmatrix} + \Sigma\\0 + \end{pmatrix} + U_2^*\\ + & = + \begin{pmatrix} + U_{1,1} & U_{1,2} + \end{pmatrix} + \begin{pmatrix} + \Sigma\\0 + \end{pmatrix} + U_2\\ + & = U_{1,1} \Sigma U_2, + \end{align*} + with $\Sigma = \cos(\Theta)$, where $\theta_i$ are the angles between the $n$ vectors of $S_1U_{1,1}$ + and the $n$ vectors of $S_2U_2$. \end{solution} -\exo{2} +\exo{4} +Show that for every positive semidefinite matrix \(H\) and every unitary matrix \(Q\), we have +\[ +\trace(HQ) \leq \trace(H). +\] + \begin{solution} - See syllabus. - You should obtain - \[ \sigma_k^2 - \|m_{i:}\|_2^2 \leq \hat{\sigma}_k^2 \leq \sigma_k^2 + \|m_{i:}\|_2^2 \] - and not - \[ \sigma_k^2 - \|m_{i:}\|_2^2 \leq \hat{\sigma}_k^2 \leq \sigma_k^2 \] - like written in the syllabus. + \emph{Hint:} + Use the property $\trace(AB) = \trace(BA)$. + + Since $H$ is Hermitian, there is a unitary $U$ and diagonal $\Lambda$ such that $H = U \Lambda U^*$. + Let's first analyse the RHS + \begin{align*} + \trace(H) & = \trace(U \Lambda U^*)\\ + & = \trace(\Lambda U^*U)\\ + & = \trace(\Lambda)\\ + & = \sum_{i=1}^n \lambda_i. + \end{align*} + For the LHS, we first need to analyse $Q$. + It is unitary, so its eigenvalues are on the unit circle therefore $\abs{x^* Q x} \leq 1$ for all $x$ such that $\norm{x}=1$. + + Using the triangle inequality and the fact that the $\lambda_i$ are positive + \begin{align*} + \trace(HQ) + & = \trace(U \Lambda U^* Q)\\ + & = \trace(\Lambda U^* Q U)\\ + & = \sum_{i=1}^n \lambda_i u_{:i}^* Q u_{:i}\\ + & \leq \abs{\sum_{i=1}^n \lambda_i u_{:i}^* Q u_{:i}}\\ + & \leq \sum_{i=1}^n \abs{\lambda_i} \, \abs{u_{:i}^* Q u_{:i}}\\ + & = \sum_{i=1}^n \lambda_i \abs{u_{:i}^* Q u_{:i}}\\ + & \leq \sum_{i=1}^n \lambda_i. + \end{align*} \end{solution} \exo{3} -\begin{solution} - See syllabus. -\end{solution} +How could you extend the polar decomposition to the case \(m \neq n\)? -\exo{2} \begin{solution} - See syllabus. - - \begin{align*} - \sum_{i=0}^\infty \frac{(A+B)^i}{i!} - & = \sum_{i=0}^\infty \frac{\sum_{k=0}^i \frac{i!}{k!(i-k)!} A^{i-k}B^k}{i!}\\ - & = \sum_{i=0}^\infty \sum_{k=0}^i \frac{A^{i-k}}{(i-k)!} \frac{B^k}{k!}\\ - & = - \left(\sum_{i=0}^\infty \frac{A^i}{i!}\right) - \left(\sum_{i=0}^\infty \frac{B^i}{i!}\right). - \end{align*} - because each term $A^aB^b$ is present with the term $\frac{1}{a!b!}$. + If $A$ is $m \times n$, then $H$ is $m \times r$ and $Q$ is $r \times n$ for some $r$. + We want $H$ to be positive semidefinite so we need $H$ to be square for it to be defined. + That means that $Q$ is $m \times n$. + + The problem with the proof when $m \neq n$ is that we cannot say + $U \Sigma V^* = U \Sigma U^* U V^*$ since the product is not defined because $U$ is $m \times m$ + and $V$ is $n \times n$. + \begin{itemize} + \item If $m < n$, we have + \begin{align*} + A + & = U + \begin{pmatrix} + \Sigma & 0 + \end{pmatrix} + \begin{pmatrix} + V_1 & V_2 + \end{pmatrix}^*\\ + & = U \Sigma V_1^*\\ + & = (U \Sigma U^*) (U V_1^*) + \end{align*} + with $V_1^*V_1 = I$ (but $V_1V_1^* \neq I$, since $I$ has full rank but $V_1$ does not have full row rank) + since + \begin{align*} + I + & = + \begin{pmatrix} + V_1 & V_2 + \end{pmatrix}^* + \begin{pmatrix} + V_1 & V_2 + \end{pmatrix}\\ + & = + \begin{pmatrix} + V_1^*V_1 & V_1^*V_2\\ + V_2^*V_1 & V_2^*V_2 + \end{pmatrix}. + \end{align*} + + Therefore $QQ^* = (U V_1^*) (U V_1^*)^* = I$ but $Q^*Q = (U V_1^*)^* (U V_1^*) = V_1V_1^* \neq I$. + \item If $m > n$, we have + \begin{align*} + A + & = + \begin{pmatrix} + U_1 & U_2 + \end{pmatrix} + \begin{pmatrix} + \Sigma \\ 0 + \end{pmatrix} + V^*\\ + & = U_1 \Sigma V^*\\ + & = (U_1 \Sigma U_1^*) (U_1 V^*) + \end{align*} + with $U_1^*U_1 = I$ but $U_1U_1^* \neq I$ like for the previous point. + + This time it's $Q^*Q = I$ and $QQ^* \neq I$. + \end{itemize} \end{solution} -\exo{1} -\begin{solution} - We have - - \begin{align*} - \begin{pmatrix} - \lambda_0 & 1 & & \\ - & \ddots & \ddots & \\ - & & \ddots & 1\\ - & & & \lambda_0\\ - \end{pmatrix} - & = - \lambda_0 I + N - \end{align*} - for - \[ - N = - \begin{pmatrix} - & 1 & & \\ - & & \ddots & \\ - & & & 1\\ - & & & \\ - \end{pmatrix}. - \] - The rest is a simple consequence of the exercise~3.7. - - It is important to note for the next page that $N^n = 0$. - This is indeed a consequence of the fact that $N = J - \lambda_0I$ - where $J$ is a Jordan block of $\lambda_0$ of size $n$. - $J$ is therefore a matrix with only one eigenvalue $\lambda_0$ - of algebraic multiplicity $n$ but geometric multiplicity $1$. - Hence the whole set $\mathbb{C}^n$ is an invariant subspace of $N$ - which means that $(J - \lambda_0I)^n = 0$. - - We can see for example for $n = 4$ that - - \begin{align*} - N & = - \begin{pmatrix} - 0 & 1 & 0 & 0\\ - 0 & 0 & 1 & 0\\ - 0 & 0 & 0 & 1\\ - 0 & 0 & 0 & 0 - \end{pmatrix}\\ - N^2 & = - \begin{pmatrix} - 0 & 0 & 1 & 0\\ - 0 & 0 & 0 & 1\\ - 0 & 0 & 0 & 0\\ - 0 & 0 & 0 & 0 - \end{pmatrix}\\ - N^3 & = - \begin{pmatrix} - 0 & 0 & 0 & 1\\ - 0 & 0 & 0 & 0\\ - 0 & 0 & 0 & 0\\ - 0 & 0 & 0 & 0 - \end{pmatrix}\\ - N^4 & = - \begin{pmatrix} - 0 & 0 & 0 & 0\\ - 0 & 0 & 0 & 0\\ - 0 & 0 & 0 & 0\\ - 0 & 0 & 0 & 0 - \end{pmatrix} - \end{align*} - We can see that $e_1$ is an eigenvector of $\lambda_0$ but - $e_2$, $e_3$, $e_4$ are not. - \begin{itemize} - \item $Je_2 = \lambda_0 e_2 + e_1$ - so $(J - \lambda_0 I)e_2 = e_1$. - \item $(J - \lambda_0 I)e_3 = e_2$ and $(J - \lambda_0 I)^2e_3 = e_1$. - \item $(J - \lambda_0 I)e_4 = e_3$, $(J - \lambda_0 I)^2e_3 = e_2$ - and $(J - \lambda_0 I)^3e_4 = e_1$. - \end{itemize} - - Note that $J^4 \mathbb{C}^n = \mathbb{C}^n$ if $\lambda_0 \neq 0$. - It is not to be mistaken from $(J - \lambda_0 I)^4 \mathbb{C}^n = \{0\}$. -\end{solution} +\exo{3} +Show that the polar decomposition of \(B^\top A\) leads to an optimal rotation \(Q\) that minimizes \(\norm{AQ^\top - B}_F^2\). -\exo{1} \begin{solution} - \begin{align*} - \fdif{}{t}e^{At} - & = \fdif{I}{t} + \sum_{i=1}^\infty \frac{A^i}{i!} \fdif{t^i}{t}\\ - & = \sum_{i=1}^\infty \frac{iA^it^{i-1}}{i!}\\ - & = A\sum_{i=1}^\infty \frac{(At)^{i-1}}{(i-1)!}\\ - & = A \sum_{i=0}^\infty \frac{(At)^i}{i!} = A e^{At}\\ - & = \sum_{i=0}^\infty \frac{A^{i+1} t^i}{i!}\\ - & = \sum_{i=0}^\infty \frac{(At)^i}{i!} A = e^{At} A. - \end{align*} + \emph{Hint} Use the property $\norm{A}_F = \trace(A^\top A) = \trace(AA^\top)$ and the property $\trace(AB) = \trace(BA)$. + + We have (also using $\trace(A^\top) = \trace(A)$). + + \begin{align*} + \norm{AQ^\top - B}_F^2 + & = \trace((AQ^\top - B)(AQ^\top - B)^\top)\\ + & = \trace((AQ^\top - B)(QA^\top - B^\top))\\ + & = \trace(AQ^\top QA^\top) + \trace(BB^\top) - \trace(AQ^\top B^\top ) - \trace(BQA^\top)\\ + & = \trace(AA^\top) + \trace(BB^\top) - \trace(B^\top AQ^\top) - \trace(QA^\top B)\\ + & = \norm{A}_F^2 + \norm{B}_F^2 - \trace(B^\top AQ^\top) - \trace(B^\top AQ^\top)\\ + & = \norm{A}_F^2 + \norm{B}_F^2 - 2\trace(B^\top AQ^\top)\\ + & = \norm{A}_F^2 + \norm{B}_F^2 - 2\trace(\tilde{H}\tilde{Q} Q^\top)\\ + & \geq \norm{A}_F^2 + \norm{B}_F^2 - 2\trace(\tilde{H}) + \end{align*} + using Exercise~3.4 since $\tilde{Q}Q^\top$ is unitary and $\tilde{H}$ is Hermitian positive semidefinite. + Taking $Q = \tilde{Q}$, we have equality. It is therefore the optimal rotation. \end{solution} -\exo{1} +\exo{3} +Construct a matrix \(B\), with \(\mathop{\mathrm{rank}}(B) \leq s\), that is different from (3.15) and reaches the same bound on \(\norm{A - B}\). + \begin{solution} - See syllabus. -\end{solution} + For $s_i \in [\sigma_i - \sigma_{s+1}(A), \sigma_i + \sigma_{s+1}(A)]$ for $i = 1, \ldots, s$, + and + \[ B = \sum_{i=1}^s \mathbf{u}_i s_i \mathbf{v}_i^\top, \] + we have + \[ A - B = \sum_{i=1}^s \mathbf{u}_i(\sigma_i - s_i)\mathbf{v}_i^\top + \sum_{i=s+1}^r \mathbf{u}_i\sigma_i \mathbf{v}_i^\top. \] + which also has its maximum singular value equal to $\sigma_{s+1}(A)$ by definition of the $s_i$. +\end{solution} \ No newline at end of file diff --git a/src/q7/matrix-INMA2380/exercises/ch4.tex b/src/q7/matrix-INMA2380/exercises/ch4.tex index 5d54aee98..df70324fd 100644 --- a/src/q7/matrix-INMA2380/exercises/ch4.tex +++ b/src/q7/matrix-INMA2380/exercises/ch4.tex @@ -1,128 +1,152 @@ -\section{Inertia and stability of matrices} -\exo{3} +\section{Eigenvalues, eigenvectors and similarity transformations} + +\exo{1} +Show that the one-dimensional invariant subspaces of a matrix are those generated by its eigenvectors. + \begin{solution} - Using the exercise~1.6 and the exercise~1.15, - \begin{align*} - V^* (I_n \otimes A^* + A^T \otimes I_n) V - & = ((U^*)^T \otimes U^*) (I_n \otimes A^* + A^T \otimes I_n) (U^T \otimes U)\\ - & = ((U^*)^T \otimes U^*) (I_n \otimes A^*) (U^T \otimes U)\\ - & \quad + ((U^*)^T \otimes U^*) (A^T \otimes I_n) (U^T \otimes U)\\ - & = ((U^*)^TU^T) \otimes (U^*A^*U) + ((U^*)^TA^TU^T) \otimes (U^*U)\\ - & = I_n \otimes A_S^* + A_S^T \otimes I_n. - \end{align*} - We know that since $U$ is unitary, $U^T$ is unitary too. - With the help of the exercise~1.16, we conclude that $U^T \otimes U$ is unitary. - Therefore, - $I_n \otimes A^* + A^T \otimes I_n$ has same spectrum than - $I_n \otimes A_S^* + A_S^T \otimes I_n$. - We can now see that - \begin{align*} - A_S & = - \begin{pmatrix} - \lambda_1 & & & 0\\ - \times & \lambda_2 & & \\ - \vdots & \ddots & & \\ - \times & \cdots & \times & \lambda_n - \end{pmatrix}\\ - I_n \otimes A_S^* & = - \begin{pmatrix} - A_S^* & & & 0\\ - & A_S^* & & \\ - & & \ddots & \\ - 0 & & & A_S^* - \end{pmatrix}\\ - A_S^T \otimes I_n & = - \begin{pmatrix} - \lambda_1 I_n & \times I_n & \cdots & \times I_n\\ - & \lambda_2 I_n & \ddots & \vdots\\ - & & \ddots & \times I_n\\ - 0 & & & \lambda_n I_n - \end{pmatrix} - \end{align*} - $I_n \otimes A_S^* + A_S^T \otimes I_n$ is therefore - upper diagonal matrices with the $in + j$th element of its - diagonal being $\overline{\lambda_j} + \lambda_i$. - Since it is upper diagonal, it is in its Schur form - and the elements in its diagonal are its eigenvalues. + If $A \Ima(x) \subseteq \Ima(\mathbf{x})$, since $\Ima(\mathbf{x})$ and $A\Ima(\mathbf{x})$ + are vector subspaces and $\Ima(\mathbf{x})$ is of dimension \(1\), + that means that either $A\Ima(\mathbf{x}) = \Ima(\mathbf{x})$ or $A\Ima(\mathbf{x}) = \{0\}$. + In the first case, it is an eigenvector of a nonzero eigenvalue. + In the second case, it is an eigenvector of \(0\). \end{solution} -\exo{3} +\exo{2} +Show that if \(A \in \R^{n \times n}\) satisfies \(A^\top A = AA^\top\) (i.e., \(A\) is normal), then its real Schur form is block-diagonal with blocks +\[ +A_{ii} = \alpha_i \quad \textnormal{or} \quad A_{jj} = \begin{bmatrix} \alpha_j & \beta_j \\ -\beta_j & \alpha_j \end{bmatrix} +\] +for each real eigenvalue \(\alpha_i\) and each complex eigenvalue \(\alpha_j \pm j \beta_j\). + \begin{solution} - Using the exercise~1.6 and the exercise~1.15, - \begin{align*} - V^* (A^T \otimes A^* + I_n \otimes I_n) V - & = ((U^*)^T \otimes U^*) (A^T \otimes A^* + I_n \otimes I_n) (U^T \otimes U)\\ - & = ((U^*)^T \otimes U^*) (A^T \otimes A^*) (U^T \otimes U)\\ - & \quad + ((U^*)^T \otimes U^*) (I_n \otimes I_n) (U^T \otimes U)\\ - & = (((U^*)^TA^TU^T) \otimes (U^*A^*U) + (U^*)^TU^T) \otimes (U^*U)\\ - & = A_S^T \otimes A_S^* + I_n \otimes I_n. - \end{align*} - We know that since $U$ is unitary, $U^T$ is unitary too. - With the help of the exercise~1.16, we conclude that $U^T \otimes U$ is unitary. - Therefore, - $A^T \otimes A^* + I_n \otimes I_n$ has same spectrum than - $A_S^T \otimes A_S^* + I_n \otimes I_n$. - We can now see that - \begin{align*} - A_S & = - \begin{pmatrix} - \lambda_1 & & & 0\\ - \times & \lambda_2 & & \\ - \vdots & \ddots & & \\ - \times & \cdots & \times & \lambda_n - \end{pmatrix}\\ - A_S^T \otimes A_S^* & = - \begin{pmatrix} - \lambda_1 A_S^* & \times A_S^* & \cdots & \times A_S^*\\ - & \lambda_2 A_S^* & \ddots & \vdots\\ - & & \ddots & \times A_S^*\\ - 0 & & & \lambda_n A_S^* - \end{pmatrix} - \end{align*} - $A_S^T \otimes A_S^* + I_n \otimes I_n$ is therefore - upper diagonal with the $in + j$th element of its - diagonal being $\overline{\lambda_j}\lambda_i + 1$. - Since it is upper diagonal, it is in its Schur form - and the elements in its diagonal are its eigenvalues. + Let's first show that if $A$ is normal, + it is still normal after a unitary transformation. + \begin{align*} + (U^\top AU)^\top (U^\top AU) + & = U^\top A^\top AU\\ + & = U^\top AA^\top U\\ + & = U^\top AUU^\top A^\top U\\ + & = U^\top AU(U^\top AU)^\top. + \end{align*} + That means that the Schur form of $A$ is normal too. + + We can now use the same argument as in the proof of Theorem~4.4 since $A_S^* = A_S^\top$ + and apply it for all $n_1+n_2 = n$ such that $n_1$ is at the end of a block. + This shows that the real Schur form of the matrix is block-diagonal, and combining it with results on what the blocks are in the lecture notes, we arrive at a solution. \end{solution} \exo{2} +Show that if \(A \in \R^{n \times n}\) is anti-symmetric (i.e., \(A = -A^\top\)), then the real Schur form is block-diagonal with blocks +\[ +A_{ii} = 0 \quad \textnormal{or} \quad A_{jj} = \begin{bmatrix} 0 & \beta_j \\ -\beta_j & 0 \end{bmatrix}. +\] + \begin{solution} - Let $A_s$ be the Schur form of $A$ and $B_S$ the lower Schur - form of $B$. - We have unitary $U_A, U_B$ such that - $A = U_AA_SU_A^*$ and $B = U_BB_sU_B^*$. - Let's define the dense matrices $\tilde{X}$ and $\tilde{C}$ such that - $\tilde{X} = U_A\tilde{X}U_B^*$ and $C = U_A\tilde{C}U_B^*$, we have - \begin{align*} - (U_AA_SU_A^*)(U_A\tilde{X}U_B^*) + (U_A\tilde{X}U_B^*)(U_BB_sU_B^*) - & = U_A\tilde{C}U_B^*\\ - U_AA_S\tilde{X}U_B^* + U_A\tilde{X}B_sU_B^* & = U_A\tilde{C}U_B^*\\ - A_S\tilde{X} + \tilde{X}B_s & = \tilde{C}\\ - \end{align*} - Using $\vect(BPA) = (A^T \otimes B) \vect(P)$, we have - \begin{align*} - \vect(A_s\tilde{X} + \tilde{X}B_S) & = \vect(\tilde{C})\\ - \vect(A_s\tilde{X}I_n) + \vect(I_n\tilde{X}B_S) & = \vect(\tilde{C})\\ - (I_n \otimes A_s) \vect(X) + (B_S^T \otimes I_n) \vect(\tilde{X}) & = \vect(\tilde{C})\\ - (I_n \otimes A_s + B_S^T \otimes I_n) \vect(\tilde{X}) & = \vect(\tilde{C}) - \end{align*} - where $(I_n \otimes A_s + B_S^T \otimes I_n)$ is upper diagonal - since $A_s$ and $B_S^T$ are upper diagonal. + Let's first show that if $A$ is anti-symmetric, + it is still anti-symmetric after a unitary transformation. + \begin{align*} + (U^\top AU)^\top + & = U^\top A^\top U\\ + & = U^\top (-A)U\\ + & = -U^\top AU. + \end{align*} + That means that the Schur form of $A$ is anti-symmetric too. + Since it is anti-symmetric, $A^\top A = -A^2 = AA^\top$, so it is also normal. + + By Exercise~4.2, we know that the Schur form is block-diagonal. + But since $A_S = -A_S^\top$, + we must have $\alpha_j = -\alpha_j$, which means that $\alpha_j = 0$ + (note that it imposes no restrictions on $\beta_j$). + The same reasoning also implies $A_{ii} = 0$. \end{solution} -\exo{2} +\exo{1} +The orthogonal polynomials defined by the recurrence +\[ +\left\{\begin{array}{rcl} +p_0(\lambda) & = & 1,\\ +p_1(\lambda) & = & \lambda - \alpha_1,\\ +p_i(\lambda) & = & (\lambda - \alpha_i) p_{i-1}(\lambda) - \beta_i^2 p_{i-2}(\lambda), \quad i = 2, \dots, n, +\end{array}\right. +\] +are the characteristic polynomials of the matrices +\[ +T_i = \begin{bmatrix} +\alpha_1 & \beta_2 & & \\ +\beta_2 & \alpha_2 & \ddots &\\ +& \ddots & \ddots & \beta_i\\ +& & \beta_i & \alpha_i \\ +\end{bmatrix}. +\] +Show that the roots of two consecutive polynomials are interlacing. +The interweaving is strict if the \(\beta_j\)'s are nonzero. + +\begin{solution} + We simply use the corollary since we remove the last line and column of \(T_i\) to obtain \(T_{i-1}\). + + If the $\beta_j$ are nonzero, we can see that it is strict since + if $p_i(\lambda') = p_{i-1}(\lambda') = 0$ for some hypothetical root \(\lambda'\), we have + \[ 0 = -\beta_i^2 p_{i-2}(\lambda') \] + so $p_{i-2}(\lambda') = 0$. + By induction, we have $p_0(\lambda') = 0$ which is in contradiction with the base case, hence \(\lambda'\) cannot be a root of two consecutive polynomials. +\end{solution} + +\exo{1} +If \(\hat{M}_{m \times n}\) is the matrix \(M_{m \times n}\) in which we have replaced a row \(\mathbf{m}_{i:}\) with a row of zeros, then the singular values \(\{\sigma_1, \dots, \sigma_n\}\) and \(\hat{\sigma}_1, \dots, \hat{\sigma}_n\) of \(M\) and \(\hat{M}\) satisfy +\[ +\sigma_k^2 - \norm{\mathbf{m}_{i:}}_2^2 \leq \hat{\sigma}_k^2 \leq \sigma_k^2 + \norm{\mathbf{m}_{i:}}_2^2. +\] + +\begin{solution} + We remember that the singular values of a matrix \(A\) are the square roots of the eigenvalues of the matrix \(A^\top A\). + We also note that \(\hat{M}^\top \hat{M} = M^\top M - \mathbf{m}_{i:}^T\mathbf{m}_{i:}\). + Applying the result derived from Corollary~4.32 in the lecture notes, with \(A = M\), \(\Delta = -\mathbf{m}_{i:}^\top \mathbf{m}_{i:}\) and \(\hat{A} = \hat{M}\) we then find that + \[ + \sigma_k^2 - \norm{\mathbf{m}_{i:}}_2^2 \leq \hat{\sigma}_k^2 \leq \sigma_k^2 + \norm{\mathbf{m}_{i:}}_2^2. + \] +\end{solution} + +\exo{1} +Show that +\[ +e^{J_k(\lambda_0)t} = e^{(\lambda_0I_kt + J_k(0)t)} = e^{\lambda_0t} e^{J_k(0)t}. +\] + \begin{solution} - For 4.7, let - \[ \lambda = \argmin_{\lambda^* = j\omega} \sigma_{\mathrm{min}}(A - \lambda^*I). \] - Let $C = A - \lambda I$. - If $C = U^* \Sigma V$, we can take $\Delta = -u_n \sigma_n v_n^*$ which gives - \[ C + \Delta = A - \lambda I + \Delta = (A + \Delta) - \lambda I \] - of rank $n-1$. + We start by noticing that \(J_k(\lambda_0) = \lambda_0 I + J_k(0)\). + This explains the first equality. + The second equality is then a direct consequence of Proposition~4.39, as multiples of the identity matrix commute with any matrix (including \(J_k(0)\)). +\end{solution} + +\exo{1} +From the Taylor expansion, show that +\[ +\fdif{}{t} e^{At} = A e^{At} = e^{At} A. +\] - For 4.8, it is the same except that we take - \[ \lambda = \argmin_{\lambda^* = \exp(j\omega)} \sigma_{\mathrm{min}}(A - \lambda^*I). \] +\begin{solution} + \begin{align*} + \fdif{}{t}e^{At} + & = \fdif{I}{t} + \sum_{i=1}^\infty \frac{A^i}{i!} \fdif{t^i}{t}\\ + & = \sum_{i=1}^\infty \frac{iA^it^{i-1}}{i!}\\ + & = A\sum_{i=1}^\infty \frac{(At)^{i-1}}{(i-1)!}\\ + & = A \sum_{i=0}^\infty \frac{(At)^i}{i!} = A e^{At}\\ + & = \sum_{i=0}^\infty \frac{A^{i+1} t^i}{i!}\\ + & = \sum_{i=0}^\infty \frac{(At)^i}{i!} A = e^{At} A. + \end{align*} \end{solution} -\exo{0} -\nosolution +\exo{2} +Show that, in the general case, we have +\[ +\mathbf{x}(t) = e^{At}\mathbf{x}(0) + \int_0^t e^{A(t - \tau)} \mathbf{f}(\tau) \dif \tau. +\] +\begin{solution} +We can compute the derivative of \(\mathbf{x}(t)\) as follows +\begin{align*} +\dot{\mathbf{x}}(t) &= Ae^{At}\mathbf{x}(0) + \mathbf{f}(t) + \int_0^t A e^{A(t - \tau)} \mathbf{f}(\tau) \dif \tau\\ +&= A \left(e^{At}\mathbf{x}(0) + \int_0^t e^{A(t - \tau)} \mathbf{f}(\tau) \dif \tau\right) + \mathbf{f}(t)\\ +&= A \mathbf{x}(t) + \mathbf{f}(t), +\end{align*} +which shows (thanks to the Leibniz integral rule) that our general solution satisfies the differential equation given in (4.22) in the lecture notes. +\end{solution} \ No newline at end of file diff --git a/src/q7/matrix-INMA2380/exercises/ch5.tex b/src/q7/matrix-INMA2380/exercises/ch5.tex index 14feafeef..a4cd4327e 100644 --- a/src/q7/matrix-INMA2380/exercises/ch5.tex +++ b/src/q7/matrix-INMA2380/exercises/ch5.tex @@ -1,12 +1,129 @@ -\section{Polynomial matrices} -\subsection{} -\nosolution +\section{Inertia and stability of matrices} +\exo{3} +Show that +\[ +V^*(I_n \otimes A^* + A^\top \otimes I_n) V = I_n \otimes A^*_S + A_S^\top \otimes I_n +\] +whose eigenvalues are \(\bar{\lambda}_j + \lambda_i\), \(i = 1, \dots, n, j = 1, \dots, n)\). -\subsection{} -\nosolution +\begin{solution} + Using Exercises~1.6 and~1.15, + \begin{align*} + V^* (I_n \otimes A^* + A^\top \otimes I_n) V + & = ((U^*)^\top \otimes U^*) (I_n \otimes A^* + A^\top \otimes I_n) (U^\top \otimes U)\\ + & = ((U^*)^\top \otimes U^*) (I_n \otimes A^*) (U^\top \otimes U)\\ + & \quad + ((U^*)^\top \otimes U^*) (A^\top \otimes I_n) (U^\top \otimes U)\\ + & = ((U^*)^\top U^\top) \otimes (U^*A^*U) + ((U^*)^\top A^\top U^\top) \otimes (U^*U)\\ + & = I_n \otimes A_S^* + A_S^\top \otimes I_n. + \end{align*} + We know that since $U$ is unitary, $U^\top$ is unitary too. + With the help of Exercise~1.16, we conclude that $U^\top \otimes U$ is unitary. + Therefore, + $I_n \otimes A^* + A^\top \otimes I_n$ has the same spectrum as + $I_n \otimes A_S^* + A_S^\top \otimes I_n$. + We can now see that + \begin{align*} + A_S & = + \begin{bmatrix} + \lambda_1 & & & 0\\ + \times & \lambda_2 & & \\ + \vdots & \ddots & & \\ + \times & \cdots & \times & \lambda_n + \end{bmatrix},\\ + I_n \otimes A_S^* & = + \begin{bmatrix} + A_S^* & & & 0\\ + & A_S^* & & \\ + & & \ddots & \\ + 0 & & & A_S^* + \end{bmatrix},\\ + A_S^\top \otimes I_n & = + \begin{bmatrix} + \lambda_1 I_n & \times I_n & \cdots & \times I_n\\ + & \lambda_2 I_n & \ddots & \vdots\\ + & & \ddots & \times I_n\\ + 0 & & & \lambda_n I_n + \end{bmatrix}. + \end{align*} + $I_n \otimes A_S^* + A_S^\top \otimes I_n$ is therefore an + upper diagonal matrix with the $in + j$th element of its + diagonal being $\bar{\lambda}_j + \lambda_i$. + Since it is upper diagonal, it is in its Schur form + and the elements on its diagonal are its eigenvalues. +\end{solution} -\subsection{} -\nosolution +\exo{3} +Show that +\[ +V^*(A^\top \otimes A^* - I_{n^2}) V = A_S^\top \otimes A_S^* - I_{n^2} +\] +whose eigenvalues are \(\bar{\lambda}_j \lambda_i - 1\), \((i = 1, \dots, n, j = 1, \dots, n)\). -\subsection{} -\nosolution +\begin{solution} + Using Exercises~1.6 and~1.15, + \begin{align*} + V^* (A^\top \otimes A^* - I_{n^2}) V + & = ((U^*)^\top \otimes U^*) (A^\top \otimes A^* - I_{n^2}) (U^\top \otimes U)\\ + & = ((U^*)^\top \otimes U^*) (A^\top \otimes A^*) (U^\top \otimes U)\\ + & \quad - ((U^*)^\top \otimes U^*) (I_n \otimes I_n) (U^\top \otimes U)\\ + & = (((U^*)^\top A^\top U^\top) \otimes (U^*A^*U) - (U^*)^\top U^\top) \otimes (U^*U)\\ + & = A_S^\top \otimes A_S^* - I_{n^2}. + \end{align*} + We know that since $U$ is unitary, $U^\top$ is unitary too. + With the help of Exercise~1.16, we conclude that $U^\top \otimes U$ is unitary. + Therefore, + $A^\top \otimes A^* - I_{n^2}$ has the same spectrum as + $A_S^\top \otimes A_S^* - I_{n^2}$. + We can now see that + \begin{align*} + A_S & = + \begin{bmatrix} + \lambda_1 & & & 0\\ + \times & \lambda_2 & & \\ + \vdots & \ddots & & \\ + \times & \cdots & \times & \lambda_n + \end{bmatrix},\\ + A_S^\top \otimes A_S^* & = + \begin{bmatrix} + \lambda_1 A_S^* & \times A_S^* & \cdots & \times A_S^*\\ + & \lambda_2 A_S^* & \ddots & \vdots\\ + & & \ddots & \times A_S^*\\ + 0 & & & \lambda_n A_S^* + \end{bmatrix}. + \end{align*} + $A_S^\top \otimes A_S^* - I_{n^2}$ is therefore + upper diagonal with the $in + j$th element of its + diagonal being $\bar{\lambda}_j\lambda_i - 1$. + Since it is upper diagonal, it is in its Schur form + and the elements on its diagonal are its eigenvalues. +\end{solution} + +\exo{2} +Apply these techniques to \emph{Sylvester's equation} +\[ +AX + XB = C. +\] + +\begin{solution} + Let $A_S$ be the Schur form of $A$ and $B_S$ the lower Schur + form of $B$. + We have unitary $U_A, U_B$ such that + $A = U_AA_SU_A^*$ and $B = U_BB_SU_B^*$. + Let's define the dense matrices $\tilde{X}$ and $\tilde{C}$ such that + $X = U_A\tilde{X}U_B^*$ and $C = U_A\tilde{C}U_B^*$, we have + \begin{align*} + (U_AA_SU_A^*)(U_A\tilde{X}U_B^*) + (U_A\tilde{X}U_B^*)(U_BB_SU_B^*) + & = U_A\tilde{C}U_B^*\\ + U_AA_S\tilde{X}U_B^* + U_A\tilde{X}B_SU_B^* & = U_A\tilde{C}U_B^*\\ + A_S\tilde{X} + \tilde{X}B_S & = \tilde{C}. + \end{align*} + Using $\vect(BPA) = (A^\top \otimes B) \vect(P)$, we have + \begin{align*} + \vect(A_S\tilde{X} + \tilde{X}B_S) & = \vect(\tilde{C})\\ + \vect(A_S\tilde{X}I_n) + \vect(I_n\tilde{X}B_S) & = \vect(\tilde{C})\\ + (I_n \otimes A_S) \vect(\tilde{X}) + (B_S^\top \otimes I_n) \vect(\tilde{X}) & = \vect(\tilde{C})\\ + (I_n \otimes A_S + B_S^\top \otimes I_n) \vect(\tilde{X}) & = \vect(\tilde{C}) + \end{align*} + where $(I_n \otimes A_S + B_S^\top \otimes I_n)$ is upper diagonal + since $A_S$ and $B_S^\top$ are upper diagonal. +\end{solution} \ No newline at end of file diff --git a/src/q7/matrix-INMA2380/exercises/ch6.tex b/src/q7/matrix-INMA2380/exercises/ch6.tex index dcc575e2d..f735b26f0 100644 --- a/src/q7/matrix-INMA2380/exercises/ch6.tex +++ b/src/q7/matrix-INMA2380/exercises/ch6.tex @@ -1,112 +1,62 @@ -\section{Positive matrices} -\exo{1} +\section{Polynomial matrices} + +\exo{2} +Show that the invertible polynomial matrices are exactly the unimodular matrices. + \begin{solution} - The following inequalities are equivalent - \begin{align*} - \rho x & \leq Sx\\ - \rho x_i & \leq (Sx)_i & \forall i\\ - \rho & \leq \frac{(Sx)_i}{x_i} & \forall i\\ - \rho & \leq \min_i \frac{(Sx)_i}{x_i}. - \end{align*} +We start by showing that unimodular matrices are invertible. +This is trivially true, by considering (6.1) in the lecture notes. +We then need to show that invertible matrices must be unimodular. +Consider two invertible polynomial matrices \(E(\lambda), F(\lambda)\). +Since \(E(\lambda)F(\lambda) = I_n\), then \(\det(E(\lambda)) = 1/\det(F(\lambda))\), which means that both determinants are nonzero constants. +\(E(\lambda)\) and \(F(\lambda)\) are thus unimodular matrices. \end{solution} -\exo{3} -\begin{solution} - In can be seen as a sort of consequence of the power method - applied on the column of $S$ - ($S^\infty = S^\infty - \begin{pmatrix} - a_{:1} & \cdots & a_{:n} - \end{pmatrix}$). +\exo{0} +Show that the elementary transformations of type 1 and 2 defined in (6.2) applied on the rows and on the columns of a polynomial matrix define a multiplicative group. + +\nosolution - Let's prove it simply using the Jordan form of $S$. - Without loss of generality, the eigenvalues of S are $1 = \rho(S) > |\lambda_2| \geq \cdots \geq |\lambda_k|$). - \begin{equation} - T^{-1}ST = - \begin{pmatrix} - 1 & 0\\ - 0 & - \diag_{i = 2, \ldots, k} - \{J_i\} - \end{pmatrix} \triangleq D \label{D} - \end{equation} +\exo{1} +The normal rank of \(P(\lambda)\) is equal to the rank of \(P(\lambda_0)\) for almost every (in the Lebesgue sense) \(\lambda_0 \in \C\) (or \(\R\)). +When they are not equal, the rank of \(P(\lambda_0)\) is smaller than the normal rank. - where - \[ J_i = - \begin{pmatrix} - \lambda_i & 1 & & 0\\ - & \lambda_i & \ddots &\\ - & & \ddots & 1\\ - 0 & & & \lambda_i - \end{pmatrix} - \] - we have - \begin{align*} - S^k & = - T - \begin{pmatrix} - 1 & 0\\ - 0 & - \diag_{i = 2, \ldots, k} - \{J_i^k\} - \end{pmatrix} - T^{-1}. - \end{align*} - We can check now that - \begin{align*} - J_i^k & = - \begin{pmatrix} - \lambda_i^k & {k \choose 1}\lambda_i^{k-1} & {k \choose 2}\lambda_i^{k-2} & \cdots\\ - & \lambda_i^k & \ddots & \\ - & & \ddots & {k \choose 1} \lambda_i^{k-1}\\ - 0 & & & \lambda_i^k - \end{pmatrix}\\ - J_i^k & = - \lambda_i^k - \begin{pmatrix} - 1 & {k \choose 1}\lambda_i^{-1} & {k \choose 2}\lambda_i^{-2}k & \cdots\\ - & 1 & \ddots & \\ - & & \ddots & {k \choose 1}\lambda_i^{-1}\\ - 0 & & & 1 - \end{pmatrix} - \end{align*} - and ${k \choose j} \lambda_i^{-j}$ is a polynomial in $k$ while $\lambda_i^k$ is a decreasing exponential ($|\lambda_i| < 1$) in $k$ so - $J_i^k$ converges to $0$ and $S^k$ to - \begin{align} - T - \begin{pmatrix} - 1 & 0\\ - 0 & 0 - \end{pmatrix} - T^{-1} - & = - t_{:1}(t^{-1})_{1:} - \end{align} +\begin{solution} +As \(M(\lambda), N(\lambda)\) are unimodular (and hence invertible), they don't influence the rank of \(P(\lambda_0)\). +We thus have \(\mathop{\mathrm{rank}}(P(\lambda_0)) = \mathop{\mathrm{rank}}(\diag\{e_i(\lambda_0)\}))\). +The rank of the latter is only reduced when \(\lambda_0\) is one of the roots of \(e_i(\lambda)\), which has a finite amount of roots (and the reduction thus occurs ``almost never''). +\end{solution} - However, since $T^{-1}T = I$, $(t^{-1})_{1:}t_{:1} = 1$ and $(t^{-1})_{1:}t_{:j} = 0 = (t^{-1})_{j:}t_{:1}$ for $j \neq 1$. - We can see that - \begin{align*} - St_{:1} - & = TDT^{-1}t_{:1}\\ - & = TDe_1\\ - & = Te_1\\ - & = t_{:1}\\ - (t^{-1})_{1:}S - & = (t^{-1})_{1:}TDT^{-1}\\ - & = e_1^TDT^{-1}\\ - & = e_1^TT^{-1}\\ - & = (t^{-1})_{1:}. - \end{align*} - so $t_{:i}$ is a eigenvector of 1. - Since the geometric multiplicity is 1, $t_{:i}$ is a multiple of $\mathbf{x}$, - the normalised ($\1^T t_{:i} = 1$) Perron eigenvector. - Also $(t^{-1})_{1:}$ is a multiple to the \emph{left} eigenvector $\1^T$. - So, there are $\alpha,\beta$ such that - \begin{align*} - t_{:i} & = \alpha \mathbf{x}\\ - (t^{-1})_{1:} & = \beta \1^T - \end{align*} - but since $(t^{-1})_{1:}t_{:1} = 1$, $\alpha\beta = 1$ so - \[ \lim_{n \to \infty} S^n = \mathbf{x}\1^T. \] +\exo{2} +Verify that the Smith form of the matrix +\[ +\lambda I_4 - \begin{bmatrix} +1 & & &\\ +& 1 & &\\ +&& 2 & 1\\ +&&&2\\ +\end{bmatrix} +\] +is \(\diag\{1, 1, \lambda-1, (\lambda-1)(\lambda-2)^2\}\). + +\begin{solution} +This matrix looks like +\[ +\begin{bmatrix} +\lambda - 1 & & &\\ +& \lambda- 1 & &\\ +&& \lambda- 2 & -1\\ +&&&\lambda- 2\\ +\end{bmatrix} +\] +By applying Algorithm~6.1 from the lecture notes, we obtain +\[ +\begin{bmatrix} +1 & & &\\ +& 1 & &\\ +&& \lambda- 1 &\\ +&&&(\lambda-1)(\lambda- 2)^2\\ +\end{bmatrix}. +\] \end{solution} diff --git a/src/q7/matrix-INMA2380/exercises/ch7.tex b/src/q7/matrix-INMA2380/exercises/ch7.tex index 91e489f36..5388b7822 100644 --- a/src/q7/matrix-INMA2380/exercises/ch7.tex +++ b/src/q7/matrix-INMA2380/exercises/ch7.tex @@ -1,3 +1,119 @@ -\section{Structured matrices} -\exo{0} -\nosolution +\section{Positive matrices} + +\exo{1} +Show that for all \(\mathbf{x} \gneq 0 \in \R^n\), \(\rho \mathbf{x} \leq A\mathbf{x}\) if and only if \(\rho \leq r(\mathbf{x})\). + +\begin{solution} + The following inequalities are equivalent + \begin{align*} + \rho \mathbf{x} & \leq A\mathbf{x}\\ + \rho x_i & \leq [A\mathbf{x}]_i & \forall i: x_i \neq 0\\ + \rho & \leq \frac{[A\mathbf{x}]_i}{x_i} & \forall i: x_i \neq 0\\ + \rho & \leq \min_{i: x_i \neq 0} \frac{[A\mathbf{x}]_i}{x_i}. + \end{align*} +\end{solution} + +\exo{3} +Show that if \(\abs{\lambda_2(S)} < \rho(S)\), then +\[ +\lim_{n \to \infty} S^n = \mathbf{x}[1, 1, \dots, 1] +\] +where \(\mathbf{x}\) is the Perron eigenvector of \(S\), normalized such that \(\sum x_i = 1\). + +\begin{solution} + This result can be seen as a sort of consequence of the power method + applied on the columns of $S$ + ($S^\infty = S^\infty + \begin{bmatrix} + a_{:1} & \cdots & a_{:n} + \end{bmatrix}$). + + Let's prove it simply using the Jordan form of $S$. + Without loss of generality, the eigenvalues of S are $1 = \rho(S) > \abs{\lambda_2} \geq \cdots \geq \abs{\lambda_k}$). + \begin{equation} + T^{-1}ST = + \begin{bmatrix} + 1 & 0\\ + 0 & + \diag_{i = 2, \ldots, k} + \{J_i\} + \end{bmatrix} \triangleq D \label{D} + \end{equation} + where + \[ J_i = + \begin{bmatrix} + \lambda_i & 1 & & 0\\ + & \lambda_i & \ddots &\\ + & & \ddots & 1\\ + 0 & & & \lambda_i + \end{bmatrix}. + \] + We have + \begin{align*} + S^k & = + T + \begin{bmatrix} + 1 & 0\\ + 0 & + \diag_{i = 2, \ldots, k} + \{J_i^k\} + \end{bmatrix} + T^{-1}. + \end{align*} + We can check now that + \begin{align*} + J_i^k & = + \begin{bmatrix} + \lambda_i^k & \binom{k}{1}\lambda_i^{k-1} & \binom{k}{2}\lambda_i^{k-2} & \cdots\\ + & \lambda_i^k & \ddots & \\ + & & \ddots & \binom{k}{1} \lambda_i^{k-1}\\ + 0 & & & \lambda_i^k + \end{bmatrix}\\ + J_i^k & = + \lambda_i^k + \begin{bmatrix} + 1 & \binom{k}{1}\lambda_i^{-1} & \binom{k}{2}\lambda_i^{-2} & \cdots\\ + & 1 & \ddots & \\ + & & \ddots & \binom{k}{1}\lambda_i^{-1}\\ + 0 & & & 1 + \end{bmatrix} + \end{align*} + and $\binom{k}{j} \lambda_i^{-j}$ is a polynomial in $k$ while $\lambda_i^k$ is a decreasing exponential ($\abs{\lambda_i} < 1$) in $k$ so + $J_i^k$ converges to $0$ and $S^k$ to + \begin{align} + T + \begin{bmatrix} + 1 & 0\\ + 0 & 0 + \end{bmatrix} + T^{-1} + & = + t_{:1}(t^{-1})_{1:} + \end{align} + + However, since $T^{-1}T = I$, $(t^{-1})_{1:}t_{:1} = 1$ and $(t^{-1})_{1:}t_{:j} = 0 = (t^{-1})_{j:}t_{:1}$ for $j \neq 1$. + We can see that + \begin{align*} + St_{:1} + & = TDT^{-1}t_{:1}\\ + & = TD\mathbf{e}_1\\ + & = T\mathbf{e}_1\\ + & = t_{:1}\\ + (t^{-1})_{1:}S + & = (t^{-1})_{1:}TDT^{-1}\\ + & = \mathbf{e}_1^TDT^{-1}\\ + & = \mathbf{e}_1^TT^{-1}\\ + & = (t^{-1})_{1:}, + \end{align*} + so $t_{:i}$ is an eigenvector of 1. + Since the geometric multiplicity is 1, $t_{:i}$ is a multiple of $\mathbf{x}$, + the normalised ($\1^\top t_{:i} = 1$) Perron eigenvector. + Also $(t^{-1})_{1:}$ is a multiple of the \emph{left} eigenvector $\1^\top$. + So, there are $\alpha,\beta$ such that + \begin{align*} + t_{:i} & = \alpha \mathbf{x}\\ + (t^{-1})_{1:} & = \beta \1^\top + \end{align*} + but since $(t^{-1})_{1:}t_{:1} = 1$, $\alpha\beta = 1$ so + \[ \lim_{n \to \infty} S^n = \mathbf{x}\1^\top. \] +\end{solution} \ No newline at end of file diff --git a/src/q7/matrix-INMA2380/exercises/matrix-INMA2380-exercises.tex b/src/q7/matrix-INMA2380/exercises/matrix-INMA2380-exercises.tex index de74284fe..5d9111c14 100644 --- a/src/q7/matrix-INMA2380/exercises/matrix-INMA2380-exercises.tex +++ b/src/q7/matrix-INMA2380/exercises/matrix-INMA2380-exercises.tex @@ -1,5 +1,7 @@ \documentclass[en]{../../../eplexercises} +\usepackage{comment} + \DeclareMathOperator{\Ker}{Ker} \DeclareMathOperator{\Ima}{Im} \DeclareMathOperator{\trace}{trace} @@ -7,6 +9,7 @@ \DeclareMathOperator{\vect}{vec} \DeclareMathOperator{\adj}{adj} \newcommand{\1}{\mathbf{1}} +\newcommand{\sset}{\mathcal{S}} \usepackage{forloop} @@ -16,7 +19,7 @@ \usepackage{url} \hypertitle{Matrix Theory}{7}{INMA}{2380} -{Beno\^it Legat} +{Beno\^it Legat \and Gilles Peiffer} {Paul Van Dooren} \include{ch1} @@ -26,5 +29,7 @@ \include{ch5} \include{ch6} \include{ch7} +\appendix +\include{old} \end{document} diff --git a/src/q7/matrix-INMA2380/exercises/old.tex b/src/q7/matrix-INMA2380/exercises/old.tex new file mode 100644 index 000000000..d7818f488 --- /dev/null +++ b/src/q7/matrix-INMA2380/exercises/old.tex @@ -0,0 +1,122 @@ +\section{Old questions} + +\exo{3} +\begin{solution} + See syllabus. + + If $P$ is orthogonal, we have + \begin{align*} + PP & = P\\ + PPP^* & = PP^*\\ + P & = PP^* + \end{align*} + and $P^*P = I$. +\end{solution} + +\exo{2} +\begin{solution} + See syllabus. + + \begin{align*} + \sum_{i=0}^\infty \frac{(A+B)^i}{i!} + & = \sum_{i=0}^\infty \frac{\sum_{k=0}^i \frac{i!}{k!(i-k)!} A^{i-k}B^k}{i!}\\ + & = \sum_{i=0}^\infty \sum_{k=0}^i \frac{A^{i-k}}{(i-k)!} \frac{B^k}{k!}\\ + & = + \left(\sum_{i=0}^\infty \frac{A^i}{i!}\right) + \left(\sum_{i=0}^\infty \frac{B^i}{i!}\right). + \end{align*} + because each term $A^aB^b$ is present with the term $\frac{1}{a!b!}$. +\end{solution} + +\exo{1} +\begin{solution} + We have + + \begin{align*} + \begin{bmatrix} + \lambda_0 & 1 & & \\ + & \ddots & \ddots & \\ + & & \ddots & 1\\ + & & & \lambda_0\\ + \end{bmatrix} + & = + \lambda_0 I + N + \end{align*} + for + \[ + N = + \begin{bmatrix} + & 1 & & \\ + & & \ddots & \\ + & & & 1\\ + & & & \\ + \end{bmatrix}. + \] + The rest is a simple consequence of the exercise~3.7. + + It is important to note for the next page that $N^n = 0$. + This is indeed a consequence of the fact that $N = J - \lambda_0I$ + where $J$ is a Jordan block of $\lambda_0$ of size $n$. + $J$ is therefore a matrix with only one eigenvalue $\lambda_0$ + of algebraic multiplicity $n$ but geometric multiplicity $1$. + Hence the whole set $\mathbb{C}^n$ is an invariant subspace of $N$ + which means that $(J - \lambda_0I)^n = 0$. + + We can see for example for $n = 4$ that + + \begin{align*} + N & = + \begin{bmatrix} + 0 & 1 & 0 & 0\\ + 0 & 0 & 1 & 0\\ + 0 & 0 & 0 & 1\\ + 0 & 0 & 0 & 0 + \end{bmatrix}\\ + N^2 & = + \begin{bmatrix} + 0 & 0 & 1 & 0\\ + 0 & 0 & 0 & 1\\ + 0 & 0 & 0 & 0\\ + 0 & 0 & 0 & 0 + \end{bmatrix}\\ + N^3 & = + \begin{bmatrix} + 0 & 0 & 0 & 1\\ + 0 & 0 & 0 & 0\\ + 0 & 0 & 0 & 0\\ + 0 & 0 & 0 & 0 + \end{bmatrix}\\ + N^4 & = + \begin{bmatrix} + 0 & 0 & 0 & 0\\ + 0 & 0 & 0 & 0\\ + 0 & 0 & 0 & 0\\ + 0 & 0 & 0 & 0 + \end{bmatrix} + \end{align*} + We can see that $e_1$ is an eigenvector of $\lambda_0$ but + $e_2$, $e_3$, $e_4$ are not. + \begin{itemize} + \item $Je_2 = \lambda_0 e_2 + e_1$ + so $(J - \lambda_0 I)e_2 = e_1$. + \item $(J - \lambda_0 I)e_3 = e_2$ and $(J - \lambda_0 I)^2e_3 = e_1$. + \item $(J - \lambda_0 I)e_4 = e_3$, $(J - \lambda_0 I)^2e_3 = e_2$ + and $(J - \lambda_0 I)^3e_4 = e_1$. + \end{itemize} + + Note that $J^4 \mathbb{C}^n = \mathbb{C}^n$ if $\lambda_0 \neq 0$. + It is not to be mistaken from $(J - \lambda_0 I)^4 \mathbb{C}^n = \{0\}$. +\end{solution} + +\exo{2} +\begin{solution} + For 4.7, let + \[ \lambda = \argmin_{\lambda^* = j\omega} \sigma_{\mathrm{min}}(A - \lambda^*I). \] + Let $C = A - \lambda I$. + If $C = U^* \Sigma V$, we can take $\Delta = -u_n \sigma_n v_n^*$ which gives + \[ C + \Delta = A - \lambda I + \Delta = (A + \Delta) - \lambda I \] + of rank $n-1$. + + For 4.8, it is the same except that we take + \[ \lambda = \argmin_{\lambda^* = \exp(j\omega)} \sigma_{\mathrm{min}}(A - \lambda^*I). \] +\end{solution}