\documentclass[8pt,a4paper]{scrartcl}
\input{definitions}
\input{packages}
\geometry{left=10mm,right=10mm, top=12mm, bottom=5mm, paperwidth=210mm, paperheight=297mm}
\fancyhf{} %Kopf-/Fu?zeilenfelder leeren
\pagestyle{fancy} %Seitenstil auf fancy setzen
\fancyhead[L]{SR} %im Kopf links den Titel schreiben
\fancyhead[R]{\copyright \hspace{1mm} Lst Oekonometrie, Uni Regensburg, \todayV}
\renewcommand{\headrulewidth}{0pt} %Im Kopf rechts die Seitenzahl setzen
\fancypagestyle{plain}{} % damit auch "plain" Seiten fancy werden
\setlength{\headheight}{14.5pt}
\renewcommand{\baselinestretch}{1.25}
\newcommand{\rtsum}{\sum_{t=1}^{\lfloor rT \rfloor} \;}
\newcommand{\tfloor}{\lfloor T/2 \rfloor}
\renewcommand{\vec}{\operatorname{vec}}
\definecolor{DarkGreen}{RGB}{85,107,47}
\definecolor{DarkRed}{rgb}{0.6,0.08,0.1}
\definecolor{Orange}{RGB}{255,165,0}
\begin{document}
\section*{Why is stationarity so important?}
Remember what stationarity means:
\bit
\item \textbf{Covariance Stationarity (CS)}\\
$\lbrace y_t\rbrace_{t=0}^{\infty}$ is called covariance stationary iff
\beqs \E{y_t}:=\mu_t=\mu \text{ constant over time} \hd \text{ and } \hd \Cov{y_t, y_{t+k}}=\gamma(k) \text{ is a function of $k$ and NOT of $t$} \hd (\Ra \Var{y_t}=\sigma^2 \text{ constant over time}) \eeqs
(in words: constant expected value and variance; the autocovariance just depends on the time difference between two realizations and not on the time itself; this kind of stationarity does not assume anything about the underlying distribution, i.\,e.\,the distribution might change over time.)
\item \textbf{Strictly Stationarity (SS)}\\
$\lbrace y_t\rbrace_{t=0}^{\infty}$ is called strictly stationary iff
\beqs F_{Y_1, ..., Y_k}(y_{t}, ..., y_{t+k})\equiv F_{Y_1, ..., Y_k}(y_{s}, ..., y_{s+k}) \eeqs
(in words: every arbitrary joint distribution (of a finite number of random variables) is identical to the future joint distribution of the same number of random variables).\\
This implies (if all moments exist) that all moments are constant and therefore strictly stationarity implies (if the first two moments exist) covariance stationarity.
\eit
Remember what we are interested in:\\
Although there is \textbf{only one realization} in each period, the aim is to find the underlying distribution in \textbf{each} period, the \textbf{ensemble distribution} (which we need for forecasts, statistical tests, cointegration, etc.). But how can we get from a sample with size one a whole distribution? \\
The theoretic (empirically unprovable) background is called ergodic theorem. \\
For illustration, assume there are two trajectories $\textcolor{DarkRed}{\vx_1}$ and $\textcolor{DarkGreen}{\vx_2}$ each consisting of four elements. Every element was drawn from the constant (cf.\,picture) true, underlying (marginal) \textbf{ensemble density} $f_e(x)$ which we do not know, i.\,e.\,in particular, we do not know whether the realization is near the true mean $\textcolor{Orange}{\mu_e}$ or far away (the same with the true variance $\textcolor{Orange}{\sigma^2_e}$). Therefore, the basic assumption for the existence of such a constant distribution is a "`constant"' time series, i.\,e.\,a stationary time series.
\begin{center}
\includegraphics[width=0.6\textwidth]{Ergodentheorem.pdf}
\end{center}
Although we have proven the existence, there is still the problem how to estimate it. Since we have more than one realization in time, we can build the empirical \textbf{time densities} $\textcolor{DarkRed}{\hat{f}_1(x)}$ and $\textcolor{DarkGreen}{\hat{f}_2(x)}$ with average values $\textcolor{DarkRed}{\hat{\mu}_1}$ and $\textcolor{DarkGreen}{\hat{\mu}_2}$ and empirical variances $\textcolor{DarkRed}{\hat{\sigma}^2_1}$ and $\textcolor{DarkGreen}{\hat{\sigma}^2_2}$. The connection between the moments of the \textbf{given, empirical time densities} and the \textbf{unknown, underlying ensemble density} is made by the \textbf{Ergodic Theorem}:\\
Assumptions: For an asymptotically covariance stationary time series, e.\,g.\, $\textcolor{DarkRed}{\vx_1}$, which is ergodic (not empirically provable), it holds that
\begin{align*}
& \limTi \text{E}\left[\left(\underbrace{\frac{1}{T}\sum_{t=1}^T \textcolor{DarkRed}{x_{1t}}}_{\textcolor{DarkRed}{\hat{\mu}_{1}}} -\textcolor{Orange}{\mu_e}\right)^2\right]=0 \hd \text{where "`ergodicity for the mean"' means: } \hd \frac{1}{T}\sum_{t=1}^T \textcolor{DarkRed}{x_{1t}} \PRa \textcolor{DarkRed}{\mu_{1}} \text{ exists}\\
& \limTi \text{E}\left[\left(\underbrace{\frac{1}{T}\sum_{t=1}^T \left(\textcolor{DarkRed}{x_{1t}}-\textcolor{DarkRed}{\hat{\mu}_1}\right)^2}_{\textcolor{DarkRed}{\hat{\sigma}^2_{1}}} -\textcolor{Orange}{\sigma^2_e}\right)^2\right]=0
\hd \text{where "`ergodicity for the variance"' means: } \hd \frac{1}{T}\sum_{t=1}^T \left(\textcolor{DarkRed}{x_{1t}}-\textcolor{DarkRed}{\hat{\mu}_1}\right)^2 \PRa \textcolor{DarkRed}{\sigma^2_{1}} \text{ exists} \end{align*}
which means that for increasing sample size $T$ the time average value $\textcolor{DarkRed}{\hat{\mu}_1}$ converges to the ensemble mean $\textcolor{Orange}{\mu_e}$ (the same for variances $\textcolor{DarkRed}{\hat{\sigma}^2_1}$ and $\textcolor{Orange}{\sigma^2_e}$) and therefore connects the moments of those a priori different densities. An important sufficient condition for both ergodicity for the mean and variance for Gaussian processes is $\sum_{j=0}^\infty |\gamma(j)| < \infty$ (in this case, this condition even ensures ergodicity for all moments).\\
Finally, remember that the basic assumption is a form of \textbf{stationarity}. \\
\end{document}