wip, replaced bg figures

2025-09-28 12:50:58 +02:00
parent e00d1a33e3
commit 52dabf0f89
8 changed files with 263 additions and 53 deletions
--- a/thesis/Main.bbl
+++ b/thesis/Main.bbl
@@ -484,6 +484,52 @@
      \verb http://dx.doi.org/10.1147/rd.33.0210
      \endverb
    \endentry
    \entry{ml_supervised_unsupervised_figure_source}{article}{}{}
      \name{author}{2}{}{%
        {{hash=a2d0bb06d071c3e2754c88d06de7ca88}{%
           family={Morimoto},
           familyi={M\bibinitperiod},
           given={Juliano},
           giveni={J\bibinitperiod}}}%
        {{hash=947d2c0bc83a50a1d81f474d59c8cb6e}{%
           family={Ponton},
           familyi={P\bibinitperiod},
           given={Fleur},
           giveni={F\bibinitperiod}}}%
      }
      \list{publisher}{2}{%
        {Springer Science}%
        {Business Media LLC}%
      }
      \strng{namehash}{cf208ca99fc29ae8a55dba952fd1abbe}
      \strng{fullhash}{cf208ca99fc29ae8a55dba952fd1abbe}
      \strng{fullhashraw}{cf208ca99fc29ae8a55dba952fd1abbe}
      \strng{bibnamehash}{cf208ca99fc29ae8a55dba952fd1abbe}
      \strng{authorbibnamehash}{cf208ca99fc29ae8a55dba952fd1abbe}
      \strng{authornamehash}{cf208ca99fc29ae8a55dba952fd1abbe}
      \strng{authorfullhash}{cf208ca99fc29ae8a55dba952fd1abbe}
      \strng{authorfullhashraw}{cf208ca99fc29ae8a55dba952fd1abbe}
      \field{sortinit}{1}
      \field{sortinithash}{4f6aaa89bab872aa0999fec09ff8e98a}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{issn}{1936-6434}
      \field{journaltitle}{Evolution: Education and Outreach}
      \field{month}{5}
      \field{number}{1}
      \field{title}{Virtual reality in biology: could we become virtual naturalists?}
      \field{volume}{14}
      \field{year}{2021}
      \verb{doi}
      \verb 10.1186/s12052-021-00147-x
      \endverb
      \verb{urlraw}
      \verb http://dx.doi.org/10.1186/s12052-021-00147-x
      \endverb
      \verb{url}
      \verb http://dx.doi.org/10.1186/s12052-021-00147-x
      \endverb
    \endentry
    \entry{semi_ad_survey}{article}{}{}
      \name{author}{6}{}{%
        {{hash=587dbe7c422cc77291cc515bbf598cfe}{%
@@ -550,6 +596,36 @@
      \verb http://dx.doi.org/10.1016/j.knosys.2021.106878
      \endverb
    \endentry
    \entry{ml_autoencoder_figure_source}{article}{}{}
      \name{author}{1}{}{%
        {{hash=49d03d499031db786a0e61119024cf5a}{%
           family={Weng},
           familyi={W\bibinitperiod},
           given={Lilian},
           giveni={L\bibinitperiod}}}%
      }
      \strng{namehash}{49d03d499031db786a0e61119024cf5a}
      \strng{fullhash}{49d03d499031db786a0e61119024cf5a}
      \strng{fullhashraw}{49d03d499031db786a0e61119024cf5a}
      \strng{bibnamehash}{49d03d499031db786a0e61119024cf5a}
      \strng{authorbibnamehash}{49d03d499031db786a0e61119024cf5a}
      \strng{authornamehash}{49d03d499031db786a0e61119024cf5a}
      \strng{authorfullhash}{49d03d499031db786a0e61119024cf5a}
      \strng{authorfullhashraw}{49d03d499031db786a0e61119024cf5a}
      \field{sortinit}{1}
      \field{sortinithash}{4f6aaa89bab872aa0999fec09ff8e98a}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{journaltitle}{lilianweng.github.io}
      \field{title}{From Autoencoder to Beta-VAE}
      \field{year}{2018}
      \verb{urlraw}
      \verb https://lilianweng.github.io/posts/2018-08-12-vae/
      \endverb
      \verb{url}
      \verb https://lilianweng.github.io/posts/2018-08-12-vae/
      \endverb
    \endentry
    \entry{bg_autoencoder_ad}{inbook}{}{}
      \name{author}{4}{}{%
        {{hash=976ff3d638254bc84287783be910c8ab}{%
@@ -585,8 +661,8 @@
      \strng{authornamehash}{5a02f1dae7725de83d23363f0cb28a7a}
      \strng{authorfullhash}{93d83b236516fbec4b1c4285b0904114}
      \strng{authorfullhashraw}{93d83b236516fbec4b1c4285b0904114}
-      \field{sortinit}{1}
+      \field{sortinit}{2}
-      \field{sortinithash}{4f6aaa89bab872aa0999fec09ff8e98a}
+      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{booktitle}{Proceedings of the 2017 SIAM International Conference on Data Mining}
@@ -655,8 +731,8 @@
      \strng{authornamehash}{323e5563bff0d526ae6e607daa62c297}
      \strng{authorfullhash}{26ef64d9b3261b4327d122e7ff089933}
      \strng{authorfullhashraw}{26ef64d9b3261b4327d122e7ff089933}
-      \field{sortinit}{1}
+      \field{sortinit}{2}
-      \field{sortinithash}{4f6aaa89bab872aa0999fec09ff8e98a}
+      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{booktitle}{2019 IEEE/CVF International Conference on Computer Vision (ICCV)}
@@ -736,6 +812,58 @@
      \verb http://dx.doi.org/10.1155/2018/7212307
      \endverb
    \endentry
    \entry{bg_lidar_figure_source}{inproceedings}{}{}
      \name{author}{6}{}{%
        {{hash=78463b705728596a1a89b14b774f7a2f}{%
           family={Druml},
           familyi={D\bibinitperiod},
           given={Norbert},
           giveni={N\bibinitperiod}}}%
        {{hash=9cfc1b30b53c611753143927152cb9e7}{%
           family={Maksymova},
           familyi={M\bibinitperiod},
           given={Ievgeniia},
           giveni={I\bibinitperiod}}}%
        {{hash=797c01f47dbc34ec07dea93986d6c043}{%
           family={Thurner},
           familyi={T\bibinitperiod},
           given={Thomas},
           giveni={T\bibinitperiod}}}%
        {{hash=f1ae05f2020e221c28b96630587e12a3}{%
           family={Lierop},
           familyi={L\bibinitperiod},
           given={{D. van}},
           giveni={D\bibinitperiod}}}%
        {{hash=64e54a363d6222a40b061c1898e0ebbc}{%
           family={Hennecke},
           familyi={H\bibinitperiod},
           given={{Marcus E.}},
           giveni={M\bibinitperiod}}}%
        {{hash=5b9fa38d48fbce8f6297cb24871ad774}{%
           family={Foroutan},
           familyi={F\bibinitperiod},
           given={Andreas},
           giveni={A\bibinitperiod}}}%
      }
      \list{language}{1}{%
        {English}%
      }
      \strng{namehash}{49fb5ab08e445d87e0d509dbb70f05d3}
      \strng{fullhash}{b738238f3b8a7196a1e179b7824364a6}
      \strng{fullhashraw}{b738238f3b8a7196a1e179b7824364a6}
      \strng{bibnamehash}{b738238f3b8a7196a1e179b7824364a6}
      \strng{authorbibnamehash}{b738238f3b8a7196a1e179b7824364a6}
      \strng{authornamehash}{49fb5ab08e445d87e0d509dbb70f05d3}
      \strng{authorfullhash}{b738238f3b8a7196a1e179b7824364a6}
      \strng{authorfullhashraw}{b738238f3b8a7196a1e179b7824364a6}
      \field{sortinit}{2}
      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{month}{9}
      \field{title}{1D MEMS Micro-Scanning LiDAR}
      \field{year}{2018}
    \endentry
    \entry{lidar_denoising_survey}{article}{}{}
      \name{author}{4}{}{%
        {{hash=30663aad72dc59a49b7023f9c332b58a}{%
@@ -839,8 +967,8 @@
      \strng{authornamehash}{0a27e8ca9417e034303de05677d255d7}
      \strng{authorfullhash}{c25daf229a4778975b71dde19e2ed0c8}
      \strng{authorfullhashraw}{c25daf229a4778975b71dde19e2ed0c8}
-      \field{sortinit}{2}
+      \field{sortinit}{3}
-      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
+      \field{sortinithash}{ad6fe7482ffbd7b9f99c9e8b5dccd3d7}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{issn}{2169-3536}
@@ -890,8 +1018,8 @@
      \strng{authorfullhash}{f7ac305c57bc05cdd9d7b32cd2c70620}
      \strng{authorfullhashraw}{f7ac305c57bc05cdd9d7b32cd2c70620}
      \field{extraname}{1}
-      \field{sortinit}{2}
+      \field{sortinit}{3}
-      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
+      \field{sortinithash}{ad6fe7482ffbd7b9f99c9e8b5dccd3d7}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{booktitle}{IECON 2023- 49th Annual Conference of the IEEE Industrial Electronics Society}
@@ -941,8 +1069,8 @@
      \strng{authornamehash}{2348f5826634af872a0634ea83f5916a}
      \strng{authorfullhash}{6bbe9b21a1058838c2696c645e510766}
      \strng{authorfullhashraw}{6bbe9b21a1058838c2696c645e510766}
-      \field{sortinit}{2}
+      \field{sortinit}{3}
-      \field{sortinithash}{8b555b3791beccb63322c22f3320aa9a}
+      \field{sortinithash}{ad6fe7482ffbd7b9f99c9e8b5dccd3d7}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{booktitle}{2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)}
@@ -1080,8 +1208,8 @@
      \strng{authornamehash}{e9af9fcd8483f077f0dcdbd95213a56e}
      \strng{authorfullhash}{8179a2c222d1565711a7f216e4da6e56}
      \strng{authorfullhashraw}{8179a2c222d1565711a7f216e4da6e56}
-      \field{sortinit}{4}
+      \field{sortinit}{5}
-      \field{sortinithash}{9381316451d1b9788675a07e972a12a7}
+      \field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{month}{05}
@@ -1122,8 +1250,8 @@
      \strng{authornamehash}{01a32420f9995c8592740c3ad622e775}
      \strng{authorfullhash}{c0310d5b84b91b546714624d9baf92c2}
      \strng{authorfullhashraw}{c0310d5b84b91b546714624d9baf92c2}
-      \field{sortinit}{4}
+      \field{sortinit}{5}
-      \field{sortinithash}{9381316451d1b9788675a07e972a12a7}
+      \field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{issn}{1424-8220}
@@ -1430,8 +1558,8 @@
      \strng{authornamehash}{ea684bebf6033a20ad34a33644ec89fc}
      \strng{authorfullhash}{d6ad1c32e8f7738554f79d65d954b4f9}
      \strng{authorfullhashraw}{d6ad1c32e8f7738554f79d65d954b4f9}
-      \field{sortinit}{5}
+      \field{sortinit}{6}
-      \field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
+      \field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{issn}{1556-4967}
@@ -1521,8 +1649,8 @@
      \strng{authornamehash}{c4d64624ede10e1baa66843e963d7c13}
      \strng{authorfullhash}{c4d64624ede10e1baa66843e963d7c13}
      \strng{authorfullhashraw}{c4d64624ede10e1baa66843e963d7c13}
-      \field{sortinit}{6}
+      \field{sortinit}{7}
-      \field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
+      \field{sortinithash}{108d0be1b1bee9773a1173443802c0a3}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{title}{ODDS Library}
@@ -1569,8 +1697,8 @@
      \strng{authornamehash}{dd2ddc978fe083bcff1aa1379cd19643}
      \strng{authorfullhash}{4dd3ca3cdc8023700c28169734d6ad61}
      \strng{authorfullhashraw}{4dd3ca3cdc8023700c28169734d6ad61}
-      \field{sortinit}{6}
+      \field{sortinit}{7}
-      \field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
+      \field{sortinithash}{108d0be1b1bee9773a1173443802c0a3}
      \field{labelnamesource}{author}
      \field{labeltitlesource}{title}
      \field{issn}{0018-9219}
--- a/thesis/Main.pdf
+++ b/thesis/Main.pdf
--- a/thesis/Main.tex
+++ b/thesis/Main.tex
@@ -397,21 +397,30 @@ Among the techniques employed in machine learning algorithms, neural networks ha
 	\item Output layer, which produces the network’s final prediction.
 \end{itemize}
-As outlined above, neural network training is formulated as an optimization problem: we define an objective function that measures how well the model is achieving its task and then we adjust the network’s parameters to optimize that objective. The most common approach is stochastic gradient descent (SGD) or one of its variants (e.g., Adam). In each training iteration, the network first performs a forward pass to compute its outputs and evaluate the objective, then a backward pass—known as backpropagation—to calculate gradients of the objective with respect to every weight in the network. These gradients indicate the direction in which each weight should change to improve performance, and the weights are updated accordingly. Repeating this process over many iterations (or epochs) allows the network to progressively refine its parameters and better fulfill its task.
+As outlined above, neural network training is formulated as an optimization problem: we define an objective function that measures how well the model is achieving its task and then we adjust the network’s parameters to optimize that objective. The most common approach is stochastic gradient descent (SGD) or one of its variants (e.g., Adam). In each training iteration, the network first performs a forward pass to compute its outputs and evaluate the objective, then a backward pass—known as backpropagation—to calculate gradients of the objective with respect to every weight in the network. These gradients indicate the direction in which each weight should change to improve performance, and the weights are updated accordingly. Repeating this process over many iterations (also called epochs) allows the network to progressively refine its parameters and better fulfill its task.
 %To train neural networks, we express the task as an optimization problem: we define a loss function that quantifies the discrepancy between the network’s predictions and the ground-truth labels (or target values), and we seek to minimize this loss across a dataset. The most common method for doing so is stochastic gradient descent (SGD) or one of its variants (e.g., Adam). During each training iteration, the network performs a forward pass to compute its predictions and the associated loss and then a backward pass—known as backpropagation—to calculate gradients of the loss with respect to each weight in the network. These gradients indicate how to adjust the weights to reduce the loss, and the weights are updated accordingly. Over many iterations also known as epochs, the network progressively refines its parameters, improving its ability to optimally fulfil the given task.
-Aside from the underlying technique, one can also categorize machine learning algorithms by the type of feedback provided during learning, for the network to improve. Broadly speaking, three main categories-supervised, unsupervised and reinforcement learning-exist, although many other approaches do not exactly fit any of these categories and have spawned less common categories like semi-supervised or self-supervised learning.
+% Aside from the underlying technique, one can also categorize machine learning algorithms by the type of feedback provided during learning, for the network to improve. Broadly speaking, three main categories-supervised, unsupervised and reinforcement learning-exist, although many other approaches do not exactly fit any of these categories and have spawned less common categories like semi-supervised or self-supervised learning.
 %
 % In supervised learning, each input sample is paired with a “ground-truth” label representing the desired output. During training, the model makes a prediction and a loss function quantifies the difference between the prediction and the true label. The learning algorithm then adjusts its parameters to minimize this loss, improving its performance over time. Labels are typically categorical (used for classification tasks, such as distinguishing “cat” from “dog”) or continuous (used for regression tasks, like predicting a temperature or distance).
-In supervised learning, each input sample is paired with a “ground-truth” label representing the desired output. During training, the model makes a prediction and a loss function quantifies the difference between the prediction and the true label. The learning algorithm then adjusts its parameters to minimize this loss, improving its performance over time. Labels are typically categorical (used for classification tasks, such as distinguishing “cat” from “dog”) or continuous (used for regression tasks, like predicting a temperature or distance).
+%\fig{ml_learning_schema_concept}{figures/ml_learning_schema_concept.png}{}
-%For supervised learning each data sample is augmented by including a label depicting the ideal output the algorithm can produce for the given input. During the learning step these algorithms can compare their generated output with the one provided by an expert and calculate the error between them, minimizing the error to improve performance. Such labels are typically either a categorical or continuous target which are most commonly used for classification and regression tasks respectively.
+Aside from the underlying technique, one can also categorize machine learning algorithms by the type of feedback provided during learning, for the network to improve. Broadly speaking, three main categories—supervised, unsupervised and reinforcement learning—exist, although many other approaches do not exactly fit any of these categories and have spawned less common categories like semi-supervised or self-supervised learning.
-\fig{ml_supervised_learning}{figures/ml_supervised_learning_placeholder.jpg}{PLACEHOLDER - An illustration of supervised learning-the training data is augmented to include the algorithms optimal output for the data sample, called labels.}
+In supervised learning, each input sample is paired with a “ground-truth” label representing the desired output. During training, the model makes a prediction and a loss function quantifies the difference between the prediction and the true label. The learning algorithm then adjusts its parameters to minimize this loss, improving its performance over time. Labels are typically categorical (used for classification tasks, such as distinguishing “cat” from “dog”) or continuous (used for regression tasks, like predicting a temperature or distance). Figure~\ref{fig:ml_learning_schema_concept}b illustrates this principle with a classification example, where labelled data is used to learn a boundary between two classes.
-In unsupervised learning, models work directly with raw data, without any ground-truth labels to guide the learning process. Instead, they optimize an objective that reflects the discovery of useful structure—whether that is grouping similar data points together or finding a compact representation of the data. For example, cluster analysis partitions the dataset into groups so that points within the same cluster are more similar to each other (according to a chosen similarity metric) than to points in other clusters. Dimensionality reduction methods, on the other hand, project high-dimensional data into a lower-dimensional space, optimizing for minimal loss of the original data’s meaningful information. By focusing purely on the data itself, unsupervised algorithms can reveal hidden patterns and relationships that might be difficult to uncover with manual analysis.
+\begin{figure}
-%Unsupervised learning algorithms use raw data without a target label that can be used during the learning process. These types of algorithms are often utilized to identify underlying patterns in data which may be hard to discover using classical data analysis due to for example large data size or high data complexity. Cluster analysis depicts one common use case, in which data is grouped into clusters such that data from one cluster resembles other data from the same cluster more closely than data from other clusters, according to some predesignated criteria. Another important use case are dimensionality reduction tasks which transform high-dimensional data into a lower-dimensional subspace while retaining meaningful information of the original data.
+	\centering
 	\includegraphics[width=0.6\textwidth]{figures/ml_learning_schema_concept.png}
 	\caption{Conceptual illustration of unsupervised (a) and supervised (b) learning. Unsupervised methods such as clustering group data based on similarity without labels, while supervised methods such as classification learn decision boundaries from labelled data. Reproduced from~\cite{ml_supervised_unsupervised_figure_source}}
 	\label{fig:ml_learning_schema_concept}
 \end{figure}
-\fig{ml_unsupervised_learning}{figures/ml_unsupervised_learning_placeholder.png}{PLACEHOLDER - An illustration of unsupervised learning-the training data does not contain any additional information like a label. The algorithm learns to group similar input data together.}
+In unsupervised learning, models work directly with raw data, without any ground-truth labels to guide the learning process. Instead, they optimize an objective that reflects the discovery of useful structure—whether that is grouping similar data points together or finding a compact representation of the data. For example, cluster analysis partitions the dataset into groups so that points within the same cluster are more similar to each other (according to a chosen similarity metric) than to points in other clusters. Dimensionality reduction methods, on the other hand, project high-dimensional data into a lower-dimensional space, optimizing for minimal loss of the original data’s meaningful information. As shown in Figure~\ref{fig:ml_learning_schema_concept}a, clustering is a typical example of unsupervised learning, since the groups are discovered directly from the unlabelled data.
 % In unsupervised learning, models work directly with raw data, without any ground-truth labels to guide the learning process. Instead, they optimize an objective that reflects the discovery of useful structure—whether that is grouping similar data points together or finding a compact representation of the data. For example, cluster analysis partitions the dataset into groups so that points within the same cluster are more similar to each other (according to a chosen similarity metric) than to points in other clusters. Dimensionality reduction methods, on the other hand, project high-dimensional data into a lower-dimensional space, optimizing for minimal loss of the original data’s meaningful information. By focusing purely on the data itself, unsupervised algorithms can reveal hidden patterns and relationships that might be difficult to uncover with manual analysis.
 %A more interactive approach to learning is taken by reinforcement learning, which provides the algorithm with an environment and an interpreter of the environment's state. During training the algorithm explores new possible actions and their impact on the provided environment. The interpreter can then reward or punish the algorithm based on the outcome of its actions. To improve the algorithms capability it will try to maximize the rewards received from the interpreter, retaining some randomness as to enable the exploration of different actions and their outcomes. Reinforcement learning is usually used for cases where an algorithm has to make sequences of decisions in complex environments e.g., autonomous driving tasks.
@@ -436,7 +445,7 @@ Machine learning based anomaly detection methods can utilize techniques from all
 Autoencoders are a type of neural network architecture, whose main goal is learning to encode input data into a representative state, from which the same input can be reconstructed, hence the name. They typically consist of two functions, an encoder and a decoder with a latent space inbetween them as depicted in the toy example in figure~\ref{fig:autoencoder_general}. The encoder learns to extract the most significant features from the input and to convert them into the input's latent space representation. The reconstruction goal ensures that the most prominent features of the input get retained during the encoding phase, due to the inherent inability to reconstruct the input if too much relevant information is missing. The decoder simultaneously learns to reconstruct the original input from its encoded latent space representation, by minimizing the error between the input sample and the autoencoder's output. This optimization goal creates uncertainty when categorizing autoencoders as an unsupervised method, although literature commonly defines them as such. While they do not require any labeling of the input data, their optimization target can still calculate the error between the output and the optimal target, which is typically not available for unsupervised methods. For this reason, they are sometimes proposed to be a case of self-supervised learning, a type of machine learning where the data itself can be used to generate a supervisory signal without the need for a domain expert to provide one.
-\fig{autoencoder_general}{figures/autoencoder_principle_placeholder.png}{PLACEHOLDER - An illustration of autoencoders' general architecture and reconstruction task.}
+\fig{autoencoder_general}{figures/autoencoder_principle.png}{An illustration of autoencoders' general architecture and reconstruction task. Reproduced from~\cite{ml_autoencoder_figure_source}}
 %\todo[inline, color=green!40]{explain figure}
 %\todo[inline, color=green!40]{Paragraph about Variational Autoencoders? generative models vs discriminative models, enables other common use cases such as generating new data by changing parameterized generative distribution in latent space - VAES are not really relevant, maybe leave them out and just mention them shortly, with the hint that they are important but too much to explain since they are not key knowledge for this thesis}
@@ -459,7 +468,7 @@ LiDAR (Light Detection and Ranging) measures distance by emitting short laser pu
 Because the speed of light in air is effectively constant, multiplying half the round‐trip time by that speed gives the distance between the lidar sensor and the reflecting object, as can be seen visualized in figure~\ref{fig:lidar_working_principle}. Modern spinning multi‐beam LiDAR systems emit millions of these pulses every second. Each pulse is sent at a known combination of horizontal and vertical angles, creating a regular grid of measurements: for example, 32 vertical channels swept through 360° horizontally at a fixed angular spacing. While newer solid-state designs (flash, MEMS, phased-array) are emerging, spinning multi-beam LiDAR remains the most commonly seen type in autonomous vehicles and robotics because of its proven range, reliability, and mature manufacturing base.
-\fig{lidar_working_principle}{figures/bg_lidar_principle_placeholder.png}{PLACEHOLDER - An illustration of lidar sensors' working principle.}
+\fig{lidar_working_principle}{figures/bg_lidar_principle.png}{An illustration of lidar sensors' working principle. Reproduced from~\cite{bg_lidar_figure_source}}
 Each instance a lidar emits and receives a laser pulse, it can use the ray's direction and the calculated distance to produce a single three-dimensional point. By collecting millions of such points each second, the sensor constructs a “point cloud”—a dense set of 3D coordinates relative to the LiDAR’s own position. In addition to X, Y, and Z, many LiDARs also record the intensity or reflectivity of each return, providing extra information about the surface properties of the object hit by the pulse.
@@ -1563,7 +1572,7 @@ Since only per-sample reconstruction losses were retained during pretraining, we
 Due to the challenges of ground truth quality, evaluation results must be interpreted with care. As introduced earlier, we consider two complementary evaluation schemes:
 \begin{itemize}
-	\item \textbf{Experiment-based labels:} An objective way to assign anomaly labels to all frames from degraded runs. However, this also marks many near-normal frames at the start and end of runs as anomalous. These knowingly false labels lower the maximum achievable AP, since even a perfect model cannot separate these mislabeled normals from true anomalies.
+	\item \textbf{Experiment-based labels:} An objective way to assign anomaly labels to all frames from degraded runs. However, this also marks many near-normal frames at the start and end of runs as anomalous. These knowingly false labels lower the maximum achievable AP, because even an ideal model would be forced to classify some normal frames as anomalous.
 	\item \textbf{Hand-labeled labels:} A cleaner ground truth, containing only clearly degraded frames. This removes mislabeled intervals and allows nearly perfect separation. However, it also simplifies the task too much, because borderline cases are excluded.
 \end{itemize}
@@ -1613,7 +1622,7 @@ Table~\ref{tab:results_ap} summarizes average precision (AP) across latent dimen
 \end{table}
-The precision--recall curves (Figure~\ref{fig:prc_representative}) illustrate these effects more clearly. For DeepSAD, precision stays close to 1 until about 0.5 recall, after which it drops off sharply. This plateau corresponds to the fraction of truly degraded frames in the anomalous set. Once recall moves beyond this point, the evaluation demands that the model also “find” the mislabeled anomalies near the run boundaries. To do so, the decision threshold must be lowered so far that many true normal frames are also flagged, which causes precision to collapse. The baselines behave differently: OC-SVM shows a smooth but weaker decline without a strong high-precision plateau, while Isolation Forest detects only a few extreme anomalies before collapsing to near-random performance. These operational differences are hidden in a single AP number but are important for judging how the methods would behave in deployment.
+The precision--recall curves (Figure~\ref{fig:prc_representative}) illustrate these effects more clearly. For DeepSAD, precision stays close to 1 until about 0.5 recall, after which it drops off sharply. This plateau corresponds to the fraction of truly degraded frames in the anomalous set. Once recall moves beyond this point, the evaluation demands that the model also “find” the mislabeled anomalies near the run boundaries. To do so, the decision threshold must be lowered so far that many normal frames are also flagged, which causes precision to collapse. The baselines behave differently: OC-SVM shows a smooth but weaker decline without a strong high-precision plateau, while Isolation Forest detects only a few extreme anomalies before collapsing to near-random performance. These operational differences are hidden in a single AP number but are important for judging how the methods would behave in deployment.
 Taken together, the two evaluation schemes provide complementary insights. The experiment-based labels offer a noisy but realistic setting that shows how methods cope with ambiguous data, while the hand-labeled labels confirm that DeepSAD can achieve nearly perfect separation when the ground truth is clean. The combination of both evaluations makes clear that (i) DeepSAD is stronger than the baselines under both conditions, (ii) the apparent performance limits under experiment-based labels are mainly due to label noise, and (iii) interpreting results requires care, since performance drops in the curves often reflect mislabeled samples rather than model failures. At the same time, both schemes remain binary classifications and therefore cannot directly evaluate the central question of whether anomaly scores can serve as a continuous measure of degradation. For this reason, we extend the analysis in Section~\ref{sec:results_inference}, where inference on entire unseen experiments is used to provide a more intuitive demonstration of the methods’ potential for quantifying LiDAR degradation in practice.
@@ -1622,7 +1631,7 @@ Taken together, the two evaluation schemes provide complementary insights. The e
 \paragraph{Effect of latent space dimensionality.}
 Figure~\ref{fig:latent_dim_ap} shows how average precision changes with latent dimension under the experiment-based evaluation. The best performance is reached with compact latent spaces (32–128), while performance drops as the latent dimension grows. This can be explained by how the latent space controls the separation between normal and anomalous samples. Small bottlenecks act as a form of regularization, keeping the representation compact and making it easier to distinguish clear anomalies from normal frames. Larger latent spaces increase model capacity, but this extra flexibility also allows more overlap between normal frames and the mislabeled anomalies from the evaluation data. As a result, the model struggles more to keep the two groups apart.
-This effect is clearly visible in the precision--recall curves. For DeepSAD at all dimensionalities we observe high the high initial precision and steep drop once the evaluation demands that mislabeled anomalies be included. However, the sharpness of this drop depends on the latent size: at 32 dimensions the fall is comparably more gradual, while at 1024 it is almost vertical. In practice, this means that higher-dimensional latent spaces amplify the label-noise problem and lead to sudden precision collapses once the clear anomalies have been detected. Compact latent spaces are therefore more robust under noisy evaluation conditions and appear to be the safer choice for real-world deployment.
+This effect is clearly visible in the precision--recall curves. For DeepSAD at all dimensionalities we observe high initial precision and a steep drop once the evaluation demands that mislabeled anomalies be included. However, the sharpness of this drop depends on the latent size: at 32 dimensions the fall is comparably more gradual, while at 1024 it is almost vertical. In practice, this means that higher-dimensional latent spaces amplify the label-noise problem and lead to sudden precision collapses once the clear anomalies have been detected. Compact latent spaces are therefore more robust under noisy evaluation conditions and appear to be the safer choice for real-world deployment.
 \fig{latent_dim_ap}{figures/results_ap_over_latent.png}{AP as a function of latent dimension (experiment-based evaluation). DeepSAD shows inverse correlation between AP and latent space size.}
@@ -1661,45 +1670,87 @@ The red method curves can also be compared with the blue and green statistical i
 While some similarities in shape may suggest that the methods partly capture these statistics, such interpretations should be made with caution.
 The anomaly detection models are expected to have learned additional patterns that are not directly observable from simple statistics, and these may also contribute to their ability to separate degraded from clean data.
-
+% -------------------------------
 % Conclusion & Future Work (intro)
 % -------------------------------
 \newchapter{conclusion_future_work}{Conclusion and Future Work}
-This thesis set out to answer the following research question, formulated in Chapter~\ref{chp:introduction}:
+This thesis set out to answer the research question stated in Chapter~\ref{chp:introduction}:
 \begin{quote}
 	Can autonomous robots quantify the reliability of lidar sensor data in hazardous environments to make more informed decisions?
 \end{quote}
 Our results indicate a qualified “yes.” Using anomaly detection (AD)—in particular DeepSAD—we can obtain scores that (i) separate clearly normal from clearly degraded scans and (ii) track degradation trends over time on held-out traversals (see Sections~\ref{sec:results_deepsad} and \ref{sec:results_inference}). At the same time, the absence of robust ground truth limits how confidently we can assess \emph{continuous} quantification quality and complicates cross-method comparisons. The remainder of this chapter summarizes what we contribute, what we learned, and what is still missing.
-Based on the experiments presented in Chapter~\ref{sec:results_deepsad} and Chapter~\ref{sec:results_inference}, we conclude that anomaly detection (AD) methods—and in particular DeepSAD—can indeed be applied to this problem in principle. DeepSAD outperforms simpler baselines such as Isolation Forest and OC-SVM, and its anomaly scores correlate with degradation trends over time. However, the absence of robust ground truth and the reliance on noisy or ambiguous labels significantly limit the reliability of quantitative evaluation. As a result, while AD shows clear potential for enabling autonomous robots to assess the reliability of their lidar data, further work is required before this capability can be translated into practical decision-making support.
+\paragraph{Main contributions.}
 \paragraph{Contributions.}
 The main contributions of this thesis can be summarized as follows:
 \begin{itemize}
-	\item \textbf{Empirical evaluation:} A systematic comparison of DeepSAD against Isolation Forest and OC-SVM for lidar degradation detection, demonstrating that DeepSAD consistently outperforms simpler baselines.
+	\item \textbf{Empirical comparison for lidar degradation.} A systematic evaluation of DeepSAD against Isolation Forest and OC-SVM across latent sizes and labeling regimes, showing that DeepSAD consistently outperforms the baselines under both evaluation schemes (Section~\ref{sec:results_deepsad}).
-	\item \textbf{Analysis of latent dimensionality:} An investigation of how representation size influences performance and stability under noisy labels, revealing that smaller latent spaces are more robust in this setting and that in spite of the high input dimensionality of whole point clouds produced by spinning lidar sensors, bottlenecks as small as 32 dimensions can achieve promising performance.
+
-	\item \textbf{Analysis of semi-supervised training labels:} An evaluation of different semi-supervised labeling regimes, showing that in our case purely unsupervised training yielded the best performance. Adding a small number of labels reduced performance, while a higher ratio of labels led to partial recovery. This pattern may indicate overfitting effects, although interpretation is complicated by the presence of mislabeled evaluation targets.
+	\item \textbf{Two-track evaluation protocol.} We frame and use two complementary label sets: (i) \emph{experiment-based} labels (objective but noisy at run boundaries), and (ii) \emph{hand-labeled} intervals (clean but simplified). This pairing clarifies what each scheme can—and cannot—tell us about real performance (Section~\ref{sec:results_deepsad}).
-	\item \textbf{Analysis of encoder architecture:} A comparison between a LeNet-inspired and an Efficient encoder showed that the choice of architecture has a decisive influence on DeepSAD’s performance. The Efficient encoder outperformed the LeNet-inspired baseline not only during autoencoder pretraining but also in anomaly detection. While the exact magnitude of this improvement is difficult to quantify due to noisy evaluation targets, the results underline the importance of encoder design for representation quality in DeepSAD.
+
-	\item \textbf{Feasibility study:} An exploration of runtime, temporal inference plots, and downstream applicability, indicating that anomaly scores correlate with degradation trends and could provide a foundation for future quantification methods.
+	\item \textbf{Latent dimensionality insight.} Compact bottlenecks (32–128) are more robust under noisy labels and yield the best AP; larger latent spaces amplify precision collapses beyond the high-precision plateau (Figure~\ref{fig:latent_dim_ap}). High-dimensional input data apparently can be compressed quite strongly, which may lead to improved performance and better generalization.
 	\item \textbf{Semi-supervision insight.} In our data, \emph{unsupervised} DeepSAD performed best; \emph{light} labeling (50/10) performed worst; \emph{many} labels (500/100) partially recovered performance but did not surpass unsupervised. Evidence from PRC shapes and fold variance points to \emph{training-side overfitting to a small labeled set}, an effect that persists even under clean hand-labeled evaluation (Table~\ref{tab:results_ap}, Figure~\ref{fig:prc_over_semi}).
 	\item \textbf{Encoder architecture matters.} The Efficient encoder outperformed the LeNet-inspired variant in pretraining and downstream AD, indicating that representation quality substantially affects DeepSAD performance (Section~\ref{sec:results_pretraining}, Section~\ref{sec:results_deepsad}).
 	\item \textbf{Temporal inference recipe.} For deployment-oriented analysis we propose clean-run $z$-score normalization and causal EMA smoothing to obtain interpretable time-series anomaly scores on full traversals (Section~\ref{sec:results_inference}).
 \end{itemize}
 \paragraph{Practical recommendations.}
 %For settings similar to ours (heavy imbalance, label noise), we recommend: (i) use PRC/AP for model selection and reporting over ROC which is suspect to distorted results under strong class imbalance; (ii) operate on the high-precision plateau observed in PRC before the recall “cliff”; (iii) prefer compact latent spaces and explore what the smallest effective latent dimensionality allowed by your usecase is; (iv) explore multiple autoencoder architectures with modern techniques to improve performance, efficiency and stability ; (v) avoid very small labeled sets—if labels are used, collect many and diverse examples, noting that unsupervised training may still generalize best.
 For settings similar to ours, we recommend:
 (i) use PRC/AP for model selection and reporting, since ROC/AUC can give overly optimistic results under strong class imbalance;
 (ii) prefer compact latent spaces (e.g., 32–128) and determine the smallest dimensionality that still preserves task-relevant information;
 (iii) evaluate multiple encoder architectures, as design choices strongly affect performance and robustness;
 (iv) avoid very small labeled sets, which can cause overfitting to narrow anomaly exemplars. If labels are used, collect many and diverse examples—though unsupervised training may still generalize best.
 \medskip
 We now turn to the main limiting factor that emerged throughout this work: the lack of robust, expressive ground truth for lidar degradation and its downstream impact.
 % \newsection{conclusion_data}{Missing Ground Truth as an Obstacle}
 % The most significant obstacle identified in this work is the absence of robust and comprehensive ground truth for lidar degradation. As discussed in Chapter~\ref{chp:data_preprocessing}, it is not trivial to define what “degradation” precisely means in practice. Although error models for lidar and theoretical descriptions of how airborne particles affect laser returns exist, these models typically quantify errors at the level of individual points (e.g., missing returns, spurious near-range hits). Such metrics, however, may not be sufficient to assess the impact of degraded data on downstream users. A point cloud with relatively few, but highly localized errors—such as those caused by a dense smoke cloud—may lead a SLAM algorithm to misinterpret the region as a solid obstacle. In contrast, a point cloud with a greater number of dispersed errors might be easier to filter and thus cause little to no disruption in mapping. Consequently, the notion of “degradation” must extend beyond point-level error statistics to encompass how different error patterns propagate to downstream perception and decision-making modules.
 %
 % Collecting objective ground truth is complicated by the mismatch between available sensors. Smoke sensors provide only local measurements of density and particle size, while lidar captures environmental structure from a distance. Neither modality alone can serve as a perfect proxy for the other, and their differing measurement principles mean that ground truth derived from one may not fully reflect the challenges faced by the other.
 %
 % One promising direction is to evaluate degradation not directly on raw lidar frames but via its downstream impact. For example, future work could assess degradation based on discrepancies between a previously mapped 3D environment model and the output of a SLAM algorithm operating under degraded conditions. In such a setup, subjective labeling may still be required in special cases (e.g., dense smoke clouds treated as solid objects by SLAM), but it would anchor evaluation closer to the ultimate users of the data.
 %
 % Finally, the binary ground truth employed here is insufficient for the quantification goal. As shown in Section~\ref{sec:results_inference}, DeepSAD’s anomaly scores appear suitable not only for classification but also for expressing intermediate levels of degradation. Analog evaluation targets would therefore be highly valuable, as they would allow assessing whether anomaly scores correlate linearly or monotonically with degradation severity rather than only separating “normal” from “degraded.”
 %
 \newsection{conclusion_data}{Missing Ground Truth as an Obstacle}
-%The most significant obstacle identified in this work is the absence of robust and comprehensive ground truth for lidar degradation. As discussed in Chapter~\ref{chp:data_preprocessing}, it is not trivial to define what “degradation” precisely means in practice. While intuitive descriptions exist (e.g., dispersed smoke, dense clouds, localized plumes), these translate poorly into objective evaluation targets. Future work should therefore not only refine the conceptual understanding of degradation but also connect it to downstream users of the data (e.g., SLAM or mapping algorithms), where errors manifest differently.
+The most significant obstacle identified in this work is the absence of robust and comprehensive ground truth for lidar degradation. As discussed in Chapter~\ref{chp:data_preprocessing}, it is not trivial to define what “degradation” precisely means in practice. Although error models for lidar and theoretical descriptions of how airborne particles affect laser returns exist, these models typically quantify errors at the level of individual points (e.g., missing returns, spurious near-range hits). Such metrics, however, may not be sufficient to assess the impact of degraded data on downstream perception. For example, a point cloud with relatively few but highly localized errors—such as those caused by a dense smoke cloud—may cause a SLAM algorithm to misinterpret the region as a solid obstacle. In contrast, a point cloud with a greater number of dispersed errors might be easier to filter and thus cause little or no disruption in mapping. Consequently, the notion of “degradation” must extend beyond point-level error statistics to include how different error patterns propagate to downstream modules.
 The most significant obstacle identified in this work is the absence of robust and comprehensive ground truth for lidar degradation. As discussed in Chapter~\ref{chp:data_preprocessing}, it is not trivial to define what “degradation” precisely means in practice. Although error models for lidar and theoretical descriptions of how airborne particles affect laser returns exist, these models typically quantify errors at the level of individual points (e.g., missing returns, spurious near-range hits). Such metrics, however, may not be sufficient to assess the impact of degraded data on downstream users. A point cloud with relatively few, but highly localized errors—such as those caused by a dense smoke cloud—may lead a SLAM algorithm to misinterpret the region as a solid obstacle. In contrast, a point cloud with a greater number of dispersed errors might be easier to filter and thus cause little to no disruption in mapping. Consequently, the notion of “degradation” must extend beyond point-level error statistics to encompass how different error patterns propagate to downstream perception and decision-making modules.
-Collecting objective ground truth is complicated by the mismatch between available sensors. Smoke sensors provide only local measurements of density and particle size, while lidar captures environmental structure from a distance. Neither modality alone can serve as a perfect proxy for the other, and their differing measurement principles mean that ground truth derived from one may not fully reflect the challenges faced by the other.
+To our knowledge, no public datasets with explicit ground truth for lidar degradation exist. Even if such data were collected, for example with additional smoke sensors, it is unclear whether this would provide a usable ground truth. A smoke sensor measures only at a single point in space, while lidar observes many points across the environment from a distance, so the two do not directly translate. In our dataset, we relied on the fact that clean and degraded experiments were clearly separated: data from degraded runs was collected only after artificial smoke had been released. However, the degree of degradation varied strongly within each run. Because the smoke originated from a single machine in the middle of the sensor platform's traversal path, early and late frames were often nearly as clear as those from clean experiments. This led to mislabeled frames at the run boundaries and limited the reliability of experiment-based evaluation. As shown in Section~\ref{sec:results_deepsad}, this effect capped achievable AP scores even for strong models. The underlying difficulty is not only label noise, but also the challenge of collecting labeled subsets that are representative of the full range of anomalies.
-One promising direction is to evaluate degradation not directly on raw lidar frames but via its downstream impact. For example, future work could assess degradation based on discrepancies between a previously mapped 3D environment model and the output of a SLAM algorithm operating under degraded conditions. In such a setup, subjective labeling may still be required in special cases (e.g., dense smoke clouds treated as solid objects by SLAM), but it would anchor evaluation closer to the ultimate users of the data.
+% To our knowledge, no public datasets with explicit ground truth for lidar degradation exist. Even if such data were collected, for example by adding smoke sensors, it is unclear whether it would provide a reliable ground truth for our use case. A smoke sensor measures only at a single point in space, while lidar observes many points across the environment from a distance. Because of this difference, smoke sensor readings would not necessarily capture how degradation affects lidar perception in practice. This limitation was also visible in our evaluations: as shown in Section~\ref{sec:results_deepsad}, mislabeled anomalies in the experiment-based evaluation capped achievable AP scores around 0.6 even for strong models. The same issue also appeared during training. In the semi-supervised experiments, a small set of labeled anomalies led to overfitting and worse performance than the unsupervised regime, even when evaluated with clean hand-labeled ground truth. This suggests that the main difficulty is not only noisy labels, but also the challenge of obtaining representative labeled subsets that cover the full range of anomalies.
 %
 %
 % Collecting objective ground truth is further complicated by the mismatch between available sensors. Smoke sensors measure local density and particle size, while lidar captures 3D structure from a distance. Their differing principles mean that ground truth derived from one may fail to reflect how degradation affects the other. This limitation was visible in our evaluations: as shown in Section~\ref{sec:results_deepsad}, mislabeled anomalies in the experiment-based evaluation capped achievable AP scores around 0.6 even for strong models. The same issue also affected training. In the semi-supervised experiments, a small set of labeled anomalies led to overfitting and worse performance than the unsupervised regime, even when evaluated with clean hand-labeled ground truth. This indicates that the problem lies not only in noisy evaluation labels, but also in the difficulty of collecting representative labeled subsets that cover the full range of anomalies.
-Finally, the binary ground truth employed here is insufficient for the quantification goal. As shown in Section~\ref{sec:results_inference}, DeepSAD’s anomaly scores appear suitable not only for classification but also for expressing intermediate levels of degradation. Analog evaluation targets would therefore be highly valuable, as they would allow assessing whether anomaly scores correlate linearly or monotonically with degradation severity rather than only separating “normal” from “degraded.”
+One promising direction is to evaluate degradation not directly on raw lidar frames but via its downstream impact. For example, future work could assess degradation based on discrepancies between a previously mapped 3D environment and the output of a SLAM algorithm operating under degraded conditions. In such a setup, subjective labeling may still be required in special cases (e.g., dense smoke clouds treated as solid obstacles by SLAM), but it would anchor evaluation more closely to the ultimate users of the data.
 Finally, the binary ground truth employed here is insufficient for the quantification goal. As shown in Section~\ref{sec:results_inference}, DeepSAD’s anomaly scores appear suitable not only for classification but also for expressing intermediate levels of degradation. Analog evaluation targets would therefore be highly valuable, as they would allow testing whether anomaly scores increase consistently with degradation severity, rather than only separating “normal” from “degraded.”
 % \newsection{conclusion_ad}{Insights into DeepSAD and AD for Degradation Quantification}
 %
 % This work has shown that the DeepSAD principle is applicable to lidar degradation data and yields promising performance both in terms of accuracy and runtime feasibility (see section~\ref{sec:setup_experiments_environment}). Compared to simple baselines such as Isolation Forest and OC-SVM, DeepSAD achieves significantly better discrimination of degraded frames. However, in our experiments the semi-supervised component of DeepSAD did not lead to measurable improvements, which may be attributable to the noisy evaluation targets (see section~\ref{sec:results_deepsad}).
 %
 % We also observed that the choice of encoder architecture is critical. As discussed in Section~\ref{sec:results_deepsad}, the Efficient architecture consistently outperformed the LeNet-inspired baseline in pretraining and contributed to stronger downstream performance. The influence of encoder design on DeepSAD training merits further study under cleaner evaluation conditions. In particular, benchmarking different encoder architectures on datasets with high-quality ground truth could clarify how much of DeepSAD’s performance gain stems from representation quality versus optimization.
 %
 % Future work could also explore per-sample weighting of semi-supervised targets. If analog ground truth becomes available, this may allow DeepSAD to better capture varying degrees of degradation by treating supervision as a graded signal rather than a binary label.
 \newsection{conclusion_ad}{Insights into DeepSAD and AD for Degradation Quantification}
-This work has shown that the DeepSAD principle is applicable to lidar degradation data and yields promising performance both in terms of accuracy and runtime feasibility (see section~\ref{sec:setup_experiments_environment}). Compared to simple baselines such as Isolation Forest and OC-SVM, DeepSAD achieves significantly better discrimination of degraded frames. However, in our experiments the semi-supervised component of DeepSAD did not lead to measurable improvements, which may be attributable to the noisy evaluation targets (see section~\ref{sec:results_deepsad}).
+This work has shown that the DeepSAD principle is applicable to lidar degradation data and yields promising detection performance as well as runtime feasibility (see Section~\ref{sec:setup_experiments_environment}). Compared to simpler baselines such as Isolation Forest and OC-SVM, DeepSAD achieved much stronger separation between clean and degraded data. While OC-SVM showed smoother but weaker separation and Isolation Forest produced high false positives even in clean runs, both DeepSAD variants maintained large high-precision regions before collapsing under mislabeled evaluation targets.
-We also observed that the choice of encoder architecture is critical. As discussed in Section~\ref{sec:results_deepsad}, the Efficient architecture consistently outperformed the LeNet-inspired baseline in pretraining and contributed to stronger downstream performance. The influence of encoder design on DeepSAD training merits further study under cleaner evaluation conditions. In particular, benchmarking different encoder architectures on datasets with high-quality ground truth could clarify how much of DeepSAD’s performance gain stems from representation quality versus optimization.
+However, the semi-supervised component of DeepSAD did not improve results in our setting. In fact, adding a small number of labels often reduced performance due to overfitting to narrow subsets of anomalies, while larger labeled sets stabilized training but still did not surpass the unsupervised regime (see Section~\ref{sec:results_deepsad}). This suggests that without representative and diverse labeled anomalies, unsupervised training remains the safer choice.
 We also observed that the choice of encoder architecture and latent dimensionality are critical. The Efficient encoder consistently outperformed the LeNet-inspired baseline, producing more stable precision–recall curves and stronger overall results. Similarly, compact latent spaces (32–128 dimensions) yielded the best performance and proved more robust under noisy evaluation conditions, while larger latent spaces amplified the impact of mislabeled samples and caused sharper precision collapses. These findings underline the importance of representation design for robust anomaly detection.
 Finally, inference experiments showed that DeepSAD’s anomaly scores can track degradation trends over time when normalized and smoothed, suggesting potential for real-world quantification. Future work could explore per-sample weighting of semi-supervised targets, especially if analog ground truth becomes available, allowing DeepSAD to capture varying degrees of degradation as a graded rather than binary signal.
 Future work could also explore per-sample weighting of semi-supervised targets. If analog ground truth becomes available, this may allow DeepSAD to better capture varying degrees of degradation by treating supervision as a graded signal rather than a binary label.
 \newsection{conclusion_open_questions}{Open Questions and Future Work}
--- a/thesis/bib/bibliography.bib
+++ b/thesis/bib/bibliography.bib
@@ -563,8 +563,39 @@
 	          and Tian, Hao and Song, Dehai and Wei, Zhiqiang},
 	year = {2021},
 	month = aug,
 }, 
@article{ml_supervised_unsupervised_figure_source,
 	title = {Virtual reality in biology: could we become virtual naturalists?},
 	volume = {14},
 	ISSN = {1936-6434},
 	url = {http://dx.doi.org/10.1186/s12052-021-00147-x},
 	DOI = {10.1186/s12052-021-00147-x},
 	number = {1},
 	journal = {Evolution: Education and Outreach},
 	publisher = {Springer Science and Business Media LLC},
 	author = {Morimoto, Juliano and Ponton, Fleur},
 	year = {2021},
 	month = may,
 },
@article{ml_autoencoder_figure_source,
 	title = "From Autoencoder to Beta-VAE",
 	author = "Weng, Lilian",
 	journal = "lilianweng.github.io",
 	year = "2018",
 	url = "https://lilianweng.github.io/posts/2018-08-12-vae/",
 },
@conference{bg_lidar_figure_source,
 	title = "1D MEMS Micro-Scanning LiDAR",
 	author = "Norbert Druml and Ievgeniia Maksymova and Thomas Thurner and Lierop,
 	          {D. van} and Hennecke, {Marcus E.} and Andreas Foroutan",
 	year = "2018",
 	month = sep,
 	day = "16",
 	language = "English",
 }
--- a/thesis/figures/autoencoder_principle.png
+++ b/thesis/figures/autoencoder_principle.png
--- a/thesis/figures/autoencoder_principle_placeholder.png
+++ b/thesis/figures/autoencoder_principle_placeholder.png
--- a/thesis/figures/bg_lidar_principle.png
+++ b/thesis/figures/bg_lidar_principle.png
--- a/thesis/figures/ml_learning_schema_concept.png
+++ b/thesis/figures/ml_learning_schema_concept.png