feedback WIP
This commit is contained in:
@@ -1164,6 +1164,53 @@
|
|||||||
\field{title}{1D MEMS Micro-Scanning LiDAR}
|
\field{title}{1D MEMS Micro-Scanning LiDAR}
|
||||||
\field{year}{2018}
|
\field{year}{2018}
|
||||||
\endentry
|
\endentry
|
||||||
|
\entry{bg_slam}{article}{}{}
|
||||||
|
\name{author}{2}{}{%
|
||||||
|
{{hash=f20739d463254c239085b0098114da44}{%
|
||||||
|
family={Smith},
|
||||||
|
familyi={S\bibinitperiod},
|
||||||
|
given={Randall\bibnamedelima C.},
|
||||||
|
giveni={R\bibinitperiod\bibinitdelim C\bibinitperiod}}}%
|
||||||
|
{{hash=9ec288d3d1be96333e0fae9796707e68}{%
|
||||||
|
family={Cheeseman},
|
||||||
|
familyi={C\bibinitperiod},
|
||||||
|
given={Peter},
|
||||||
|
giveni={P\bibinitperiod}}}%
|
||||||
|
}
|
||||||
|
\list{publisher}{1}{%
|
||||||
|
{SAGE Publications}%
|
||||||
|
}
|
||||||
|
\strng{namehash}{7031c0ebfd4f9d2d33ef0ddcb231c367}
|
||||||
|
\strng{fullhash}{7031c0ebfd4f9d2d33ef0ddcb231c367}
|
||||||
|
\strng{fullhashraw}{7031c0ebfd4f9d2d33ef0ddcb231c367}
|
||||||
|
\strng{bibnamehash}{7031c0ebfd4f9d2d33ef0ddcb231c367}
|
||||||
|
\strng{authorbibnamehash}{7031c0ebfd4f9d2d33ef0ddcb231c367}
|
||||||
|
\strng{authornamehash}{7031c0ebfd4f9d2d33ef0ddcb231c367}
|
||||||
|
\strng{authorfullhash}{7031c0ebfd4f9d2d33ef0ddcb231c367}
|
||||||
|
\strng{authorfullhashraw}{7031c0ebfd4f9d2d33ef0ddcb231c367}
|
||||||
|
\field{sortinit}{3}
|
||||||
|
\field{sortinithash}{ad6fe7482ffbd7b9f99c9e8b5dccd3d7}
|
||||||
|
\field{labelnamesource}{author}
|
||||||
|
\field{labeltitlesource}{title}
|
||||||
|
\field{issn}{1741-3176}
|
||||||
|
\field{journaltitle}{The International Journal of Robotics Research}
|
||||||
|
\field{month}{12}
|
||||||
|
\field{number}{4}
|
||||||
|
\field{title}{On the Representation and Estimation of Spatial Uncertainty}
|
||||||
|
\field{volume}{5}
|
||||||
|
\field{year}{1986}
|
||||||
|
\field{pages}{56\bibrangedash 68}
|
||||||
|
\range{pages}{13}
|
||||||
|
\verb{doi}
|
||||||
|
\verb 10.1177/027836498600500404
|
||||||
|
\endverb
|
||||||
|
\verb{urlraw}
|
||||||
|
\verb http://dx.doi.org/10.1177/027836498600500404
|
||||||
|
\endverb
|
||||||
|
\verb{url}
|
||||||
|
\verb http://dx.doi.org/10.1177/027836498600500404
|
||||||
|
\endverb
|
||||||
|
\endentry
|
||||||
\entry{lidar_denoising_survey}{article}{}{}
|
\entry{lidar_denoising_survey}{article}{}{}
|
||||||
\name{author}{4}{}{%
|
\name{author}{4}{}{%
|
||||||
{{hash=30663aad72dc59a49b7023f9c332b58a}{%
|
{{hash=30663aad72dc59a49b7023f9c332b58a}{%
|
||||||
@@ -1413,8 +1460,8 @@
|
|||||||
\strng{authornamehash}{d17e6557c5836d2d978179999ea1037f}
|
\strng{authornamehash}{d17e6557c5836d2d978179999ea1037f}
|
||||||
\strng{authorfullhash}{3ae53fe582e8a815b118d26947eaa326}
|
\strng{authorfullhash}{3ae53fe582e8a815b118d26947eaa326}
|
||||||
\strng{authorfullhashraw}{3ae53fe582e8a815b118d26947eaa326}
|
\strng{authorfullhashraw}{3ae53fe582e8a815b118d26947eaa326}
|
||||||
\field{sortinit}{5}
|
\field{sortinit}{4}
|
||||||
\field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
|
\field{sortinithash}{9381316451d1b9788675a07e972a12a7}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{note}{\url{http://www.deeplearningbook.org}}
|
\field{note}{\url{http://www.deeplearningbook.org}}
|
||||||
@@ -1452,8 +1499,8 @@
|
|||||||
\strng{authornamehash}{e9af9fcd8483f077f0dcdbd95213a56e}
|
\strng{authornamehash}{e9af9fcd8483f077f0dcdbd95213a56e}
|
||||||
\strng{authorfullhash}{8179a2c222d1565711a7f216e4da6e56}
|
\strng{authorfullhash}{8179a2c222d1565711a7f216e4da6e56}
|
||||||
\strng{authorfullhashraw}{8179a2c222d1565711a7f216e4da6e56}
|
\strng{authorfullhashraw}{8179a2c222d1565711a7f216e4da6e56}
|
||||||
\field{sortinit}{5}
|
\field{sortinit}{4}
|
||||||
\field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
|
\field{sortinithash}{9381316451d1b9788675a07e972a12a7}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{month}{05}
|
\field{month}{05}
|
||||||
@@ -1494,8 +1541,8 @@
|
|||||||
\strng{authornamehash}{01a32420f9995c8592740c3ad622e775}
|
\strng{authornamehash}{01a32420f9995c8592740c3ad622e775}
|
||||||
\strng{authorfullhash}{c0310d5b84b91b546714624d9baf92c2}
|
\strng{authorfullhash}{c0310d5b84b91b546714624d9baf92c2}
|
||||||
\strng{authorfullhashraw}{c0310d5b84b91b546714624d9baf92c2}
|
\strng{authorfullhashraw}{c0310d5b84b91b546714624d9baf92c2}
|
||||||
\field{sortinit}{5}
|
\field{sortinit}{4}
|
||||||
\field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
|
\field{sortinithash}{9381316451d1b9788675a07e972a12a7}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{issn}{1424-8220}
|
\field{issn}{1424-8220}
|
||||||
@@ -1687,8 +1734,8 @@
|
|||||||
\strng{authornamehash}{1eed07a9c59db157d86a149850002efb}
|
\strng{authornamehash}{1eed07a9c59db157d86a149850002efb}
|
||||||
\strng{authorfullhash}{5cd0fc84a08d52373df410079c09015c}
|
\strng{authorfullhash}{5cd0fc84a08d52373df410079c09015c}
|
||||||
\strng{authorfullhashraw}{5cd0fc84a08d52373df410079c09015c}
|
\strng{authorfullhashraw}{5cd0fc84a08d52373df410079c09015c}
|
||||||
\field{sortinit}{5}
|
\field{sortinit}{4}
|
||||||
\field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
|
\field{sortinithash}{9381316451d1b9788675a07e972a12a7}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{issn}{1941-0468}
|
\field{issn}{1941-0468}
|
||||||
@@ -1753,8 +1800,8 @@
|
|||||||
\strng{authorfullhash}{31c8cde264eb0da1d45f468f719f7a54}
|
\strng{authorfullhash}{31c8cde264eb0da1d45f468f719f7a54}
|
||||||
\strng{authorfullhashraw}{31c8cde264eb0da1d45f468f719f7a54}
|
\strng{authorfullhashraw}{31c8cde264eb0da1d45f468f719f7a54}
|
||||||
\field{extraname}{2}
|
\field{extraname}{2}
|
||||||
\field{sortinit}{5}
|
\field{sortinit}{4}
|
||||||
\field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
|
\field{sortinithash}{9381316451d1b9788675a07e972a12a7}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{booktitle}{2023 31st Mediterranean Conference on Control and Automation (MED)}
|
\field{booktitle}{2023 31st Mediterranean Conference on Control and Automation (MED)}
|
||||||
@@ -1802,8 +1849,8 @@
|
|||||||
\strng{authornamehash}{ea684bebf6033a20ad34a33644ec89fc}
|
\strng{authornamehash}{ea684bebf6033a20ad34a33644ec89fc}
|
||||||
\strng{authorfullhash}{d6ad1c32e8f7738554f79d65d954b4f9}
|
\strng{authorfullhash}{d6ad1c32e8f7738554f79d65d954b4f9}
|
||||||
\strng{authorfullhashraw}{d6ad1c32e8f7738554f79d65d954b4f9}
|
\strng{authorfullhashraw}{d6ad1c32e8f7738554f79d65d954b4f9}
|
||||||
\field{sortinit}{6}
|
\field{sortinit}{5}
|
||||||
\field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
|
\field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{issn}{1556-4967}
|
\field{issn}{1556-4967}
|
||||||
@@ -1851,8 +1898,8 @@
|
|||||||
\strng{authornamehash}{5e0b9f9cab8ce61be5266767752c12dc}
|
\strng{authornamehash}{5e0b9f9cab8ce61be5266767752c12dc}
|
||||||
\strng{authorfullhash}{d932d7249aa0617596765b2fc72a8152}
|
\strng{authorfullhash}{d932d7249aa0617596765b2fc72a8152}
|
||||||
\strng{authorfullhashraw}{d932d7249aa0617596765b2fc72a8152}
|
\strng{authorfullhashraw}{d932d7249aa0617596765b2fc72a8152}
|
||||||
\field{sortinit}{6}
|
\field{sortinit}{5}
|
||||||
\field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
|
\field{sortinithash}{20e9b4b0b173788c5dace24730f47d8c}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{abstract}{Autoencoder is an unsupervised learning model, which can automatically learn data features from a large number of samples and can act as a dimensionality reduction method. With the development of deep learning technology, autoencoder has attracted the attention of many scholars. Researchers have proposed several improved versions of autoencoder based on different application fields. First, this paper explains the principle of a conventional autoencoder and investigates the primary development process of an autoencoder. Second, We proposed a taxonomy of autoencoders according to their structures and principles. The related autoencoder models are comprehensively analyzed and discussed. This paper introduces the application progress of autoencoders in different fields, such as image classification and natural language processing, etc. Finally, the shortcomings of the current autoencoder algorithm are summarized, and prospected for its future development directions are addressed.}
|
\field{abstract}{Autoencoder is an unsupervised learning model, which can automatically learn data features from a large number of samples and can act as a dimensionality reduction method. With the development of deep learning technology, autoencoder has attracted the attention of many scholars. Researchers have proposed several improved versions of autoencoder based on different application fields. First, this paper explains the principle of a conventional autoencoder and investigates the primary development process of an autoencoder. Second, We proposed a taxonomy of autoencoders according to their structures and principles. The related autoencoder models are comprehensively analyzed and discussed. This paper introduces the application progress of autoencoders in different fields, such as image classification and natural language processing, etc. Finally, the shortcomings of the current autoencoder algorithm are summarized, and prospected for its future development directions are addressed.}
|
||||||
@@ -1893,8 +1940,8 @@
|
|||||||
\strng{authornamehash}{c4d64624ede10e1baa66843e963d7c13}
|
\strng{authornamehash}{c4d64624ede10e1baa66843e963d7c13}
|
||||||
\strng{authorfullhash}{c4d64624ede10e1baa66843e963d7c13}
|
\strng{authorfullhash}{c4d64624ede10e1baa66843e963d7c13}
|
||||||
\strng{authorfullhashraw}{c4d64624ede10e1baa66843e963d7c13}
|
\strng{authorfullhashraw}{c4d64624ede10e1baa66843e963d7c13}
|
||||||
\field{sortinit}{7}
|
\field{sortinit}{6}
|
||||||
\field{sortinithash}{108d0be1b1bee9773a1173443802c0a3}
|
\field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{title}{ODDS Library}
|
\field{title}{ODDS Library}
|
||||||
@@ -1941,8 +1988,8 @@
|
|||||||
\strng{authornamehash}{dd2ddc978fe083bcff1aa1379cd19643}
|
\strng{authornamehash}{dd2ddc978fe083bcff1aa1379cd19643}
|
||||||
\strng{authorfullhash}{4dd3ca3cdc8023700c28169734d6ad61}
|
\strng{authorfullhash}{4dd3ca3cdc8023700c28169734d6ad61}
|
||||||
\strng{authorfullhashraw}{4dd3ca3cdc8023700c28169734d6ad61}
|
\strng{authorfullhashraw}{4dd3ca3cdc8023700c28169734d6ad61}
|
||||||
\field{sortinit}{7}
|
\field{sortinit}{6}
|
||||||
\field{sortinithash}{108d0be1b1bee9773a1173443802c0a3}
|
\field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{issn}{0018-9219}
|
\field{issn}{0018-9219}
|
||||||
@@ -2017,8 +2064,8 @@
|
|||||||
\strng{authornamehash}{0fca66725a9966a967fc7893b180ddef}
|
\strng{authornamehash}{0fca66725a9966a967fc7893b180ddef}
|
||||||
\strng{authorfullhash}{0e37676c60146890b0c3819a1c8e441b}
|
\strng{authorfullhash}{0e37676c60146890b0c3819a1c8e441b}
|
||||||
\strng{authorfullhashraw}{0e37676c60146890b0c3819a1c8e441b}
|
\strng{authorfullhashraw}{0e37676c60146890b0c3819a1c8e441b}
|
||||||
\field{sortinit}{7}
|
\field{sortinit}{6}
|
||||||
\field{sortinithash}{108d0be1b1bee9773a1173443802c0a3}
|
\field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{issn}{2296-7745}
|
\field{issn}{2296-7745}
|
||||||
@@ -2091,8 +2138,8 @@
|
|||||||
\strng{authornamehash}{e1fc6cab9b6009340e110518e53868c4}
|
\strng{authornamehash}{e1fc6cab9b6009340e110518e53868c4}
|
||||||
\strng{authorfullhash}{cffcf38c642164887a370768f5701b8e}
|
\strng{authorfullhash}{cffcf38c642164887a370768f5701b8e}
|
||||||
\strng{authorfullhashraw}{cffcf38c642164887a370768f5701b8e}
|
\strng{authorfullhashraw}{cffcf38c642164887a370768f5701b8e}
|
||||||
\field{sortinit}{7}
|
\field{sortinit}{6}
|
||||||
\field{sortinithash}{108d0be1b1bee9773a1173443802c0a3}
|
\field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{title}{MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications}
|
\field{title}{MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications}
|
||||||
@@ -2143,8 +2190,8 @@
|
|||||||
\strng{authorfullhash}{2b7b29fe45fee2bd5ddb1dd1cbbff521}
|
\strng{authorfullhash}{2b7b29fe45fee2bd5ddb1dd1cbbff521}
|
||||||
\strng{authorfullhashraw}{2b7b29fe45fee2bd5ddb1dd1cbbff521}
|
\strng{authorfullhashraw}{2b7b29fe45fee2bd5ddb1dd1cbbff521}
|
||||||
\field{extraname}{2}
|
\field{extraname}{2}
|
||||||
\field{sortinit}{7}
|
\field{sortinit}{6}
|
||||||
\field{sortinithash}{108d0be1b1bee9773a1173443802c0a3}
|
\field{sortinithash}{b33bc299efb3c36abec520a4c896a66d}
|
||||||
\field{labelnamesource}{author}
|
\field{labelnamesource}{author}
|
||||||
\field{labeltitlesource}{title}
|
\field{labeltitlesource}{title}
|
||||||
\field{booktitle}{2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition}
|
\field{booktitle}{2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition}
|
||||||
|
|||||||
BIN
thesis/Main.pdf
BIN
thesis/Main.pdf
Binary file not shown.
@@ -328,23 +328,23 @@ Because the speed of light in air is effectively constant, multiplying half the
|
|||||||
|
|
||||||
\rev{Each time} a \rev{LiDAR} emits and receives a laser pulse, it can use the ray's direction and the calculated distance to produce a single three-dimensional point. By collecting up to millions of such points each second, the sensor constructs a “point cloud”—a dense set of 3D coordinates relative to the \rev{LiDAR}’s own position. In addition to \rev{$X$, $Y$, and $Z$}, many \rev{LiDAR}s also record the intensity or reflectivity of each return, providing extra information about the surface properties of the object hit by the pulse.
|
\rev{Each time} a \rev{LiDAR} emits and receives a laser pulse, it can use the ray's direction and the calculated distance to produce a single three-dimensional point. By collecting up to millions of such points each second, the sensor constructs a “point cloud”—a dense set of 3D coordinates relative to the \rev{LiDAR}’s own position. In addition to \rev{$X$, $Y$, and $Z$}, many \rev{LiDAR}s also record the intensity or reflectivity of each return, providing extra information about the surface properties of the object hit by the pulse.
|
||||||
|
|
||||||
\rev{LiDAR}’s high accuracy, long range, and full-circle field of view make it indispensable for tasks like obstacle detection, simultaneous localization and mapping (SLAM), and terrain modeling in autonomous driving and mobile robotics. While complementary sensors—such as time-of-flight cameras, ultrasonic sensors, and RGB cameras—have their strengths at short range or in particular lighting, only \rev{LiDAR} delivers the combination of precise 3D measurements over medium to long distances, consistent performance regardless of illumination, and the pointcloud density needed for safe navigation. \rev{LiDAR} systems do exhibit intrinsic noise (e.g., range quantization or occasional multi-return ambiguities), but in most robotic applications these effects are minor compared to environmental degradation.
|
\rev{LiDAR}’s high accuracy, long range, and full-circle field of view make it indispensable for tasks like obstacle detection, simultaneous localization and mapping~(SLAM)~\rev{\cite{bg_slam}}, and terrain modeling in autonomous driving and mobile robotics. While complementary sensors—such as time-of-flight cameras, ultrasonic sensors, and RGB cameras—have their strengths at short range or in particular lighting, only \rev{LiDAR} delivers the combination of precise 3D measurements over medium to long distances, consistent performance regardless of illumination, and the pointcloud density needed for safe navigation. \rev{LiDAR} systems do exhibit intrinsic noise (e.g., range quantization or occasional multi-return ambiguities), but in most robotic applications these effects are minor compared to environmental degradation.
|
||||||
|
|
||||||
In subterranean and rescue domain scenarios, the dominant challenge is airborne particles: dust kicked up by debris or smoke from fires. These aerosols create early returns that can mask real obstacles and cause missing data behind particle clouds, undermining SLAM and perception algorithms designed for cleaner data. This degradation is a type of atmospheric scattering, which can be caused by any kind of airborne particulates (e.g., snowflakes) or liquids (e.g., water droplets). Other kinds of environmental noise exist as well, such as specular reflections caused by smooth surfaces, beam occlusion due to close objects blocking the sensor's field of view or even thermal drift-temperature affecting the sensor's circuits and mechanics, introducing biases in the measurements.
|
In subterranean and rescue domain scenarios, the dominant challenge is airborne particles: dust kicked up by debris or smoke from fires. These aerosols create early returns that can mask real obstacles and cause missing data behind particle clouds, undermining SLAM and perception algorithms designed for cleaner data. This degradation is a type of atmospheric scattering, which can be caused by any kind of airborne particulates (e.g., snowflakes) or liquids (e.g., water droplets). Other kinds of environmental noise exist as well, such as specular reflections caused by smooth surfaces, beam occlusion due to close objects blocking the sensor's field of view or even thermal drift-temperature affecting the sensor's circuits and mechanics, introducing biases in the measurements.
|
||||||
|
|
||||||
All of these may create unwanted noise in the point cloud created by the \rev{LiDAR}, making this domain an important research topic. \citetitle{lidar_denoising_survey}~\cite{lidar_denoising_survey} gives an overview about the current state of research into denoising methods for \rev{LiDAR} in adverse environments, categorizes them according to their approach (distance-, intensity- or learning-based) and concludes that all approaches have merits but also open challenges to solve, for autonomous systems to safely navigate these adverse environments. The current research is heavily focused on the automotive domain, which can be observed by the vastly higher number of methods filtering noise from adverse weather effects-environmental scattering from rain, snow and fog-than from dust, smoke or other particles occuring rarely in the automotive domain.
|
All of these may create unwanted noise in the point cloud created by the \rev{LiDAR}, making this domain an important research topic. \rev{In \cite{lidar_denoising_survey} an overview} about the current state of research into denoising methods for \rev{LiDAR} in adverse environments \rev{is given. It} categorizes them according to their approach (distance-, intensity- or learning-based) and concludes that all approaches have merits but also open challenges to solve, for autonomous systems to safely navigate these adverse environments. The current research is heavily focused on the automotive domain, which can be observed by the vastly higher number of methods filtering noise from adverse weather effects\rev{--}environmental scattering from rain, snow and fog-than from dust, smoke or other particles occuring rarely in the automotive domain.
|
||||||
|
|
||||||
A learning-based method to filter dust-caused degradation from \rev{LiDAR} is introduced in \citetitle{lidar_denoising_dust}~\cite{lidar_denoising_dust}. The authors employ a convultional neural network to classify dust particles in \rev{LiDAR} point clouds as such, enabling the filtering of those points and compare their methods to more conservative approaches, such as various outlier removal algorithms. Another relevant example would be the filtering method proposed in \citetitle{lidar_subt_dust_removal}~\cite{lidar_subt_dust_removal}, which enables the filtration of pointclouds degraded by smoke or dust in subterranean environments, with a focus on the search and rescue domain. To achieve this, they formulated a filtration framework that relies on dynamic onboard statistical cluster outlier removal, to classify and remove dust particles in point clouds.
|
A learning-based method to filter dust-caused degradation from \rev{LiDAR} is introduced in \rev{\cite{lidar_denoising_dust}}. The authors employ a convultional neural network to classify dust particles in \rev{LiDAR} point clouds as such, enabling the filtering of those points and compare their methods to more conservative approaches, such as various outlier removal algorithms. Another relevant example would be the filtering method proposed in \rev{\cite{lidar_subt_dust_removal}}, which enables the filtration of pointclouds degraded by smoke or dust in subterranean environments, with a focus on the search and rescue domain. To achieve this, they formulated a filtration framework that relies on dynamic onboard statistical cluster outlier removal, to classify and remove dust particles in point clouds.
|
||||||
|
|
||||||
Our method does not aim to remove the noise or degraded points in the \rev{LiDAR} data, but quantify its degradation to inform other systems of the autonomous robot about the data's quality, enabling more informed decisions. One such approach, though from the autonomous driving and not from the search and rescue domain can be found in \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain}. A learning-based method to quantify the \rev{LiDAR} sensor data degradation caused by adverse weather-effects was proposed, implemented by posing the problem as an anomaly detection task and utilizing DeepSAD to learn degraded data to be an anomaly and high quality data to be normal behaviour. DeepSAD's anomaly score was used as the degradation's quantification score. From this example we decided to imitate this method and adapt it for the search and rescue domain, although this proved challenging due to the more limited data availability. Since it was effective for this closely related use case, we also employed DeepSAD, whose detailed workings we present in the following chapter.
|
Our method does not aim to remove the noise or degraded points in the \rev{LiDAR} data, but quantify its degradation to inform other systems of the autonomous robot about the data's quality, enabling more informed decisions. One such approach, though from the autonomous driving and not from the search and rescue domain can be found in \rev{\cite{degradation_quantification_rain}, where a} learning-based method to quantify the \rev{LiDAR} sensor data degradation caused by adverse weather-effects was proposed. \rev{They posed} the problem as an anomaly detection task and \rev{utilized} DeepSAD to learn degraded data to be an anomaly and high quality data to be normal behaviour. DeepSAD's anomaly score was used as the degradation's quantification score. From this example we decided to imitate this method and adapt it for the search and rescue domain, although this proved challenging due to the more limited data availability. Since it was effective for this closely related use case, we also employed DeepSAD, whose detailed workings we present in the following chapter.
|
||||||
|
|
||||||
\newchapter{deepsad}{DeepSAD: Semi-Supervised Anomaly Detection}
|
\newchapter{deepsad}{DeepSAD: Semi-Supervised Anomaly Detection}
|
||||||
|
|
||||||
In this chapter, we explore the method \citetitle{deepsad}~(DeepSAD)~\cite{deepsad}, which we employ to quantify the degradation of \rev{LiDAR} scans caused by airborne particles in the form of artificially introduced water vapor from a theater smoke machine. A similar approach—modeling degradation quantification as an anomaly detection task—was successfully applied in \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain} to assess the impact of adverse weather conditions on \rev{LiDAR} data for autonomous driving applications. DeepSAD leverages deep learning to capture complex anomalous patterns that classical statistical methods might miss. Furthermore, by incorporating a limited amount of hand-labeled data (both normal and anomalous), it can more effectively differentiate between known anomalies and normal data compared to purely unsupervised methods, which typically learn only the most prevalent patterns in the dataset~\cite{deepsad}.
|
In this chapter, we explore the method \rev{DeepSAD}~\cite{deepsad}, which we employ to quantify the degradation of \rev{LiDAR} scans caused by airborne particles in the form of artificially introduced water vapor from a theater smoke machine. A similar approach—modeling degradation quantification as an anomaly detection task—was successfully applied in \rev{\cite{degradation_quantification_rain}} to assess the impact of adverse weather conditions on \rev{LiDAR} data for autonomous driving applications. DeepSAD leverages deep learning to capture complex anomalous patterns that classical statistical methods might miss. Furthermore, by incorporating a limited amount of hand-labeled data (both normal and anomalous), it can more effectively differentiate between known anomalies and normal data compared to purely unsupervised methods, which typically learn only the most prevalent patterns in the dataset~\cite{deepsad}.
|
||||||
|
|
||||||
\newsection{algorithm_description}{Algorithm Description}
|
\newsection{algorithm_description}{Algorithm Description}
|
||||||
|
|
||||||
DeepSAD's overall mechanics are similar to clustering-based anomaly detection methods, which according to \citetitle{anomaly_detection_survey}~\cite{anomaly_detection_survey} typically follow a two-step approach. First, a clustering algorithm groups data points around a centroid; then, the distances of individual data points from this centroid are calculated and used as anomaly scores. In DeepSAD, these concepts are implemented by employing a neural network, which is jointly trained to map input data onto a latent space and to minimize the volume of an data-encompassing hypersphere, whose center is the aforementioned centroid. The data's geometric distance in the latent space to the hypersphere center is used as the anomaly score, where a larger distance between data and centroid corresponds to a higher probability of a sample being anomalous. This is achieved by shrinking the data-encompassing hypersphere during training, proportionally to all training data, of which is required that there is significantly more normal than anomalous data present. The outcome of this approach is that normal data gets clustered more closely around the centroid, while anomalies appear further away from it as can be seen in the toy example depicted in \rev{Figure}~\ref{fig:deep_svdd_transformation}.
|
DeepSAD's overall mechanics are similar to clustering-based anomaly detection methods, which according to \rev{\cite{anomaly_detection_survey}} typically follow a two-step approach. First, a clustering algorithm groups data points around a centroid; then, the distances of individual data points from this centroid are calculated and used as anomaly scores. In DeepSAD, these concepts are implemented by employing a neural network, which is jointly trained to map input data onto a latent space and to minimize the volume of an data-encompassing hypersphere, whose center is the aforementioned centroid. The data's geometric distance in the latent space to the hypersphere center is used as the anomaly score, where a larger distance between data and centroid corresponds to a higher probability of a sample being anomalous. This is achieved by shrinking the data-encompassing hypersphere during training, proportionally to all training data, of which is required that there is significantly more normal than anomalous data present. The outcome of this approach is that normal data gets clustered more closely around the centroid, while anomalies appear further away from it as can be seen in the toy example depicted in \rev{Figure}~\ref{fig:deep_svdd_transformation}.
|
||||||
|
|
||||||
\fig{deep_svdd_transformation}{figures/deep_svdd_transformation}{DeepSAD teaches a neural network to transform data into a latent space and minimize the volume of an data-encompassing hypersphere centered around a predetermined centroid $\textbf{c}$. \\Reproduced from~\cite{deep_svdd}.}
|
\fig{deep_svdd_transformation}{figures/deep_svdd_transformation}{DeepSAD teaches a neural network to transform data into a latent space and minimize the volume of an data-encompassing hypersphere centered around a predetermined centroid $\textbf{c}$. \\Reproduced from~\cite{deep_svdd}.}
|
||||||
|
|
||||||
@@ -359,13 +359,13 @@ In the main training step, DeepSAD's network is trained using SGD backpropagatio
|
|||||||
|
|
||||||
\fig{deepsad_procedure}{diagrams/deepsad_procedure/deepsad_procedure}{Overview of the DeepSAD workflow. Training starts with unlabeled data and optional labeled samples, which are used to pre-train an autoencoder, compute the hypersphere center, and then perform main training with adjustable weighting of labeled versus unlabeled data. During inference, new samples are encoded and their distance to the hypersphere center is used as an anomaly score, with larger distances indicating stronger anomalies.}
|
\fig{deepsad_procedure}{diagrams/deepsad_procedure/deepsad_procedure}{Overview of the DeepSAD workflow. Training starts with unlabeled data and optional labeled samples, which are used to pre-train an autoencoder, compute the hypersphere center, and then perform main training with adjustable weighting of labeled versus unlabeled data. During inference, new samples are encoded and their distance to the hypersphere center is used as an anomaly score, with larger distances indicating stronger anomalies.}
|
||||||
|
|
||||||
To infer if a previously unknown data sample is normal or anomalous, the sample is fed in a forward-pass through the fully trained network. During inference, the centroid $\mathbf{c}$ needs to be known, to calculate the geometric distance between the samples latent representation and $\mathbf{c}$. This distance is tantamount to an anomaly score, which correlates with the likelihood of the sample being anomalous. Due to differences in input data type, training success and latent space dimensionality, the anomaly score's magnitude has to be judged on an individual basis for each trained network. This means, scores produced by one network that signify normal data, may very well clearly indicate an anomaly for another network. The geometric distance between two points in space is a scalar analog value, therefore post-processing of the score is necessary to achieve a binary classification of normal and anomalous if desired.
|
To infer if a previously unknown data sample is normal or anomalous, the sample is fed in a forward-pass through the fully trained network. During inference, the centroid $\mathbf{c}$ needs to be known, to calculate the geometric distance between the samples latent representation and $\mathbf{c}$. This distance \rev{serves as} an anomaly score, which correlates with the likelihood of the sample being anomalous. Due to differences in input data type, training success and latent space dimensionality, the anomaly score's magnitude has to be judged on an individual basis for each trained network. This means, scores produced by one network that signify normal data, may very well clearly indicate an anomaly for another network. The geometric distance between two points in space is a scalar analog value, therefore post-processing of the score is necessary to achieve a binary classification of normal and anomalous if desired.
|
||||||
|
|
||||||
DeepSAD's full training and inference procedure is visualized in \rev{Figure}~\ref{fig:deepsad_procedure}, which gives a comprehensive overview of the dataflows, tuneable hyperparameters and individual steps involved.
|
DeepSAD's full training and inference procedure is visualized in \rev{Figure}~\ref{fig:deepsad_procedure}, which gives a comprehensive overview of the dataflows, tuneable hyperparameters and individual steps involved.
|
||||||
|
|
||||||
\newsection{algorithm_details}{Algorithm Details and Hyperparameters}
|
\newsection{algorithm_details}{Algorithm Details and Hyperparameters}
|
||||||
|
|
||||||
Since DeepSAD is heavily based on its predecessor \citetitle{deep_svdd}~(Deep SVDD)~\cite{deep_svdd} it is helpful to first understand Deep SVDD's optimization objective, so we start with explaining it here. For input space $\mathcal{X} \subseteq \mathbb{R}^D$, output space $\mathcal{Z} \subseteq \mathbb{R}^d$ and a neural network $\phi(\wc; \mathcal{W}) : \mathcal{X} \to \mathcal{Z}$ where $\mathcal{W}$ depicts the neural networks' weights with $L$ layers $\{\mathbf{W}_1, \dots, \mathbf{W}_L\}$, $n$ the number of unlabeled training samples $\{\mathbf{x}_1, \dots, \mathbf{x}_n\}$, $\mathbf{c}$ the center of the hypersphere in the latent space, Deep SVDD teaches the neural network to cluster normal data closely together in the latent space by defining its optimization objective as seen in equation~\ref{eq:deepsvdd_optimization_objective}.
|
Since DeepSAD is heavily based on its predecessor \rev{Deep SVDD}~\cite{deep_svdd} it is helpful to first understand Deep SVDD's optimization objective, so we start with explaining it here. For input space $\mathcal{X} \subseteq \mathbb{R}^D$, output space $\mathcal{Z} \subseteq \mathbb{R}^d$ and a neural network $\phi(\wc; \mathcal{W}) : \mathcal{X} \to \mathcal{Z}$ where $\mathcal{W}$ depicts the neural networks' weights with $L$ layers $\{\mathbf{W}_1, \dots, \mathbf{W}_L\}$, $n$ the number of unlabeled training samples $\{\mathbf{x}_1, \dots, \mathbf{x}_n\}$, $\mathbf{c}$ the center of the hypersphere in the latent space, Deep SVDD teaches the neural network to cluster normal data closely together in the latent space by defining its optimization objective as \rev{follows.}
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{eq:deepsvdd_optimization_objective}
|
\label{eq:deepsvdd_optimization_objective}
|
||||||
@@ -378,7 +378,9 @@ Deep SVDD is an unsupervised method which does not rely on labeled data to train
|
|||||||
|
|
||||||
\citeauthor{deepsad} argue that the pre-training step employing an autoencoder—originally introduced in Deep SVDD—not only allows a geometric interpretation of the method as minimum volume estimation i.e., the shrinking of the data encompassing hypersphere but also a probabilistic one as entropy minimization over the latent distribution. The autoencoding objective during pre-training implicitly maximizes the mutual information between the data and its latent representation, aligning the approach with the Infomax principle while encouraging a latent space with minimal entropy. This insight enabled \citeauthor{deepsad} to introduce an additional term in DeepSAD’s objective, beyond that of its predecessor Deep SVDD, which incorporates labeled data to better capture the characteristics of normal and anomalous data. They demonstrate that DeepSAD’s objective effectively models the latent distribution of normal data as having low entropy, while that of anomalous data is characterized by higher entropy. In this framework, anomalies are interpreted as being generated from an infinite mixture of distributions that differ from the normal data distribution. The introduction of this aforementioned term in DeepSAD's objective allows it to learn in a semi-supervised way, which helps the model better position known normal samples near the hypersphere center and push known anomalies farther away, thereby enhancing its ability to differentiate between normal and anomalous data.
|
\citeauthor{deepsad} argue that the pre-training step employing an autoencoder—originally introduced in Deep SVDD—not only allows a geometric interpretation of the method as minimum volume estimation i.e., the shrinking of the data encompassing hypersphere but also a probabilistic one as entropy minimization over the latent distribution. The autoencoding objective during pre-training implicitly maximizes the mutual information between the data and its latent representation, aligning the approach with the Infomax principle while encouraging a latent space with minimal entropy. This insight enabled \citeauthor{deepsad} to introduce an additional term in DeepSAD’s objective, beyond that of its predecessor Deep SVDD, which incorporates labeled data to better capture the characteristics of normal and anomalous data. They demonstrate that DeepSAD’s objective effectively models the latent distribution of normal data as having low entropy, while that of anomalous data is characterized by higher entropy. In this framework, anomalies are interpreted as being generated from an infinite mixture of distributions that differ from the normal data distribution. The introduction of this aforementioned term in DeepSAD's objective allows it to learn in a semi-supervised way, which helps the model better position known normal samples near the hypersphere center and push known anomalies farther away, thereby enhancing its ability to differentiate between normal and anomalous data.
|
||||||
|
|
||||||
From equation~\ref{eq:deepsvdd_optimization_objective} it is easy to understand DeepSAD's optimization objective seen in equation~\ref{eq:deepsad_optimization_objective} which additionally defines $m$ number of labeled data samples $\{(\mathbf{\tilde{x}}_1, \tilde{y}_1), \dots, (\mathbf{\tilde{x}}_m, \tilde{y}_1)\} \in \mathcal{X} \times \mathcal{Y}$ and $\mathcal{Y} = \{-1,+1\}$ for which $\tilde{y} = +1$ denotes normal and $\tilde{y} = -1$ anomalous samples as well as a new hyperparameter $\eta > 0$ which can be used to balance the strength with which labeled and unlabeled samples contribute to the training.
|
From \rev{Equation}~\ref{eq:deepsvdd_optimization_objective} it is easy to understand DeepSAD's optimization objective seen in \rev{Equation}~\ref{eq:deepsad_optimization_objective} which additionally \rev{uses} $m$ number of labeled data samples $\{(\mathbf{\tilde{x}}_1, \tilde{y}_1), \dots, (\mathbf{\tilde{x}}_m, \tilde{y}_1)\} \in \mathcal{X} \times \mathcal{Y}$ and $\mathcal{Y} = \{-1,+1\}$ for which $\tilde{y} = +1$ denotes normal and $\tilde{y} = -1$ anomalous samples as well as a new hyperparameter $\eta > 0$ which can be used to balance the strength with which labeled and unlabeled samples contribute to the training.
|
||||||
|
|
||||||
|
\rev{The objective is}
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\label{eq:deepsad_optimization_objective}
|
\label{eq:deepsad_optimization_objective}
|
||||||
@@ -388,7 +390,7 @@ From equation~\ref{eq:deepsvdd_optimization_objective} it is easy to understand
|
|||||||
+\frac{\lambda}{2}\sum_{\ell=1}^{L}\|\mathbf{W}^{\ell}\|_{F}^{2}.
|
+\frac{\lambda}{2}\sum_{\ell=1}^{L}\|\mathbf{W}^{\ell}\|_{F}^{2}.
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
The first term of equation~\ref{eq:deepsad_optimization_objective} stays mostly the same, differing only in its consideration of the introduced $m$ labeled datasamples for its proportionality. The second term is newly introduced to incorporate the labeled data samples with hyperparameter $\eta$'s strength, by either minimizing or maximizing the distance between the samples latent represenation and $\mathbf{c}$ depending on each data samples label $\tilde{y}$. The standard L2 regularization is kept identical to Deep SVDD's optimization objective. It can also be observed that in case of $m = 0$ labeled samples, DeepSAD falls back to Deep SVDD's optimization objective and can therefore be used in a completely unsupervised fashion as well.
|
The first term of \rev{Equation}~\ref{eq:deepsad_optimization_objective} stays \rev{almost} the same, differing only in its consideration of the introduced $m$ labeled datasamples for its proportionality. The second term is newly introduced to incorporate the labeled data samples with hyperparameter $\eta$'s strength, by either minimizing or maximizing the distance between the samples latent represenation and $\mathbf{c}$ depending on each data samples label $\tilde{y}$. The standard L2 regularization is kept identical to Deep SVDD's optimization objective. It can also be observed that in case of $m = 0$ labeled samples, DeepSAD falls back to Deep SVDD's optimization objective and can therefore be used in a completely unsupervised fashion as well.
|
||||||
|
|
||||||
|
|
||||||
\paragraph{Hyperparameters}
|
\paragraph{Hyperparameters}
|
||||||
@@ -400,7 +402,7 @@ DeepSAD relies on several tuneable hyperparameters that influence different stag
|
|||||||
\item \textbf{Latent space dimensionality $\mathbb{R}^d$} \\
|
\item \textbf{Latent space dimensionality $\mathbb{R}^d$} \\
|
||||||
The size of the latent bottleneck is a critical parameter. If $\mathbb{R}^d$ is too small, the network cannot encode all relevant information, leading to information loss and weak representations. If $\mathbb{R}^d$ is too large, the network risks overfitting by encoding irrelevant detail, while also increasing computational cost. These insights stem from autoencoder literature \cite{deep_learning_book}, but it is unclear whether they apply directly to DeepSAD: here the autoencoder serves only for pretraining, and the encoder is subsequently fine-tuned with a different objective. Thus, the optimal choice of $\mathbb{R}^d$ may not coincide with the value that would be ideal for autoencoder reconstruction alone.
|
The size of the latent bottleneck is a critical parameter. If $\mathbb{R}^d$ is too small, the network cannot encode all relevant information, leading to information loss and weak representations. If $\mathbb{R}^d$ is too large, the network risks overfitting by encoding irrelevant detail, while also increasing computational cost. These insights stem from autoencoder literature \cite{deep_learning_book}, but it is unclear whether they apply directly to DeepSAD: here the autoencoder serves only for pretraining, and the encoder is subsequently fine-tuned with a different objective. Thus, the optimal choice of $\mathbb{R}^d$ may not coincide with the value that would be ideal for autoencoder reconstruction alone.
|
||||||
\item \textbf{Label weighting $\eta$} \\
|
\item \textbf{Label weighting $\eta$} \\
|
||||||
The parameter $\eta$ controls the relative contribution of labeled versus unlabeled data in the DeepSAD objective. With $\eta = 1$, both groups contribute equally (normalized by their sample counts). Larger values of $\eta$ emphasize the labeled data, pulling labeled normals closer to the center and pushing labeled anomalies further away. Smaller values emphasize the unlabeled data, effectively reducing the influence of labels. Its impact depends not only on its numerical value but also on the quantity and quality of available labels.
|
The parameter $\eta$ controls the relative contribution of labeled versus unlabeled data in the DeepSAD objective. With $\eta = 1$, both groups contribute equally (normalized by their sample counts). Larger values of $\eta$ emphasize the labeled data, pulling labeled \rev{normal data} closer to the center and pushing labeled anomalies further away. Smaller values emphasize the unlabeled data, effectively reducing the influence of labels. Its impact depends not only on its numerical value but also on the quantity and quality of available labels.
|
||||||
\item \textbf{Learning rates $L_A$ and $L_M$} \\
|
\item \textbf{Learning rates $L_A$ and $L_M$} \\
|
||||||
Two learning rates are defined: $L_A$ for the autoencoder pretraining and $L_M$ for the main DeepSAD training. The learning rate sets the step size used during gradient descent updates and thereby controls the stability and speed of training. If it is too high, the optimization may diverge or oscillate; if too low, convergence becomes excessively slow and may get stuck in poor local minima. Schemes with adaptive learning rates such as ADAM may be applied to prevent poor choices.
|
Two learning rates are defined: $L_A$ for the autoencoder pretraining and $L_M$ for the main DeepSAD training. The learning rate sets the step size used during gradient descent updates and thereby controls the stability and speed of training. If it is too high, the optimization may diverge or oscillate; if too low, convergence becomes excessively slow and may get stuck in poor local minima. Schemes with adaptive learning rates such as ADAM may be applied to prevent poor choices.
|
||||||
\item \textbf{Number of epochs $E_A$ and $E_M$} \\
|
\item \textbf{Number of epochs $E_A$ and $E_M$} \\
|
||||||
@@ -422,7 +424,7 @@ To ensure our chosen dataset meets the needs of reliable degradation quantificat
|
|||||||
|
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item \textbf{Data Modalities:}\\
|
\item \textbf{Data Modalities:}\\
|
||||||
The dataset must include \rev{LiDAR} sensor data, since we decided to train and evaluate our method on what should be the most universally used sensor type in the given domain. To keep our method as generalized as possible, we chose to only require range-based point cloud data and forego sensor-specific data such as intensity or reflectivity, though it may be of interest for future work. It is also desirable to have complementary visual data such as camera images, for better context, manual verification and understanding of the data.
|
The dataset must include \rev{LiDAR} sensor data, since we decided to train and evaluate our method on what should be the most universally used sensor type in the given domain. To keep our method as generalized as possible, we chose to only require range-based point cloud data and \rev{opt out of} sensor-specific data such as intensity or reflectivity, though it may be of interest for future work. It is also desirable to have complementary visual data such as camera images, for better context, manual verification and understanding of the data.
|
||||||
|
|
||||||
\item \textbf{Context \& Collection Method:}\\
|
\item \textbf{Context \& Collection Method:}\\
|
||||||
To mirror the real-world conditions of autonomous rescue robots, the data should originate from locations such as subterranean environments (tunnels, caves, collapsed structures), which closely reflect what would be encountered during rescue missions. Ideally, it should be captured from a ground-based, self-driving robot platform in motion instead of aerial, handheld, or stationary collection, to ensure similar circumstances to the target domain.
|
To mirror the real-world conditions of autonomous rescue robots, the data should originate from locations such as subterranean environments (tunnels, caves, collapsed structures), which closely reflect what would be encountered during rescue missions. Ideally, it should be captured from a ground-based, self-driving robot platform in motion instead of aerial, handheld, or stationary collection, to ensure similar circumstances to the target domain.
|
||||||
@@ -442,16 +444,16 @@ To ensure our chosen dataset meets the needs of reliable degradation quantificat
|
|||||||
|
|
||||||
Quantitative benchmarking of degradation quantification requires a degradation label for every scan. Ideally that label would be a continuous degradation score, although a binary label would still enable meaningful comparison. As the rest of this section shows, producing any reliable label is already challenging and assigning meaningful analog scores may not be feasible at all. Compounding the problem, no public search-and-rescue (SAR) \rev{LiDAR} data set offers such ground truth as far as we know. To understand the challenges around labeling \rev{LiDAR} data degradation, we will look at what constitutes degradation in this context.
|
Quantitative benchmarking of degradation quantification requires a degradation label for every scan. Ideally that label would be a continuous degradation score, although a binary label would still enable meaningful comparison. As the rest of this section shows, producing any reliable label is already challenging and assigning meaningful analog scores may not be feasible at all. Compounding the problem, no public search-and-rescue (SAR) \rev{LiDAR} data set offers such ground truth as far as we know. To understand the challenges around labeling \rev{LiDAR} data degradation, we will look at what constitutes degradation in this context.
|
||||||
|
|
||||||
In section~\ref{sec:lidar_related_work} we discussed some internal and environmental error causes of \rev{LiDAR} sensors, such as multi-return ambiguities or atmospheric scattering respectively. While we are aware of research into singular failure modes, such as \citetitle{lidar_errormodel_particles}~\cite{lidar_errormodel_particles} or research trying to model the totality of error souces occuring in other domains, such as .\citetitle{lidar_errormodel_automotive}~\cite{lidar_errormodel_automotive}, there appears to be no such model for the search and rescue domain and its unique environmental circumstances. Although, scientific consensus appears to be, that airborne particles are the biggest contributor to degradation in SAR~\cite{lidar_errormodel_consensus}, we think that a more versatile definition is required to ensure confidence during critical SAR missions, which are often of a volatile nature. We are left with an ambiguous definition of what constitutes \rev{LiDAR} point cloud degradation in the SAR domain.
|
In \rev{Section}~\ref{sec:lidar_related_work} we discussed some internal and environmental error causes of \rev{LiDAR} sensors, such as multi-return ambiguities or atmospheric scattering respectively. While we are aware of research into singular failure \rev{modes~\cite{lidar_errormodel_particles}} or research trying to model the totality of error souces occuring in other \rev{domains~\cite{lidar_errormodel_automotive}}, there appears to be no such model for the search and rescue domain and its unique environmental circumstances. Although, scientific consensus appears to be, that airborne particles are the biggest contributor to degradation in SAR~\cite{lidar_errormodel_consensus}, we think that a more versatile definition is required to ensure confidence during critical SAR missions, which are often of a volatile nature. We are left with an ambiguous definition of what constitutes \rev{LiDAR} point cloud degradation in the SAR domain.
|
||||||
|
|
||||||
We considered which types of objective measurements may be available to produce ground-truth labels, such as particulate matter sensors, \rev{LiDAR} point clouds' inherent properties such as range-dropout rate and others, but we fear that using purely objective measures to label the data, would limit our learning based method to imitating the labels' sources instead of differentiating all possible degradation patterns from high quality data. Due to the incomplete error model in this domain, there may be novel or compound error sources that would not be captured using such an approach. As an example, we did observe dense smoke reflecting enough rays to produce phantom objects, which may fool SLAM algorithms. Such a case may even be labeleled incorrectly as normal by one of the aforementioned objective measurement labeling options, if the surroundings do not exhibit enough dispersed smoke particles already.
|
We considered which types of objective measurements may be available to produce ground-truth labels, such as particulate matter sensors, \rev{LiDAR} point clouds' inherent properties such as range-dropout rate and others, but we fear that using purely objective measures to label the data, would limit our learning based method to imitating the labels' sources instead of differentiating all possible degradation patterns from high quality data. Due to the incomplete error model in this domain, there may be novel or compound error sources that would not be captured using such an approach. As an example, we did observe dense smoke reflecting enough rays to produce phantom objects, which may fool SLAM algorithms. Such a case may even be labeleled incorrectly as normal by one of the aforementioned objective measurement labeling options, if the surroundings do not exhibit enough dispersed smoke particles already.
|
||||||
|
|
||||||
To mitigate the aforementioned risks we adopt a human-centric, binary labelling strategy. We judged analog and multi-level discrete rating scales to be too subjective for human consideration, which only left us with the simplistic, but hopefully more reliable binary choice. We used two labeling approaches, producing two evaluation sets, whose motivation and details will be discussed in more detail in section~\ref{sec:preprocessing}. Rationale for the exact labeling procedures requires knowledge of the actual dataset we ended up choosing, which we will present in the next section.
|
To mitigate the aforementioned risks we adopt a human-centric, binary labelling strategy. We judged analog and multi-level discrete rating scales to be too subjective for human consideration, which only left us with the simplistic, but hopefully more reliable binary choice. We used two labeling approaches, producing two evaluation sets, whose motivation and details will be discussed in more detail in \rev{Section}~\ref{sec:preprocessing}. Rationale for the exact labeling procedures requires knowledge of the actual dataset we ended up choosing, which we will present in the next section.
|
||||||
|
|
||||||
\newsection{data_dataset}{Chosen Dataset}
|
\newsection{data_dataset}{\rev{Dataset}}
|
||||||
|
|
||||||
|
|
||||||
Based on the previously discussed requirements and the challenges of obtaining reliable labels, we selected the \citetitle{subter}~\cite{subter} for training and evaluation. This dataset comprises multimodal sensor data collected from a robotic platform navigating tunnels and rooms in a subterranean environment, an underground tunnel in Luleå, Sweden. Notably, some experiments incorporated an artificial smoke machine to simulate heavy degradation from aerosol particles, making the dataset particularly well-suited to our use case. A Pioneer 3-AT2 robotic platform, which can be seen in \rev{Figure}~\ref{fig:subter_platform_photo}, was used to mount a multitude of sensors that are described in table~\ref{tab:subter-sensors} and whose mounting locations are depicted in \rev{Figure}~\ref{fig:subter_platform_sketch}.
|
Based on the previously discussed requirements and the challenges of obtaining reliable labels, we selected the \citetitle{subter}~\cite{subter} for training and evaluation. This dataset comprises multimodal sensor data collected from a robotic platform navigating tunnels and rooms in a subterranean environment, an underground tunnel in Luleå, Sweden. Notably, some experiments incorporated an artificial smoke machine to simulate heavy degradation from aerosol particles, making the dataset particularly well-suited to our use case. A Pioneer 3-AT2 robotic platform, which can be seen in \rev{Figure}~\ref{fig:subter_platform_photo}, was used to mount a multitude of sensors that are described in \rev{Table}~\ref{tab:subter-sensors} and whose mounting locations are depicted in \rev{Figure}~\ref{fig:subter_platform_sketch}.
|
||||||
|
|
||||||
%-------------------------------------------------
|
%-------------------------------------------------
|
||||||
% Compact sensor overview (row numbers follow Fig.~\ref{fig:subter_platform})
|
% Compact sensor overview (row numbers follow Fig.~\ref{fig:subter_platform})
|
||||||
@@ -465,14 +467,14 @@ Based on the previously discussed requirements and the challenges of obtaining r
|
|||||||
\rowcolors{2}{gray!08}{white}
|
\rowcolors{2}{gray!08}{white}
|
||||||
\scriptsize
|
\scriptsize
|
||||||
\begin{tabular}{cp{4cm}p{4.5cm}p{5.5cm}}
|
\begin{tabular}{cp{4cm}p{4.5cm}p{5.5cm}}
|
||||||
\textbf{\#} & \textbf{Sensor} & \textbf{Recorded Data} & \textbf{Key Specs} \\
|
\textbf{\#} & \textbf{Sensor} & \textbf{Recorded Data} & \textbf{Key Specs} \\
|
||||||
1 & \sensorcell{Spinning 3-D \rev{LiDAR}}{Ouster OS1-32} & 3-D cloud, reflectivity & 10 Hz, 32 ch, 360° × 42.4°, $\leq$ 120 m \rule{0pt}{2.6ex} \\
|
1 & \sensorcell{Spinning 3-D \rev{LiDAR}}{Ouster OS1-32} & 3-D cloud, reflectivity & 10 Hz, 32 ch, 360° × 42.4°, $\leq$ 120 m \rule{0pt}{2.6ex} \\
|
||||||
2 & \sensorcell{mm-wave RADAR (×4)}{TI IWR6843AoP} & 4 × 60° RADAR point clouds & 30 Hz, 60 GHz, 9 m max, 0.05 m res. \\
|
2 & \sensorcell{mm-wave RADAR (×4)}{TI IWR6843AoP} & 4 × 60° RADAR point clouds & 30 Hz, 60 GHz, 9 m max, 0.05 m res. \\
|
||||||
3 & \sensorcell{Solid-state \rev{LiDAR}}{Velodyne Velarray M1600} & Forward \rev{LiDAR} cloud & 10 Hz, 160 ch, 120° × 32°, 0.1–30 m \\
|
3 & \sensorcell{Solid-state \rev{LiDAR}}{Velodyne Velarray M1600} & Forward \rev{LiDAR} cloud & 10 Hz, 160 ch, 120° × 32°, 0.1–30 m \\
|
||||||
4 & \sensorcell{RGB-D / stereo cam}{Luxonis OAK-D Pro} & stereo b/w images, depth map & 15 fps, 75 mm baseline, active IR 930 nm \\
|
4 & \sensorcell{RGB-D / stereo cam}{Luxonis OAK-D Pro} & \rev{Stereo} b/w images, depth map & 15 fps, 75 mm baseline, active IR 930 nm \\
|
||||||
5 & \sensorcell{LED flood-light}{RS PRO WL28R} & Illumination for stereo cam & 7 W, 650 lm (no data stream) \\
|
5 & \sensorcell{LED flood-light}{RS PRO WL28R} & Illumination for stereo \rev{camera} & 7 W, 650 lm (no data stream) \\
|
||||||
6 & \sensorcell{IMU}{Pixhawk 2.1 Cube Orange} & Accel, gyro, mag, baro & 190 Hz, 9-DoF, vibration-damped \\
|
6 & \sensorcell{IMU}{Pixhawk 2.1 Cube Orange} & Accel, gyro, mag, baro & 190 Hz, 9-DoF, vibration-damped \\
|
||||||
7 & \sensorcell{On-board PC}{Intel NUC i7} & Time-synced logging & Quad-core i7, 16 GB RAM, 500 GB SSD \\
|
7 & \sensorcell{On-board PC}{Intel NUC i7} & Time-synced logging & Quad-core i7, 16 GB RAM, 500 GB SSD \\
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
|
|
||||||
\end{table}
|
\end{table}
|
||||||
@@ -501,7 +503,7 @@ During the measurement campaign, a total of 14 experiments were conducted—10 p
|
|||||||
|
|
||||||
In the anomalous experiments, the artificial smoke machine appears to have been running for some time before data collection began, as evidenced by both camera images and \rev{LiDAR} data showing an even distribution of water vapor around the machine. The stationary experiment is particularly unique: the smoke machine was positioned very close to the sensor platform and was actively generating new, dense smoke, to the extent that the \rev{LiDAR} registered the surface of the fresh water vapor as if it were a solid object.
|
In the anomalous experiments, the artificial smoke machine appears to have been running for some time before data collection began, as evidenced by both camera images and \rev{LiDAR} data showing an even distribution of water vapor around the machine. The stationary experiment is particularly unique: the smoke machine was positioned very close to the sensor platform and was actively generating new, dense smoke, to the extent that the \rev{LiDAR} registered the surface of the fresh water vapor as if it were a solid object.
|
||||||
|
|
||||||
The figures~\ref{fig:data_screenshot_pointcloud}~and~\ref{fig:data_screenshot_camera} show an representative depiction of the environment of the experiments as a camera image of the IR camera and the point cloud created by the OS1 \rev{LiDAR} sensor at practically the same time.
|
The \rev{Figures}~\ref{fig:data_screenshot_pointcloud}~and~\ref{fig:data_screenshot_camera} show an representative depiction of the environment of the experiments as a camera image of the IR camera and the point cloud created by the OS1 \rev{LiDAR} sensor at practically the same time.
|
||||||
|
|
||||||
\fig{data_screenshot_pointcloud}{figures/data_screenshot_pointcloud.png}{Screenshot of 3D rendering of an experiment's point cloud produced by the OS1-32 \rev{LiDAR} sensor without smoke and with illumination (same frame and roughly same alignment as \rev{Figure}~\ref{fig:data_screenshot_camera}). Point color corresponds to measurement range and axis in center of figure is the \rev{LiDAR}'s position.}
|
\fig{data_screenshot_pointcloud}{figures/data_screenshot_pointcloud.png}{Screenshot of 3D rendering of an experiment's point cloud produced by the OS1-32 \rev{LiDAR} sensor without smoke and with illumination (same frame and roughly same alignment as \rev{Figure}~\ref{fig:data_screenshot_camera}). Point color corresponds to measurement range and axis in center of figure is the \rev{LiDAR}'s position.}
|
||||||
\fig{data_screenshot_camera}{figures/data_screenshot_camera.png}{Screenshot of IR camera output of an experiment without smoke and with illumination (same frame and roughly same alignment as \rev{Figure}~\ref{fig:data_screenshot_pointcloud})}
|
\fig{data_screenshot_camera}{figures/data_screenshot_camera.png}{Screenshot of IR camera output of an experiment without smoke and with illumination (same frame and roughly same alignment as \rev{Figure}~\ref{fig:data_screenshot_pointcloud})}
|
||||||
@@ -532,7 +534,7 @@ Figure~\ref{fig:data_projections} displays two examples of \rev{LiDAR} point clo
|
|||||||
\fig{data_projections}{figures/data_2d_projections.png}{Two-dimensional projections of two pointclouds, one from an experiment without degradation and one from an experiment with artifical smoke as degradation. To aid the readers perception, the images are vertically stretched and a colormap has been applied to the pixels' reciprocal range values, while the actual training data is grayscale.}
|
\fig{data_projections}{figures/data_2d_projections.png}{Two-dimensional projections of two pointclouds, one from an experiment without degradation and one from an experiment with artifical smoke as degradation. To aid the readers perception, the images are vertically stretched and a colormap has been applied to the pixels' reciprocal range values, while the actual training data is grayscale.}
|
||||||
|
|
||||||
|
|
||||||
The remaining challenge, was labeling a large enough portion of the dataset in a reasonably accurate manner, whose difficulties and general approach we described in section~\ref{sec:data_req}. Since, to our knowledge, neither our chosen dataset nor any other publicly available one provide objective labels for \rev{LiDAR} data degradation in the SAR domain, we had to define our own labeling approach. With objective measures of degradation unavailable, we explored alternative labeling methods—such as using the datas' statistical properties like the number of missing measurements per point cloud or the higher incidence of erroneous measurements near the sensor we described in section~\ref{sec:data_req}. Ultimately, we were concerned that these statistical approaches might lead the method to simply mimic the statistical evaluation rather than to quantify degradation in a generalized and robust manner. After considering these options, we decided to label all point clouds from experiments with artificial smoke as anomalies, while point clouds from experiments without smoke were labeled as normal data. This labeling strategy—based on the presence or absence of smoke—is fundamentally an environmental indicator, independent of the intrinsic data properties recorded during the experiments.
|
The remaining challenge, was labeling a large enough portion of the dataset in a reasonably accurate manner, whose difficulties and general approach we described in \rev{Section}~\ref{sec:data_req}. Since, to our knowledge, neither our chosen dataset nor any other publicly available one provide objective labels for \rev{LiDAR} data degradation in the SAR domain, we had to define our own labeling approach. With objective measures of degradation unavailable, we explored alternative labeling methods—such as using the datas' statistical properties like the number of missing measurements per point cloud or the higher incidence of erroneous measurements near the sensor we described in \rev{Section}~\ref{sec:data_req}. Ultimately, we were concerned that these statistical approaches might lead the method to simply mimic the statistical evaluation rather than to quantify degradation in a generalized and robust manner. After considering these options, we decided to label all point clouds from experiments with artificial smoke as anomalies, while point clouds from experiments without smoke were labeled as normal data. This labeling strategy—based on the presence or absence of smoke—is fundamentally an environmental indicator, independent of the intrinsic data properties recorded during the experiments.
|
||||||
|
|
||||||
The simplicity of this labeling approach has both advantages and disadvantages. On the positive side, it is easy to implement and creates a clear distinction between normal and anomalous data. However, its simplicity is also its drawback: some point clouds from experiments with artificial smoke do not exhibit perceptible degradation, yet they are still labeled as anomalies. The reason for this, is that during the three non-static anomalous experiments the sensor platform starts recording in a tunnel roughly 20 meters from the smoke machine's location. It starts by approaching the smoke machine, navigates close to the machine for some time and then leaves its perimeter once again. Since the artificical smoke's density is far larger near the machine it originates from, the time the sensor platform spent close to it produced highly degraded point clouds, whereas the beginnings and ends of the anomalous experiments capture point clouds which are subjectively not degraded and appear similar to ones from the normal experiments. This effect is clearly illustrated by the degradation indicators which we talked about earlier-the proportion of missing points and the amount of erroneous points close to the sensor per pointcloud-as can be seen in \rev{Figure}~\ref{fig:data_anomalies_timeline}.
|
The simplicity of this labeling approach has both advantages and disadvantages. On the positive side, it is easy to implement and creates a clear distinction between normal and anomalous data. However, its simplicity is also its drawback: some point clouds from experiments with artificial smoke do not exhibit perceptible degradation, yet they are still labeled as anomalies. The reason for this, is that during the three non-static anomalous experiments the sensor platform starts recording in a tunnel roughly 20 meters from the smoke machine's location. It starts by approaching the smoke machine, navigates close to the machine for some time and then leaves its perimeter once again. Since the artificical smoke's density is far larger near the machine it originates from, the time the sensor platform spent close to it produced highly degraded point clouds, whereas the beginnings and ends of the anomalous experiments capture point clouds which are subjectively not degraded and appear similar to ones from the normal experiments. This effect is clearly illustrated by the degradation indicators which we talked about earlier-the proportion of missing points and the amount of erroneous points close to the sensor per pointcloud-as can be seen in \rev{Figure}~\ref{fig:data_anomalies_timeline}.
|
||||||
|
|
||||||
@@ -572,7 +574,7 @@ Together, these components define the full experimental pipeline, from data load
|
|||||||
|
|
||||||
DeepSAD's PyTorch implementation—our starting point—includes implementations for training on standardized datasets such as MNIST, CIFAR-10 and datasets from \citetitle{odds}~\cite{odds}. The framework can train and test DeepSAD as well as a number of baseline algorithms, namely SSAD, OCSVM, Isolation Forest, KDE and SemiDGM with the loaded data and evaluate their performance by calculating the Receiver Operating Characteristic (ROC) and its Area Under the Curve (AUC) for all given algorithms. We adapted this implementation which was originally developed for Python 3.7 to work with Python 3.12 and changed or added functionality for dataloading our chosen dataset, added DeepSAD models that work with the \rev{LiDAR} projections datatype, added more evaluation methods and an inference module.
|
DeepSAD's PyTorch implementation—our starting point—includes implementations for training on standardized datasets such as MNIST, CIFAR-10 and datasets from \citetitle{odds}~\cite{odds}. The framework can train and test DeepSAD as well as a number of baseline algorithms, namely SSAD, OCSVM, Isolation Forest, KDE and SemiDGM with the loaded data and evaluate their performance by calculating the Receiver Operating Characteristic (ROC) and its Area Under the Curve (AUC) for all given algorithms. We adapted this implementation which was originally developed for Python 3.7 to work with Python 3.12 and changed or added functionality for dataloading our chosen dataset, added DeepSAD models that work with the \rev{LiDAR} projections datatype, added more evaluation methods and an inference module.
|
||||||
|
|
||||||
The raw SubTER dataset is provided as one ROS bag file per experiment, each containing a dense 3D point cloud from the Ouster OS1-32 \rev{LiDAR}. To streamline training and avoid repeated heavy computation, we project these point clouds offline into 2D “range images” as described in section~\ref{sec:preprocessing} and export them to files as NumPy arrays. Storing precomputed projections allows rapid data loading during training and evaluation. Many modern \rev{LiDARs} can be configured to output range images directly which would bypass the need for post-hoc projection. When available, such native range-image streams can further simplify preprocessing or even allow skipping this step completely.
|
The raw SubTER dataset is provided as one ROS bag file per experiment, each containing a dense 3D point cloud from the Ouster OS1-32 \rev{LiDAR}. To streamline training and avoid repeated heavy computation, we project these point clouds offline into 2D “range images” as described in \rev{Section}~\ref{sec:preprocessing} and export them to files as NumPy arrays. Storing precomputed projections allows rapid data loading during training and evaluation. Many modern \rev{LiDARs} can be configured to output range images directly which would bypass the need for post-hoc projection. When available, such native range-image streams can further simplify preprocessing or even allow skipping this step completely.
|
||||||
|
|
||||||
We extended the DeepSAD framework’s PyTorch \texttt{DataLoader} by implementing a custom \texttt{Dataset} class that ingests our precomputed NumPy range-image files and attaches appropriate evaluation labels. Each experiment’s frames are stored as a single \texttt{.npy} file of shape \((\text{Number of Frames}, H, W)\), containing the point clouds' reciprocal range values. Our \texttt{Dataset} initializer scans a directory of these files, loads the NumPy arrays from file into memory, transforms them into PyTorch tensors and assigns evaluation and training labels accordingly.
|
We extended the DeepSAD framework’s PyTorch \texttt{DataLoader} by implementing a custom \texttt{Dataset} class that ingests our precomputed NumPy range-image files and attaches appropriate evaluation labels. Each experiment’s frames are stored as a single \texttt{.npy} file of shape \((\text{Number of Frames}, H, W)\), containing the point clouds' reciprocal range values. Our \texttt{Dataset} initializer scans a directory of these files, loads the NumPy arrays from file into memory, transforms them into PyTorch tensors and assigns evaluation and training labels accordingly.
|
||||||
|
|
||||||
@@ -607,9 +609,9 @@ For inference (i.e.\ model validation on held-out experiments), we provide a sec
|
|||||||
|
|
||||||
\section{Model Configuration \& Evaluation Protocol}
|
\section{Model Configuration \& Evaluation Protocol}
|
||||||
|
|
||||||
Since the neural network architecture trained in the deepsad method is not fixed as described in section~\ref{sec:algorithm_details} but rather chosen based on the input data, we also had to choose an autoencoder architecture befitting our preprocessed \rev{LiDAR} data projections. Since \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain} reported success in training DeepSAD on similar data we firstly adapted the network architecture utilized by them for our use case, which is based on the simple and well understood LeNet architecture~\cite{lenet}. Additionally we were interested in evaluating the importance and impact of a well-suited network architecture for DeepSAD's performance and therefore designed a second network architecture henceforth reffered to as "efficient architecture" to incorporate a few modern techniques, befitting our use case.
|
Since the neural network architecture trained in the deepsad method is not fixed as described in \rev{Section}~\ref{sec:algorithm_details} but rather chosen based on the input data, we also had to choose an autoencoder architecture befitting our preprocessed \rev{LiDAR} data projections. Since \citetitle{degradation_quantification_rain}~\cite{degradation_quantification_rain} reported success in training DeepSAD on similar data we firstly adapted the network architecture utilized by them for our use case, which is based on the simple and well understood LeNet architecture~\cite{lenet}. Additionally we were interested in evaluating the importance and impact of a well-suited network architecture for DeepSAD's performance and therefore designed a second network architecture henceforth reffered to as "efficient architecture" to incorporate a few modern techniques, befitting our use case.
|
||||||
|
|
||||||
The LeNet-inspired autoencoder can be split into an encoder network (\rev{Figure}~\ref{fig:setup_arch_lenet_encoder}) and a decoder network (\rev{Figure}~\ref{fig:setup_arch_lenet_decoder}) with a latent space \rev{in between} the two parts. Such an arrangement is typical for autoencoder architectures as we discussed in section~\ref{sec:autoencoder}. The encoder network is simultaneously DeepSAD's main training architecture which is used to infer the degradation quantification in our use case, once trained.
|
The LeNet-inspired autoencoder can be split into an encoder network (\rev{Figure}~\ref{fig:setup_arch_lenet_encoder}) and a decoder network (\rev{Figure}~\ref{fig:setup_arch_lenet_decoder}) with a latent space \rev{in between} the two parts. Such an arrangement is typical for autoencoder architectures as we discussed in \rev{Section}~\ref{sec:autoencoder}. The encoder network is simultaneously DeepSAD's main training architecture which is used to infer the degradation quantification in our use case, once trained.
|
||||||
|
|
||||||
\figc{setup_arch_lenet_encoder}{diagrams/arch_lenet_encoder}{
|
\figc{setup_arch_lenet_encoder}{diagrams/arch_lenet_encoder}{
|
||||||
Architecture of the LeNet-inspired encoder. The input is a \rev{LiDAR} range image of size
|
Architecture of the LeNet-inspired encoder. The input is a \rev{LiDAR} range image of size
|
||||||
@@ -700,7 +702,7 @@ The decoder (see \rev{Figure}~\ref{fig:setup_arch_ef_decoder}) mirrors the encod
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
To compare the computational efficiency of the two architectures we show the number of trainable parameters and the number of multiply–accumulate operations (MACs) for different latent space sizes used in our experiments in table~\ref{tab:params_lenet_vs_efficient}. Even though the efficient architecture employs more layers and channels which allows the network to learn to recognize more types of patterns when compared to the LeNet-inspired one, the encoders' MACs are quite similar. The more complex decoder design of the efficient network appears to contribute a lot more MACs, which leads to longer pretraining times which we report in section~\ref{sec:setup_experiments_environment}.
|
To compare the computational efficiency of the two architectures we show the number of trainable parameters and the number of multiply–accumulate operations (MACs) for different latent space sizes used in our experiments in \rev{Table}~\ref{tab:params_lenet_vs_efficient}. Even though the efficient architecture employs more layers and channels which allows the network to learn to recognize more types of patterns when compared to the LeNet-inspired one, the encoders' MACs are quite similar. The more complex decoder design of the efficient network appears to contribute a lot more MACs, which leads to longer pretraining times which we report in \rev{Section}~\ref{sec:setup_experiments_environment}.
|
||||||
|
|
||||||
\begin{table}[!ht]
|
\begin{table}[!ht]
|
||||||
\centering
|
\centering
|
||||||
@@ -798,7 +800,7 @@ Combining $7$ latent sizes, $2$ architectures, and $3$ labeling regimes yields $
|
|||||||
\label{tab:exp_grid}
|
\label{tab:exp_grid}
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
These experiments were run on a computational environment for which we summarize the hardware and software stack in table~\ref{tab:system_setup}.
|
These experiments were run on a computational environment for which we summarize the hardware and software stack in \rev{Table}~\ref{tab:system_setup}.
|
||||||
|
|
||||||
\begin{table}[p]
|
\begin{table}[p]
|
||||||
\centering
|
\centering
|
||||||
|
|||||||
@@ -670,6 +670,20 @@
|
|||||||
structures;Neuroscience;Genetics;System testing;Neural
|
structures;Neuroscience;Genetics;System testing;Neural
|
||||||
networks;Constraint theory},
|
networks;Constraint theory},
|
||||||
doi = {10.1109/2.36},
|
doi = {10.1109/2.36},
|
||||||
|
},
|
||||||
|
@article{bg_slam,
|
||||||
|
title = {On the Representation and Estimation of Spatial Uncertainty},
|
||||||
|
volume = {5},
|
||||||
|
ISSN = {1741-3176},
|
||||||
|
url = {http://dx.doi.org/10.1177/027836498600500404},
|
||||||
|
DOI = {10.1177/027836498600500404},
|
||||||
|
number = {4},
|
||||||
|
journal = {The International Journal of Robotics Research},
|
||||||
|
publisher = {SAGE Publications},
|
||||||
|
author = {Smith, Randall C. and Cheeseman, Peter},
|
||||||
|
year = {1986},
|
||||||
|
month = dec,
|
||||||
|
pages = {56–68},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user