full upload so not to lose anything important

This commit is contained in:
Jan Kowalczyk
2025-03-14 18:02:23 +01:00
parent 35fcfb7d5a
commit b824ff7482
33 changed files with 3539 additions and 353 deletions

View File

@@ -0,0 +1,220 @@
import pickle
import shutil
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
from pointcloudset import Dataset
# define data path containing the bag files
all_data_path = Path("/home/fedex/mt/data/subter")
output_path = Path("/home/fedex/mt/plots/data_particles_near_sensor")
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
latest_folder_path = output_path / "latest"
archive_folder_path = output_path / "archive"
output_datetime_path = output_path / datetime_folder_name
# if output does not exist, create it
output_path.mkdir(exist_ok=True, parents=True)
output_datetime_path.mkdir(exist_ok=True, parents=True)
latest_folder_path.mkdir(exist_ok=True, parents=True)
archive_folder_path.mkdir(exist_ok=True, parents=True)
normal_experiment_paths, anomaly_experiment_paths = [], []
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
for bag_file_path in all_data_path.iterdir():
if bag_file_path.suffix != ".bag":
continue
if "smoke" in bag_file_path.name:
anomaly_experiment_paths.append(bag_file_path)
else:
normal_experiment_paths.append(bag_file_path)
# print out the names of the normal and anomaly experiments that we found
print("Normal experiments:")
for path in normal_experiment_paths:
print(path.name)
print("\nAnomaly experiments:")
for path in anomaly_experiment_paths:
print(path.name)
# sort anomaly and normal experiments by filesize, ascending
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
# function that plots histogram of how many measurements have a range smaller than 0.5m for both normal and anomaly experiments
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
# function that finds the number of measurements with a range smaller than 0.5m in list of experiments (used for both normal and anomalous)
def find_particles_near_sensor(experiment_paths, range_threshold):
particles_near_sensor = []
for dataset in (
Dataset.from_file(experiment_path, topic="/ouster/points")
for experiment_path in experiment_paths
):
particles_near_sensor_per_pc = []
for pc in dataset:
particles_near_sensor_per_pc.append(
pc.data[pc.data["range"] < range_threshold].shape[0]
)
particles_near_sensor.append(particles_near_sensor_per_pc)
return particles_near_sensor
range_thresholds = [500, 750, 1000, 1250, 1500]
for rt in range_thresholds:
print(f"Processing range threshold {rt}...")
# check if the data has already been calculated and saved to a pickle file
if (output_path / f"particles_near_sensor_counts_{rt}.pkl").exists():
with open(
output_path / f"particles_near_sensor_counts_{rt}.pkl", "rb"
) as file:
particles_near_sensor_normal, particles_near_sensor_anomaly = (
pickle.load(file)
)
else:
particles_near_sensor_normal = find_particles_near_sensor(
normal_experiment_paths,
rt,
)
particles_near_sensor_anomaly = find_particles_near_sensor(
anomaly_experiment_paths,
rt,
)
# for faster subsequent runs, save the data to a pickle file
with open(output_path / f"particles_near_sensor_counts_{rt}.pkl", "wb") as file:
pickle.dump(
(particles_near_sensor_normal, particles_near_sensor_anomaly),
file,
protocol=pickle.HIGHEST_PROTOCOL,
)
# combine all counts of how many particles are close to the sensor into one list for each type of experiment
particles_near_sensor_normal = np.concatenate(particles_near_sensor_normal)
particles_near_sensor_anomaly = np.concatenate(particles_near_sensor_anomaly)
# find min and max of both categories so we can set the same limits for both plots
min = np.min(
[
np.min(particles_near_sensor_normal),
np.min(particles_near_sensor_anomaly),
]
)
max = np.max(
[
np.max(particles_near_sensor_normal),
np.max(particles_near_sensor_anomaly),
]
)
# create bins array with min and max values
bins = np.linspace(min, max, 100)
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
# commented out since boxplot is more informative
# plt.clf()
# plt.figure(figsize=(10, 5))
# plt.hist(
# particles_near_sensor_normal,
# bins=bins,
# alpha=0.5,
# label="Normal Experiments (No Artifical Smoke)",
# color="blue",
# density=True,
# )
# plt.hist(
# particles_near_sensor_anomaly,
# bins=bins,
# alpha=0.5,
# color="red",
# label="Anomaly Experiments (With Artifical Smoke)",
# density=True,
# )
# plt.title(title)
# plt.xlabel("Number of Particles Near Sensor")
# plt.ylabel("Density")
# plt.legend()
# plt.tight_layout()
# plt.savefig(output_datetime_path / f"particles_near_sensor_density_{rt}.png")
# alternatively create a box plot to show the distribution of the data
# instead of plotting the frequency of particles near sensor we'll plot the percentage of points (compared to the total number of points in the pointcloud)
data_resolution = 32 * 2048
plt.clf()
plt.figure(figsize=(10, 5))
plt.boxplot(
[
particles_near_sensor_normal / data_resolution,
particles_near_sensor_anomaly / data_resolution,
],
tick_labels=[
"Normal Experiments (No Artifical Smoke)",
"Anomaly Experiments (With Artifical Smoke)",
],
)
# format the y axis labels as percentages
plt.gca().set_yticklabels(
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
)
plt.title("Particles Closer than 0.5m to the Sensor")
plt.ylabel("Percentage of measurements closer than 0.5m")
plt.tight_layout()
plt.savefig(output_datetime_path / f"particles_near_sensor_boxplot_{rt}.png")
# we create the same boxplot but limit the y-axis to 5% to better see the distribution of the data
plt.clf()
plt.figure(figsize=(10, 5))
plt.boxplot(
[
particles_near_sensor_normal / data_resolution,
particles_near_sensor_anomaly / data_resolution,
],
tick_labels=[
"Normal Experiments (No Artifical Smoke)",
"Anomaly Experiments (With Artifical Smoke)",
],
)
# format the y axis labels as percentages
plt.gca().set_yticklabels(
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
)
plt.title("Particles Closer than 0.5m to the Sensor")
plt.ylabel("Percentage of measurements closer than 0.5m")
plt.ylim(0, 0.05)
plt.tight_layout()
plt.savefig(
output_datetime_path / f"particles_near_sensor_boxplot_zoomed_{rt}.png"
)
# plot histogram of how many measurements have a range smaller than 0.5m for both normal and anomaly experiments
plot_data_points(
normal_experiment_paths,
anomaly_experiment_paths,
"Density of Number of Particles Near Sensor",
)
# delete current latest folder
shutil.rmtree(latest_folder_path, ignore_errors=True)
# create new latest folder
latest_folder_path.mkdir(exist_ok=True, parents=True)
# copy contents of output folder to the latest folder
for file in output_datetime_path.iterdir():
shutil.copy2(file, latest_folder_path)
# copy this python script to the output datetime folder to preserve the code used to generate the plots
shutil.copy2(__file__, output_datetime_path)
shutil.copy2(__file__, latest_folder_path)
# move output date folder to archive
shutil.move(output_datetime_path, archive_folder_path)