full upload so not to lose anything important
This commit is contained in:
243
tools/plot_scripts/data_missing_points.py
Normal file
243
tools/plot_scripts/data_missing_points.py
Normal file
@@ -0,0 +1,243 @@
|
||||
import pickle
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from pointcloudset import Dataset
|
||||
|
||||
# define data path containing the bag files
|
||||
all_data_path = Path("/home/fedex/mt/data/subter")
|
||||
|
||||
output_path = Path("/home/fedex/mt/plots/data_missing_points")
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
latest_folder_path = output_path / "latest"
|
||||
archive_folder_path = output_path / "archive"
|
||||
output_datetime_path = output_path / datetime_folder_name
|
||||
|
||||
# if output does not exist, create it
|
||||
output_path.mkdir(exist_ok=True, parents=True)
|
||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
data_resolution = 32 * 2048
|
||||
|
||||
normal_experiment_paths, anomaly_experiment_paths = [], []
|
||||
|
||||
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
|
||||
for bag_file_path in all_data_path.iterdir():
|
||||
if bag_file_path.suffix != ".bag":
|
||||
continue
|
||||
if "smoke" in bag_file_path.name:
|
||||
anomaly_experiment_paths.append(bag_file_path)
|
||||
else:
|
||||
normal_experiment_paths.append(bag_file_path)
|
||||
|
||||
# sort anomaly and normal experiments by filesize, ascending
|
||||
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||
|
||||
|
||||
# function that plots histogram of how many points are missing in pointclouds for both normal and anomaly experiments
|
||||
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
# function that finds the number of missing points in list of experiments (used for both normal and anomalous)
|
||||
def find_missing_points(experiment_paths):
|
||||
missing_points = []
|
||||
for dataset in (
|
||||
Dataset.from_file(experiment_path, topic="/ouster/points")
|
||||
for experiment_path in experiment_paths
|
||||
):
|
||||
missing_points_per_pc = []
|
||||
for pc in dataset:
|
||||
missing_points_per_pc.append(data_resolution - pc.data.shape[0])
|
||||
missing_points.append(missing_points_per_pc)
|
||||
# FIXME temporary break to test code on only one experiment
|
||||
# break
|
||||
return missing_points
|
||||
|
||||
# check if the data has already been calculated and saved to a pickle file
|
||||
if (output_path / "missing_points.pkl").exists():
|
||||
with open(output_path / "missing_points.pkl", "rb") as file:
|
||||
missing_points_normal, missing_points_anomaly = pickle.load(file)
|
||||
else:
|
||||
missing_points_normal = find_missing_points(normal_experiment_paths)
|
||||
missing_points_anomaly = find_missing_points(anomaly_experiment_paths)
|
||||
|
||||
# for faster subsequent runs, save the data to a pickle file
|
||||
with open(output_path / "missing_points.pkl", "wb") as file:
|
||||
pickle.dump(
|
||||
(missing_points_normal, missing_points_anomaly),
|
||||
file,
|
||||
protocol=pickle.HIGHEST_PROTOCOL,
|
||||
)
|
||||
|
||||
# combine all missing points into one list for each type of experiment
|
||||
missing_points_normal = np.concatenate(missing_points_normal)
|
||||
missing_points_anomaly = np.concatenate(missing_points_anomaly)
|
||||
|
||||
# create histogram of missing points for normal and anomaly experiments
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(missing_points_normal, bins=100, alpha=0.5, label="Normal Experiments")
|
||||
plt.hist(missing_points_anomaly, bins=100, alpha=0.5, label="Anomaly Experiments")
|
||||
plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Number of Pointclouds")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points.png")
|
||||
|
||||
plt.clf()
|
||||
|
||||
# alternatively plot curves representing the data
|
||||
|
||||
# create alternative version with missing points on y axis and number of pointclouds on x axis
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(
|
||||
missing_points_normal,
|
||||
bins=100,
|
||||
alpha=0.5,
|
||||
label="Normal Experiments",
|
||||
orientation="horizontal",
|
||||
)
|
||||
plt.hist(
|
||||
missing_points_anomaly,
|
||||
bins=100,
|
||||
alpha=0.5,
|
||||
label="Anomaly Experiments",
|
||||
orientation="horizontal",
|
||||
)
|
||||
plt.title(title)
|
||||
plt.xlabel("Number of Pointclouds")
|
||||
plt.ylabel("Number of Missing Points")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points_alternative.png")
|
||||
|
||||
# find min and max of both categories so we can set the same limits for both plots
|
||||
min = np.min([np.min(missing_points_normal), np.min(missing_points_anomaly)])
|
||||
max = np.max([np.max(missing_points_normal), np.max(missing_points_anomaly)])
|
||||
|
||||
# create bins array with min and max values
|
||||
bins = np.linspace(min, max, 100)
|
||||
|
||||
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(
|
||||
missing_points_normal,
|
||||
bins=bins,
|
||||
alpha=0.5,
|
||||
label="Normal Experiments",
|
||||
color="blue",
|
||||
density=True,
|
||||
)
|
||||
plt.hist(
|
||||
missing_points_anomaly,
|
||||
bins=bins,
|
||||
alpha=0.5,
|
||||
color="red",
|
||||
label="Anomaly Experiments",
|
||||
density=True,
|
||||
)
|
||||
plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Density")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points_density.png")
|
||||
|
||||
# create another density version which does not plot number of missing points but percentage of measurements that are missing (total number of points is 32*2048)
|
||||
bins = np.linspace(0, 1, 100)
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(
|
||||
missing_points_normal / data_resolution,
|
||||
bins=bins,
|
||||
alpha=0.5,
|
||||
label="Normal Experiments (No Artifical Smoke)",
|
||||
color="blue",
|
||||
density=True,
|
||||
)
|
||||
plt.hist(
|
||||
missing_points_anomaly / data_resolution,
|
||||
bins=bins,
|
||||
alpha=0.5,
|
||||
color="red",
|
||||
label="Anomaly Experiments (With Artifical Smoke)",
|
||||
density=True,
|
||||
)
|
||||
plt.title(title)
|
||||
plt.xlabel("Percentage of Missing Lidar Measurements")
|
||||
plt.ylabel("Density")
|
||||
# display the x axis as percentages
|
||||
plt.gca().set_xticklabels(
|
||||
["{:.0f}%".format(x * 100) for x in plt.gca().get_xticks()]
|
||||
)
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points_density_percentage.png")
|
||||
|
||||
# mathplotlib does not support normalizing the histograms to the same scale, so we do it manually using numpy
|
||||
num_bins = 100
|
||||
bin_lims = np.linspace(0, 40000, num_bins + 1)
|
||||
bin_centers = 0.5 * (bin_lims[:-1] + bin_lims[1:])
|
||||
bin_widths = bin_lims[1:] - bin_lims[:-1]
|
||||
# calculate the histogram for normal and anomaly experiments
|
||||
normal_hist, _ = np.histogram(missing_points_normal, bins=bin_lims)
|
||||
anomaly_hist, _ = np.histogram(missing_points_anomaly, bins=bin_lims)
|
||||
# normalize the histograms to the same scale
|
||||
normal_hist_normalized = np.array(normal_hist, dtype=float) / np.max(normal_hist)
|
||||
anomaly_hist_normalized = np.array(anomaly_hist, dtype=float) / np.max(anomaly_hist)
|
||||
# plot the normalized histograms
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.bar(
|
||||
bin_centers,
|
||||
normal_hist_normalized,
|
||||
width=bin_widths,
|
||||
align="center",
|
||||
alpha=0.5,
|
||||
label="Normal Experiments",
|
||||
)
|
||||
plt.bar(
|
||||
bin_centers,
|
||||
anomaly_hist_normalized,
|
||||
width=bin_widths,
|
||||
align="center",
|
||||
alpha=0.5,
|
||||
label="Anomaly Experiments",
|
||||
)
|
||||
plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Normalized Density")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points_normalized.png")
|
||||
|
||||
|
||||
# plot histogram of missing points for normal and anomaly experiments
|
||||
plot_data_points(
|
||||
normal_experiment_paths,
|
||||
anomaly_experiment_paths,
|
||||
"Missing Lidar Measurements per Scan",
|
||||
)
|
||||
|
||||
# delete current latest folder
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
|
||||
# create new latest folder
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# copy contents of output folder to the latest folder
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# copy this python script to the output datetime folder to preserve the code used to generate the plots
|
||||
shutil.copy2(__file__, output_datetime_path)
|
||||
shutil.copy2(__file__, latest_folder_path)
|
||||
|
||||
# move output date folder to archive
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
Reference in New Issue
Block a user