diff --git a/thesis/Main.pdf b/thesis/Main.pdf index 3dc29ae..384d737 100644 Binary files a/thesis/Main.pdf and b/thesis/Main.pdf differ diff --git a/thesis/Main.tex b/thesis/Main.tex index b769c70..8249052 100755 --- a/thesis/Main.tex +++ b/thesis/Main.tex @@ -1426,7 +1426,15 @@ Together, these results provide a comprehensive overview of the computational re \newchapter{results_discussion}{Results and Discussion} \newsection{results}{Results} -\todo[inline]{some results, ROC curves, for both global and local} + +% \threadtodo +% {give overview about hardware setup and how long things take to train} +% {we know what we trained but not how long that takes} +% {table of hardware and of how long different trainings took} +% {experiment setup understood $\rightarrow$ what were the experiments' results} + +\todo{} + \newsection{hyperparameter_analysis}{Hyperparameter Analysis} \todo[inline]{result for different amounts of labeled data} diff --git a/tools/plot_scripts/results_inference_timeline smoothed.py b/tools/plot_scripts/results_inference_timeline smoothed.py deleted file mode 100644 index 2647775..0000000 --- a/tools/plot_scripts/results_inference_timeline smoothed.py +++ /dev/null @@ -1,269 +0,0 @@ -import json -import pickle -import shutil -from datetime import datetime -from pathlib import Path - -import matplotlib.pyplot as plt -import numpy as np - -# ========================= -# User-configurable params -# ========================= -# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23") -EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23" - -# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"} -methods_scores_path = Path( - "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference" -) - -# Root data path containing .bag files used to build the cached stats -all_data_path = Path("/home/fedex/mt/data/subter") - -# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/") -output_path = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed") - -# Cache (stats + labels) directory — same as your original script -cache_path = output_path - -# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original) -data_resolution = 32 * 2048 - -# Frames per second for x-axis time -FPS = 10.0 - -# Whether to try to align score sign so that higher = more degraded. -ALIGN_SCORE_DIRECTION = True - -# ========================= -# Smoothing configuration -# ========================= -# Options: "none", "moving_average", "gaussian", "ema" -SMOOTHING_METHOD = "ema" - -# Moving average window size (in frames). Use odd number for symmetry; <=1 disables. -MA_WINDOW = 11 - -# Gaussian sigma (in frames). ~2-3 frames is mild smoothing. -GAUSSIAN_SIGMA = 2.0 - -# Exponential moving average factor in (0,1]; smaller = smoother. ~0.2 is a good start. -EMA_ALPHA = 0.1 - -# ========================= -# Setup output folders -# ========================= -datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") -latest_folder_path = output_path / "latest" -archive_folder_path = output_path / "archive" -output_datetime_path = output_path / datetime_folder_name - -output_path.mkdir(exist_ok=True, parents=True) -output_datetime_path.mkdir(exist_ok=True, parents=True) -latest_folder_path.mkdir(exist_ok=True, parents=True) -archive_folder_path.mkdir(exist_ok=True, parents=True) - -# ========================= -# Discover experiments -# ========================= -normal_experiment_paths, anomaly_experiment_paths = [], [] -for bag_file_path in all_data_path.iterdir(): - if bag_file_path.suffix != ".bag": - continue - if "smoke" in bag_file_path.name: - anomaly_experiment_paths.append(bag_file_path) - else: - normal_experiment_paths.append(bag_file_path) - -normal_experiment_paths = sorted( - normal_experiment_paths, key=lambda p: p.stat().st_size -) -anomaly_experiment_paths = sorted( - anomaly_experiment_paths, key=lambda p: p.stat().st_size -) - -# Find experiment -exp_path, exp_is_anomaly = None, None -for p in anomaly_experiment_paths: - if p.stem == EXPERIMENT_NAME: - exp_path, exp_is_anomaly = p, True - break -if exp_path is None: - for p in normal_experiment_paths: - if p.stem == EXPERIMENT_NAME: - exp_path, exp_is_anomaly = p, False - break -if exp_path is None: - raise FileNotFoundError(f"Experiment '{EXPERIMENT_NAME}' not found") - -exp_index = ( - anomaly_experiment_paths.index(exp_path) - if exp_is_anomaly - else normal_experiment_paths.index(exp_path) -) - -# ========================= -# Load cached statistical data -# ========================= -with open(cache_path / "missing_points.pkl", "rb") as f: - missing_points_normal, missing_points_anomaly = pickle.load(f) -with open(cache_path / "particles_near_sensor_counts_500.pkl", "rb") as f: - near_sensor_normal, near_sensor_anomaly = pickle.load(f) - -if exp_is_anomaly: - missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float) - near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float) -else: - missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float) - near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float) - -missing_points_pct = (missing_points_series / data_resolution) * 100.0 -near_sensor_pct = (near_sensor_series / data_resolution) * 100.0 - -# ========================= -# Load manual anomaly frame borders -# ========================= -manually_labeled_anomaly_frames = {} -labels_json_path = cache_path / "manually_labeled_anomaly_frames.json" -if labels_json_path.exists(): - with open(labels_json_path, "r") as f: - labeled_json = json.load(f) - for file in labeled_json.get("files", []): - manually_labeled_anomaly_frames[file["filename"]] = ( - file.get("semi_target_begin_frame"), - file.get("semi_target_end_frame"), - ) -exp_npy_filename = exp_path.with_suffix(".npy").name -anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None)) - - -# ========================= -# Load method scores and normalize -# ========================= -def zscore_1d(x, eps=1e-12): - mu, sigma = np.mean(x), np.std(x, ddof=0) - return np.zeros_like(x) if sigma < eps else (x - mu) / sigma - - -def maybe_align_direction(z, window): - start, end = window - if start is None or end is None: - return z - inside_mean = np.mean(z[start:end]) if end > start else 0 - outside = np.concatenate([z[:start], z[end:]]) if start > 0 or end < len(z) else [] - outside_mean = np.mean(outside) if len(outside) else inside_mean - return z if inside_mean >= outside_mean else -z - - -methods = ["deepsad", "ocsvm", "isoforest"] -method_zscores = {} -for m in methods: - s = np.load(methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy") - s = np.asarray(s, dtype=float).ravel() - n = min(len(s), len(missing_points_pct)) - s, missing_points_pct, near_sensor_pct = ( - s[:n], - missing_points_pct[:n], - near_sensor_pct[:n], - ) - z = zscore_1d(s) - if ALIGN_SCORE_DIRECTION: - z = maybe_align_direction(z, anomaly_window) - method_zscores[m] = z - - -# ========================= -# Smoothing -# ========================= -def moving_average(x, window): - if window <= 1: - return x - if window % 2 == 0: - window += 1 - return np.convolve(x, np.ones(window) / window, mode="same") - - -def gaussian_smooth(x, sigma): - from scipy.ndimage import gaussian_filter1d - - return gaussian_filter1d(x, sigma=sigma, mode="nearest") if sigma > 0 else x - - -def ema(x, alpha): - y = np.empty_like(x) - y[0] = x[0] - for i in range(1, len(x)): - y[i] = alpha * x[i] + (1 - alpha) * y[i - 1] - return y - - -def apply_smoothing(x): - m = SMOOTHING_METHOD.lower() - if m == "none": - return x - if m == "moving_average": - return moving_average(x, MA_WINDOW) - if m == "gaussian": - return gaussian_smooth(x, GAUSSIAN_SIGMA) - if m == "ema": - return ema(x, EMA_ALPHA) - raise ValueError(f"Unknown SMOOTHING_METHOD: {SMOOTHING_METHOD}") - - -smoothed_z = {k: apply_smoothing(v) for k, v in method_zscores.items()} -smoothed_missing = apply_smoothing(missing_points_pct) -smoothed_near = apply_smoothing(near_sensor_pct) - -# ========================= -# Plot -# ========================= -t = np.arange(len(missing_points_pct)) / FPS - - -def plot_series(y2, ylabel, fname, title_suffix): - fig, axz = plt.subplots(figsize=(14, 6), constrained_layout=True) - axy = axz.twinx() - for m in methods: - axz.plot(t, smoothed_z[m], label=f"{m} (z)") - axy.plot(t, y2, linestyle="--", label=ylabel) - start, end = anomaly_window - if start and end: - axz.axvline(start / FPS, linestyle=":", alpha=0.6) - axz.axvline(end / FPS, linestyle=":", alpha=0.6) - axz.set_xlabel("Time (s)") - axz.set_ylabel("Anomaly score (z)") - axy.set_ylabel(ylabel) - axz.set_title(f"{EXPERIMENT_NAME}\n{title_suffix}\nSmoothing: {SMOOTHING_METHOD}") - lines1, labels1 = axz.get_legend_handles_labels() - lines2, labels2 = axy.get_legend_handles_labels() - axz.legend(lines1 + lines2, labels1 + labels2, loc="upper right") - axz.grid(True, alpha=0.3) - fig.savefig(output_datetime_path / fname, dpi=150) - plt.close(fig) - - -plot_series( - smoothed_missing, - "Missing points (%)", - f"{EXPERIMENT_NAME}_zscores_vs_missing.png", - "Degradation vs Missing Points", -) -plot_series( - smoothed_near, - "Near-sensor points (%)", - f"{EXPERIMENT_NAME}_zscores_vs_near.png", - "Degradation vs Near-Sensor Points (<0.5m)", -) - -# ========================= -# Save & archive -# ========================= -shutil.rmtree(latest_folder_path, ignore_errors=True) -latest_folder_path.mkdir(exist_ok=True, parents=True) -for f in output_datetime_path.iterdir(): - shutil.copy2(f, latest_folder_path) -shutil.copy2(__file__, output_datetime_path) -shutil.copy2(__file__, latest_folder_path) -shutil.move(output_datetime_path, archive_folder_path) -print("Done. Plots saved and archived.") diff --git a/tools/plot_scripts/results_inference_timeline_smoothed.py b/tools/plot_scripts/results_inference_timeline_smoothed.py new file mode 100644 index 0000000..a6fb0de --- /dev/null +++ b/tools/plot_scripts/results_inference_timeline_smoothed.py @@ -0,0 +1,459 @@ +import json +import pickle +import shutil +from datetime import datetime +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np +import polars as pl + +# ===================================== +# User-configurable params +# ===================================== +# Root directory that contains per-run outputs (your loader will scan this) +INFERENCE_ROOT = Path("/home/fedex/mt/results/inference/copy") + +# Path that holds cached stats (same as before) +CACHE_PATH = Path("/home/fedex/mt/plots/data_anomalies_timeline") + +# Root data path containing .bag files to rebuild ordering (for stats mapping) +ALL_DATA_PATH = Path("/home/fedex/mt/data/subter") + +# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/") +OUTPUT_PATH = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed") + +# Frames per second for x-axis time +FPS = 10.0 + +# ---- Smoothing: EMA only ---- +EMA_ALPHA = 0.1 # models (0,1], smaller = smoother +STATS_EMA_ALPHA = 0.1 # stats (absolute %); tweak independently if desired + +# Whether to z-score per-curve for the model methods (recommended) +Z_SCORE_MODELS = True + +# If some model's series is longer/shorter than others in a group, align to min length +ALIGN_TO_MIN_LENGTH = True + +# Whether to try to align model score sign so that higher = more degraded using manual window +ALIGN_SCORE_DIRECTION = True + +# LiDAR points per frame (for stats -> percent) +DATA_RESOLUTION = 32 * 2048 + +# ===================================== +# Setup output folders +# ===================================== +datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +latest_folder_path = OUTPUT_PATH / "latest" +archive_folder_path = OUTPUT_PATH / "archive" +output_datetime_path = OUTPUT_PATH / datetime_folder_name + +OUTPUT_PATH.mkdir(exist_ok=True, parents=True) +archive_folder_path.mkdir(exist_ok=True, parents=True) +latest_folder_path.mkdir(exist_ok=True, parents=True) +output_datetime_path.mkdir(exist_ok=True, parents=True) + +# ===================================== +# Load Polars DataFrame via your helper +# ===================================== +from load_results import load_inference_results_dataframe + +df: pl.DataFrame = load_inference_results_dataframe(INFERENCE_ROOT) + +# sanity +expected_cols = { + "experiment", + "network", + "latent_dim", + "semi_normals", + "semi_anomalous", + "model", + "scores", + "folder", + "config_json", +} +missing_cols = expected_cols - set(df.columns) +if missing_cols: + raise KeyError(f"DataFrame missing required columns: {sorted(missing_cols)}") + + +# ===================================== +# Rebuild experiment → stats mapping (like your original) +# ===================================== +def rebuild_experiment_index(): + normals, anomalies = [], [] + if not ALL_DATA_PATH.exists(): + return [], [], {} + for bag in ALL_DATA_PATH.iterdir(): + if bag.suffix != ".bag": + continue + if "smoke" in bag.name: + anomalies.append(bag) + else: + normals.append(bag) + normals = sorted(normals, key=lambda p: p.stat().st_size) + anomalies = sorted(anomalies, key=lambda p: p.stat().st_size) + mapping = {} + for i, p in enumerate(normals): + mapping[p.stem] = (False, i, p) + for i, p in enumerate(anomalies): + mapping[p.stem] = (True, i, p) + return normals, anomalies, mapping + + +normal_paths, anomaly_paths, exp_map = rebuild_experiment_index() + +# Load cached statistical data (+ manual labels) +missing_points_cache = CACHE_PATH / "missing_points.pkl" +near_sensor_cache = CACHE_PATH / "particles_near_sensor_counts_500.pkl" +labels_json_path = CACHE_PATH / "manually_labeled_anomaly_frames.json" + +missing_points_normal = missing_points_anomaly = None +near_sensor_normal = near_sensor_anomaly = None +if missing_points_cache.exists(): + with open(missing_points_cache, "rb") as f: + missing_points_normal, missing_points_anomaly = pickle.load(f) +if near_sensor_cache.exists(): + with open(near_sensor_cache, "rb") as f: + near_sensor_normal, near_sensor_anomaly = pickle.load(f) + +manual_windows = {} +if labels_json_path.exists(): + with open(labels_json_path, "r") as f: + labeled_json = json.load(f) + for file in labeled_json.get("files", []): + manual_windows[file["filename"]] = ( + file.get("semi_target_begin_frame"), + file.get("semi_target_end_frame"), + ) + + +def get_stats_for_experiment(exp_name: str): + """ + Returns: + missing_pct (np.ndarray) | None, + near_pct (np.ndarray) | None, + anomaly_window (tuple(start,end)) | (None,None) + """ + if exp_name not in exp_map: + return None, None, (None, None) + is_anomaly, idx, path = exp_map[exp_name] + missing = None + near = None + if missing_points_normal is not None and missing_points_anomaly is not None: + series = ( + missing_points_anomaly[idx] if is_anomaly else missing_points_normal[idx] + ) + missing = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0 + if near_sensor_normal is not None and near_sensor_anomaly is not None: + series = near_sensor_anomaly[idx] if is_anomaly else near_sensor_normal[idx] + near = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0 + npy_key = path.with_suffix(".npy").name + window = manual_windows.get(npy_key, (None, None)) + return missing, near, window + + +# ===================================== +# Helpers +# ===================================== +def to_np(a): + """Convert a Polars list cell to a 1D NumPy array of float.""" + if a is None: + return None + return np.asarray(a, dtype=float).ravel() + + +def zscore_1d(x, eps=1e-12): + if x is None or len(x) == 0: + return x + mu = float(np.mean(x)) + sigma = float(np.std(x, ddof=0)) + return np.zeros_like(x) if sigma < eps else (x - mu) / sigma + + +def ema(x, alpha): + if x is None or len(x) == 0: + return x + y = np.empty_like(x, dtype=float) + y[0] = x[0] + for i in range(1, len(x)): + y[i] = alpha * x[i] + (1 - alpha) * y[i - 1] + return y + + +def apply_ema_models(x): + return ema(x, EMA_ALPHA) + + +def apply_ema_stats(x): + return ema(x, STATS_EMA_ALPHA) + + +def align_lengths(series_dict): + """Truncate all series to the shortest available length.""" + valid_lengths = [ + len(v) for v in series_dict.values() if v is not None and len(v) > 0 + ] + if not valid_lengths: + return series_dict + min_len = min(valid_lengths) + return {k: (v[:min_len] if v is not None else None) for k, v in series_dict.items()} + + +def maybe_align_direction(z: np.ndarray, window): + """Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist.""" + if z is None: + return z + start, end = window + if start is None or end is None: + return z + start = int(max(0, start)) + end = int(min(len(z), end)) + if end <= start or end > len(z): + return z + inside_mean = float(np.mean(z[start:end])) + if start == 0 and end == len(z): + return z + outside_parts = [] + if start > 0: + outside_parts.append(z[:start]) + if end < len(z): + outside_parts.append(z[end:]) + if not outside_parts: + return z + outside_mean = float(np.mean(np.concatenate(outside_parts))) + return z if inside_mean >= outside_mean else -z + + +def safe_title(s: str) -> str: + return s.replace("_", " ") + + +# ===================================== +# Model selection per group (network names updated) +# ===================================== +group_cols = ["experiment", "latent_dim", "semi_normals", "semi_anomalous"] + + +def pick_rows(gdf: pl.DataFrame): + sel = {} + sel["DeepSAD (LeNet)"] = gdf.filter( + (pl.col("network") == "subter_LeNet") & (pl.col("model") == "deepsad") + ) + sel["DeepSAD (efficient)"] = gdf.filter( + (pl.col("network") == "subter_efficient") & (pl.col("model") == "deepsad") + ) + sel["OCSVM (LeNet)"] = gdf.filter( + (pl.col("network") == "subter_LeNet") & (pl.col("model") == "ocsvm") + ) + sel["IsoForest (LeNet)"] = gdf.filter( + (pl.col("network") == "subter_LeNet") & (pl.col("model") == "isoforest") + ) + chosen = {} + for k, dfk in sel.items(): + chosen[k] = dfk.row(0) if dfk.height > 0 else None + return chosen + + +# ===================================== +# Iterate groups and plot +# ===================================== +plots_made = 0 + +for keys, g in df.group_by(group_cols, maintain_order=True): + experiment, latent_dim, semi_normals, semi_anomalous = keys + + chosen = pick_rows(g) + + # Extract series for models + curves_raw = {} + for label, row in chosen.items(): + if row is None: + curves_raw[label] = None + continue + row_dict = {c: row[i] for i, c in enumerate(df.columns)} + scores = to_np(row_dict["scores"]) + curves_raw[label] = scores + + # If nothing to plot, skip group + if all(v is None or len(v) == 0 for v in curves_raw.values()): + continue + + # Stats for this experiment (absolute %; no z-scoring) + missing_pct, near_pct, anomaly_window = get_stats_for_experiment(experiment) + + # Optionally align lengths among model curves + curves = curves_raw.copy() + if ALIGN_TO_MIN_LENGTH: + curves = align_lengths(curves) + + # Prepare processed model curves: z-score (if enabled) + EMA smoothing + proc = {} + for k, v in curves.items(): + if v is None: + continue + x = zscore_1d(v) if Z_SCORE_MODELS else v.astype(float) + if ALIGN_SCORE_DIRECTION and anomaly_window != (None, None): + x = maybe_align_direction(x, anomaly_window) + x = apply_ema_models(x) + proc[k] = x + + if not proc: + continue + + # Establish time axis for model curves + any_len = len(next(iter(proc.values()))) + t_models = np.arange(any_len) / FPS + + # =========== Plot A: Scores-only (models z-scored; stats not shown) =========== + figA, axA = plt.subplots(figsize=(14, 6), constrained_layout=True) + for label, y in proc.items(): + if y is not None: + axA.plot(t_models, y, label=label) + axA.set_xlabel("Time (s)") + axA.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else "")) + titleA = ( + f"{safe_title(experiment)} | latent_dim={latent_dim}, " + f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n" + f"Smoothing: EMA(alpha={EMA_ALPHA})" + ) + axA.set_title(titleA) + axA.grid(True, alpha=0.3) + axA.legend(loc="upper right") + fnameA = ( + f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" + f"_scores_EMA-{EMA_ALPHA}{'_z' if Z_SCORE_MODELS else ''}.png" + ) + figA.savefig(output_datetime_path / fnameA, dpi=150) + plt.close(figA) + + # =========== Plot B: Models (z-scored) + Missing Points (%) absolute =========== + if missing_pct is not None and len(missing_pct) > 0: + mp = missing_pct + if ALIGN_TO_MIN_LENGTH: + mp = mp[:any_len] + mp_s = apply_ema_stats(mp) + t_stats = np.arange(len(mp_s)) / FPS + + figB, axB = plt.subplots(figsize=(14, 6), constrained_layout=True) + axBy = axB.twinx() + for label, y in proc.items(): + if y is not None: + axB.plot(t_models, y, label=label) + axBy.plot(t_stats, mp_s, linestyle="--", label="Missing points (%)") + + if anomaly_window != (None, None): + start, end = anomaly_window + if isinstance(start, int) and isinstance(end, int) and 0 <= start < end: + axB.axvline(start / FPS, linestyle=":", alpha=0.6) + axB.axvline(end / FPS, linestyle=":", alpha=0.6) + + axB.set_xlabel("Time (s)") + axB.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else "")) + axBy.set_ylabel("Missing points (%)") + titleB = ( + f"{safe_title(experiment)} | latent_dim={latent_dim}, " + f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n" + f"Models: EMA({EMA_ALPHA}) | Stats: EMA({STATS_EMA_ALPHA}) — + Missing points (absolute %)" + ) + axB.set_title(titleB) + axB.grid(True, alpha=0.3) + lines1, labels1 = axB.get_legend_handles_labels() + lines2, labels2 = axBy.get_legend_handles_labels() + axB.legend(lines1 + lines2, labels1 + labels2, loc="upper right") + + fnameB = ( + f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" + f"_scores_plus_missing_EMA-{EMA_ALPHA}_stats-{STATS_EMA_ALPHA}" + f"{'_z' if Z_SCORE_MODELS else ''}.png" + ) + figB.savefig(output_datetime_path / fnameB, dpi=150) + plt.close(figB) + + # =========== Plot C: Models (z-scored) + Near-sensor Points (%) absolute =========== + if near_pct is not None and len(near_pct) > 0: + ns = near_pct + if ALIGN_TO_MIN_LENGTH: + ns = ns[:any_len] + ns_s = apply_ema_stats(ns) + t_stats = np.arange(len(ns_s)) / FPS + + figC, axC = plt.subplots(figsize=(14, 6), constrained_layout=True) + axCy = axC.twinx() + for label, y in proc.items(): + if y is not None: + axC.plot(t_models, y, label=label) + axCy.plot(t_stats, ns_s, linestyle="--", label="Near-sensor <0.5m (%)") + + if anomaly_window != (None, None): + start, end = anomaly_window + if isinstance(start, int) and isinstance(end, int) and 0 <= start < end: + axC.axvline(start / FPS, linestyle=":", alpha=0.6) + axC.axvline(end / FPS, linestyle=":", alpha=0.6) + + axC.set_xlabel("Time (s)") + axC.set_ylabel("Model anomaly score" + (" (z-score)" if Z_SCORE_MODELS else "")) + axCy.set_ylabel("Near-sensor points (%)") + titleC = ( + f"{safe_title(experiment)} | latent_dim={latent_dim}, " + f"semi_normals={semi_normals}, semi_anomalous={semi_anomalous}\n" + f"Models: EMA({EMA_ALPHA}) | Stats: EMA({STATS_EMA_ALPHA}) — + Near-sensor <0.5m (absolute %)" + ) + axC.set_title(titleC) + axC.grid(True, alpha=0.3) + lines1, labels1 = axC.get_legend_handles_labels() + lines2, labels2 = axCy.get_legend_handles_labels() + axC.legend(lines1 + lines2, labels1 + labels2, loc="upper right") + + fnameC = ( + f"{experiment}_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" + f"_scores_plus_nearsensor_EMA-{EMA_ALPHA}_stats-{STATS_EMA_ALPHA}" + f"{'_z' if Z_SCORE_MODELS else ''}.png" + ) + figC.savefig(output_datetime_path / fnameC, dpi=150) + plt.close(figC) + + plots_made += 1 + +# ===================================== +# Preserve latest/, archive/, copy script +# ===================================== +# delete current latest folder +shutil.rmtree(latest_folder_path, ignore_errors=True) +# create new latest folder +latest_folder_path.mkdir(exist_ok=True, parents=True) + +# copy contents of output folder to the latest folder +for file in output_datetime_path.iterdir(): + shutil.copy2(file, latest_folder_path) + +# copy this python script to preserve the code used +try: + shutil.copy2(__file__, output_datetime_path) + shutil.copy2(__file__, latest_folder_path) +except Exception: + # If running interactively, fall back to saving the config snapshot + (output_datetime_path / "run_config.json").write_text( + json.dumps( + { + "INFERENCE_ROOT": str(INFERENCE_ROOT), + "CACHE_PATH": str(CACHE_PATH), + "ALL_DATA_PATH": str(ALL_DATA_PATH), + "FPS": FPS, + "EMA_ALPHA": EMA_ALPHA, + "STATS_EMA_ALPHA": STATS_EMA_ALPHA, + "Z_SCORE_MODELS": Z_SCORE_MODELS, + "ALIGN_TO_MIN_LENGTH": ALIGN_TO_MIN_LENGTH, + "ALIGN_SCORE_DIRECTION": ALIGN_SCORE_DIRECTION, + "timestamp": datetime_folder_name, + }, + indent=2, + ) + ) + +# move output date folder to archive +shutil.move(output_datetime_path, archive_folder_path) + +print(f"Done. Plotted {plots_made} groups. Archived under: {archive_folder_path}") diff --git a/tools/plot_scripts/results_inference_timelines_exp_compare.py b/tools/plot_scripts/results_inference_timelines_exp_compare.py new file mode 100644 index 0000000..0aa62af --- /dev/null +++ b/tools/plot_scripts/results_inference_timelines_exp_compare.py @@ -0,0 +1,533 @@ +#!/usr/bin/env python3 +# results_inference_timelines_exp_compare.py + +import json +import pickle +import re +import shutil +from datetime import datetime +from pathlib import Path +from typing import Dict, Optional, Tuple + +import matplotlib.pyplot as plt +import numpy as np +import polars as pl + +# ===================================== +# User-configurable params +# ===================================== + +# Root directory that contains per-run outputs (your loader will scan this) +INFERENCE_ROOT = Path("/home/fedex/mt/results/inference/copy") + +# Cached stats + manual labels (same location as your earlier scripts) +CACHE_PATH = Path("/home/fedex/mt/plots/data_anomalies_timeline") + +# .bag directory (used only to rebuild experiment order for mapping stats) +ALL_DATA_PATH = Path("/home/fedex/mt/data/subter") + +# Output base directory (timestamped subfolder will be created here, archived, and copied to latest/) +OUTPUT_PATH = Path("/home/fedex/mt/plots/results_inference_exp_compare") + +# Two experiments to compare (exact strings as they appear in your DF’s `experiment` column) +EXPERIMENT_CLEAN = "2_static_no_artifacts_illuminated_2023-01-23-001" +EXPERIMENT_DEGRADED = "3_smoke_human_walking_2023-01-23" + +# Shared model configuration for BOTH experiments +LATENT_DIM = 32 +SEMI_NORMALS = 50 +SEMI_ANOMALOUS = 10 + +# Comparison y-axis mode for methods: "baseline_z" or "baseline_tailprob" +Y_MODE = "baseline_z" + +# Progress axis resolution (number of bins from 0% to 100%) +PROGRESS_BINS = 100 + +# Frames per second for building time axes before progress-binning (informational only) +FPS = 10.0 + +# ---- EMA smoothing only ---- +# Methods (scores) EMA alpha +EMA_ALPHA_METHODS = 0.1 # (0,1], smaller = smoother +# Stats (absolute %) EMA alpha +EMA_ALPHA_STATS = 0.1 # (0,1], smaller = smoother + +# LiDAR points per frame (for stats -> percent) +DATA_RESOLUTION = 32 * 2048 + +# Copy this script into outputs for provenance (best-effort if not running as a file) +COPY_SELF = True + +# ===================================== +# Setup output folders +# ===================================== +datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +latest_folder_path = OUTPUT_PATH / "latest" +archive_folder_path = OUTPUT_PATH / "archive" +output_datetime_path = OUTPUT_PATH / datetime_folder_name + +OUTPUT_PATH.mkdir(exist_ok=True, parents=True) +archive_folder_path.mkdir(exist_ok=True, parents=True) +latest_folder_path.mkdir(exist_ok=True, parents=True) +output_datetime_path.mkdir(exist_ok=True, parents=True) + +# ===================================== +# Load Polars DataFrame via your helper +# ===================================== +from load_results import load_inference_results_dataframe + +df: pl.DataFrame = load_inference_results_dataframe(INFERENCE_ROOT) + +required_cols = { + "experiment", + "network", + "latent_dim", + "semi_normals", + "semi_anomalous", + "model", + "scores", + "folder", + "config_json", +} +missing = required_cols - set(df.columns) +if missing: + raise KeyError(f"DataFrame missing required columns: {sorted(missing)}") + + +# ===================================== +# Rebuild experiment → stats mapping (like your original) +# ===================================== +def rebuild_experiment_index(): + normals, anomalies = [], [] + if not ALL_DATA_PATH.exists(): + return [], [], {} + for bag in ALL_DATA_PATH.iterdir(): + if bag.suffix != ".bag": + continue + if "smoke" in bag.name: + anomalies.append(bag) + else: + normals.append(bag) + normals = sorted(normals, key=lambda p: p.stat().st_size) + anomalies = sorted(anomalies, key=lambda p: p.stat().st_size) + mapping = {} + for i, p in enumerate(normals): + mapping[p.stem] = (False, i, p) + for i, p in enumerate(anomalies): + mapping[p.stem] = (True, i, p) + return normals, anomalies, mapping + + +normal_paths, anomaly_paths, exp_map = rebuild_experiment_index() + +# Load cached statistical data and manual labels +missing_points_cache = CACHE_PATH / "missing_points.pkl" +near_sensor_cache = CACHE_PATH / "particles_near_sensor_counts_500.pkl" +labels_json_path = CACHE_PATH / "manually_labeled_anomaly_frames.json" + +missing_points_normal = missing_points_anomaly = None +near_sensor_normal = near_sensor_anomaly = None +if missing_points_cache.exists(): + with open(missing_points_cache, "rb") as f: + missing_points_normal, missing_points_anomaly = pickle.load(f) +if near_sensor_cache.exists(): + with open(near_sensor_cache, "rb") as f: + near_sensor_normal, near_sensor_anomaly = pickle.load(f) + +manual_windows = {} +if labels_json_path.exists(): + with open(labels_json_path, "r") as f: + labeled_json = json.load(f) + for file in labeled_json.get("files", []): + manual_windows[file["filename"]] = ( + file.get("semi_target_begin_frame"), + file.get("semi_target_end_frame"), + ) + + +# ===================================== +# Helpers +# ===================================== +def ema(x: np.ndarray, alpha: float) -> np.ndarray: + if x is None or len(x) == 0: + return x + y = np.empty_like(x, dtype=float) + y[0] = x[0] + for i in range(1, len(x)): + y[i] = alpha * x[i] + (1 - alpha) * y[i - 1] + return y + + +def to_np_list(list_cell) -> Optional[np.ndarray]: + if list_cell is None: + return None + return np.asarray(list_cell, dtype=float).ravel() + + +def normalize_exp_name(name: str) -> str: + # strip trailing run suffix like -001, -002 if present + return re.sub(r"-\d{3}$", "", name) + + +def map_experiment_to_stats_stem(exp_name: str) -> Optional[str]: + """Try exact match, then prefix match with / without -### suffix stripped.""" + if exp_name in exp_map: + return exp_name + base = normalize_exp_name(exp_name) + if base in exp_map: + return base + for stem in exp_map.keys(): + if stem.startswith(exp_name) or stem.startswith(base): + return stem + return None + + +def get_stats_for_experiment( + exp_name: str, +) -> Tuple[ + Optional[np.ndarray], Optional[np.ndarray], Tuple[Optional[int], Optional[int]] +]: + key = map_experiment_to_stats_stem(exp_name) + if key is None: + return None, None, (None, None) + is_anomaly, idx, path = exp_map[key] + missing = near = None + if missing_points_normal is not None and missing_points_anomaly is not None: + series = ( + missing_points_anomaly[idx] if is_anomaly else missing_points_normal[idx] + ) + missing = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0 + if near_sensor_normal is not None and near_sensor_anomaly is not None: + series = near_sensor_anomaly[idx] if is_anomaly else near_sensor_normal[idx] + near = (np.asarray(series, dtype=float) / DATA_RESOLUTION) * 100.0 + npy_key = path.with_suffix(".npy").name + window = manual_windows.get(npy_key, (None, None)) + return missing, near, window + + +def _bin_to_progress(x: np.ndarray, bins: int = PROGRESS_BINS) -> np.ndarray: + """Average x into fixed #bins across its length (progress-normalized timeline).""" + if x is None or len(x) == 0: + return x + n = len(x) + edges = np.linspace(0, n, bins + 1, dtype=int) + out = np.empty(bins, dtype=float) + for i in range(bins): + a, b = edges[i], edges[i + 1] + if b <= a: + out[i] = out[i - 1] if i > 0 else x[0] + else: + out[i] = float(np.mean(x[a:b])) + return out + + +def _ecdf(x: np.ndarray): + xs = np.sort(np.asarray(x, dtype=float)) + n = len(xs) + + def F(t): + return float(np.searchsorted(xs, t, side="right")) / n + + return F + + +def baseline_transform(clean: np.ndarray, other: np.ndarray, mode: str): + """Transform using stats from clean only.""" + assert mode in ("baseline_z", "baseline_tailprob") + if clean is None or len(clean) == 0: + return clean, other, "raw" + if mode == "baseline_z": + mu = float(np.mean(clean)) + sd = float(np.std(clean, ddof=0)) + if sd < 1e-12: + zc = clean - mu + zo = other - mu if other is not None else None + else: + zc = (clean - mu) / sd + zo = (other - mu) / sd if other is not None else None + return zc, zo, "Anomaly score (σ above clean)" + else: + F = _ecdf(clean) + tp_clean = np.array([1.0 - F(v) for v in clean], dtype=float) + tp_other = ( + np.array([1.0 - F(v) for v in other], dtype=float) + if other is not None + else None + ) + return tp_clean, tp_other, "Tail probability vs clean (1 - F_clean)" + + +def pick_method_series(gdf: pl.DataFrame, label: str) -> Optional[np.ndarray]: + if label == "DeepSAD (LeNet)": + sel = gdf.filter( + (pl.col("network") == "subter_LeNet") & (pl.col("model") == "deepsad") + ) + elif label == "DeepSAD (efficient)": + sel = gdf.filter( + (pl.col("network") == "subter_efficient") & (pl.col("model") == "deepsad") + ) + elif label == "OCSVM (LeNet)": + sel = gdf.filter( + (pl.col("network") == "subter_LeNet") & (pl.col("model") == "ocsvm") + ) + elif label == "IsoForest (LeNet)": + sel = gdf.filter( + (pl.col("network") == "subter_LeNet") & (pl.col("model") == "isoforest") + ) + else: + sel = pl.DataFrame() + if sel.height == 0: + return None + row = sel.row(0) + row_dict = {c: row[i] for i, c in enumerate(sel.columns)} + return to_np_list(row_dict["scores"]) + + +def group_slice( + df: pl.DataFrame, + experiment: str, + latent_dim: int, + semi_normals: int, + semi_anomalous: int, +) -> pl.DataFrame: + return df.filter( + (pl.col("experiment") == experiment) + & (pl.col("latent_dim") == latent_dim) + & (pl.col("semi_normals") == semi_normals) + & (pl.col("semi_anomalous") == semi_anomalous) + ) + + +def compare_two_experiments_progress( + df: pl.DataFrame, + experiment_clean: str, + experiment_degraded: str, + latent_dim: int, + semi_normals: int, + semi_anomalous: int, + y_mode: str = "baseline_z", + include_stats: bool = True, +): + methods = [ + "DeepSAD (LeNet)", + "DeepSAD (efficient)", + "OCSVM (LeNet)", + "IsoForest (LeNet)", + ] + + g_clean = group_slice( + df, experiment_clean, latent_dim, semi_normals, semi_anomalous + ) + g_deg = group_slice( + df, experiment_degraded, latent_dim, semi_normals, semi_anomalous + ) + if g_clean.is_empty() or g_deg.is_empty(): + print( + f"[WARN] Missing one of the experiment groups: clean({g_clean.height}), degraded({g_deg.height}). Skipping." + ) + return 0 + + # Stats (% absolute, EMA smoothed later) + mp_clean, ns_clean, _ = get_stats_for_experiment(experiment_clean) + mp_deg, ns_deg, _ = get_stats_for_experiment(experiment_degraded) + + # Build baseline-anchored, progress-binned curves per method + curves_clean: Dict[str, np.ndarray] = {} + curves_deg: Dict[str, np.ndarray] = {} + y_label = "Anomaly" + + for label in methods: + s_clean = pick_method_series(g_clean, label) + s_deg = pick_method_series(g_deg, label) + if s_clean is None or s_deg is None: + continue + + # Smooth raw with EMA for stability before fitting baseline + s_clean_sm = ema(s_clean.astype(float), EMA_ALPHA_METHODS) + s_deg_sm = ema(s_deg.astype(float), EMA_ALPHA_METHODS) + + t_clean, t_deg, y_label = baseline_transform(s_clean_sm, s_deg_sm, y_mode) + + # Progress-bin both + curves_clean[label] = _bin_to_progress(t_clean, PROGRESS_BINS) + curves_deg[label] = _bin_to_progress(t_deg, PROGRESS_BINS) + + if not curves_clean: + print("[WARN] No method curves available for comparison in this config.") + return 0 + + x = np.linspace(0, 100, PROGRESS_BINS) + + # ---- Figure 1: scores only + fig1, ax1 = plt.subplots(figsize=(14, 6), constrained_layout=True) + for label in methods: + yc = curves_clean.get(label) + yd = curves_deg.get(label) + if yc is None or yd is None: + continue + ax1.plot(x, yd, label=f"{label} — degraded", linewidth=1.8) + ax1.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2) + ax1.set_xlabel("Progress through experiment (%)") + ax1.set_ylabel(y_label) + ax1.set_title( + f"Methods across experiments (progress-normalized)\n" + f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n" + f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS})" + ) + ax1.grid(True, alpha=0.3) + ax1.legend(ncol=2, loc="upper right") + out1 = ( + f"compare_{experiment_clean}_vs_{experiment_degraded}" + f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" + f"_methods_{y_mode}.png" + ) + fig1.savefig(output_datetime_path / out1, dpi=150) + plt.close(fig1) + + made = 1 + + if include_stats: + # Prep stats: absolute %, EMA, progress-binned + def prep_stat_pair(a, b): + if a is None or len(a) == 0 or b is None or len(b) == 0: + return None, None + a_s = ema(a.astype(float), EMA_ALPHA_STATS) + b_s = ema(b.astype(float), EMA_ALPHA_STATS) + return _bin_to_progress(a_s, PROGRESS_BINS), _bin_to_progress( + b_s, PROGRESS_BINS + ) + + mp_c, mp_d = prep_stat_pair(mp_clean, mp_deg) + ns_c, ns_d = prep_stat_pair(ns_clean, ns_deg) + + # ---- Figure 2: + Missing points (%) + if mp_c is not None and mp_d is not None: + fig2, ax2 = plt.subplots(figsize=(14, 6), constrained_layout=True) + axy2 = ax2.twinx() + for label in methods: + yc = curves_clean.get(label) + yd = curves_deg.get(label) + if yc is None or yd is None: + continue + ax2.plot(x, yd, label=f"{label} — degraded", linewidth=1.8) + ax2.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2) + axy2.plot(x, mp_d, linestyle="-.", label="Missing points — degraded (%)") + axy2.plot(x, mp_c, linestyle=":", label="Missing points — clean (%)") + ax2.set_xlabel("Progress through experiment (%)") + ax2.set_ylabel(y_label) + axy2.set_ylabel("Missing points (%)") + ax2.set_title( + f"Methods vs Missing points (absolute %) — progress-normalized\n" + f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n" + f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS}, stats α={EMA_ALPHA_STATS})" + ) + ax2.grid(True, alpha=0.3) + L1, N1 = ax2.get_legend_handles_labels() + L2, N2 = axy2.get_legend_handles_labels() + ax2.legend(L1 + L2, N1 + N2, loc="upper right", ncol=2) + out2 = ( + f"compare_{experiment_clean}_vs_{experiment_degraded}" + f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" + f"_{y_mode}_missing.png" + ) + fig2.savefig(output_datetime_path / out2, dpi=150) + plt.close(fig2) + made += 1 + + # ---- Figure 3: + Near-sensor (%) + if ns_c is not None and ns_d is not None: + fig3, ax3 = plt.subplots(figsize=(14, 6), constrained_layout=True) + axy3 = ax3.twinx() + for label in methods: + yc = curves_clean.get(label) + yd = curves_deg.get(label) + if yc is None or yd is None: + continue + ax3.plot(x, yd, label=f"{label} — degraded", linewidth=1.8) + ax3.plot(x, yc, linestyle="--", label=f"{label} — clean", linewidth=1.2) + axy3.plot(x, ns_d, linestyle="-.", label="Near-sensor — degraded (%)") + axy3.plot(x, ns_c, linestyle=":", label="Near-sensor — clean (%)") + ax3.set_xlabel("Progress through experiment (%)") + ax3.set_ylabel(y_label) + axy3.set_ylabel("Near-sensor points (%)") + ax3.set_title( + f"Methods vs Near-sensor (absolute %) — progress-normalized\n" + f"Clean: {experiment_clean} vs Degraded: {experiment_degraded}\n" + f"Transform: {y_mode} | EMA(methods α={EMA_ALPHA_METHODS}, stats α={EMA_ALPHA_STATS})" + ) + ax3.grid(True, alpha=0.3) + L1, N1 = ax3.get_legend_handles_labels() + L2, N2 = axy3.get_legend_handles_labels() + ax3.legend(L1 + L2, N1 + N2, loc="upper right", ncol=2) + out3 = ( + f"compare_{experiment_clean}_vs_{experiment_degraded}" + f"_ld{latent_dim}_sn{semi_normals}_sa{semi_anomalous}" + f"_{y_mode}_nearsensor.png" + ) + fig3.savefig(output_datetime_path / out3, dpi=150) + plt.close(fig3) + made += 1 + + return made + + +# ===================================== +# Run comparison & save +# ===================================== +plots_made = compare_two_experiments_progress( + df=df, + experiment_clean=EXPERIMENT_CLEAN, + experiment_degraded=EXPERIMENT_DEGRADED, + latent_dim=LATENT_DIM, + semi_normals=SEMI_NORMALS, + semi_anomalous=SEMI_ANOMALOUS, + y_mode=Y_MODE, + include_stats=True, +) + +# ===================================== +# Preserve latest/, archive/, copy script +# ===================================== +# delete current latest folder +shutil.rmtree(latest_folder_path, ignore_errors=True) +# create new latest folder +latest_folder_path.mkdir(exist_ok=True, parents=True) + +# copy contents of output folder to the latest folder +for file in output_datetime_path.iterdir(): + shutil.copy2(file, latest_folder_path) + +# copy this python script to preserve the code used (best effort) +if COPY_SELF: + try: + shutil.copy2(__file__, output_datetime_path) + shutil.copy2(__file__, latest_folder_path) + except Exception: + (output_datetime_path / "run_config.json").write_text( + json.dumps( + { + "INFERENCE_ROOT": str(INFERENCE_ROOT), + "CACHE_PATH": str(CACHE_PATH), + "ALL_DATA_PATH": str(ALL_DATA_PATH), + "EXPERIMENT_CLEAN": EXPERIMENT_CLEAN, + "EXPERIMENT_DEGRADED": EXPERIMENT_DEGRADED, + "LATENT_DIM": LATENT_DIM, + "SEMI_NORMALS": SEMI_NORMALS, + "SEMI_ANOMALOUS": SEMI_ANOMALOUS, + "Y_MODE": Y_MODE, + "PROGRESS_BINS": PROGRESS_BINS, + "FPS": FPS, + "EMA_ALPHA_METHODS": EMA_ALPHA_METHODS, + "EMA_ALPHA_STATS": EMA_ALPHA_STATS, + "DATA_RESOLUTION": DATA_RESOLUTION, + "timestamp": datetime_folder_name, + }, + indent=2, + ) + ) + +# move output date folder to archive +shutil.move(output_datetime_path, archive_folder_path) + +print(f"Done. Wrote {plots_made} figure(s). Archived under: {archive_folder_path}")