From e7624d2786f060da7a0b90e6fea8161088ca00e5 Mon Sep 17 00:00:00 2001 From: Jan Kowalczyk Date: Mon, 15 Sep 2025 11:21:30 +0200 Subject: [PATCH] wip inference --- Deep-SAD-PyTorch/src/baselines/isoforest.py | 74 +++++ Deep-SAD-PyTorch/src/baselines/ocsvm.py | 74 +++++ Deep-SAD-PyTorch/src/datasets/subter.py | 2 + Deep-SAD-PyTorch/src/main.py | 192 +++++++++-- Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py | 6 +- tools/plot_scripts/load_results.py | 141 +++++++- .../results_inference_timeline smoothed.py | 269 ++++++++++++++++ .../results_inference_timeline.py | 304 ++++++++++++++++++ 8 files changed, 1027 insertions(+), 35 deletions(-) create mode 100644 tools/plot_scripts/results_inference_timeline smoothed.py create mode 100644 tools/plot_scripts/results_inference_timeline.py diff --git a/Deep-SAD-PyTorch/src/baselines/isoforest.py b/Deep-SAD-PyTorch/src/baselines/isoforest.py index 0adc608..4d3c27d 100644 --- a/Deep-SAD-PyTorch/src/baselines/isoforest.py +++ b/Deep-SAD-PyTorch/src/baselines/isoforest.py @@ -261,6 +261,80 @@ class IsoForest(object): logger.info("Test Time: {:.3f}s".format(self.results["test_time"])) logger.info("Finished testing.") + def inference( + self, + dataset: BaseADDataset, + device: str = "cpu", + n_jobs_dataloader: int = 0, + batch_size: int = 32, + ): + """Perform inference on the dataset using the trained Isolation Forest model.""" + logger = logging.getLogger() + + # Get inference data loader + _, _, inference_loader = dataset.loaders( + batch_size=batch_size, num_workers=n_jobs_dataloader + ) + + # Get data from loader + X = () + idxs = [] + file_ids = [] + frame_ids = [] + + logger.info("Starting inference...") + start_time = time.time() + + for data in inference_loader: + inputs, idx, (file_id, frame_id) = data + inputs = inputs.to(device) + + if self.hybrid: + inputs = self.ae_net.encoder(inputs) + X_batch = inputs.view(inputs.size(0), -1) + X += (X_batch.cpu().data.numpy(),) + + # Store indices and metadata + idxs.extend(idx.cpu().data.numpy().tolist()) + file_ids.extend(file_id.cpu().data.numpy().tolist()) + frame_ids.extend(frame_id.cpu().data.numpy().tolist()) + + X = np.concatenate(X) + + # Get anomaly scores + scores = (-1.0) * self.model.decision_function(X) + scores = scores.flatten() + + # Store inference results + self.inference_time = time.time() - start_time + self.inference_indices = np.array(idxs) + self.inference_file_ids = np.array(file_ids) + self.inference_frame_ids = np.array(frame_ids) + + # Create index mapping similar to DeepSAD trainer + self.inference_index_mapping = { + "indices": self.inference_indices, + "file_ids": self.inference_file_ids, + "frame_ids": self.inference_frame_ids, + } + + # Log inference statistics + logger.info(f"Number of inference samples: {len(self.inference_indices)}") + logger.info( + f"Number of unique files: {len(np.unique(self.inference_file_ids))}" + ) + logger.info("Inference Time: {:.3f}s".format(self.inference_time)) + logger.info( + "Score statistics: " + f"min={scores.min():.3f}, " + f"max={scores.max():.3f}, " + f"mean={scores.mean():.3f}, " + f"std={scores.std():.3f}" + ) + logger.info("Finished inference.") + + return scores + def load_ae(self, dataset_name, model_path): """Load pretrained autoencoder from model_path for feature extraction in a hybrid Isolation Forest model.""" diff --git a/Deep-SAD-PyTorch/src/baselines/ocsvm.py b/Deep-SAD-PyTorch/src/baselines/ocsvm.py index 5b2f98b..3535c86 100644 --- a/Deep-SAD-PyTorch/src/baselines/ocsvm.py +++ b/Deep-SAD-PyTorch/src/baselines/ocsvm.py @@ -453,6 +453,80 @@ class OCSVM(object): logger.info("Test Time: {:.3f}s".format(self.results["test_time"])) logger.info("Finished testing.") + def inference( + self, + dataset: BaseADDataset, + device: str = "cpu", + n_jobs_dataloader: int = 0, + batch_size: int = 32, + ): + """Perform inference on the dataset using the trained OC-SVM model.""" + logger = logging.getLogger() + + # Get inference data loader + _, _, inference_loader = dataset.loaders( + batch_size=batch_size, num_workers=n_jobs_dataloader + ) + + # Get data from loader + X = () + idxs = [] + file_ids = [] + frame_ids = [] + + logger.info("Starting inference...") + start_time = time.time() + + for data in inference_loader: + inputs, idx, (file_id, frame_id) = data + inputs = inputs.to(device) + + if self.hybrid: + inputs = self.ae_net.encoder(inputs) + X_batch = inputs.view(inputs.size(0), -1) + X += (X_batch.cpu().data.numpy(),) + + # Store indices and metadata + idxs.extend(idx.cpu().data.numpy().tolist()) + file_ids.extend(file_id.cpu().data.numpy().tolist()) + frame_ids.extend(frame_id.cpu().data.numpy().tolist()) + + X = np.concatenate(X) + + # Get anomaly scores + scores = (-1.0) * self.model.decision_function(X) + scores = scores.flatten() + + # Store inference results + self.inference_time = time.time() - start_time + self.inference_indices = np.array(idxs) + self.inference_file_ids = np.array(file_ids) + self.inference_frame_ids = np.array(frame_ids) + + # Create index mapping similar to DeepSAD trainer + self.inference_index_mapping = { + "indices": self.inference_indices, + "file_ids": self.inference_file_ids, + "frame_ids": self.inference_frame_ids, + } + + # Log inference statistics + logger.info(f"Number of inference samples: {len(self.inference_indices)}") + logger.info( + f"Number of unique files: {len(np.unique(self.inference_file_ids))}" + ) + logger.info("Inference Time: {:.3f}s".format(self.inference_time)) + logger.info( + "Score statistics: " + f"min={scores.min():.3f}, " + f"max={scores.max():.3f}, " + f"mean={scores.mean():.3f}, " + f"std={scores.std():.3f}" + ) + logger.info("Finished inference.") + + return scores + def load_ae(self, model_path, net_name, device="cpu"): """Load pretrained autoencoder from model_path for feature extraction in a hybrid OC-SVM model.""" diff --git a/Deep-SAD-PyTorch/src/datasets/subter.py b/Deep-SAD-PyTorch/src/datasets/subter.py index 18d1657..f3f9d98 100644 --- a/Deep-SAD-PyTorch/src/datasets/subter.py +++ b/Deep-SAD-PyTorch/src/datasets/subter.py @@ -338,6 +338,8 @@ class SubTerInference(VisionDataset): self.frame_ids = np.arange(self.data.shape[0], dtype=np.int32) self.file_names = {0: experiment_file.name} + self.transform = transform if transform else transforms.ToTensor() + def __len__(self): return len(self.data) diff --git a/Deep-SAD-PyTorch/src/main.py b/Deep-SAD-PyTorch/src/main.py index 00433b5..b114306 100644 --- a/Deep-SAD-PyTorch/src/main.py +++ b/Deep-SAD-PyTorch/src/main.py @@ -638,57 +638,185 @@ def main( cfg.save_config(export_json=xp_path + "/config.json") elif action == "infer": + # Inference uses a deterministic, non-shuffled loader to preserve temporal order dataset = load_dataset( - dataset_name, + cfg.settings["dataset_name"], data_path, - normal_class, - known_outlier_class, - n_known_outlier_classes, - ratio_known_normal, - ratio_known_outlier, - ratio_pollution, + cfg.settings["normal_class"], + cfg.settings["known_outlier_class"], + cfg.settings["n_known_outlier_classes"], + cfg.settings["ratio_known_normal"], + cfg.settings["ratio_known_outlier"], + cfg.settings["ratio_pollution"], random_state=np.random.RandomState(cfg.settings["seed"]), + k_fold_num=False, inference=True, ) + # Log random sample of known anomaly classes if more than 1 class if n_known_outlier_classes > 1: logger.info("Known anomaly classes: %s" % (dataset.known_outlier_classes,)) - # Initialize DeepSAD model and set neural network phi - deepSAD = DeepSAD(latent_space_dim, cfg.settings["eta"]) - deepSAD.set_network(net_name) - - # If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights) - if not load_model: + # --- Expect a model DIRECTORY (aligned with 'retest') --- + if ( + (not load_model) + or (not Path(load_model).exists()) + or (not Path(load_model).is_dir()) + ): logger.error( - "For inference mode a model has to be loaded! Pass the --load_model option with the model path!" + "For inference mode a model directory has to be loaded! " + "Pass the --load_model option with the model directory path!" + ) + return + load_model = Path(load_model) + + # Resolve expected model artifacts (single-model / no k-fold suffixes) + deepsad_model_path = load_model / "model_deepsad.tar" + ae_model_path = load_model / "model_ae.tar" + ocsvm_model_path = load_model / "model_ocsvm.pkl" + isoforest_model_path = load_model / "model_isoforest.pkl" + + # Sanity check model files exist + model_paths = [ + deepsad_model_path, + ae_model_path, + ocsvm_model_path, + isoforest_model_path, + ] + missing = [p.name for p in model_paths if not p.exists() or not p.is_file()] + if missing: + logger.error( + "The following model files do not exist in the provided model directory: " + + ", ".join(missing) ) return - deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device) - logger.info("Loading model from %s." % load_model) + # Prepare output paths + inf_dir = Path(xp_path) / "inference" + inf_dir.mkdir(parents=True, exist_ok=True) + base_stem = Path(Path(dataset.root).stem) # keep your previous naming + # DeepSAD outputs (keep legacy filenames for backward compatibility) + deepsad_scores_path = inf_dir / Path( + base_stem.stem + "_deepsad_scores" + ).with_suffix(".npy") + deepsad_outputs_path = inf_dir / Path(base_stem.stem + "_outputs").with_suffix( + ".npy" + ) + # Baselines + ocsvm_scores_path = inf_dir / Path( + base_stem.stem + "_ocsvm_scores" + ).with_suffix(".npy") + isoforest_scores_path = inf_dir / Path( + base_stem.stem + "_isoforest_scores" + ).with_suffix(".npy") - inference_results, all_outputs = deepSAD.inference( - dataset, device=device, n_jobs_dataloader=n_jobs_dataloader - ) - inference_results_path = ( - Path(xp_path) - / "inference" - / Path(Path(dataset.root).stem).with_suffix(".npy") - ) - inference_outputs_path = ( - Path(xp_path) - / "inference" - / Path(Path(dataset.root).stem + "_outputs").with_suffix(".npy") + # Common loader settings + _n_jobs = ( + n_jobs_dataloader + if "n_jobs_dataloader" in locals() + else cfg.settings.get("n_jobs_dataloader", 0) ) - inference_results_path.parent.mkdir(parents=True, exist_ok=True) - np.save(inference_results_path, inference_results, fix_imports=False) - np.save(inference_outputs_path, all_outputs, fix_imports=False) + # ----------------- DeepSAD ----------------- + + deepSAD = DeepSAD(cfg.settings["latent_space_dim"], cfg.settings["eta"]) + deepSAD.set_network(cfg.settings["net_name"]) + deepSAD.load_model( + model_path=deepsad_model_path, load_ae=True, map_location=device + ) + logger.info("Loaded DeepSAD model from %s.", deepsad_model_path) + + deepsad_scores, deepsad_all_outputs = deepSAD.inference( + dataset, device=device, n_jobs_dataloader=_n_jobs + ) + + np.save(deepsad_scores_path, deepsad_scores) + # np.save(deepsad_outputs_path, deepsad_all_outputs) logger.info( - f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}" + "DeepSAD inference: median=%.6f mean=%.6f min=%.6f max=%.6f", + float(np.median(deepsad_scores)), + float(np.mean(deepsad_scores)), + float(np.min(deepsad_scores)), + float(np.max(deepsad_scores)), ) + + # ----------------- OCSVM (hybrid) ----------------- + ocsvm_scores = None + ocsvm = OCSVM( + kernel=cfg.settings["ocsvm_kernel"], + nu=cfg.settings["ocsvm_nu"], + hybrid=True, + latent_space_dim=cfg.settings["latent_space_dim"], + ) + # load AE to build the feature extractor for hybrid OCSVM + ocsvm.load_ae( + net_name=cfg.settings["net_name"], + model_path=ae_model_path, + device=device, + ) + ocsvm.load_model(import_path=ocsvm_model_path) + + ocsvm_scores = ocsvm.inference( + dataset, device=device, n_jobs_dataloader=_n_jobs, batch_size=32 + ) + + if ocsvm_scores is not None: + np.save(ocsvm_scores_path, ocsvm_scores) + logger.info( + "OCSVM inference: median=%.6f mean=%.6f min=%.6f max=%.6f", + float(np.median(ocsvm_scores)), + float(np.mean(ocsvm_scores)), + float(np.min(ocsvm_scores)), + float(np.max(ocsvm_scores)), + ) + else: + logger.warning("OCSVM scores could not be determined; no array saved.") + + # ----------------- Isolation Forest ----------------- + isoforest_scores = None + Isoforest = IsoForest( + hybrid=False, + n_estimators=cfg.settings["isoforest_n_estimators"], + max_samples=cfg.settings["isoforest_max_samples"], + contamination=cfg.settings["isoforest_contamination"], + n_jobs=cfg.settings["isoforest_n_jobs_model"], + seed=cfg.settings["seed"], + ) + Isoforest.load_model(import_path=isoforest_model_path, device=device) + isoforest_scores = Isoforest.inference( + dataset, device=device, n_jobs_dataloader=_n_jobs + ) + if isoforest_scores is not None: + np.save(isoforest_scores_path, isoforest_scores) + logger.info( + "IsolationForest inference: median=%.6f mean=%.6f min=%.6f max=%.6f", + float(np.median(isoforest_scores)), + float(np.mean(isoforest_scores)), + float(np.min(isoforest_scores)), + float(np.max(isoforest_scores)), + ) + else: + logger.warning( + "Isolation Forest scores could not be determined; no array saved." + ) + + # Final summary (DeepSAD always runs; baselines are best-effort) + logger.info( + "Inference complete. Saved arrays to %s:\n" + " DeepSAD scores: %s\n" + " DeepSAD outputs: %s\n" + " OCSVM scores: %s\n" + " IsoForest scores: %s", + inf_dir, + deepsad_scores_path.name, + deepsad_outputs_path.name, + ocsvm_scores_path.name if ocsvm_scores is not None else "(not saved)", + isoforest_scores_path.name + if isoforest_scores is not None + else "(not saved)", + ) + elif action == "ae_elbow_test": # Load data once dataset = load_dataset( diff --git a/Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py b/Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py index 1be0633..05a100d 100644 --- a/Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py +++ b/Deep-SAD-PyTorch/src/optim/DeepSAD_trainer.py @@ -177,6 +177,8 @@ class DeepSADTrainer(BaseTrainer): batch_size=self.batch_size, num_workers=self.n_jobs_dataloader ) + latent_dim = net.rep_dim + # Set device for network net = net.to(self.device) @@ -184,7 +186,9 @@ class DeepSADTrainer(BaseTrainer): logger.info("Starting inference...") n_batches = 0 start_time = time.time() - all_outputs = np.zeros((len(inference_loader.dataset), 1024), dtype=np.float32) + all_outputs = np.zeros( + (len(inference_loader.dataset), latent_dim), dtype=np.float32 + ) scores = [] net.eval() diff --git a/tools/plot_scripts/load_results.py b/tools/plot_scripts/load_results.py index 8e7080d..3877086 100644 --- a/tools/plot_scripts/load_results.py +++ b/tools/plot_scripts/load_results.py @@ -96,6 +96,21 @@ PRETRAIN_SCHEMA = { "config_json": pl.Utf8, # full config.json as string (for reference) } +SCHEMA_INFERENCE = { + # identifiers / dims + "experiment": pl.Utf8, # e.g. "2_static_no_artifacts_illuminated_2023-01-23-001" + "network": pl.Utf8, # e.g. "LeNet", "efficient" + "latent_dim": pl.Int32, + "semi_normals": pl.Int32, + "semi_anomalous": pl.Int32, + "model": pl.Utf8, # "deepsad" | "isoforest" | "ocsvm" + # metrics + "scores": pl.List(pl.Float64), + # timings / housekeeping + "folder": pl.Utf8, + "config_json": pl.Utf8, # full config.json as string (for reference) +} + # ------------------------------------------------------------ # Helpers: curve/scores normalizers (tuples/ndarrays -> dict/list) @@ -233,11 +248,11 @@ def normalize_bool_list(a) -> Optional[List[bool]]: # ------------------------------------------------------------ # Low-level: read one experiment folder # ------------------------------------------------------------ -def read_config(exp_dir: Path) -> dict: +def read_config(exp_dir: Path, k_fold_required: bool = True) -> dict: cfg = exp_dir / "config.json" with cfg.open("r") as f: c = json.load(f) - if not c.get("k_fold"): + if k_fold_required and not c.get("k_fold"): raise ValueError(f"{exp_dir.name}: not trained as k-fold") return c @@ -589,7 +604,129 @@ def load_pretraining_results_dataframe( return df +def load_inference_results_dataframe( + root: Path, + allow_cache: bool = True, + models: List[str] = MODELS, +) -> pl.DataFrame: + """Load inference results from experiment folders. + + Args: + root: Path to root directory containing experiment folders + allow_cache: Whether to use/create cache file + models: List of models to look for scores + + Returns: + pl.DataFrame: DataFrame containing inference results + """ + if allow_cache: + cache = root / "inference_results_cache.parquet" + if cache.exists(): + try: + df = pl.read_parquet(cache) + print(f"[info] loaded cached inference frame from {cache}") + return df + except Exception as e: + print(f"[warn] failed to load inference cache {cache}: {e}") + + rows: List[dict] = [] + + exp_dirs = [p for p in root.iterdir() if p.is_dir()] + for exp_dir in sorted(exp_dirs): + try: + # Load and validate config + cfg = read_config(exp_dir, k_fold_required=False) + cfg_json = json.dumps(cfg, sort_keys=True) + + # Extract config values + network = cfg.get("net_name") + latent_dim = int(cfg.get("latent_space_dim")) + semi_normals = int(cfg.get("num_known_normal")) + semi_anomalous = int(cfg.get("num_known_outlier")) + + # Process each model's scores + inference_dir = exp_dir / "inference" + if not inference_dir.exists(): + print(f"[warn] no inference directory for {exp_dir.name}") + continue + + # Find all unique experiments in this folder's inference files + score_files = list(inference_dir.glob("*_scores.npy")) + if not score_files: + print(f"[warn] no score files in {inference_dir}") + continue + + # Extract unique experiment names from score files + # Format: {experiment}_{model}_scores.npy + experiments = set() + for score_file in score_files: + exp_name = score_file.stem.rsplit("_", 2)[0] + experiments.add(exp_name) + + # Load scores for each experiment and model + for experiment in sorted(experiments): + for model in models: + score_file = inference_dir / f"{experiment}_{model}_scores.npy" + if not score_file.exists(): + print(f"[warn] missing score file for {experiment}, {model}") + continue + + try: + scores = np.load(score_file) + rows.append( + { + "experiment": experiment, + "network": network, + "latent_dim": latent_dim, + "semi_normals": semi_normals, + "semi_anomalous": semi_anomalous, + "model": model, + "scores": scores.tolist(), + "folder": str(exp_dir), + "config_json": cfg_json, + } + ) + except Exception as e: + print( + f"[warn] failed to load scores for {experiment}, {model}: {e}" + ) + continue + + except Exception as e: + print(f"[warn] skipping {exp_dir.name}: {e}") + continue + + # If empty, return a typed empty frame + if not rows: + return pl.DataFrame(schema=SCHEMA_INFERENCE) + + df = pl.DataFrame(rows, schema=SCHEMA_INFERENCE) + + # Optimize datatypes + df = df.with_columns( + [ + pl.col("experiment", "network", "model").cast(pl.Categorical), + pl.col("latent_dim", "semi_normals", "semi_anomalous").cast(pl.Int32), + ] + ) + + # Cache if enabled + if allow_cache: + try: + df.write_parquet(cache) + print(f"[info] cached inference frame to {cache}") + except Exception as e: + print(f"[warn] failed to write cache {cache}: {e}") + + return df + + def main(): + inference_root = Path("/home/fedex/mt/results/inference/copy") + df_inference = load_inference_results_dataframe(inference_root, allow_cache=True) + + exit(0) + root = Path("/home/fedex/mt/results/copy") df1 = load_results_dataframe(root, allow_cache=True) exit(0) diff --git a/tools/plot_scripts/results_inference_timeline smoothed.py b/tools/plot_scripts/results_inference_timeline smoothed.py new file mode 100644 index 0000000..2647775 --- /dev/null +++ b/tools/plot_scripts/results_inference_timeline smoothed.py @@ -0,0 +1,269 @@ +import json +import pickle +import shutil +from datetime import datetime +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + +# ========================= +# User-configurable params +# ========================= +# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23") +EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23" + +# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"} +methods_scores_path = Path( + "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference" +) + +# Root data path containing .bag files used to build the cached stats +all_data_path = Path("/home/fedex/mt/data/subter") + +# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/") +output_path = Path("/home/fedex/mt/plots/results_inference_timeline_smoothed") + +# Cache (stats + labels) directory — same as your original script +cache_path = output_path + +# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original) +data_resolution = 32 * 2048 + +# Frames per second for x-axis time +FPS = 10.0 + +# Whether to try to align score sign so that higher = more degraded. +ALIGN_SCORE_DIRECTION = True + +# ========================= +# Smoothing configuration +# ========================= +# Options: "none", "moving_average", "gaussian", "ema" +SMOOTHING_METHOD = "ema" + +# Moving average window size (in frames). Use odd number for symmetry; <=1 disables. +MA_WINDOW = 11 + +# Gaussian sigma (in frames). ~2-3 frames is mild smoothing. +GAUSSIAN_SIGMA = 2.0 + +# Exponential moving average factor in (0,1]; smaller = smoother. ~0.2 is a good start. +EMA_ALPHA = 0.1 + +# ========================= +# Setup output folders +# ========================= +datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +latest_folder_path = output_path / "latest" +archive_folder_path = output_path / "archive" +output_datetime_path = output_path / datetime_folder_name + +output_path.mkdir(exist_ok=True, parents=True) +output_datetime_path.mkdir(exist_ok=True, parents=True) +latest_folder_path.mkdir(exist_ok=True, parents=True) +archive_folder_path.mkdir(exist_ok=True, parents=True) + +# ========================= +# Discover experiments +# ========================= +normal_experiment_paths, anomaly_experiment_paths = [], [] +for bag_file_path in all_data_path.iterdir(): + if bag_file_path.suffix != ".bag": + continue + if "smoke" in bag_file_path.name: + anomaly_experiment_paths.append(bag_file_path) + else: + normal_experiment_paths.append(bag_file_path) + +normal_experiment_paths = sorted( + normal_experiment_paths, key=lambda p: p.stat().st_size +) +anomaly_experiment_paths = sorted( + anomaly_experiment_paths, key=lambda p: p.stat().st_size +) + +# Find experiment +exp_path, exp_is_anomaly = None, None +for p in anomaly_experiment_paths: + if p.stem == EXPERIMENT_NAME: + exp_path, exp_is_anomaly = p, True + break +if exp_path is None: + for p in normal_experiment_paths: + if p.stem == EXPERIMENT_NAME: + exp_path, exp_is_anomaly = p, False + break +if exp_path is None: + raise FileNotFoundError(f"Experiment '{EXPERIMENT_NAME}' not found") + +exp_index = ( + anomaly_experiment_paths.index(exp_path) + if exp_is_anomaly + else normal_experiment_paths.index(exp_path) +) + +# ========================= +# Load cached statistical data +# ========================= +with open(cache_path / "missing_points.pkl", "rb") as f: + missing_points_normal, missing_points_anomaly = pickle.load(f) +with open(cache_path / "particles_near_sensor_counts_500.pkl", "rb") as f: + near_sensor_normal, near_sensor_anomaly = pickle.load(f) + +if exp_is_anomaly: + missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float) + near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float) +else: + missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float) + near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float) + +missing_points_pct = (missing_points_series / data_resolution) * 100.0 +near_sensor_pct = (near_sensor_series / data_resolution) * 100.0 + +# ========================= +# Load manual anomaly frame borders +# ========================= +manually_labeled_anomaly_frames = {} +labels_json_path = cache_path / "manually_labeled_anomaly_frames.json" +if labels_json_path.exists(): + with open(labels_json_path, "r") as f: + labeled_json = json.load(f) + for file in labeled_json.get("files", []): + manually_labeled_anomaly_frames[file["filename"]] = ( + file.get("semi_target_begin_frame"), + file.get("semi_target_end_frame"), + ) +exp_npy_filename = exp_path.with_suffix(".npy").name +anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None)) + + +# ========================= +# Load method scores and normalize +# ========================= +def zscore_1d(x, eps=1e-12): + mu, sigma = np.mean(x), np.std(x, ddof=0) + return np.zeros_like(x) if sigma < eps else (x - mu) / sigma + + +def maybe_align_direction(z, window): + start, end = window + if start is None or end is None: + return z + inside_mean = np.mean(z[start:end]) if end > start else 0 + outside = np.concatenate([z[:start], z[end:]]) if start > 0 or end < len(z) else [] + outside_mean = np.mean(outside) if len(outside) else inside_mean + return z if inside_mean >= outside_mean else -z + + +methods = ["deepsad", "ocsvm", "isoforest"] +method_zscores = {} +for m in methods: + s = np.load(methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy") + s = np.asarray(s, dtype=float).ravel() + n = min(len(s), len(missing_points_pct)) + s, missing_points_pct, near_sensor_pct = ( + s[:n], + missing_points_pct[:n], + near_sensor_pct[:n], + ) + z = zscore_1d(s) + if ALIGN_SCORE_DIRECTION: + z = maybe_align_direction(z, anomaly_window) + method_zscores[m] = z + + +# ========================= +# Smoothing +# ========================= +def moving_average(x, window): + if window <= 1: + return x + if window % 2 == 0: + window += 1 + return np.convolve(x, np.ones(window) / window, mode="same") + + +def gaussian_smooth(x, sigma): + from scipy.ndimage import gaussian_filter1d + + return gaussian_filter1d(x, sigma=sigma, mode="nearest") if sigma > 0 else x + + +def ema(x, alpha): + y = np.empty_like(x) + y[0] = x[0] + for i in range(1, len(x)): + y[i] = alpha * x[i] + (1 - alpha) * y[i - 1] + return y + + +def apply_smoothing(x): + m = SMOOTHING_METHOD.lower() + if m == "none": + return x + if m == "moving_average": + return moving_average(x, MA_WINDOW) + if m == "gaussian": + return gaussian_smooth(x, GAUSSIAN_SIGMA) + if m == "ema": + return ema(x, EMA_ALPHA) + raise ValueError(f"Unknown SMOOTHING_METHOD: {SMOOTHING_METHOD}") + + +smoothed_z = {k: apply_smoothing(v) for k, v in method_zscores.items()} +smoothed_missing = apply_smoothing(missing_points_pct) +smoothed_near = apply_smoothing(near_sensor_pct) + +# ========================= +# Plot +# ========================= +t = np.arange(len(missing_points_pct)) / FPS + + +def plot_series(y2, ylabel, fname, title_suffix): + fig, axz = plt.subplots(figsize=(14, 6), constrained_layout=True) + axy = axz.twinx() + for m in methods: + axz.plot(t, smoothed_z[m], label=f"{m} (z)") + axy.plot(t, y2, linestyle="--", label=ylabel) + start, end = anomaly_window + if start and end: + axz.axvline(start / FPS, linestyle=":", alpha=0.6) + axz.axvline(end / FPS, linestyle=":", alpha=0.6) + axz.set_xlabel("Time (s)") + axz.set_ylabel("Anomaly score (z)") + axy.set_ylabel(ylabel) + axz.set_title(f"{EXPERIMENT_NAME}\n{title_suffix}\nSmoothing: {SMOOTHING_METHOD}") + lines1, labels1 = axz.get_legend_handles_labels() + lines2, labels2 = axy.get_legend_handles_labels() + axz.legend(lines1 + lines2, labels1 + labels2, loc="upper right") + axz.grid(True, alpha=0.3) + fig.savefig(output_datetime_path / fname, dpi=150) + plt.close(fig) + + +plot_series( + smoothed_missing, + "Missing points (%)", + f"{EXPERIMENT_NAME}_zscores_vs_missing.png", + "Degradation vs Missing Points", +) +plot_series( + smoothed_near, + "Near-sensor points (%)", + f"{EXPERIMENT_NAME}_zscores_vs_near.png", + "Degradation vs Near-Sensor Points (<0.5m)", +) + +# ========================= +# Save & archive +# ========================= +shutil.rmtree(latest_folder_path, ignore_errors=True) +latest_folder_path.mkdir(exist_ok=True, parents=True) +for f in output_datetime_path.iterdir(): + shutil.copy2(f, latest_folder_path) +shutil.copy2(__file__, output_datetime_path) +shutil.copy2(__file__, latest_folder_path) +shutil.move(output_datetime_path, archive_folder_path) +print("Done. Plots saved and archived.") diff --git a/tools/plot_scripts/results_inference_timeline.py b/tools/plot_scripts/results_inference_timeline.py new file mode 100644 index 0000000..f758e32 --- /dev/null +++ b/tools/plot_scripts/results_inference_timeline.py @@ -0,0 +1,304 @@ +import json +import pickle +import shutil +from datetime import datetime +from pathlib import Path + +import matplotlib.pyplot as plt +import numpy as np + +# ========================= +# User-configurable params +# ========================= +# Single experiment to plot (stem of the .bag file, e.g. "3_smoke_human_walking_2023-01-23") +EXPERIMENT_NAME = "3_smoke_human_walking_2023-01-23" + +# Directory that contains {EXPERIMENT_NAME}_{method}_scores.npy for methods in {"deepsad","ocsvm","isoforest"} +# Adjust this to where you save your per-method scores. +methods_scores_path = Path( + "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/test/inference" +) + +# Root data path containing .bag files used to build the cached stats +all_data_path = Path("/home/fedex/mt/data/subter") + +# Output base directory (timestamped subfolder will be created here, then archived and copied to "latest/") +output_path = Path("/home/fedex/mt/plots/results_inference_timeline") + +# Cache (stats + labels) directory — same as your original script +cache_path = output_path + +# Assumed LiDAR frame resolution to convert counts -> percent (unchanged from original) +data_resolution = 32 * 2048 + +# Frames per second for x-axis time +FPS = 10.0 + +# Whether to try to align score sign so that higher = more degraded. +# If manual labels exist for this experiment, alignment uses anomaly window mean vs. outside. +ALIGN_SCORE_DIRECTION = True + +# ========================= +# Setup output folders +# ========================= +datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") +latest_folder_path = output_path / "latest" +archive_folder_path = output_path / "archive" +output_datetime_path = output_path / datetime_folder_name + +output_path.mkdir(exist_ok=True, parents=True) +output_datetime_path.mkdir(exist_ok=True, parents=True) +latest_folder_path.mkdir(exist_ok=True, parents=True) +archive_folder_path.mkdir(exist_ok=True, parents=True) + +# ========================= +# Discover experiments to reconstruct indices consistent with caches +# ========================= +normal_experiment_paths, anomaly_experiment_paths = [], [] +if not all_data_path.exists(): + raise FileNotFoundError(f"all_data_path does not exist: {all_data_path}") + +for bag_file_path in all_data_path.iterdir(): + if bag_file_path.suffix != ".bag": + continue + if "smoke" in bag_file_path.name: + anomaly_experiment_paths.append(bag_file_path) + else: + normal_experiment_paths.append(bag_file_path) + +# Sort by filesize to match original ordering used when caches were generated +normal_experiment_paths = sorted( + normal_experiment_paths, key=lambda p: p.stat().st_size +) +anomaly_experiment_paths = sorted( + anomaly_experiment_paths, key=lambda p: p.stat().st_size +) + +# Find the path for the requested experiment +exp_path = None +exp_is_anomaly = None +for p in anomaly_experiment_paths: + if p.stem == EXPERIMENT_NAME: + exp_path = p + exp_is_anomaly = True + break +if exp_path is None: + for p in normal_experiment_paths: + if p.stem == EXPERIMENT_NAME: + exp_path = p + exp_is_anomaly = False + break +if exp_path is None: + raise FileNotFoundError( + f"Experiment '{EXPERIMENT_NAME}' not found as a .bag in {all_data_path}" + ) + +# Get the index within the appropriate list +if exp_is_anomaly: + exp_index = anomaly_experiment_paths.index(exp_path) +else: + exp_index = normal_experiment_paths.index(exp_path) + +# ========================= +# Load cached statistical data +# ========================= +missing_points_cache = Path(cache_path / "missing_points.pkl") +near_sensor_cache = Path(cache_path / "particles_near_sensor_counts_500.pkl") + +if not missing_points_cache.exists(): + raise FileNotFoundError(f"Missing points cache not found: {missing_points_cache}") +if not near_sensor_cache.exists(): + raise FileNotFoundError(f"Near-sensor cache not found: {near_sensor_cache}") + +with open(missing_points_cache, "rb") as f: + missing_points_normal, missing_points_anomaly = pickle.load(f) +with open(near_sensor_cache, "rb") as f: + near_sensor_normal, near_sensor_anomaly = pickle.load(f) + +if exp_is_anomaly: + missing_points_series = np.asarray(missing_points_anomaly[exp_index], dtype=float) + near_sensor_series = np.asarray(near_sensor_anomaly[exp_index], dtype=float) +else: + missing_points_series = np.asarray(missing_points_normal[exp_index], dtype=float) + near_sensor_series = np.asarray(near_sensor_normal[exp_index], dtype=float) + +# Convert counts to percentages of total points +missing_points_pct = (missing_points_series / data_resolution) * 100.0 +near_sensor_pct = (near_sensor_series / data_resolution) * 100.0 + +# ========================= +# Load manual anomaly frame borders (optional; used for sign alignment + vertical markers) +# ========================= +manually_labeled_anomaly_frames = {} +labels_json_path = cache_path / "manually_labeled_anomaly_frames.json" +if labels_json_path.exists(): + with open(labels_json_path, "r") as frame_borders_file: + manually_labeled_anomaly_frames_json = json.load(frame_borders_file) + for file in manually_labeled_anomaly_frames_json.get("files", []): + manually_labeled_anomaly_frames[file["filename"]] = ( + file.get("semi_target_begin_frame", None), + file.get("semi_target_end_frame", None), + ) + +# The JSON uses .npy filenames (as in original script). Create this experiment’s key. +exp_npy_filename = exp_path.with_suffix(".npy").name +anomaly_window = manually_labeled_anomaly_frames.get(exp_npy_filename, (None, None)) + + +# ========================= +# Load method scores and z-score normalize per method +# ========================= +def zscore_1d(x: np.ndarray, eps=1e-12): + x = np.asarray(x, dtype=float) + mu = np.mean(x) + sigma = np.std(x, ddof=0) + if sigma < eps: + return np.zeros_like(x) + return (x - mu) / sigma + + +def maybe_align_direction(z: np.ndarray, window): + """Flip sign so that the anomaly window mean is higher than the outside mean, if labels exist.""" + start, end = window + if start is None or end is None: + return z # no labels → leave as-is + start = int(max(0, start)) + end = int(min(len(z), end)) + if end <= start or end > len(z): + return z + inside_mean = float(np.mean(z[start:end])) + # outside: everything except [start:end]; handle edge cases + if start == 0 and end == len(z): + return z + outside_parts = [] + if start > 0: + outside_parts.append(z[:start]) + if end < len(z): + outside_parts.append(z[end:]) + if not outside_parts: + return z + outside_mean = float(np.mean(np.concatenate(outside_parts))) + return z if inside_mean >= outside_mean else -z + + +methods = ["deepsad", "ocsvm", "isoforest"] +method_scores = {} +method_zscores = {} + +if not methods_scores_path.exists(): + raise FileNotFoundError( + f"Methods scores path does not exist: {methods_scores_path}" + ) + +for m in methods: + file_path = methods_scores_path / f"{EXPERIMENT_NAME}_{m}_scores.npy" + if not file_path.exists(): + raise FileNotFoundError(f"Missing scores file for method '{m}': {file_path}") + s = np.load(file_path) + s = np.asarray(s, dtype=float).reshape(-1) + # If needed, truncate or pad to match stats length (should match if generated consistently) + n = min(len(s), len(missing_points_pct)) + if len(s) != len(missing_points_pct): + # Align by truncation to the shortest length + s = s[:n] + # Also truncate stats to match + missing_points_pct = missing_points_pct[:n] + near_sensor_pct = near_sensor_pct[:n] + z = zscore_1d(s) + if ALIGN_SCORE_DIRECTION: + z = maybe_align_direction(z, anomaly_window) + method_scores[m] = s + method_zscores[m] = z + +# Common time axis in seconds +num_frames = len(missing_points_pct) +t = np.arange(num_frames) / FPS + +# ========================= +# Plot 1: Missing points (%) vs. method z-scores +# ========================= +fig1, axz1 = plt.subplots(figsize=(14, 6), constrained_layout=True) +axy1 = axz1.twinx() + +# plot z-scores +for m in methods: + axz1.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9) + +# plot missing points (%) +axy1.plot(t, missing_points_pct, linestyle="--", alpha=0.7, label="Missing points (%)") + +# vertical markers for anomaly window if available +start, end = anomaly_window +if start is not None and end is not None and 0 <= start < end <= num_frames: + axz1.axvline(x=start / FPS, linestyle=":", alpha=0.6) + axz1.axvline(x=end / FPS, linestyle=":", alpha=0.6) + +axz1.set_xlabel("Time (s)") +axz1.set_ylabel("Anomaly score (z-score, ↑ = more degraded)") +axy1.set_ylabel("Missing points (%)") +axz1.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Missing Points") + +# Build a combined legend +lines1, labels1 = axz1.get_legend_handles_labels() +lines2, labels2 = axy1.get_legend_handles_labels() +axz1.legend(lines1 + lines2, labels1 + labels2, loc="upper right") + +axz1.grid(True, alpha=0.3) +fig1.savefig( + output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_missing_points.png", dpi=150 +) +plt.close(fig1) + +# ========================= +# Plot 2: Near-sensor (%) vs. method z-scores +# ========================= +fig2, axz2 = plt.subplots(figsize=(14, 6), constrained_layout=True) +axy2 = axz2.twinx() + +for m in methods: + axz2.plot(t, method_zscores[m], label=f"{m} (z)", alpha=0.9) + +axy2.plot(t, near_sensor_pct, linestyle="--", alpha=0.7, label="Near-sensor <0.5m (%)") + +start, end = anomaly_window +if start is not None and end is not None and 0 <= start < end <= num_frames: + axz2.axvline(x=start / FPS, linestyle=":", alpha=0.6) + axz2.axvline(x=end / FPS, linestyle=":", alpha=0.6) + +axz2.set_xlabel("Time (s)") +axz2.set_ylabel("Anomaly score (z-score, ↑ = more degraded)") +axy2.set_ylabel("Near-sensor points (%)") +axz2.set_title(f"{EXPERIMENT_NAME}\nDegradation vs. Near-Sensor Points (<0.5 m)") + +lines1, labels1 = axz2.get_legend_handles_labels() +lines2, labels2 = axy2.get_legend_handles_labels() +axz2.legend(lines1 + lines2, labels1 + labels2, loc="upper right") + +axz2.grid(True, alpha=0.3) +fig2.savefig( + output_datetime_path / f"{EXPERIMENT_NAME}_zscores_vs_near_sensor.png", dpi=150 +) +plt.close(fig2) + +# ========================= +# Preserve latest/, archive/, copy script +# ========================= + +# delete current latest folder +shutil.rmtree(latest_folder_path, ignore_errors=True) + +# create new latest folder +latest_folder_path.mkdir(exist_ok=True, parents=True) + +# copy contents of output folder to the latest folder +for file in output_datetime_path.iterdir(): + shutil.copy2(file, latest_folder_path) + +# copy this python script to preserve the code used +shutil.copy2(__file__, output_datetime_path) +shutil.copy2(__file__, latest_folder_path) + +# move output date folder to archive +shutil.move(output_datetime_path, archive_folder_path) + +print("Done. Plots saved and archived.")