full upload so not to lose anything important
This commit is contained in:
1
Deep-SAD-PyTorch/.gitignore
vendored
1
Deep-SAD-PyTorch/.gitignore
vendored
@@ -4,3 +4,4 @@
|
||||
**/__pycache__/
|
||||
data
|
||||
log
|
||||
infer
|
||||
|
||||
@@ -26,6 +26,17 @@
|
||||
config.allowUnfree = true;
|
||||
config.cudaSupport = true;
|
||||
};
|
||||
thundersvm = import ./nix/thundersvm.nix {
|
||||
inherit pkgs;
|
||||
inherit (pkgs) fetchFromGitHub cmake gcc12Stdenv;
|
||||
cudaPackages = pkgs.cudaPackages;
|
||||
};
|
||||
|
||||
thundersvm-python = import ./nix/thundersvm-python.nix {
|
||||
inherit pkgs;
|
||||
pythonPackages = pkgs.python311Packages;
|
||||
thundersvm = thundersvm;
|
||||
};
|
||||
inherit (poetry2nix.lib.mkPoetry2Nix { inherit pkgs; }) mkPoetryApplication;
|
||||
in
|
||||
{
|
||||
@@ -43,6 +54,7 @@
|
||||
buildInputs = with pkgs.python311Packages; [
|
||||
torch-bin
|
||||
torchvision-bin
|
||||
thundersvm-python
|
||||
];
|
||||
#LD_LIBRARY_PATH = with pkgs; lib.makeLibraryPath [
|
||||
#pkgs.stdenv.cc.cc
|
||||
|
||||
48
Deep-SAD-PyTorch/nix/thundersvm-python.nix
Normal file
48
Deep-SAD-PyTorch/nix/thundersvm-python.nix
Normal file
@@ -0,0 +1,48 @@
|
||||
{ pkgs
|
||||
, pythonPackages
|
||||
, thundersvm
|
||||
, ...
|
||||
}:
|
||||
|
||||
pythonPackages.buildPythonPackage rec {
|
||||
pname = "thundersvm-python";
|
||||
version = "0.3.4";
|
||||
|
||||
src = pkgs.fetchFromGitHub {
|
||||
owner = "Xtra-Computing";
|
||||
repo = "thundersvm";
|
||||
rev = "5c6a056ac7f474b085d5415c81c5d48a1419642a";
|
||||
sha256 = "sha256-58YtOC8FK1d3ATETmwrEBc+/SQx0Pzlx5L1MgxXPQDE=";
|
||||
};
|
||||
|
||||
#sourceRoot = "${src.name}/python";
|
||||
|
||||
buildInputs = [
|
||||
pkgs.cudaPackages.cudatoolkit
|
||||
thundersvm
|
||||
];
|
||||
|
||||
dependencies = [
|
||||
pythonPackages.scikit-learn
|
||||
];
|
||||
|
||||
preBuild = ''
|
||||
ln -s ${thundersvm} build
|
||||
cd python
|
||||
'';
|
||||
|
||||
doCheck = false;
|
||||
|
||||
nativeBuildInputs = [
|
||||
pkgs.autoPatchelfHook
|
||||
];
|
||||
|
||||
meta = with pkgs.lib; {
|
||||
description = "Python bindings for ThunderSVM, with CUDA support";
|
||||
homepage = "https://github.com/Xtra-Computing/thundersvm";
|
||||
license = licenses.asl20;
|
||||
maintainers = [];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
||||
|
||||
35
Deep-SAD-PyTorch/nix/thundersvm.nix
Normal file
35
Deep-SAD-PyTorch/nix/thundersvm.nix
Normal file
@@ -0,0 +1,35 @@
|
||||
{ pkgs
|
||||
, fetchFromGitHub
|
||||
, cmake
|
||||
, gcc12Stdenv ? pkgs.gcc12Stdenv
|
||||
, cudaPackages ? pkgs.cudaPackages
|
||||
}:
|
||||
|
||||
gcc12Stdenv.mkDerivation rec {
|
||||
pname = "thundersvm";
|
||||
version = "git";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "Xtra-Computing";
|
||||
repo = "thundersvm";
|
||||
rev = "5c6a056ac7f474b085d5415c81c5d48a1419642a";
|
||||
sha256 = "sha256-58YtOC8FK1d3ATETmwrEBc+/SQx0Pzlx5L1MgxXPQDE=";
|
||||
};
|
||||
|
||||
nativeBuildInputs = [
|
||||
cmake
|
||||
];
|
||||
|
||||
buildInputs = [
|
||||
cudaPackages.cudatoolkit
|
||||
];
|
||||
|
||||
meta = with pkgs.lib; {
|
||||
description = "ThunderSVM CLI and library (compiled with CUDA)";
|
||||
homepage = "https://github.com/Xtra-Computing/thundersvm";
|
||||
license = licenses.asl20;
|
||||
maintainers = [];
|
||||
platforms = platforms.linux;
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import json
|
||||
import pickle
|
||||
|
||||
import torch
|
||||
|
||||
from base.base_dataset import BaseADDataset
|
||||
from networks.main import build_network, build_autoencoder
|
||||
from optim.DeepSAD_trainer import DeepSADTrainer
|
||||
from networks.main import build_autoencoder, build_network
|
||||
from optim.ae_trainer import AETrainer
|
||||
from optim.DeepSAD_trainer import DeepSADTrainer
|
||||
|
||||
|
||||
class DeepSAD(object):
|
||||
@@ -65,6 +67,7 @@ class DeepSAD(object):
|
||||
weight_decay: float = 1e-6,
|
||||
device: str = "cuda",
|
||||
n_jobs_dataloader: int = 0,
|
||||
k_fold_idx: int = None,
|
||||
):
|
||||
"""Trains the Deep SAD model on the training data."""
|
||||
|
||||
@@ -82,7 +85,7 @@ class DeepSAD(object):
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
)
|
||||
# Get the model
|
||||
self.net = self.trainer.train(dataset, self.net)
|
||||
self.net = self.trainer.train(dataset, self.net, k_fold_idx=k_fold_idx)
|
||||
self.results["train_time"] = self.trainer.train_time
|
||||
self.c = self.trainer.c.cpu().data.numpy().tolist() # get as list
|
||||
|
||||
@@ -99,7 +102,11 @@ class DeepSAD(object):
|
||||
return self.trainer.infer(dataset, self.net)
|
||||
|
||||
def test(
|
||||
self, dataset: BaseADDataset, device: str = "cuda", n_jobs_dataloader: int = 0
|
||||
self,
|
||||
dataset: BaseADDataset,
|
||||
device: str = "cuda",
|
||||
n_jobs_dataloader: int = 0,
|
||||
k_fold_idx: int = None,
|
||||
):
|
||||
"""Tests the Deep SAD model on the test data."""
|
||||
|
||||
@@ -108,10 +115,13 @@ class DeepSAD(object):
|
||||
self.c, self.eta, device=device, n_jobs_dataloader=n_jobs_dataloader
|
||||
)
|
||||
|
||||
self.trainer.test(dataset, self.net)
|
||||
self.trainer.test(dataset, self.net, k_fold_idx=k_fold_idx)
|
||||
|
||||
# Get results
|
||||
self.results["test_auc"] = self.trainer.test_auc
|
||||
self.results["test_roc"] = self.trainer.test_roc
|
||||
self.results["test_prc"] = self.trainer.test_prc
|
||||
self.results["test_ap"] = self.trainer.test_ap
|
||||
self.results["test_time"] = self.trainer.test_time
|
||||
self.results["test_scores"] = self.trainer.test_scores
|
||||
|
||||
@@ -126,6 +136,7 @@ class DeepSAD(object):
|
||||
weight_decay: float = 1e-6,
|
||||
device: str = "cuda",
|
||||
n_jobs_dataloader: int = 0,
|
||||
k_fold_idx: int = None,
|
||||
):
|
||||
"""Pretrains the weights for the Deep SAD network phi via autoencoder."""
|
||||
|
||||
@@ -144,13 +155,13 @@ class DeepSAD(object):
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
)
|
||||
self.ae_net = self.ae_trainer.train(dataset, self.ae_net)
|
||||
self.ae_net = self.ae_trainer.train(dataset, self.ae_net, k_fold_idx=k_fold_idx)
|
||||
|
||||
# Get train results
|
||||
self.ae_results["train_time"] = self.ae_trainer.train_time
|
||||
|
||||
# Test
|
||||
self.ae_trainer.test(dataset, self.ae_net)
|
||||
self.ae_trainer.test(dataset, self.ae_net, k_fold_idx=k_fold_idx)
|
||||
|
||||
# Get test results
|
||||
self.ae_results["test_auc"] = self.ae_trainer.test_auc
|
||||
@@ -197,10 +208,11 @@ class DeepSAD(object):
|
||||
self.ae_net = build_autoencoder(self.net_name)
|
||||
self.ae_net.load_state_dict(model_dict["ae_net_dict"])
|
||||
|
||||
def save_results(self, export_json):
|
||||
def save_results(self, export_pkl):
|
||||
"""Save results dict to a JSON-file."""
|
||||
with open(export_json, "w") as fp:
|
||||
json.dump(self.results, fp)
|
||||
with open(export_pkl, "wb") as fp:
|
||||
# json.dump(self.results, fp)
|
||||
pickle.dump(self.results, fp)
|
||||
|
||||
def save_ae_results(self, export_json):
|
||||
"""Save autoencoder results dict to a JSON-file."""
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
from sklearn.model_selection import KFold
|
||||
from torch.utils.data import ConcatDataset, DataLoader, Subset
|
||||
|
||||
from .base_dataset import BaseADDataset
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
|
||||
class TorchvisionDataset(BaseADDataset):
|
||||
"""TorchvisionDataset class for datasets already implemented in torchvision.datasets."""
|
||||
|
||||
def __init__(self, root: str):
|
||||
def __init__(self, root: str, k_fold_number: int = 5):
|
||||
super().__init__(root)
|
||||
self.k_fold_number = k_fold_number
|
||||
self.fold_indices = None
|
||||
|
||||
def loaders(
|
||||
self,
|
||||
@@ -50,3 +54,43 @@ class TorchvisionDataset(BaseADDataset):
|
||||
else None
|
||||
)
|
||||
return train_loader, test_loader, inference_loader
|
||||
|
||||
def loaders_k_fold(
|
||||
self,
|
||||
fold_idx: int,
|
||||
batch_size: int,
|
||||
shuffle_train=True,
|
||||
shuffle_test=False,
|
||||
num_workers: int = 0,
|
||||
) -> (DataLoader, DataLoader):
|
||||
if self.fold_indices is None:
|
||||
# Define the K-fold Cross Validator
|
||||
kfold = KFold(n_splits=self.k_fold_number, shuffle=False)
|
||||
self.fold_indices = []
|
||||
# Generate indices for each fold and store them in a list
|
||||
for train_indices, val_indices in kfold.split(self.data_set):
|
||||
self.fold_indices.append((train_indices, val_indices))
|
||||
|
||||
train_loader = (
|
||||
DataLoader(
|
||||
dataset=Subset(self.data_set, self.fold_indices[fold_idx][0]),
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle_train,
|
||||
num_workers=num_workers,
|
||||
drop_last=True,
|
||||
)
|
||||
if self.data_set is not None
|
||||
else None
|
||||
)
|
||||
test_loader = (
|
||||
DataLoader(
|
||||
dataset=Subset(self.data_set, self.fold_indices[fold_idx][1]),
|
||||
batch_size=batch_size,
|
||||
shuffle=shuffle_test,
|
||||
num_workers=num_workers,
|
||||
drop_last=False,
|
||||
)
|
||||
if self.data_set is not None
|
||||
else None
|
||||
)
|
||||
return train_loader, test_loader
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
import json
|
||||
import logging
|
||||
import pickle
|
||||
import time
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from torch.utils.data import DataLoader
|
||||
import numpy as np
|
||||
import torch
|
||||
from sklearn.ensemble import IsolationForest
|
||||
from sklearn.metrics import roc_auc_score
|
||||
from sklearn.metrics import (
|
||||
average_precision_score,
|
||||
precision_recall_curve,
|
||||
roc_auc_score,
|
||||
roc_curve,
|
||||
)
|
||||
|
||||
from base.base_dataset import BaseADDataset
|
||||
from networks.main import build_autoencoder
|
||||
|
||||
@@ -22,7 +27,7 @@ class IsoForest(object):
|
||||
contamination=0.1,
|
||||
n_jobs=-1,
|
||||
seed=None,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
):
|
||||
"""Init Isolation Forest instance."""
|
||||
self.n_estimators = n_estimators
|
||||
@@ -37,7 +42,7 @@ class IsoForest(object):
|
||||
contamination=contamination,
|
||||
n_jobs=n_jobs,
|
||||
random_state=seed,
|
||||
**kwargs
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
self.hybrid = hybrid
|
||||
@@ -47,28 +52,44 @@ class IsoForest(object):
|
||||
"train_time": None,
|
||||
"test_time": None,
|
||||
"test_auc": None,
|
||||
"test_roc": None,
|
||||
"test_scores": None,
|
||||
}
|
||||
|
||||
def train(
|
||||
self, dataset: BaseADDataset, device: str = "cpu", n_jobs_dataloader: int = 0
|
||||
self,
|
||||
dataset: BaseADDataset,
|
||||
device: str = "cpu",
|
||||
n_jobs_dataloader: int = 0,
|
||||
k_fold_idx: int = None,
|
||||
):
|
||||
"""Trains the Isolation Forest model on the training data."""
|
||||
logger = logging.getLogger()
|
||||
|
||||
# do not drop last batch for non-SGD optimization shallow_ssad
|
||||
train_loader = DataLoader(
|
||||
dataset=dataset.train_set,
|
||||
batch_size=128,
|
||||
shuffle=True,
|
||||
num_workers=n_jobs_dataloader,
|
||||
drop_last=False,
|
||||
)
|
||||
# drop_last necessary?
|
||||
# train_loader = DataLoader(
|
||||
# dataset=dataset.train_set,
|
||||
# batch_size=128,
|
||||
# shuffle=True,
|
||||
# num_workers=n_jobs_dataloader,
|
||||
# drop_last=False,
|
||||
# )
|
||||
|
||||
if k_fold_idx is not None:
|
||||
train_loader, _ = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=128,
|
||||
num_workers=n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
train_loader, _, _ = dataset.loaders(
|
||||
batch_size=128, num_workers=n_jobs_dataloader
|
||||
)
|
||||
# Get data from loader
|
||||
X = ()
|
||||
for data in train_loader:
|
||||
inputs, _, _, _ = data
|
||||
inputs, _, _, _, _ = data
|
||||
inputs = inputs.to(device)
|
||||
if self.hybrid:
|
||||
inputs = self.ae_net.encoder(
|
||||
@@ -91,14 +112,25 @@ class IsoForest(object):
|
||||
logger.info("Finished training.")
|
||||
|
||||
def test(
|
||||
self, dataset: BaseADDataset, device: str = "cpu", n_jobs_dataloader: int = 0
|
||||
self,
|
||||
dataset: BaseADDataset,
|
||||
device: str = "cpu",
|
||||
n_jobs_dataloader: int = 0,
|
||||
k_fold_idx: int = None,
|
||||
):
|
||||
"""Tests the Isolation Forest model on the test data."""
|
||||
logger = logging.getLogger()
|
||||
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=128, num_workers=n_jobs_dataloader
|
||||
)
|
||||
if k_fold_idx is not None:
|
||||
_, test_loader = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=128,
|
||||
num_workers=n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=128, num_workers=n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Get data from loader
|
||||
idx_label_score = []
|
||||
@@ -106,7 +138,7 @@ class IsoForest(object):
|
||||
idxs = []
|
||||
labels = []
|
||||
for data in test_loader:
|
||||
inputs, label_batch, _, idx = data
|
||||
inputs, label_batch, _, idx, _ = data
|
||||
inputs, label_batch, idx = (
|
||||
inputs.to(device),
|
||||
label_batch.to(device),
|
||||
@@ -140,6 +172,9 @@ class IsoForest(object):
|
||||
labels = np.array(labels)
|
||||
scores = np.array(scores)
|
||||
self.results["test_auc"] = roc_auc_score(labels, scores)
|
||||
self.results["test_roc"] = roc_curve(labels, scores)
|
||||
self.results["test_prc"] = precision_recall_curve(labels, scores)
|
||||
self.results["test_ap"] = average_precision_score(labels, scores)
|
||||
|
||||
# Log results
|
||||
logger.info("Test AUC: {:.2f}%".format(100.0 * self.results["test_auc"]))
|
||||
@@ -178,7 +213,8 @@ class IsoForest(object):
|
||||
"""Load Isolation Forest model from import_path."""
|
||||
pass
|
||||
|
||||
def save_results(self, export_json):
|
||||
def save_results(self, export_pkl):
|
||||
"""Save results dict to a JSON-file."""
|
||||
with open(export_json, "w") as fp:
|
||||
json.dump(self.results, fp)
|
||||
with open(export_pkl, "wb") as fp:
|
||||
# json.dump(self.results, fp)
|
||||
pickle.dump(self.results, fp)
|
||||
|
||||
@@ -1,12 +1,18 @@
|
||||
import json
|
||||
import logging
|
||||
import pickle
|
||||
import time
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from torch.utils.data import DataLoader
|
||||
from sklearn.svm import OneClassSVM
|
||||
from sklearn.metrics import roc_auc_score
|
||||
import numpy as np
|
||||
import torch
|
||||
from sklearn.metrics import (
|
||||
average_precision_score,
|
||||
precision_recall_curve,
|
||||
roc_auc_score,
|
||||
roc_curve,
|
||||
)
|
||||
from thundersvm import OneClassSVM
|
||||
|
||||
from base.base_dataset import BaseADDataset
|
||||
from networks.main import build_autoencoder
|
||||
|
||||
@@ -21,7 +27,7 @@ class OCSVM(object):
|
||||
self.rho = None
|
||||
self.gamma = None
|
||||
|
||||
self.model = OneClassSVM(kernel=kernel, nu=nu)
|
||||
self.model = OneClassSVM(kernel=kernel, nu=nu, verbose=True, max_mem_size=4048)
|
||||
|
||||
self.hybrid = hybrid
|
||||
self.ae_net = None # autoencoder network for the case of a hybrid model
|
||||
@@ -40,24 +46,31 @@ class OCSVM(object):
|
||||
}
|
||||
|
||||
def train(
|
||||
self, dataset: BaseADDataset, device: str = "cpu", n_jobs_dataloader: int = 0
|
||||
self,
|
||||
dataset: BaseADDataset,
|
||||
device: str = "cpu",
|
||||
n_jobs_dataloader: int = 0,
|
||||
k_fold_idx: int = None,
|
||||
batch_size: int = 32,
|
||||
):
|
||||
"""Trains the OC-SVM model on the training data."""
|
||||
logger = logging.getLogger()
|
||||
|
||||
# do not drop last batch for non-SGD optimization shallow_ssad
|
||||
train_loader = DataLoader(
|
||||
dataset=dataset.train_set,
|
||||
batch_size=128,
|
||||
shuffle=True,
|
||||
num_workers=n_jobs_dataloader,
|
||||
drop_last=False,
|
||||
)
|
||||
if k_fold_idx is not None:
|
||||
train_loader, _ = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=batch_size,
|
||||
num_workers=n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
train_loader, _, _ = dataset.loaders(
|
||||
batch_size=batch_size, num_workers=n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Get data from loader
|
||||
X = ()
|
||||
for data in train_loader:
|
||||
inputs, _, _, _ = data
|
||||
inputs, _, _, _, _ = data
|
||||
inputs = inputs.to(device)
|
||||
if self.hybrid:
|
||||
inputs = self.ae_net.encoder(
|
||||
@@ -77,14 +90,21 @@ class OCSVM(object):
|
||||
best_auc = 0.0
|
||||
|
||||
# Sample hold-out set from test set
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=128, num_workers=n_jobs_dataloader
|
||||
)
|
||||
if k_fold_idx is not None:
|
||||
_, test_loader = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=batch_size,
|
||||
num_workers=n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=batch_size, num_workers=n_jobs_dataloader
|
||||
)
|
||||
|
||||
X_test = ()
|
||||
labels = []
|
||||
for data in test_loader:
|
||||
inputs, label_batch, _, _ = data
|
||||
inputs, label_batch, _, _, _ = data
|
||||
inputs, label_batch = inputs.to(device), label_batch.to(device)
|
||||
if self.hybrid:
|
||||
inputs = self.ae_net.encoder(
|
||||
@@ -102,8 +122,9 @@ class OCSVM(object):
|
||||
np.sum(labels == 1),
|
||||
)
|
||||
n_val = int(0.1 * n_test)
|
||||
n_val_normal, n_val_outlier = int(n_val * (n_normal / n_test)), int(
|
||||
n_val * (n_outlier / n_test)
|
||||
n_val_normal, n_val_outlier = (
|
||||
int(n_val * (n_normal / n_test)),
|
||||
int(n_val * (n_outlier / n_test)),
|
||||
)
|
||||
perm = np.random.permutation(n_test)
|
||||
X_val = np.concatenate(
|
||||
@@ -116,9 +137,14 @@ class OCSVM(object):
|
||||
|
||||
i = 1
|
||||
for gamma in gammas:
|
||||
|
||||
# Model candidate
|
||||
model = OneClassSVM(kernel=self.kernel, nu=self.nu, gamma=gamma)
|
||||
model = OneClassSVM(
|
||||
kernel=self.kernel,
|
||||
nu=self.nu,
|
||||
gamma=gamma,
|
||||
verbose=True,
|
||||
max_mem_size=4048,
|
||||
)
|
||||
|
||||
# Train
|
||||
start_time = time.time()
|
||||
@@ -147,7 +173,9 @@ class OCSVM(object):
|
||||
|
||||
# If hybrid, also train a model with linear kernel
|
||||
if self.hybrid:
|
||||
self.linear_model = OneClassSVM(kernel="linear", nu=self.nu)
|
||||
self.linear_model = OneClassSVM(
|
||||
kernel="linear", nu=self.nu, max_mem_size=4048
|
||||
)
|
||||
start_time = time.time()
|
||||
self.linear_model.fit(X)
|
||||
train_time = time.time() - start_time
|
||||
@@ -160,14 +188,26 @@ class OCSVM(object):
|
||||
logger.info("Finished training.")
|
||||
|
||||
def test(
|
||||
self, dataset: BaseADDataset, device: str = "cpu", n_jobs_dataloader: int = 0
|
||||
self,
|
||||
dataset: BaseADDataset,
|
||||
device: str = "cpu",
|
||||
n_jobs_dataloader: int = 0,
|
||||
k_fold_idx: int = None,
|
||||
batch_size: int = 32,
|
||||
):
|
||||
"""Tests the OC-SVM model on the test data."""
|
||||
logger = logging.getLogger()
|
||||
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=128, num_workers=n_jobs_dataloader
|
||||
)
|
||||
if k_fold_idx is not None:
|
||||
_, test_loader = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=batch_size,
|
||||
num_workers=n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=batch_size, num_workers=n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Get data from loader
|
||||
idx_label_score = []
|
||||
@@ -175,7 +215,7 @@ class OCSVM(object):
|
||||
idxs = []
|
||||
labels = []
|
||||
for data in test_loader:
|
||||
inputs, label_batch, _, idx = data
|
||||
inputs, label_batch, _, idx, _ = data
|
||||
inputs, label_batch, idx = (
|
||||
inputs.to(device),
|
||||
label_batch.to(device),
|
||||
@@ -212,6 +252,9 @@ class OCSVM(object):
|
||||
labels = np.array(labels)
|
||||
scores = np.array(scores)
|
||||
self.results["test_auc"] = roc_auc_score(labels, scores)
|
||||
self.results["test_roc"] = roc_curve(labels, scores)
|
||||
self.results["test_prc"] = precision_recall_curve(labels, scores)
|
||||
self.results["test_ap"] = average_precision_score(labels, scores)
|
||||
|
||||
# If hybrid, also test model with linear kernel
|
||||
if self.hybrid:
|
||||
@@ -268,7 +311,7 @@ class OCSVM(object):
|
||||
"""Load OC-SVM model from import_path."""
|
||||
pass
|
||||
|
||||
def save_results(self, export_json):
|
||||
"""Save results dict to a JSON-file."""
|
||||
with open(export_json, "w") as fp:
|
||||
json.dump(self.results, fp)
|
||||
def save_results(self, export_pkl):
|
||||
with open(export_pkl, "wb") as fp:
|
||||
# json.dump(self.results, fp)
|
||||
pickle.dump(self.results, fp)
|
||||
|
||||
264
Deep-SAD-PyTorch/src/datasets/esmerasplit.py
Normal file
264
Deep-SAD-PyTorch/src/datasets/esmerasplit.py
Normal file
@@ -0,0 +1,264 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
from PIL import Image
|
||||
from torch.utils.data import Subset
|
||||
from torchvision.datasets import VisionDataset
|
||||
|
||||
from base.torchvision_dataset import TorchvisionDataset
|
||||
|
||||
from .preprocessing import create_semisupervised_setting
|
||||
|
||||
|
||||
class EsmeraSplit_Dataset(TorchvisionDataset):
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
ratio_known_normal: float = 0.0,
|
||||
ratio_known_outlier: float = 0.0,
|
||||
ratio_pollution: float = 0.0,
|
||||
inference: bool = False,
|
||||
):
|
||||
super().__init__(root)
|
||||
|
||||
# Define normal and outlier classes
|
||||
self.n_classes = 2 # 0: normal, 1: outlier
|
||||
self.normal_classes = tuple([0])
|
||||
self.outlier_classes = tuple([1])
|
||||
self.inference_set = None
|
||||
|
||||
# MNIST preprocessing: feature scaling to [0, 1]
|
||||
# FIXME understand mnist feature scaling and check if it or other preprocessing is necessary for elpv
|
||||
transform = transforms.ToTensor()
|
||||
target_transform = transforms.Lambda(lambda x: int(x in self.outlier_classes))
|
||||
|
||||
if inference:
|
||||
self.inference_set = EsmeraSplitInference(
|
||||
root=self.root,
|
||||
transform=transform,
|
||||
)
|
||||
else:
|
||||
# Get train set
|
||||
train_set = EsmeraSplitTraining(
|
||||
root=self.root,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
train=True,
|
||||
)
|
||||
|
||||
# Create semi-supervised setting
|
||||
idx, _, semi_targets = create_semisupervised_setting(
|
||||
train_set.targets.cpu().data.numpy(),
|
||||
self.normal_classes,
|
||||
self.outlier_classes,
|
||||
self.outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
)
|
||||
train_set.semi_targets[idx] = torch.tensor(
|
||||
np.array(semi_targets, dtype=np.int8)
|
||||
) # set respective semi-supervised labels
|
||||
|
||||
# Subset train_set to semi-supervised setup
|
||||
self.train_set = Subset(train_set, idx)
|
||||
|
||||
# Get test set
|
||||
self.test_set = EsmeraSplitTraining(
|
||||
root=self.root,
|
||||
train=False,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
)
|
||||
|
||||
|
||||
def split_array_into_subarrays(array, split_height, split_width):
|
||||
original_shape = array.shape
|
||||
height, width = original_shape[-2], original_shape[-1]
|
||||
assert height % split_height == 0, "The height is not divisible by the split_height"
|
||||
assert width % split_width == 0, "The width is not divisible by the split_width"
|
||||
num_splits_height = height // split_height
|
||||
num_splits_width = width // split_width
|
||||
reshaped_array = array.reshape(
|
||||
-1, num_splits_height, split_height, num_splits_width, split_width
|
||||
)
|
||||
transposed_array = reshaped_array.transpose(0, 1, 3, 2, 4)
|
||||
final_array = transposed_array.reshape(-1, split_height, split_width)
|
||||
return final_array
|
||||
|
||||
|
||||
class EsmeraSplitTraining(VisionDataset):
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
transforms: Optional[Callable] = None,
|
||||
transform: Optional[Callable] = None,
|
||||
target_transform: Optional[Callable] = None,
|
||||
train=False,
|
||||
split=0.7,
|
||||
seed=0,
|
||||
height=16,
|
||||
width=256,
|
||||
):
|
||||
super(EsmeraSplitTraining, self).__init__(
|
||||
root, transforms, transform, target_transform
|
||||
)
|
||||
|
||||
experiments_data = []
|
||||
experiments_targets = []
|
||||
validation_files = []
|
||||
experiment_files = []
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
for experiment_file in Path(root).iterdir():
|
||||
if experiment_file.is_dir() and experiment_file.name == "validation":
|
||||
for validation_file in experiment_file.iterdir():
|
||||
if validation_file.suffix != ".npy":
|
||||
continue
|
||||
validation_files.append(experiment_file)
|
||||
if experiment_file.suffix != ".npy":
|
||||
continue
|
||||
experiment_files.append(experiment_file)
|
||||
experiment_data = np.load(experiment_file)
|
||||
|
||||
if (
|
||||
experiment_data.shape[1] % height != 0
|
||||
or experiment_data.shape[2] % width != 0
|
||||
):
|
||||
logger.error(
|
||||
f"Experiment {experiment_file.name} has shape {experiment_data.shape} which is not divisible by {height}x{width}"
|
||||
)
|
||||
experiment_data = split_array_into_subarrays(experiment_data, height, width)
|
||||
|
||||
# experiment_data = np.lib.format.open_memmap(experiment_file, mode='r+')
|
||||
experiment_targets = (
|
||||
np.ones(experiment_data.shape[0], dtype=np.int8)
|
||||
if "smoke" in experiment_file.name
|
||||
else np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||
)
|
||||
experiments_data.append(experiment_data)
|
||||
experiments_targets.append(experiment_targets)
|
||||
|
||||
filtered_validation_files = []
|
||||
for validation_file in validation_files:
|
||||
validation_file_name = validation_file.name
|
||||
file_exists_in_experiments = any(
|
||||
experiment_file.name == validation_file_name
|
||||
for experiment_file in experiment_files
|
||||
)
|
||||
if not file_exists_in_experiments:
|
||||
filtered_validation_files.append(validation_file)
|
||||
validation_files = filtered_validation_files
|
||||
|
||||
logger.info(
|
||||
f"Train/Test experiments: {[experiment_file.name for experiment_file in experiment_files]}"
|
||||
)
|
||||
logger.info(
|
||||
f"Validation experiments: {[validation_file.name for validation_file in validation_files]}"
|
||||
)
|
||||
|
||||
lidar_projections = np.concatenate(experiments_data)
|
||||
smoke_presence = np.concatenate(experiments_targets)
|
||||
|
||||
np.random.seed(seed)
|
||||
|
||||
shuffled_indices = np.random.permutation(lidar_projections.shape[0])
|
||||
shuffled_lidar_projections = lidar_projections[shuffled_indices]
|
||||
shuffled_smoke_presence = smoke_presence[shuffled_indices]
|
||||
|
||||
split_idx = int(split * shuffled_lidar_projections.shape[0])
|
||||
|
||||
if train:
|
||||
self.data = shuffled_lidar_projections[:split_idx]
|
||||
self.targets = shuffled_smoke_presence[:split_idx]
|
||||
|
||||
else:
|
||||
self.data = shuffled_lidar_projections[split_idx:]
|
||||
self.targets = shuffled_smoke_presence[split_idx:]
|
||||
|
||||
self.data = np.nan_to_num(self.data)
|
||||
|
||||
self.data = torch.tensor(self.data)
|
||||
self.targets = torch.tensor(self.targets, dtype=torch.int8)
|
||||
|
||||
self.semi_targets = torch.zeros_like(self.targets, dtype=torch.int8)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Override the original method of the MNIST class.
|
||||
Args:
|
||||
index (int): Index
|
||||
|
||||
Returns:
|
||||
tuple: (image, target, semi_target, index)
|
||||
"""
|
||||
img, target, semi_target = (
|
||||
self.data[index],
|
||||
int(self.targets[index]),
|
||||
int(self.semi_targets[index]),
|
||||
)
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
# to return a PIL Image
|
||||
img = Image.fromarray(img.numpy(), mode="F")
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
if self.target_transform is not None:
|
||||
target = self.target_transform(target)
|
||||
|
||||
return img, target, semi_target, index
|
||||
|
||||
|
||||
class EsmeraSplitInference(VisionDataset):
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
transforms: Optional[Callable] = None,
|
||||
transform: Optional[Callable] = None,
|
||||
):
|
||||
super(EsmeraSplitInference, self).__init__(root, transforms, transform)
|
||||
logger = logging.getLogger()
|
||||
|
||||
self.experiment_file_path = Path(root)
|
||||
|
||||
if not self.experiment_file_path.is_file():
|
||||
logger.error(
|
||||
"For inference the data path has to be a single experiment file!"
|
||||
)
|
||||
raise Exception("Inference data is not a loadable file!")
|
||||
|
||||
self.data = np.load(self.experiment_file_path)
|
||||
self.data = split_array_into_subarrays(self.data, 16, 256)
|
||||
self.data = np.nan_to_num(self.data)
|
||||
self.data = torch.tensor(self.data)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Override the original method of the MNIST class.
|
||||
Args:
|
||||
index (int): Index
|
||||
|
||||
Returns:
|
||||
tuple: (image, index)
|
||||
"""
|
||||
img = self.data[index]
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
# to return a PIL Image
|
||||
img = Image.fromarray(img.numpy(), mode="F")
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
return img, index
|
||||
@@ -18,6 +18,9 @@ def load_dataset(
|
||||
ratio_pollution: float = 0.0,
|
||||
random_state=None,
|
||||
inference: bool = False,
|
||||
k_fold: bool = False,
|
||||
num_known_normal: int = 0,
|
||||
num_known_outlier: int = 0,
|
||||
):
|
||||
"""Loads the dataset."""
|
||||
|
||||
@@ -46,6 +49,9 @@ def load_dataset(
|
||||
ratio_known_outlier=ratio_known_outlier,
|
||||
ratio_pollution=ratio_pollution,
|
||||
inference=inference,
|
||||
k_fold=k_fold,
|
||||
num_known_normal=num_known_normal,
|
||||
num_known_outlier=num_known_outlier,
|
||||
)
|
||||
|
||||
if dataset_name == "subtersplit":
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
from pathlib import Path
|
||||
@@ -6,12 +7,13 @@ from typing import Callable, Optional
|
||||
import numpy as np
|
||||
import torch
|
||||
import torchvision.transforms as transforms
|
||||
from base.torchvision_dataset import TorchvisionDataset
|
||||
from PIL import Image
|
||||
from torch.utils.data import Subset
|
||||
from torch.utils.data.dataset import ConcatDataset
|
||||
from torchvision.datasets import VisionDataset
|
||||
|
||||
from base.torchvision_dataset import TorchvisionDataset
|
||||
|
||||
from .preprocessing import create_semisupervised_setting
|
||||
|
||||
|
||||
@@ -23,8 +25,22 @@ class SubTer_Dataset(TorchvisionDataset):
|
||||
ratio_known_outlier: float = 0.0,
|
||||
ratio_pollution: float = 0.0,
|
||||
inference: bool = False,
|
||||
k_fold: bool = False,
|
||||
num_known_normal: int = 0,
|
||||
num_known_outlier: int = 0,
|
||||
only_use_given_semi_targets_for_evaluation: bool = True,
|
||||
):
|
||||
super().__init__(root)
|
||||
if Path(root).is_dir():
|
||||
with open(Path(root) / "semi_targets.json", "r") as f:
|
||||
data = json.load(f)
|
||||
semi_targets_given = {
|
||||
item["filename"]: (
|
||||
item["semi_target_begin_frame"],
|
||||
item["semi_target_end_frame"],
|
||||
)
|
||||
for item in data["files"]
|
||||
}
|
||||
|
||||
# Define normal and outlier classes
|
||||
self.n_classes = 2 # 0: normal, 1: outlier
|
||||
@@ -43,38 +59,146 @@ class SubTer_Dataset(TorchvisionDataset):
|
||||
transform=transform,
|
||||
)
|
||||
else:
|
||||
# Get train set
|
||||
train_set = SubTerTraining(
|
||||
root=self.root,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
train=True,
|
||||
)
|
||||
if k_fold:
|
||||
# Get train set
|
||||
data_set = SubTerTraining(
|
||||
root=self.root,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
train=True,
|
||||
split=1,
|
||||
semi_targets_given=semi_targets_given,
|
||||
)
|
||||
|
||||
# Create semi-supervised setting
|
||||
idx, _, semi_targets = create_semisupervised_setting(
|
||||
train_set.targets.cpu().data.numpy(),
|
||||
self.normal_classes,
|
||||
self.outlier_classes,
|
||||
self.outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
)
|
||||
train_set.semi_targets[idx] = torch.tensor(
|
||||
np.array(semi_targets, dtype=np.int8)
|
||||
) # set respective semi-supervised labels
|
||||
np.random.seed(0)
|
||||
semi_targets = data_set.semi_targets.numpy()
|
||||
|
||||
# Subset train_set to semi-supervised setup
|
||||
self.train_set = Subset(train_set, idx)
|
||||
# Find indices where semi_targets is -1 (abnormal) or 1 (normal)
|
||||
normal_indices = np.where(semi_targets == 1)[0]
|
||||
abnormal_indices = np.where(semi_targets == -1)[0]
|
||||
|
||||
# Get test set
|
||||
self.test_set = SubTerTraining(
|
||||
root=self.root,
|
||||
train=False,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
)
|
||||
# Randomly select the specified number of indices to keep for each category
|
||||
if len(normal_indices) > num_known_normal:
|
||||
keep_normal_indices = np.random.choice(
|
||||
normal_indices, size=num_known_normal, replace=False
|
||||
)
|
||||
else:
|
||||
keep_normal_indices = (
|
||||
normal_indices # Keep all if there are fewer than required
|
||||
)
|
||||
|
||||
if len(abnormal_indices) > num_known_outlier:
|
||||
keep_abnormal_indices = np.random.choice(
|
||||
abnormal_indices, size=num_known_outlier, replace=False
|
||||
)
|
||||
else:
|
||||
keep_abnormal_indices = (
|
||||
abnormal_indices # Keep all if there are fewer than required
|
||||
)
|
||||
|
||||
# Set all values to 0, then restore only the selected -1 and 1 values
|
||||
semi_targets[(semi_targets == 1) | (semi_targets == -1)] = 0
|
||||
semi_targets[keep_normal_indices] = 1
|
||||
semi_targets[keep_abnormal_indices] = -1
|
||||
data_set.semi_targets = torch.tensor(semi_targets, dtype=torch.int8)
|
||||
|
||||
self.data_set = data_set
|
||||
|
||||
# # Create semi-supervised setting
|
||||
# idx, _, semi_targets = create_semisupervised_setting(
|
||||
# data_set.targets.cpu().data.numpy(),
|
||||
# self.normal_classes,
|
||||
# self.outlier_classes,
|
||||
# self.outlier_classes,
|
||||
# ratio_known_normal,
|
||||
# ratio_known_outlier,
|
||||
# ratio_pollution,
|
||||
# )
|
||||
# data_set.semi_targets[idx] = torch.tensor(
|
||||
# np.array(semi_targets, dtype=np.int8)
|
||||
# ) # set respective semi-supervised labels
|
||||
|
||||
# # Subset data_set to semi-supervised setup
|
||||
# self.data_set = Subset(data_set, idx)
|
||||
else:
|
||||
# Get train set
|
||||
if only_use_given_semi_targets_for_evaluation:
|
||||
pass
|
||||
train_set = SubTerTrainingSelective(
|
||||
root=self.root,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
train=True,
|
||||
num_known_outlier=num_known_outlier,
|
||||
semi_targets_given=semi_targets_given,
|
||||
)
|
||||
|
||||
np.random.seed(0)
|
||||
semi_targets = train_set.semi_targets.numpy()
|
||||
|
||||
# Find indices where semi_targets is -1 (abnormal) or 1 (normal)
|
||||
normal_indices = np.where(semi_targets == 1)[0]
|
||||
|
||||
# Randomly select the specified number of indices to keep for each category
|
||||
if len(normal_indices) > num_known_normal:
|
||||
keep_normal_indices = np.random.choice(
|
||||
normal_indices, size=num_known_normal, replace=False
|
||||
)
|
||||
else:
|
||||
keep_normal_indices = (
|
||||
normal_indices # Keep all if there are fewer than required
|
||||
)
|
||||
|
||||
# Set all values to 0, then restore only the selected -1 and 1 values
|
||||
semi_targets[semi_targets == 1] = 0
|
||||
semi_targets[keep_normal_indices] = 1
|
||||
train_set.semi_targets = torch.tensor(
|
||||
semi_targets, dtype=torch.int8
|
||||
)
|
||||
|
||||
self.train_set = train_set
|
||||
self.test_set = SubTerTrainingSelective(
|
||||
root=self.root,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
num_known_outlier=num_known_outlier,
|
||||
train=False,
|
||||
semi_targets_given=semi_targets_given,
|
||||
)
|
||||
else:
|
||||
train_set = SubTerTraining(
|
||||
root=self.root,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
train=True,
|
||||
semi_targets_given=semi_targets_given,
|
||||
)
|
||||
|
||||
# Create semi-supervised setting
|
||||
idx, _, semi_targets = create_semisupervised_setting(
|
||||
train_set.targets.cpu().data.numpy(),
|
||||
self.normal_classes,
|
||||
self.outlier_classes,
|
||||
self.outlier_classes,
|
||||
ratio_known_normal,
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
)
|
||||
train_set.semi_targets[idx] = torch.tensor(
|
||||
np.array(semi_targets, dtype=np.int8)
|
||||
) # set respective semi-supervised labels
|
||||
|
||||
# Subset train_set to semi-supervised setup
|
||||
self.train_set = Subset(train_set, idx)
|
||||
|
||||
# Get test set
|
||||
self.test_set = SubTerTraining(
|
||||
root=self.root,
|
||||
train=False,
|
||||
transform=transform,
|
||||
target_transform=target_transform,
|
||||
semi_targets_given=semi_targets_given,
|
||||
)
|
||||
|
||||
|
||||
class SubTerTraining(VisionDataset):
|
||||
@@ -87,6 +211,8 @@ class SubTerTraining(VisionDataset):
|
||||
train=False,
|
||||
split=0.7,
|
||||
seed=0,
|
||||
semi_targets_given=None,
|
||||
only_use_given_semi_targets_for_evaluation=False,
|
||||
):
|
||||
super(SubTerTraining, self).__init__(
|
||||
root, transforms, transform, target_transform
|
||||
@@ -94,73 +220,120 @@ class SubTerTraining(VisionDataset):
|
||||
|
||||
experiments_data = []
|
||||
experiments_targets = []
|
||||
validation_files = []
|
||||
experiments_semi_targets = []
|
||||
# validation_files = []
|
||||
experiment_files = []
|
||||
experiment_frame_ids = []
|
||||
experiment_file_ids = []
|
||||
file_names = {}
|
||||
|
||||
for experiment_file in Path(root).iterdir():
|
||||
if experiment_file.is_dir() and experiment_file.name == "validation":
|
||||
for validation_file in experiment_file.iterdir():
|
||||
if validation_file.suffix != ".npy":
|
||||
continue
|
||||
validation_files.append(experiment_file)
|
||||
for file_idx, experiment_file in enumerate(sorted(Path(root).iterdir())):
|
||||
# if experiment_file.is_dir() and experiment_file.name == "validation":
|
||||
# for validation_file in experiment_file.iterdir():
|
||||
# if validation_file.suffix != ".npy":
|
||||
# continue
|
||||
# validation_files.append(experiment_file)
|
||||
if experiment_file.suffix != ".npy":
|
||||
continue
|
||||
file_names[file_idx] = experiment_file.name
|
||||
experiment_files.append(experiment_file)
|
||||
experiment_data = np.load(experiment_file)
|
||||
# experiment_data = np.lib.format.open_memmap(experiment_file, mode='r+')
|
||||
experiment_targets = (
|
||||
np.ones(experiment_data.shape[0], dtype=np.int8)
|
||||
if "smoke" in experiment_file.name
|
||||
else np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||
)
|
||||
# experiment_data = np.lib.format.open_memmap(experiment_file, mode='r+')
|
||||
experiment_semi_targets = np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||
if "smoke" not in experiment_file.name:
|
||||
experiment_semi_targets = np.ones(
|
||||
experiment_data.shape[0], dtype=np.int8
|
||||
)
|
||||
else:
|
||||
if semi_targets_given:
|
||||
if experiment_file.name in semi_targets_given:
|
||||
semi_target_begin_frame, semi_target_end_frame = (
|
||||
semi_targets_given[experiment_file.name]
|
||||
)
|
||||
experiment_semi_targets[
|
||||
semi_target_begin_frame:semi_target_end_frame
|
||||
] = -1
|
||||
else:
|
||||
experiment_semi_targets = (
|
||||
np.ones(experiment_data.shape[0], dtype=np.int8) * -1
|
||||
)
|
||||
|
||||
experiment_file_ids.append(
|
||||
np.full(experiment_data.shape[0], file_idx, dtype=np.int8)
|
||||
)
|
||||
experiment_frame_ids.append(
|
||||
np.arange(experiment_data.shape[0], dtype=np.int32)
|
||||
)
|
||||
experiments_data.append(experiment_data)
|
||||
experiments_targets.append(experiment_targets)
|
||||
experiments_semi_targets.append(experiment_semi_targets)
|
||||
|
||||
filtered_validation_files = []
|
||||
for validation_file in validation_files:
|
||||
validation_file_name = validation_file.name
|
||||
file_exists_in_experiments = any(
|
||||
experiment_file.name == validation_file_name
|
||||
for experiment_file in experiment_files
|
||||
)
|
||||
if not file_exists_in_experiments:
|
||||
filtered_validation_files.append(validation_file)
|
||||
validation_files = filtered_validation_files
|
||||
# filtered_validation_files = []
|
||||
# for validation_file in validation_files:
|
||||
# validation_file_name = validation_file.name
|
||||
# file_exists_in_experiments = any(
|
||||
# experiment_file.name == validation_file_name
|
||||
# for experiment_file in experiment_files
|
||||
# )
|
||||
# if not file_exists_in_experiments:
|
||||
# filtered_validation_files.append(validation_file)
|
||||
# validation_files = filtered_validation_files
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
logger.info(
|
||||
f"Train/Test experiments: {[experiment_file.name for experiment_file in experiment_files]}"
|
||||
)
|
||||
logger.info(
|
||||
f"Validation experiments: {[validation_file.name for validation_file in validation_files]}"
|
||||
)
|
||||
# logger.info(
|
||||
# f"Validation experiments: {[validation_file.name for validation_file in validation_files]}"
|
||||
# )
|
||||
|
||||
lidar_projections = np.concatenate(experiments_data)
|
||||
smoke_presence = np.concatenate(experiments_targets)
|
||||
semi_targets = np.concatenate(experiments_semi_targets)
|
||||
file_ids = np.concatenate(experiment_file_ids)
|
||||
frame_ids = np.concatenate(experiment_frame_ids)
|
||||
self.file_names = file_names
|
||||
|
||||
np.random.seed(seed)
|
||||
|
||||
shuffled_indices = np.random.permutation(lidar_projections.shape[0])
|
||||
shuffled_lidar_projections = lidar_projections[shuffled_indices]
|
||||
shuffled_smoke_presence = smoke_presence[shuffled_indices]
|
||||
shuffled_file_ids = file_ids[shuffled_indices]
|
||||
shuffled_frame_ids = frame_ids[shuffled_indices]
|
||||
shuffled_semis = semi_targets[shuffled_indices]
|
||||
|
||||
split_idx = int(split * shuffled_lidar_projections.shape[0])
|
||||
|
||||
if train:
|
||||
self.data = shuffled_lidar_projections[:split_idx]
|
||||
self.targets = shuffled_smoke_presence[:split_idx]
|
||||
semi_targets = shuffled_semis[:split_idx]
|
||||
self.shuffled_file_ids = shuffled_file_ids[:split_idx]
|
||||
self.shuffled_frame_ids = shuffled_frame_ids[:split_idx]
|
||||
|
||||
else:
|
||||
self.data = shuffled_lidar_projections[split_idx:]
|
||||
self.targets = shuffled_smoke_presence[split_idx:]
|
||||
semi_targets = shuffled_semis[split_idx:]
|
||||
self.shuffled_file_ids = shuffled_file_ids[split_idx:]
|
||||
self.shuffled_frame_ids = shuffled_frame_ids[split_idx:]
|
||||
|
||||
self.data = np.nan_to_num(self.data)
|
||||
|
||||
self.data = torch.tensor(self.data)
|
||||
self.targets = torch.tensor(self.targets, dtype=torch.int8)
|
||||
|
||||
self.semi_targets = torch.zeros_like(self.targets, dtype=torch.int8)
|
||||
if semi_targets_given is not None:
|
||||
self.semi_targets = torch.tensor(semi_targets, dtype=torch.int8)
|
||||
else:
|
||||
self.semi_targets = torch.zeros_like(self.targets, dtype=torch.int8)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
@@ -173,10 +346,12 @@ class SubTerTraining(VisionDataset):
|
||||
Returns:
|
||||
tuple: (image, target, semi_target, index)
|
||||
"""
|
||||
img, target, semi_target = (
|
||||
img, target, semi_target, file_id, frame_id = (
|
||||
self.data[index],
|
||||
int(self.targets[index]),
|
||||
int(self.semi_targets[index]),
|
||||
int(self.shuffled_file_ids[index]),
|
||||
int(self.shuffled_frame_ids[index]),
|
||||
)
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
@@ -189,7 +364,10 @@ class SubTerTraining(VisionDataset):
|
||||
if self.target_transform is not None:
|
||||
target = self.target_transform(target)
|
||||
|
||||
return img, target, semi_target, index
|
||||
return img, target, semi_target, index, (file_id, frame_id)
|
||||
|
||||
def get_file_name_from_idx(self, idx: int):
|
||||
return self.file_names[idx]
|
||||
|
||||
|
||||
class SubTerInference(VisionDataset):
|
||||
@@ -235,3 +413,191 @@ class SubTerInference(VisionDataset):
|
||||
img = self.transform(img)
|
||||
|
||||
return img, index
|
||||
|
||||
|
||||
class SubTerTrainingSelective(VisionDataset):
|
||||
def __init__(
|
||||
self,
|
||||
root: str,
|
||||
transforms: Optional[Callable] = None,
|
||||
transform: Optional[Callable] = None,
|
||||
target_transform: Optional[Callable] = None,
|
||||
train=False,
|
||||
num_known_outlier=0,
|
||||
seed=0,
|
||||
semi_targets_given=None,
|
||||
ratio_test_normal_to_anomalous=3,
|
||||
):
|
||||
super(SubTerTrainingSelective, self).__init__(
|
||||
root, transforms, transform, target_transform
|
||||
)
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
if semi_targets_given is None:
|
||||
raise ValueError(
|
||||
"semi_targets_given must be provided for selective training"
|
||||
)
|
||||
|
||||
experiments_data = []
|
||||
experiments_targets = []
|
||||
experiments_semi_targets = []
|
||||
# validation_files = []
|
||||
experiment_files = []
|
||||
experiment_frame_ids = []
|
||||
experiment_file_ids = []
|
||||
file_names = {}
|
||||
|
||||
for file_idx, experiment_file in enumerate(sorted(Path(root).iterdir())):
|
||||
if experiment_file.suffix != ".npy":
|
||||
continue
|
||||
|
||||
file_names[file_idx] = experiment_file.name
|
||||
experiment_files.append(experiment_file)
|
||||
experiment_data = np.load(experiment_file)
|
||||
|
||||
experiment_targets = (
|
||||
np.ones(experiment_data.shape[0], dtype=np.int8)
|
||||
if "smoke" in experiment_file.name
|
||||
else np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||
)
|
||||
|
||||
experiment_semi_targets = np.zeros(experiment_data.shape[0], dtype=np.int8)
|
||||
if "smoke" not in experiment_file.name:
|
||||
experiment_semi_targets = np.ones(
|
||||
experiment_data.shape[0], dtype=np.int8
|
||||
)
|
||||
elif experiment_file.name in semi_targets_given:
|
||||
semi_target_begin_frame, semi_target_end_frame = semi_targets_given[
|
||||
experiment_file.name
|
||||
]
|
||||
experiment_semi_targets[
|
||||
semi_target_begin_frame:semi_target_end_frame
|
||||
] = -1
|
||||
else:
|
||||
raise ValueError(
|
||||
"smoke experiment not in given semi_targets. required for selective training"
|
||||
)
|
||||
|
||||
experiment_file_ids.append(
|
||||
np.full(experiment_data.shape[0], file_idx, dtype=np.int8)
|
||||
)
|
||||
experiment_frame_ids.append(
|
||||
np.arange(experiment_data.shape[0], dtype=np.int32)
|
||||
)
|
||||
experiments_data.append(experiment_data)
|
||||
experiments_targets.append(experiment_targets)
|
||||
experiments_semi_targets.append(experiment_semi_targets)
|
||||
|
||||
logger.info(
|
||||
f"Train/Test experiments: {[experiment_file.name for experiment_file in experiment_files]}"
|
||||
)
|
||||
|
||||
lidar_projections = np.concatenate(experiments_data)
|
||||
smoke_presence = np.concatenate(experiments_targets)
|
||||
semi_targets = np.concatenate(experiments_semi_targets)
|
||||
file_ids = np.concatenate(experiment_file_ids)
|
||||
frame_ids = np.concatenate(experiment_frame_ids)
|
||||
self.file_names = file_names
|
||||
|
||||
np.random.seed(seed)
|
||||
|
||||
shuffled_indices = np.random.permutation(lidar_projections.shape[0])
|
||||
shuffled_lidar_projections = lidar_projections[shuffled_indices]
|
||||
shuffled_smoke_presence = smoke_presence[shuffled_indices]
|
||||
shuffled_file_ids = file_ids[shuffled_indices]
|
||||
shuffled_frame_ids = frame_ids[shuffled_indices]
|
||||
shuffled_semis = semi_targets[shuffled_indices]
|
||||
|
||||
# check if there are enough known normal and known outlier samples
|
||||
outlier_indices = np.where(shuffled_semis == -1)[0]
|
||||
normal_indices = np.where(shuffled_semis == 1)[0]
|
||||
|
||||
if len(outlier_indices) < num_known_outlier:
|
||||
raise ValueError(
|
||||
f"Not enough known outliers in dataset. Required: {num_known_outlier}, Found: {len(outlier_indices)}"
|
||||
)
|
||||
|
||||
# randomly select known normal and outlier samples
|
||||
keep_outlier_indices = np.random.choice(
|
||||
outlier_indices, size=num_known_outlier, replace=False
|
||||
)
|
||||
|
||||
# put outliers that are not kept into test set and the same number of normal samples aside for testing
|
||||
test_outlier_indices = np.setdiff1d(outlier_indices, keep_outlier_indices)
|
||||
num_test_outliers = len(test_outlier_indices)
|
||||
test_normal_indices = np.random.choice(
|
||||
normal_indices,
|
||||
size=num_test_outliers * ratio_test_normal_to_anomalous,
|
||||
replace=False,
|
||||
)
|
||||
|
||||
# combine test indices
|
||||
test_indices = np.concatenate([test_outlier_indices, test_normal_indices])
|
||||
|
||||
# training indices are the rest
|
||||
train_indices = np.setdiff1d(np.arange(len(shuffled_semis)), test_indices)
|
||||
|
||||
if train:
|
||||
self.data = shuffled_lidar_projections[train_indices]
|
||||
self.targets = shuffled_smoke_presence[train_indices]
|
||||
semi_targets = shuffled_semis[train_indices]
|
||||
self.shuffled_file_ids = shuffled_file_ids[train_indices]
|
||||
self.shuffled_frame_ids = shuffled_frame_ids[train_indices]
|
||||
|
||||
else:
|
||||
self.data = shuffled_lidar_projections[test_indices]
|
||||
self.targets = shuffled_smoke_presence[test_indices]
|
||||
semi_targets = shuffled_semis[test_indices]
|
||||
self.shuffled_file_ids = shuffled_file_ids[test_indices]
|
||||
self.shuffled_frame_ids = shuffled_frame_ids[test_indices]
|
||||
|
||||
self.data = np.nan_to_num(self.data)
|
||||
|
||||
self.data = torch.tensor(self.data)
|
||||
self.targets = torch.tensor(self.targets, dtype=torch.int8)
|
||||
self.semi_targets = torch.tensor(semi_targets, dtype=torch.int8)
|
||||
|
||||
# log some stats to ensure the data is loaded correctly
|
||||
if train:
|
||||
logger.info(
|
||||
f"Training set: {len(self.data)} samples, {sum(self.semi_targets == -1)} semi-supervised samples"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Test set: {len(self.data)} samples, {sum(self.semi_targets == -1)} semi-supervised samples"
|
||||
)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.data)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Override the original method of the MNIST class.
|
||||
Args:
|
||||
index (int): Index
|
||||
|
||||
Returns:
|
||||
tuple: (image, target, semi_target, index)
|
||||
"""
|
||||
img, target, semi_target, file_id, frame_id = (
|
||||
self.data[index],
|
||||
int(self.targets[index]),
|
||||
int(self.semi_targets[index]),
|
||||
int(self.shuffled_file_ids[index]),
|
||||
int(self.shuffled_frame_ids[index]),
|
||||
)
|
||||
|
||||
# doing this so that it is consistent with all other datasets
|
||||
# to return a PIL Image
|
||||
img = Image.fromarray(img.numpy(), mode="F")
|
||||
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
|
||||
if self.target_transform is not None:
|
||||
target = self.target_transform(target)
|
||||
|
||||
return img, target, semi_target, index, (file_id, frame_id)
|
||||
|
||||
def get_file_name_from_idx(self, idx: int):
|
||||
return self.file_names[idx]
|
||||
|
||||
@@ -5,6 +5,9 @@ from pathlib import Path
|
||||
import click
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from baselines.isoforest import IsoForest
|
||||
from baselines.ocsvm import OCSVM
|
||||
from datasets.main import load_dataset
|
||||
from DeepSAD import DeepSAD
|
||||
from utils.config import Config
|
||||
@@ -64,6 +67,30 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
||||
)
|
||||
@click.argument("xp_path", type=click.Path(exists=True))
|
||||
@click.argument("data_path", type=click.Path(exists=True))
|
||||
@click.option(
|
||||
"--k_fold",
|
||||
type=bool,
|
||||
default=False,
|
||||
help="Use k-fold cross-validation for training (default: False).",
|
||||
)
|
||||
@click.option(
|
||||
"--k_fold_num",
|
||||
type=int,
|
||||
default=5,
|
||||
help="Number of folds for k-fold cross-validation (default: 5).",
|
||||
)
|
||||
@click.option(
|
||||
"--num_known_normal",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Number of max known normal samples (semi-supervised-setting) (default: 0).",
|
||||
)
|
||||
@click.option(
|
||||
"--num_known_outlier",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Number of max known outlier samples (semi-supervised-setting) (default: 0).",
|
||||
)
|
||||
@click.option(
|
||||
"--load_config",
|
||||
type=click.Path(exists=True),
|
||||
@@ -214,12 +241,52 @@ from utils.visualization.plot_images_grid import plot_images_grid
|
||||
"If 1, outlier class as specified in --known_outlier_class option."
|
||||
"If > 1, the specified number of outlier classes will be sampled at random.",
|
||||
)
|
||||
@click.option(
|
||||
"--ocsvm_kernel",
|
||||
type=click.Choice(["rbf", "linear", "poly"]),
|
||||
default="rbf",
|
||||
help="Kernel for the OC-SVM",
|
||||
)
|
||||
@click.option(
|
||||
"--ocsvm_nu",
|
||||
type=float,
|
||||
default=0.1,
|
||||
help="OC-SVM hyperparameter nu (must be 0 < nu <= 1).",
|
||||
)
|
||||
@click.option(
|
||||
"--isoforest_n_estimators",
|
||||
type=int,
|
||||
default=100,
|
||||
help="Set the number of base estimators in the ensemble (default: 100).",
|
||||
)
|
||||
@click.option(
|
||||
"--isoforest_max_samples",
|
||||
type=int,
|
||||
default=256,
|
||||
help="Set the number of samples drawn to train each base estimator (default: 256).",
|
||||
)
|
||||
@click.option(
|
||||
"--isoforest_contamination",
|
||||
type=float,
|
||||
default=0.1,
|
||||
help="Expected fraction of anomalies in the training set. (default: 0.1).",
|
||||
)
|
||||
@click.option(
|
||||
"--isoforest_n_jobs_model",
|
||||
type=int,
|
||||
default=-1,
|
||||
help="Number of jobs for model training.",
|
||||
)
|
||||
def main(
|
||||
action,
|
||||
dataset_name,
|
||||
net_name,
|
||||
xp_path,
|
||||
data_path,
|
||||
k_fold,
|
||||
k_fold_num,
|
||||
num_known_normal,
|
||||
num_known_outlier,
|
||||
load_config,
|
||||
load_model,
|
||||
eta,
|
||||
@@ -246,6 +313,12 @@ def main(
|
||||
normal_class,
|
||||
known_outlier_class,
|
||||
n_known_outlier_classes,
|
||||
ocsvm_kernel,
|
||||
ocsvm_nu,
|
||||
isoforest_n_estimators,
|
||||
isoforest_max_samples,
|
||||
isoforest_contamination,
|
||||
isoforest_n_jobs_model,
|
||||
):
|
||||
"""
|
||||
Deep SAD, a method for deep semi-supervised anomaly detection.
|
||||
@@ -318,6 +391,7 @@ def main(
|
||||
|
||||
if action == "train":
|
||||
# Load data
|
||||
# TODO: pass num of folds
|
||||
dataset = load_dataset(
|
||||
dataset_name,
|
||||
data_path,
|
||||
@@ -328,135 +402,297 @@ def main(
|
||||
ratio_known_outlier,
|
||||
ratio_pollution,
|
||||
random_state=np.random.RandomState(cfg.settings["seed"]),
|
||||
k_fold=k_fold,
|
||||
num_known_normal=num_known_normal,
|
||||
num_known_outlier=num_known_outlier,
|
||||
)
|
||||
# Log random sample of known anomaly classes if more than 1 class
|
||||
if n_known_outlier_classes > 1:
|
||||
logger.info("Known anomaly classes: %s" % (dataset.known_outlier_classes,))
|
||||
|
||||
# Initialize DeepSAD model and set neural network phi
|
||||
deepSAD = DeepSAD(cfg.settings["eta"])
|
||||
deepSAD.set_network(net_name)
|
||||
train_passes = range(k_fold_num) if k_fold else [None]
|
||||
|
||||
# If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
|
||||
if load_model:
|
||||
deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
|
||||
logger.info("Loading model from %s." % load_model)
|
||||
train_isoforest = True
|
||||
train_ocsvm = False
|
||||
train_deepsad = True
|
||||
|
||||
logger.info("Pretraining: %s" % pretrain)
|
||||
if pretrain:
|
||||
# Log pretraining details
|
||||
logger.info("Pretraining optimizer: %s" % cfg.settings["ae_optimizer_name"])
|
||||
logger.info("Pretraining learning rate: %g" % cfg.settings["ae_lr"])
|
||||
logger.info("Pretraining epochs: %d" % cfg.settings["ae_n_epochs"])
|
||||
for fold_idx in train_passes:
|
||||
if fold_idx is None:
|
||||
logger.info("Single training without k-fold")
|
||||
else:
|
||||
logger.info(f"Fold {fold_idx + 1}/{k_fold_num}")
|
||||
|
||||
# Initialize OC-SVM model
|
||||
if train_ocsvm:
|
||||
ocsvm = OCSVM(kernel=ocsvm_kernel, nu=ocsvm_nu, hybrid=False)
|
||||
|
||||
# Initialize Isolation Forest model
|
||||
if train_isoforest:
|
||||
Isoforest = IsoForest(
|
||||
hybrid=False,
|
||||
n_estimators=isoforest_n_estimators,
|
||||
max_samples=isoforest_max_samples,
|
||||
contamination=isoforest_contamination,
|
||||
n_jobs=isoforest_n_jobs_model,
|
||||
seed=seed,
|
||||
)
|
||||
|
||||
# Initialize DeepSAD model and set neural network phi
|
||||
if train_deepsad:
|
||||
deepSAD = DeepSAD(cfg.settings["eta"])
|
||||
deepSAD.set_network(net_name)
|
||||
|
||||
# If specified, load Deep SAD model (center c, network weights, and possibly autoencoder weights)
|
||||
if train_deepsad and load_model:
|
||||
deepSAD.load_model(
|
||||
model_path=load_model, load_ae=True, map_location=device
|
||||
)
|
||||
logger.info("Loading model from %s." % load_model)
|
||||
|
||||
logger.info("Pretraining: %s" % pretrain)
|
||||
if train_deepsad and pretrain:
|
||||
# Log pretraining details
|
||||
logger.info(
|
||||
"Pretraining optimizer: %s" % cfg.settings["ae_optimizer_name"]
|
||||
)
|
||||
logger.info("Pretraining learning rate: %g" % cfg.settings["ae_lr"])
|
||||
logger.info("Pretraining epochs: %d" % cfg.settings["ae_n_epochs"])
|
||||
logger.info(
|
||||
"Pretraining learning rate scheduler milestones: %s"
|
||||
% (cfg.settings["ae_lr_milestone"],)
|
||||
)
|
||||
logger.info(
|
||||
"Pretraining batch size: %d" % cfg.settings["ae_batch_size"]
|
||||
)
|
||||
logger.info(
|
||||
"Pretraining weight decay: %g" % cfg.settings["ae_weight_decay"]
|
||||
)
|
||||
|
||||
# Pretrain model on dataset (via autoencoder)
|
||||
deepSAD.pretrain(
|
||||
dataset,
|
||||
optimizer_name=cfg.settings["ae_optimizer_name"],
|
||||
lr=cfg.settings["ae_lr"],
|
||||
n_epochs=cfg.settings["ae_n_epochs"],
|
||||
lr_milestones=cfg.settings["ae_lr_milestone"],
|
||||
batch_size=cfg.settings["ae_batch_size"],
|
||||
weight_decay=cfg.settings["ae_weight_decay"],
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
k_fold_idx=fold_idx,
|
||||
)
|
||||
|
||||
# Save pretraining results
|
||||
if fold_idx is None:
|
||||
deepSAD.save_ae_results(export_json=xp_path + "/ae_results.json")
|
||||
else:
|
||||
deepSAD.save_ae_results(
|
||||
export_json=xp_path + f"/ae_results_{fold_idx}.json"
|
||||
)
|
||||
|
||||
# Log training details
|
||||
logger.info("Training optimizer: %s" % cfg.settings["optimizer_name"])
|
||||
logger.info("Training learning rate: %g" % cfg.settings["lr"])
|
||||
logger.info("Training epochs: %d" % cfg.settings["n_epochs"])
|
||||
logger.info(
|
||||
"Pretraining learning rate scheduler milestones: %s"
|
||||
% (cfg.settings["ae_lr_milestone"],)
|
||||
)
|
||||
logger.info("Pretraining batch size: %d" % cfg.settings["ae_batch_size"])
|
||||
logger.info(
|
||||
"Pretraining weight decay: %g" % cfg.settings["ae_weight_decay"]
|
||||
"Training learning rate scheduler milestones: %s"
|
||||
% (cfg.settings["lr_milestone"],)
|
||||
)
|
||||
logger.info("Training batch size: %d" % cfg.settings["batch_size"])
|
||||
logger.info("Training weight decay: %g" % cfg.settings["weight_decay"])
|
||||
|
||||
# Pretrain model on dataset (via autoencoder)
|
||||
deepSAD.pretrain(
|
||||
dataset,
|
||||
optimizer_name=cfg.settings["ae_optimizer_name"],
|
||||
lr=cfg.settings["ae_lr"],
|
||||
n_epochs=cfg.settings["ae_n_epochs"],
|
||||
lr_milestones=cfg.settings["ae_lr_milestone"],
|
||||
batch_size=cfg.settings["ae_batch_size"],
|
||||
weight_decay=cfg.settings["ae_weight_decay"],
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
)
|
||||
|
||||
# Save pretraining results
|
||||
deepSAD.save_ae_results(export_json=xp_path + "/ae_results.json")
|
||||
|
||||
# Log training details
|
||||
logger.info("Training optimizer: %s" % cfg.settings["optimizer_name"])
|
||||
logger.info("Training learning rate: %g" % cfg.settings["lr"])
|
||||
logger.info("Training epochs: %d" % cfg.settings["n_epochs"])
|
||||
logger.info(
|
||||
"Training learning rate scheduler milestones: %s"
|
||||
% (cfg.settings["lr_milestone"],)
|
||||
)
|
||||
logger.info("Training batch size: %d" % cfg.settings["batch_size"])
|
||||
logger.info("Training weight decay: %g" % cfg.settings["weight_decay"])
|
||||
|
||||
# Train model on dataset
|
||||
deepSAD.train(
|
||||
dataset,
|
||||
optimizer_name=cfg.settings["optimizer_name"],
|
||||
lr=cfg.settings["lr"],
|
||||
n_epochs=cfg.settings["n_epochs"],
|
||||
lr_milestones=cfg.settings["lr_milestone"],
|
||||
batch_size=cfg.settings["batch_size"],
|
||||
weight_decay=cfg.settings["weight_decay"],
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
)
|
||||
|
||||
# Test model
|
||||
deepSAD.test(dataset, device=device, n_jobs_dataloader=n_jobs_dataloader)
|
||||
|
||||
# Save results, model, and configuration
|
||||
deepSAD.save_results(export_json=xp_path + "/results.json")
|
||||
deepSAD.save_model(export_model=xp_path + "/model.tar")
|
||||
cfg.save_config(export_json=xp_path + "/config.json")
|
||||
|
||||
# Plot most anomalous and most normal test samples
|
||||
indices, labels, scores = zip(*deepSAD.results["test_scores"])
|
||||
indices, labels, scores = np.array(indices), np.array(labels), np.array(scores)
|
||||
idx_all_sorted = indices[np.argsort(scores)] # from lowest to highest score
|
||||
idx_normal_sorted = indices[labels == 0][
|
||||
np.argsort(scores[labels == 0])
|
||||
] # from lowest to highest score
|
||||
|
||||
if dataset_name in ("mnist", "fmnist", "cifar10", "elpv"):
|
||||
if dataset_name in ("mnist", "fmnist", "elpv"):
|
||||
X_all_low = dataset.test_set.data[idx_all_sorted[:32], ...].unsqueeze(1)
|
||||
X_all_high = dataset.test_set.data[idx_all_sorted[-32:], ...].unsqueeze(
|
||||
1
|
||||
# Train model on dataset
|
||||
if train_deepsad:
|
||||
deepSAD.train(
|
||||
dataset,
|
||||
optimizer_name=cfg.settings["optimizer_name"],
|
||||
lr=cfg.settings["lr"],
|
||||
n_epochs=cfg.settings["n_epochs"],
|
||||
lr_milestones=cfg.settings["lr_milestone"],
|
||||
batch_size=cfg.settings["batch_size"],
|
||||
weight_decay=cfg.settings["weight_decay"],
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
k_fold_idx=fold_idx,
|
||||
)
|
||||
X_normal_low = dataset.test_set.data[
|
||||
idx_normal_sorted[:32], ...
|
||||
].unsqueeze(1)
|
||||
X_normal_high = dataset.test_set.data[
|
||||
idx_normal_sorted[-32:], ...
|
||||
].unsqueeze(1)
|
||||
|
||||
if dataset_name == "cifar10":
|
||||
X_all_low = torch.tensor(
|
||||
np.transpose(
|
||||
dataset.test_set.data[idx_all_sorted[:32], ...], (0, 3, 1, 2)
|
||||
# Train model on dataset
|
||||
if train_ocsvm:
|
||||
ocsvm.train(
|
||||
dataset,
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
k_fold_idx=fold_idx,
|
||||
batch_size=8,
|
||||
)
|
||||
|
||||
# Train model on dataset
|
||||
if train_isoforest:
|
||||
Isoforest.train(
|
||||
dataset,
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
k_fold_idx=fold_idx,
|
||||
)
|
||||
|
||||
# Test model
|
||||
if train_deepsad:
|
||||
deepSAD.test(
|
||||
dataset,
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
k_fold_idx=fold_idx,
|
||||
)
|
||||
|
||||
# Test model
|
||||
if train_ocsvm:
|
||||
ocsvm.test(
|
||||
dataset,
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
k_fold_idx=fold_idx,
|
||||
batch_size=8,
|
||||
)
|
||||
|
||||
# Test model
|
||||
if train_isoforest:
|
||||
Isoforest.test(
|
||||
dataset,
|
||||
device=device,
|
||||
n_jobs_dataloader=n_jobs_dataloader,
|
||||
k_fold_idx=fold_idx,
|
||||
)
|
||||
|
||||
# Save results, model, and configuration
|
||||
if fold_idx is None:
|
||||
if train_deepsad:
|
||||
deepSAD.save_results(export_pkl=xp_path + "/results.pkl")
|
||||
deepSAD.save_model(export_model=xp_path + "/model.tar")
|
||||
if train_ocsvm:
|
||||
ocsvm.save_results(export_pkl=xp_path + "/results_ocsvm.pkl")
|
||||
if train_isoforest:
|
||||
Isoforest.save_results(
|
||||
export_pkl=xp_path + "/results_isoforest.pkl"
|
||||
)
|
||||
)
|
||||
X_all_high = torch.tensor(
|
||||
np.transpose(
|
||||
dataset.test_set.data[idx_all_sorted[-32:], ...], (0, 3, 1, 2)
|
||||
else:
|
||||
if train_deepsad:
|
||||
deepSAD.save_results(
|
||||
export_pkl=xp_path + f"/results_{fold_idx}.pkl"
|
||||
)
|
||||
)
|
||||
X_normal_low = torch.tensor(
|
||||
np.transpose(
|
||||
dataset.test_set.data[idx_normal_sorted[:32], ...], (0, 3, 1, 2)
|
||||
deepSAD.save_model(export_model=xp_path + f"/model_{fold_idx}.tar")
|
||||
if train_ocsvm:
|
||||
ocsvm.save_results(
|
||||
export_pkl=xp_path + f"/results_ocsvm_{fold_idx}.pkl"
|
||||
)
|
||||
)
|
||||
X_normal_high = torch.tensor(
|
||||
np.transpose(
|
||||
dataset.test_set.data[idx_normal_sorted[-32:], ...],
|
||||
(0, 3, 1, 2),
|
||||
if train_isoforest:
|
||||
Isoforest.save_results(
|
||||
export_pkl=xp_path + f"/results_isoforest_{fold_idx}.pkl"
|
||||
)
|
||||
)
|
||||
|
||||
plot_images_grid(X_all_low, export_img=xp_path + "/all_low", padding=2)
|
||||
plot_images_grid(X_all_high, export_img=xp_path + "/all_high", padding=2)
|
||||
plot_images_grid(
|
||||
X_normal_low, export_img=xp_path + "/normals_low", padding=2
|
||||
)
|
||||
plot_images_grid(
|
||||
X_normal_high, export_img=xp_path + "/normals_high", padding=2
|
||||
)
|
||||
cfg.save_config(export_json=xp_path + "/config.json")
|
||||
|
||||
# Plot most anomalous and most normal test samples
|
||||
if train_deepsad:
|
||||
indices, labels, scores = zip(*deepSAD.results["test_scores"])
|
||||
indices, labels, scores = (
|
||||
np.array(indices),
|
||||
np.array(labels),
|
||||
np.array(scores),
|
||||
)
|
||||
idx_all_sorted = indices[
|
||||
np.argsort(scores)
|
||||
] # from lowest to highest score
|
||||
idx_normal_sorted = indices[labels == 0][
|
||||
np.argsort(scores[labels == 0])
|
||||
] # from lowest to highest score
|
||||
|
||||
if dataset_name in (
|
||||
"mnist",
|
||||
"fmnist",
|
||||
"cifar10",
|
||||
"elpv",
|
||||
):
|
||||
if dataset_name in (
|
||||
"mnist",
|
||||
"fmnist",
|
||||
"elpv",
|
||||
):
|
||||
X_all_low = dataset.test_set.data[
|
||||
idx_all_sorted[:32], ...
|
||||
].unsqueeze(1)
|
||||
X_all_high = dataset.test_set.data[
|
||||
idx_all_sorted[-32:], ...
|
||||
].unsqueeze(1)
|
||||
X_normal_low = dataset.test_set.data[
|
||||
idx_normal_sorted[:32], ...
|
||||
].unsqueeze(1)
|
||||
X_normal_high = dataset.test_set.data[
|
||||
idx_normal_sorted[-32:], ...
|
||||
].unsqueeze(1)
|
||||
|
||||
if dataset_name == "cifar10":
|
||||
X_all_low = torch.tensor(
|
||||
np.transpose(
|
||||
dataset.test_set.data[idx_all_sorted[:32], ...],
|
||||
(0, 3, 1, 2),
|
||||
)
|
||||
)
|
||||
X_all_high = torch.tensor(
|
||||
np.transpose(
|
||||
dataset.test_set.data[idx_all_sorted[-32:], ...],
|
||||
(0, 3, 1, 2),
|
||||
)
|
||||
)
|
||||
X_normal_low = torch.tensor(
|
||||
np.transpose(
|
||||
dataset.test_set.data[idx_normal_sorted[:32], ...],
|
||||
(0, 3, 1, 2),
|
||||
)
|
||||
)
|
||||
X_normal_high = torch.tensor(
|
||||
np.transpose(
|
||||
dataset.test_set.data[idx_normal_sorted[-32:], ...],
|
||||
(0, 3, 1, 2),
|
||||
)
|
||||
)
|
||||
|
||||
if fold_idx is None:
|
||||
plot_images_grid(
|
||||
X_all_low, export_img=xp_path + "/all_low", padding=2
|
||||
)
|
||||
plot_images_grid(
|
||||
X_all_high, export_img=xp_path + "/all_high", padding=2
|
||||
)
|
||||
plot_images_grid(
|
||||
X_normal_low, export_img=xp_path + "/normals_low", padding=2
|
||||
)
|
||||
plot_images_grid(
|
||||
X_normal_high,
|
||||
export_img=xp_path + "/normals_high",
|
||||
padding=2,
|
||||
)
|
||||
else:
|
||||
plot_images_grid(
|
||||
X_all_low,
|
||||
export_img=xp_path + f"/all_low_{fold_idx}",
|
||||
padding=2,
|
||||
)
|
||||
plot_images_grid(
|
||||
X_all_high,
|
||||
export_img=xp_path + f"/all_high_{fold_idx}",
|
||||
padding=2,
|
||||
)
|
||||
plot_images_grid(
|
||||
X_normal_low,
|
||||
export_img=xp_path + f"/normals_low_{fold_idx}",
|
||||
padding=2,
|
||||
)
|
||||
plot_images_grid(
|
||||
X_normal_high,
|
||||
export_img=xp_path + f"/normals_high_{fold_idx}",
|
||||
padding=2,
|
||||
)
|
||||
|
||||
elif action == "infer":
|
||||
dataset = load_dataset(
|
||||
dataset_name,
|
||||
@@ -488,14 +724,23 @@ def main(
|
||||
deepSAD.load_model(model_path=load_model, load_ae=True, map_location=device)
|
||||
logger.info("Loading model from %s." % load_model)
|
||||
|
||||
inference_results = deepSAD.inference(
|
||||
inference_results, all_outputs = deepSAD.inference(
|
||||
dataset, device=device, n_jobs_dataloader=n_jobs_dataloader
|
||||
)
|
||||
inference_results_path = (
|
||||
Path(xp_path) / "inference" / Path(dataset.root).with_suffix(".npy").stem
|
||||
Path(xp_path)
|
||||
/ "inference"
|
||||
/ Path(Path(dataset.root).stem).with_suffix(".npy")
|
||||
)
|
||||
inference_outputs_path = (
|
||||
Path(xp_path)
|
||||
/ "inference"
|
||||
/ Path(Path(dataset.root).stem + "_outputs").with_suffix(".npy")
|
||||
)
|
||||
|
||||
inference_results_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
np.save(inference_results_path, inference_results, fix_imports=False)
|
||||
np.save(inference_outputs_path, all_outputs, fix_imports=False)
|
||||
|
||||
logger.info(
|
||||
f"Inference: median={np.median(inference_results)} mean={np.mean(inference_results)} min={inference_results.min()} max={inference_results.max()}"
|
||||
|
||||
@@ -1,18 +1,23 @@
|
||||
from base.base_trainer import BaseTrainer
|
||||
from base.base_dataset import BaseADDataset
|
||||
from base.base_net import BaseNet
|
||||
from torch.utils.data.dataloader import DataLoader
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
from sklearn.metrics import (
|
||||
average_precision_score,
|
||||
precision_recall_curve,
|
||||
roc_auc_score,
|
||||
roc_curve,
|
||||
)
|
||||
from torch.utils.data.dataloader import DataLoader
|
||||
|
||||
from base.base_dataset import BaseADDataset
|
||||
from base.base_net import BaseNet
|
||||
from base.base_trainer import BaseTrainer
|
||||
|
||||
|
||||
class DeepSADTrainer(BaseTrainer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
c,
|
||||
@@ -50,13 +55,22 @@ class DeepSADTrainer(BaseTrainer):
|
||||
self.test_time = None
|
||||
self.test_scores = None
|
||||
|
||||
def train(self, dataset: BaseADDataset, net: BaseNet):
|
||||
def train(
|
||||
self, dataset: BaseADDataset, net: BaseNet, k_fold_idx: int = None
|
||||
) -> BaseNet:
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Get train data loader
|
||||
train_loader, _, _ = dataset.loaders(
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
if k_fold_idx is not None:
|
||||
train_loader, _ = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=self.batch_size,
|
||||
num_workers=self.n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
train_loader, _, _ = dataset.loaders(
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Set device for network
|
||||
net = net.to(self.device)
|
||||
@@ -82,14 +96,14 @@ class DeepSADTrainer(BaseTrainer):
|
||||
start_time = time.time()
|
||||
net.train()
|
||||
for epoch in range(self.n_epochs):
|
||||
|
||||
epoch_loss = 0.0
|
||||
n_batches = 0
|
||||
epoch_start_time = time.time()
|
||||
for data in train_loader:
|
||||
inputs, _, semi_targets, _ = data
|
||||
inputs, semi_targets = inputs.to(self.device), semi_targets.to(
|
||||
self.device
|
||||
inputs, _, semi_targets, _, _ = data
|
||||
inputs, semi_targets = (
|
||||
inputs.to(self.device),
|
||||
semi_targets.to(self.device),
|
||||
)
|
||||
|
||||
# Zero the network parameter gradients
|
||||
@@ -145,6 +159,7 @@ class DeepSADTrainer(BaseTrainer):
|
||||
logger.info("Starting inference...")
|
||||
n_batches = 0
|
||||
start_time = time.time()
|
||||
all_outputs = np.zeros((len(inference_loader.dataset), 1024), dtype=np.float32)
|
||||
scores = []
|
||||
net.eval()
|
||||
with torch.no_grad():
|
||||
@@ -155,6 +170,10 @@ class DeepSADTrainer(BaseTrainer):
|
||||
idx = idx.to(self.device)
|
||||
|
||||
outputs = net(inputs)
|
||||
all_idx = n_batches * self.batch_size
|
||||
all_outputs[all_idx : all_idx + len(inputs)] = (
|
||||
outputs.cpu().data.numpy()
|
||||
)
|
||||
dist = torch.sum((outputs - self.c) ** 2, dim=1)
|
||||
scores += dist.cpu().data.numpy().tolist()
|
||||
|
||||
@@ -166,15 +185,22 @@ class DeepSADTrainer(BaseTrainer):
|
||||
logger.info("Inference Time: {:.3f}s".format(self.inference_time))
|
||||
logger.info("Finished inference.")
|
||||
|
||||
return np.array(scores)
|
||||
return np.array(scores), all_outputs
|
||||
|
||||
def test(self, dataset: BaseADDataset, net: BaseNet):
|
||||
def test(self, dataset: BaseADDataset, net: BaseNet, k_fold_idx: int = None):
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Get test data loader
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
if k_fold_idx is not None:
|
||||
_, test_loader = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=self.batch_size,
|
||||
num_workers=self.n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Set device for network
|
||||
net = net.to(self.device)
|
||||
@@ -188,7 +214,7 @@ class DeepSADTrainer(BaseTrainer):
|
||||
net.eval()
|
||||
with torch.no_grad():
|
||||
for data in test_loader:
|
||||
inputs, labels, semi_targets, idx = data
|
||||
inputs, labels, semi_targets, idx, _ = data
|
||||
|
||||
inputs = inputs.to(self.device)
|
||||
labels = labels.to(self.device)
|
||||
@@ -225,6 +251,9 @@ class DeepSADTrainer(BaseTrainer):
|
||||
labels = np.array(labels)
|
||||
scores = np.array(scores)
|
||||
self.test_auc = roc_auc_score(labels, scores)
|
||||
self.test_roc = roc_curve(labels, scores)
|
||||
self.test_prc = precision_recall_curve(labels, scores)
|
||||
self.test_ap = average_precision_score(labels, scores)
|
||||
|
||||
# Log results
|
||||
logger.info("Test Loss: {:.6f}".format(epoch_loss / n_batches))
|
||||
@@ -241,7 +270,7 @@ class DeepSADTrainer(BaseTrainer):
|
||||
with torch.no_grad():
|
||||
for data in train_loader:
|
||||
# get the inputs of the batch
|
||||
inputs, _, _, _ = data
|
||||
inputs, _, _, _, _ = data
|
||||
inputs = inputs.to(self.device)
|
||||
outputs = net(inputs)
|
||||
n_samples += outputs.shape[0]
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
from base.base_trainer import BaseTrainer
|
||||
from base.base_dataset import BaseADDataset
|
||||
from base.base_net import BaseNet
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import numpy as np
|
||||
from sklearn.metrics import roc_auc_score
|
||||
|
||||
from base.base_dataset import BaseADDataset
|
||||
from base.base_net import BaseNet
|
||||
from base.base_trainer import BaseTrainer
|
||||
|
||||
|
||||
class AETrainer(BaseTrainer):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
optimizer_name: str = "adam",
|
||||
@@ -40,13 +40,20 @@ class AETrainer(BaseTrainer):
|
||||
self.test_auc = None
|
||||
self.test_time = None
|
||||
|
||||
def train(self, dataset: BaseADDataset, ae_net: BaseNet):
|
||||
def train(self, dataset: BaseADDataset, ae_net: BaseNet, k_fold_idx: int = None):
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Get train data loader
|
||||
train_loader, _, _ = dataset.loaders(
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
if k_fold_idx is not None:
|
||||
train_loader, _ = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=self.batch_size,
|
||||
num_workers=self.n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
train_loader, _, _ = dataset.loaders(
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Set loss
|
||||
criterion = nn.MSELoss(reduction="none")
|
||||
@@ -69,14 +76,23 @@ class AETrainer(BaseTrainer):
|
||||
logger.info("Starting pretraining...")
|
||||
start_time = time.time()
|
||||
ae_net.train()
|
||||
for epoch in range(self.n_epochs):
|
||||
|
||||
all_training_data = []
|
||||
for epoch in range(self.n_epochs):
|
||||
epoch_loss = 0.0
|
||||
n_batches = 0
|
||||
epoch_start_time = time.time()
|
||||
for data in train_loader:
|
||||
inputs, _, _, _ = data
|
||||
inputs, _, _, _, file_frame_ids = data
|
||||
inputs = inputs.to(self.device)
|
||||
all_training_data.append(
|
||||
np.dstack(
|
||||
(
|
||||
file_frame_ids[0].detach().cpu().numpy(),
|
||||
file_frame_ids[1].detach().cpu().numpy(),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
# Zero the network parameter gradients
|
||||
optimizer.zero_grad()
|
||||
@@ -107,17 +123,31 @@ class AETrainer(BaseTrainer):
|
||||
|
||||
self.train_time = time.time() - start_time
|
||||
logger.info("Pretraining Time: {:.3f}s".format(self.train_time))
|
||||
|
||||
all_training_data = np.concatenate([x.squeeze() for x in all_training_data])
|
||||
|
||||
sorted_training_data = all_training_data[
|
||||
np.lexsort((all_training_data[:, 1], all_training_data[:, 0]))
|
||||
]
|
||||
|
||||
logger.info("Finished pretraining.")
|
||||
|
||||
return ae_net
|
||||
|
||||
def test(self, dataset: BaseADDataset, ae_net: BaseNet):
|
||||
def test(self, dataset: BaseADDataset, ae_net: BaseNet, k_fold_idx: int = None):
|
||||
logger = logging.getLogger()
|
||||
|
||||
# Get test data loader
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
if k_fold_idx is not None:
|
||||
_, test_loader = dataset.loaders_k_fold(
|
||||
fold_idx=k_fold_idx,
|
||||
batch_size=self.batch_size,
|
||||
num_workers=self.n_jobs_dataloader,
|
||||
)
|
||||
else:
|
||||
_, test_loader, _ = dataset.loaders(
|
||||
batch_size=self.batch_size, num_workers=self.n_jobs_dataloader
|
||||
)
|
||||
|
||||
# Set loss
|
||||
criterion = nn.MSELoss(reduction="none")
|
||||
@@ -133,15 +163,25 @@ class AETrainer(BaseTrainer):
|
||||
start_time = time.time()
|
||||
idx_label_score = []
|
||||
ae_net.eval()
|
||||
all_training_data = []
|
||||
with torch.no_grad():
|
||||
for data in test_loader:
|
||||
inputs, labels, _, idx = data
|
||||
inputs, labels, _, idx, file_frame_ids = data
|
||||
inputs, labels, idx = (
|
||||
inputs.to(self.device),
|
||||
labels.to(self.device),
|
||||
idx.to(self.device),
|
||||
)
|
||||
|
||||
all_training_data.append(
|
||||
np.dstack(
|
||||
(
|
||||
file_frame_ids[0].detach().cpu().numpy(),
|
||||
file_frame_ids[1].detach().cpu().numpy(),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
rec = ae_net(inputs)
|
||||
rec_loss = criterion(rec, inputs)
|
||||
scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim())))
|
||||
@@ -161,6 +201,12 @@ class AETrainer(BaseTrainer):
|
||||
|
||||
self.test_time = time.time() - start_time
|
||||
|
||||
all_training_data = np.concatenate([x.squeeze() for x in all_training_data])
|
||||
|
||||
sorted_training_data = all_training_data[
|
||||
np.lexsort((all_training_data[:, 1], all_training_data[:, 0]))
|
||||
]
|
||||
|
||||
# Compute AUC
|
||||
_, labels, scores = zip(*idx_label_score)
|
||||
labels = np.array(labels)
|
||||
|
||||
151
tools/animate_score.py
Normal file
151
tools/animate_score.py
Normal file
@@ -0,0 +1,151 @@
|
||||
from functools import partial
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from matplotlib import animation
|
||||
|
||||
# from util import calculate_average_frame_rate, load_dataset
|
||||
from rich.progress import Progress, track
|
||||
|
||||
scores_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/subter_selective_test/inference/3_smoke_human_walking_2023-01-23.npy"
|
||||
)
|
||||
|
||||
# scores_path = Path(
|
||||
# "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/subter_split_test/inference/3_smoke_human_walking_2023-01-23.npy"
|
||||
# )
|
||||
# scores_path = Path(
|
||||
# "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/subter_split_test/inference/1_loop_closure_illuminated_two_LiDARs_2023-01-23.npy"
|
||||
# )
|
||||
# scores_path = Path(
|
||||
# "/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/esmera_split_test/inference/experiment_3.npy"
|
||||
# )
|
||||
|
||||
# dataset_path = Path(
|
||||
# "/home/fedex/mt/data/subter/1_loop_closure_illuminated_two_LiDARs_2023-01-23.bag"
|
||||
# )
|
||||
# dataset_path = Path("/home/fedex/mt/data/subter/3_smoke_human_walking_2023-01-23.bag")
|
||||
|
||||
# dataset = load_dataset(dataset_path)
|
||||
fps = 10
|
||||
|
||||
all_scores = np.load(scores_path)
|
||||
y_limit = 1.1 * np.max(all_scores[np.isfinite(all_scores)])
|
||||
# y_limit = 10
|
||||
# all_scores = np.where(all_scores > 10, 10, all_scores)
|
||||
# all_scores = all_scores.reshape(-1, 16).T #SUBTER
|
||||
# all_scores = all_scores.reshape(-1, 8).T # ESMERA
|
||||
all_scores = all_scores.reshape(-1, 1).T # ESMERA
|
||||
|
||||
print(all_scores.shape, y_limit)
|
||||
|
||||
fig, axes = plt.subplots(
|
||||
1,
|
||||
1,
|
||||
figsize=(7.68, 7.2),
|
||||
# 1, 1, figsize=(7.68, 7.2), gridspec_kw={"wspace": 0.10, "hspace": 0.10}
|
||||
)
|
||||
|
||||
axes = [axes]
|
||||
# fig, axes = plt.subplots(
|
||||
# 1, 8, figsize=(20.48, 7.2), gridspec_kw={"wspace": 0.05, "hspace": 0.05}
|
||||
# )
|
||||
|
||||
# Flatten the axes for easier indexing
|
||||
# axes = axes.flatten()
|
||||
|
||||
last_running_avg = [None] * len(all_scores)
|
||||
|
||||
|
||||
# Function to calculate the running average with a dynamic window size
|
||||
def running_average_dynamic(data, current_frame, max_window=20):
|
||||
"""Calculate the running average with dynamic window size up to max_window."""
|
||||
window = min(
|
||||
current_frame, max_window
|
||||
) # Use the minimum of current frame or the max window size
|
||||
return np.convolve(data, np.ones(window) / window, mode="valid")
|
||||
|
||||
|
||||
# Function to animate each subplot
|
||||
def animate(i, progress_bar, progress_task):
|
||||
for score_id, ax in enumerate(axes):
|
||||
ax.cla() # Clear the axes for the current frame
|
||||
|
||||
ax.plot(
|
||||
all_scores[score_id][:i], label="Anomaly Score"
|
||||
) # Plot the scores up to frame `i`
|
||||
if i > 0:
|
||||
avg_data = running_average_dynamic(
|
||||
all_scores[score_id][:i], current_frame=i, max_window=20
|
||||
)
|
||||
ax.plot(range(len(avg_data)), avg_data, color="orange", label="Running Avg")
|
||||
|
||||
ax.set_xlim([0, all_scores[score_id].shape[0]]) # Set x limits
|
||||
ax.set_ylim([0, y_limit]) # Set y limits
|
||||
# ax.set_title(f"Score {score_id + 1}") # Add a title for each subplot
|
||||
|
||||
ax.set_ylabel("Score", fontsize=10) # Add y-axis label
|
||||
ax.set_xlabel("Frame", fontsize=10) # Add y-axis label
|
||||
ax.legend(loc="upper right", fontsize=10) # Add a legend
|
||||
# Only show y-axis tick labels for the leftmost subplots
|
||||
# if score_id % 8 == 0: # First column of subplots
|
||||
# ax.set_ylabel("Score", fontsize=8) # Add y-axis label
|
||||
# else:
|
||||
# ax.set_yticklabels([]) # Remove y-axis tick labels
|
||||
# if score_id < 8:
|
||||
# ax.set_xticklabels([]) # Remove x-axis tick labels
|
||||
|
||||
# ax.tick_params(labelsize=6)
|
||||
|
||||
# Update the running average text every 10 frames
|
||||
if i % fps == 0 and i > 0:
|
||||
# Calculate the current running average value (up to last 20 frames)
|
||||
current_window = min(i, 20)
|
||||
last_running_avg[score_id] = np.mean(
|
||||
all_scores[score_id][i - current_window : i]
|
||||
)
|
||||
|
||||
# Display the last updated running average value (if available)
|
||||
if last_running_avg[score_id] is not None:
|
||||
ax.text(
|
||||
0.05,
|
||||
0.95,
|
||||
f"Current Avg: {last_running_avg[score_id]:>2.1f}",
|
||||
transform=ax.transAxes,
|
||||
fontsize=10,
|
||||
verticalalignment="top",
|
||||
horizontalalignment="left",
|
||||
color="black",
|
||||
fontfamily="monospace",
|
||||
)
|
||||
progress_bar.update(progress_task, completed=i)
|
||||
|
||||
|
||||
# plt.subplots_adjust(
|
||||
# left=0.02, right=0.98, top=0.95, bottom=0.05, wspace=0.05, hspace=0.05
|
||||
# )
|
||||
|
||||
|
||||
with Progress() as progress:
|
||||
total = all_scores[0].shape[0] + 1
|
||||
progress_task = progress.add_task("[cyan]Animating...", total=total)
|
||||
progress.update(progress_task, completed=0)
|
||||
|
||||
animate_partial = partial(
|
||||
animate, progress_bar=progress, progress_task=progress_task
|
||||
)
|
||||
|
||||
# anim = animation.FuncAnimation(fig, animate_partial, frames=50, interval=1, blit=False)
|
||||
anim = animation.FuncAnimation(
|
||||
fig, animate_partial, frames=total, interval=1, blit=False
|
||||
)
|
||||
|
||||
# Save the animation as a single video
|
||||
animated_score_filename = f"{scores_path.stem}_selective.mp4"
|
||||
anim.save(animated_score_filename, writer=animation.FFMpegWriter(fps=fps))
|
||||
|
||||
progress.update(progress_task, completed=all_scores[0].shape[0] + 1)
|
||||
|
||||
# Clear the figure after saving
|
||||
fig.clear()
|
||||
93
tools/anomaly_scatter_plot.py
Normal file
93
tools/anomaly_scatter_plot.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
# Fix the random seed for reproducibility
|
||||
np.random.seed(0)
|
||||
|
||||
# 1. Generate NORMAL DATA (e.g. points roughly around the origin)
|
||||
# We'll keep them relatively close together so that a circle can enclose them easily.
|
||||
normal_data = np.random.randn(50, 2) * 0.75
|
||||
|
||||
# 2. Generate ANOMALOUS DATA
|
||||
# - Cluster 1: 3 points close together
|
||||
anomaly_cluster_1 = np.array([[3.0, 3.0], [3.2, 3.1], [2.8, 2.9], [0.4, 4.0]])
|
||||
|
||||
# - Cluster 2: A single point
|
||||
|
||||
# 3. Compute the center and radius for a boundary circle around normal data
|
||||
center = normal_data.mean(axis=0)
|
||||
distances = np.linalg.norm(normal_data - center, axis=1)
|
||||
radius = (
|
||||
np.max(distances) + 0.2
|
||||
) # Add a small margin to ensure all normal points are inside
|
||||
|
||||
# Create coordinates for plotting the circular boundary
|
||||
theta = np.linspace(0, 2 * np.pi, 200)
|
||||
circle_x = center[0] + radius * np.cos(theta)
|
||||
circle_y = center[1] + radius * np.sin(theta)
|
||||
|
||||
# 4. Plot the data
|
||||
plt.figure(figsize=(7, 7))
|
||||
|
||||
# Scatter normal points with 'o'
|
||||
plt.scatter(
|
||||
normal_data[:, 0], normal_data[:, 1], marker="o", color="blue", label="Normal Data"
|
||||
)
|
||||
|
||||
# Scatter anomalous points with 'x', but separate them by cluster for clarity
|
||||
plt.scatter(
|
||||
anomaly_cluster_1[:, 0],
|
||||
anomaly_cluster_1[:, 1],
|
||||
marker="x",
|
||||
color="red",
|
||||
label="Anomalies",
|
||||
)
|
||||
|
||||
# Plot the boundary (circle) around the normal data
|
||||
plt.plot(circle_x, circle_y, linestyle="--", color="black", label="Boundary")
|
||||
|
||||
# 5. Annotate/label the clusters
|
||||
# Label the normal cluster near its center
|
||||
# plt.text(center[0], center[1],
|
||||
# 'Normal Cluster',
|
||||
# horizontalalignment='center',
|
||||
# verticalalignment='center',
|
||||
# fontsize=9,
|
||||
# bbox=dict(facecolor='white', alpha=0.7))
|
||||
#
|
||||
# # Label anomaly cluster 1 near its centroid
|
||||
# ac1_center = anomaly_cluster_1.mean(axis=0)
|
||||
# plt.text(ac1_center[0], ac1_center[1],
|
||||
# 'Anomaly Cluster 1',
|
||||
# horizontalalignment='center',
|
||||
# verticalalignment='center',
|
||||
# fontsize=9,
|
||||
# bbox=dict(facecolor='white', alpha=0.7))
|
||||
#
|
||||
# # Label anomaly cluster 2
|
||||
# ac2_point = anomaly_cluster_2[0]
|
||||
# plt.text(ac2_point[0]+0.2, ac2_point[1],
|
||||
# 'Anomaly Cluster 2',
|
||||
# horizontalalignment='left',
|
||||
# verticalalignment='center',
|
||||
# fontsize=9,
|
||||
# bbox=dict(facecolor='white', alpha=0.7))
|
||||
|
||||
# Add legend and make plot look nice
|
||||
plt.legend(loc="upper left")
|
||||
# plt.title('2D Scatter Plot Showing Normal and Anomalous Clusters')
|
||||
plt.xlabel("x")
|
||||
plt.ylabel("y")
|
||||
plt.tick_params(
|
||||
axis="both", # changes apply to the x-axis
|
||||
which="both", # both major and minor ticks are affected
|
||||
bottom=False, # ticks along the bottom edge are off
|
||||
top=False, # ticks along the top edge are off
|
||||
left=False, # ticks along the top edge are off
|
||||
right=False, # ticks along the top edge are off
|
||||
labelbottom=False,
|
||||
labelleft=False,
|
||||
) #
|
||||
# plt.grid(True)
|
||||
plt.axis("equal") # Makes circles look circular rather than elliptical
|
||||
plt.savefig("scatter_plot.png")
|
||||
@@ -22,21 +22,6 @@ from rosbags.typesys.stores.ros1_noetic import (
|
||||
|
||||
from util import existing_path
|
||||
|
||||
|
||||
def get_o3d_pointcloud(points: np.ndarray) -> o3d.geometry.PointCloud:
|
||||
xyz_array = np.stack(
|
||||
[
|
||||
points["x"].astype(np.float64),
|
||||
points["y"].astype(np.float64),
|
||||
points["z"].astype(np.float64),
|
||||
],
|
||||
axis=-1,
|
||||
)
|
||||
filtered_xyz = xyz_array[~np.all(xyz_array == 0, axis=1)]
|
||||
o3d_vector = o3d.utility.Vector3dVector(filtered_xyz)
|
||||
return o3d.geometry.PointCloud(o3d_vector)
|
||||
|
||||
|
||||
# Mapping of PointField datatypes to NumPy dtypes
|
||||
POINTFIELD_DATATYPES = {
|
||||
1: np.int8, # INT8
|
||||
@@ -178,9 +163,9 @@ def main() -> int:
|
||||
with AnyReader([args.input_experiment_path]) as reader:
|
||||
connections = reader.connections
|
||||
topics = dict(sorted({conn.topic: conn for conn in connections}.items()))
|
||||
assert (
|
||||
args.pointcloud_topic in topics
|
||||
), f"Topic {args.pointcloud_topic} not found"
|
||||
assert args.pointcloud_topic in topics, (
|
||||
f"Topic {args.pointcloud_topic} not found"
|
||||
)
|
||||
|
||||
original_types = {}
|
||||
typestore = get_typestore(Stores.ROS1_NOETIC)
|
||||
@@ -207,16 +192,78 @@ def main() -> int:
|
||||
"Processing all messages", total=reader.message_count
|
||||
)
|
||||
|
||||
i = 0
|
||||
for connection, timestamp, rawdata in reader.messages():
|
||||
if connection.topic == args.pointcloud_topic:
|
||||
i += 1
|
||||
# For the pointcloud topic, we need to modify the data
|
||||
msg = reader.deserialize(rawdata, connection.msgtype)
|
||||
original_pointcloud = read_pointcloud(msg)
|
||||
cleaned_pointcloud = clean_pointcloud(original_pointcloud)
|
||||
|
||||
o3d_pcd = get_o3d_pointcloud(cleaned_pointcloud)
|
||||
# o3d_pointcloud = o3d.geometry.PointCloud()
|
||||
|
||||
# xyz_points = np.zeros((cleaned_pointcloud.shape[0], 3))
|
||||
# xyz_points[:, 0] = cleaned_pointcloud["x"]
|
||||
# xyz_points[:, 1] = cleaned_pointcloud["y"]
|
||||
# xyz_points[:, 2] = cleaned_pointcloud["z"]
|
||||
|
||||
# o3d_pointcloud.points = o3d.utility.Vector3dVector(xyz_points)
|
||||
|
||||
range_dtypes = {
|
||||
500: ("range_smaller_500", "u1"),
|
||||
800: ("range_smaller_800", "u1"),
|
||||
1000: ("range_smaller_1000", "u1"),
|
||||
1200: ("range_smaller_1200", "u1"),
|
||||
}
|
||||
|
||||
# radius_outlier_dtypes = {
|
||||
# # 10: ("radius_outlier_10", "u1"),
|
||||
# 50: ("radius_outlier_50", "u1"),
|
||||
# # 100: ("radius_outlier_100", "u1"),
|
||||
# }
|
||||
|
||||
# statistical_outlier_dtypes = {
|
||||
# # (20, 1.0): ("statisstical_outlier_20_1", "u1"),
|
||||
# # (20, 2.0): ("statisstical_outlier_20_2", "u1"),
|
||||
# (20, 4.0): ("statisstical_outlier_20_4", "u1"),
|
||||
# }
|
||||
|
||||
edited_dtype = np.dtype(
|
||||
cleaned_pointcloud.dtype.descr + list(range_dtypes.values())
|
||||
# + list(radius_outlier_dtypes.values())
|
||||
# + list(statistical_outlier_dtypes.values())
|
||||
)
|
||||
|
||||
edited_pointcloud = np.zeros(
|
||||
cleaned_pointcloud.shape, dtype=edited_dtype
|
||||
)
|
||||
for name in cleaned_pointcloud.dtype.names:
|
||||
edited_pointcloud[name] = cleaned_pointcloud[name]
|
||||
|
||||
for key, val in range_dtypes.items():
|
||||
edited_pointcloud[val[0]][
|
||||
edited_pointcloud["range"] < key
|
||||
] = 255
|
||||
|
||||
# for key, val in radius_outlier_dtypes.items():
|
||||
# _, mask_indices = o3d_pointcloud.remove_radius_outlier(
|
||||
# nb_points=2, radius=key
|
||||
# )
|
||||
# mask = np.zeros(edited_pointcloud.shape[0], dtype=bool)
|
||||
# mask[mask_indices] = True
|
||||
# edited_pointcloud[val[0]][mask] = 255
|
||||
|
||||
# for key, val in statistical_outlier_dtypes.items():
|
||||
# _, mask_indices = o3d_pointcloud.remove_statistical_outlier(
|
||||
# nb_neighbors=key[0], std_ratio=key[1]
|
||||
# )
|
||||
# mask = np.zeros(edited_pointcloud.shape[0], dtype=bool)
|
||||
# mask[mask_indices] = True
|
||||
# edited_pointcloud[val[0]][mask] = 255
|
||||
|
||||
msg = create_pointcloud2_msg(msg, edited_pointcloud)
|
||||
|
||||
msg = create_pointcloud2_msg(msg, cleaned_pointcloud)
|
||||
writer.write(
|
||||
connection=new_connections[connection.id],
|
||||
timestamp=timestamp,
|
||||
@@ -224,6 +271,16 @@ def main() -> int:
|
||||
message=msg, typename=msg.__msgtype__
|
||||
),
|
||||
)
|
||||
|
||||
# Create a boolean mask where the condition is met
|
||||
# mask = cleaned_pointcloud["range"] > 1
|
||||
|
||||
# Use the mask to set fields to zero
|
||||
# cleaned_pointcloud["x"][mask] = 0
|
||||
# cleaned_pointcloud["y"][mask] = 0
|
||||
# cleaned_pointcloud["z"][mask] = 0
|
||||
# cleaned_pointcloud["range"][mask] = 0
|
||||
|
||||
else:
|
||||
# For all other topics, we can write rawdata directly, no need to deserialize
|
||||
writer.write(new_connections[connection.id], timestamp, rawdata)
|
||||
|
||||
127
tools/evaluate_prc.py
Normal file
127
tools/evaluate_prc.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from scipy.stats import sem, t
|
||||
from sklearn.metrics import PrecisionRecallDisplay, auc
|
||||
|
||||
|
||||
def confidence_interval(data, confidence=0.95):
|
||||
"""Compute mean and margin of error for a given list of scores."""
|
||||
n = len(data)
|
||||
mean = np.mean(data)
|
||||
# Standard error of the mean:
|
||||
std_err = sem(data)
|
||||
# Confidence interval radius
|
||||
h = std_err * t.ppf((1 + confidence) / 2.0, n - 1)
|
||||
return mean, h
|
||||
|
||||
|
||||
# 1) LOAD PRECISION-RECALL DATA
|
||||
prc_data = [] # Stores (precision, recall) for each DeepSAD fold
|
||||
ap_scores = [] # Average Precision for each DeepSAD fold
|
||||
|
||||
isoforest_prc_data = [] # Stores (precision, recall) for each IsoForest fold
|
||||
isoforest_ap_scores = [] # Average Precision for each IsoForest fold
|
||||
|
||||
results_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_800_3000_new"
|
||||
)
|
||||
|
||||
# We assume we have 5 folds (adjust if you have a different number)
|
||||
for i in range(5):
|
||||
with (results_path / f"results_{i}.pkl").open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
precision, recall, _ = data["test_prc"] # (precision, recall, thresholds)
|
||||
prc_data.append((precision, recall))
|
||||
# Compute Average Precision (AP) via AUC of the (recall, precision) curve
|
||||
ap_scores.append(auc(recall, precision))
|
||||
|
||||
with (results_path / f"results_isoforest_{i}.pkl").open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
precision, recall, _ = data["test_prc"]
|
||||
isoforest_prc_data.append((precision, recall))
|
||||
isoforest_ap_scores.append(auc(recall, precision))
|
||||
|
||||
# 2) CALCULATE PER-FOLD STATISTICS
|
||||
mean_ap, ap_ci = confidence_interval(ap_scores)
|
||||
isoforest_mean_ap, isoforest_ap_ci = confidence_interval(isoforest_ap_scores)
|
||||
|
||||
# 3) INTERPOLATE EACH FOLD'S PRC ON A COMMON RECALL GRID FOR MEAN CURVE
|
||||
mean_recall = np.linspace(0, 1, 100)
|
||||
|
||||
# -- DeepSAD
|
||||
deep_sad_precisions_interp = []
|
||||
for precision, recall in prc_data:
|
||||
# Interpolate precision values at mean_recall
|
||||
interp_precision = np.interp(mean_recall, precision, recall)
|
||||
deep_sad_precisions_interp.append(interp_precision)
|
||||
|
||||
mean_precision = np.mean(deep_sad_precisions_interp, axis=0)
|
||||
std_precision = np.std(deep_sad_precisions_interp, axis=0)
|
||||
|
||||
# -- IsoForest
|
||||
isoforest_precisions_interp = []
|
||||
for precision, recall in isoforest_prc_data:
|
||||
interp_precision = np.interp(mean_recall, precision, recall)
|
||||
isoforest_precisions_interp.append(interp_precision)
|
||||
|
||||
isoforest_mean_precision = np.mean(isoforest_precisions_interp, axis=0)
|
||||
isoforest_std_precision = np.std(isoforest_precisions_interp, axis=0)
|
||||
|
||||
# 4) CREATE PLOT USING PrecisionRecallDisplay
|
||||
fig, ax = plt.subplots(figsize=(8, 6))
|
||||
|
||||
# (A) Plot each fold (optional) for DeepSAD
|
||||
for i, (precision, recall) in enumerate(prc_data):
|
||||
disp = PrecisionRecallDisplay(precision=precision, recall=recall)
|
||||
# Label only the first fold to avoid legend clutter
|
||||
disp.plot(
|
||||
ax=ax, color="b", alpha=0.3, label=f"DeepSAD Fold {i+1}" if i == 0 else None
|
||||
)
|
||||
|
||||
# (B) Plot each fold (optional) for IsoForest
|
||||
for i, (precision, recall) in enumerate(isoforest_prc_data):
|
||||
disp = PrecisionRecallDisplay(precision=precision, recall=recall)
|
||||
disp.plot(
|
||||
ax=ax, color="r", alpha=0.3, label=f"IsoForest Fold {i+1}" if i == 0 else None
|
||||
)
|
||||
|
||||
# (C) Plot mean curve for DeepSAD
|
||||
mean_disp_deepsad = PrecisionRecallDisplay(precision=mean_precision, recall=mean_recall)
|
||||
mean_disp_deepsad.plot(
|
||||
ax=ax, color="b", label=f"DeepSAD Mean PR (AP={mean_ap:.2f} ± {ap_ci:.2f})"
|
||||
)
|
||||
ax.fill_between(
|
||||
mean_recall,
|
||||
mean_precision - std_precision,
|
||||
mean_precision + std_precision,
|
||||
color="b",
|
||||
alpha=0.2,
|
||||
)
|
||||
|
||||
# (D) Plot mean curve for IsoForest
|
||||
mean_disp_isoforest = PrecisionRecallDisplay(
|
||||
precision=isoforest_mean_precision, recall=mean_recall
|
||||
)
|
||||
mean_disp_isoforest.plot(
|
||||
ax=ax,
|
||||
color="r",
|
||||
label=f"IsoForest Mean PR (AP={isoforest_mean_ap:.2f} ± {isoforest_ap_ci:.2f})",
|
||||
)
|
||||
ax.fill_between(
|
||||
mean_recall,
|
||||
isoforest_mean_precision - isoforest_std_precision,
|
||||
isoforest_mean_precision + isoforest_std_precision,
|
||||
color="r",
|
||||
alpha=0.2,
|
||||
)
|
||||
|
||||
# 5) FINAL PLOT ADJUSTMENTS
|
||||
ax.set_xlabel("Recall")
|
||||
ax.set_ylabel("Precision")
|
||||
ax.set_title("Precision-Recall Curve with 5-Fold Cross-Validation")
|
||||
ax.legend(loc="upper right")
|
||||
|
||||
plt.savefig("pr_curve_800_3000_2.png")
|
||||
135
tools/evaluate_prc_2.py
Normal file
135
tools/evaluate_prc_2.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from scipy.stats import sem, t
|
||||
from sklearn.metrics import auc
|
||||
|
||||
|
||||
# Confidence interval function
|
||||
def confidence_interval(data, confidence=0.95):
|
||||
n = len(data)
|
||||
mean = np.mean(data)
|
||||
std_err = sem(data)
|
||||
h = std_err * t.ppf((1 + confidence) / 2.0, n - 1)
|
||||
return mean, h
|
||||
|
||||
|
||||
# Load PRC (precision-recall) data and compute AP (average precision)
|
||||
prc_data = []
|
||||
ap_scores = []
|
||||
isoforest_prc_data = []
|
||||
isoforest_ap_scores = []
|
||||
|
||||
results_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_3000_800_2"
|
||||
)
|
||||
|
||||
for i in range(5):
|
||||
with (results_path / f"results_{i}.pkl").open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
# data["test_prc"] should be (precision, recall, thresholds)
|
||||
precision, recall, _ = data["test_prc"]
|
||||
prc_data.append((precision, recall))
|
||||
|
||||
# Compute AP using area under the precision-recall curve
|
||||
ap_scores.append(auc(recall, precision))
|
||||
|
||||
with (results_path / f"results_isoforest_{i}.pkl").open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
precision, recall, _ = data["test_prc"]
|
||||
isoforest_prc_data.append((precision, recall))
|
||||
|
||||
isoforest_ap_scores.append(auc(recall, precision))
|
||||
|
||||
# Calculate mean and confidence interval for DeepSAD AP scores
|
||||
mean_ap, ap_ci = confidence_interval(ap_scores)
|
||||
|
||||
# Interpolate precision over a common recall range for DeepSAD
|
||||
mean_recall = np.linspace(0, 1, 100)
|
||||
precisions = []
|
||||
|
||||
for precision, recall in prc_data:
|
||||
# Make sure recall is sorted (usually is from sklearn)
|
||||
# Interpolate precision at the points in mean_recall
|
||||
interp_prec = np.interp(mean_recall, np.flip(recall), np.flip(precision))
|
||||
precisions.append(interp_prec)
|
||||
|
||||
mean_precision = np.mean(precisions, axis=0)
|
||||
std_precision = np.std(precisions, axis=0)
|
||||
|
||||
# Calculate mean and confidence interval for IsoForest AP scores
|
||||
isoforest_mean_ap, isoforest_ap_ci = confidence_interval(isoforest_ap_scores)
|
||||
|
||||
# Interpolate precision over a common recall range for IsoForest
|
||||
isoforest_precisions = []
|
||||
for precision, recall in isoforest_prc_data:
|
||||
interp_prec = np.interp(mean_recall, np.flip(recall), np.flip(precision))
|
||||
isoforest_precisions.append(interp_prec)
|
||||
|
||||
isoforest_mean_precision = np.mean(isoforest_precisions, axis=0)
|
||||
isoforest_std_precision = np.std(isoforest_precisions, axis=0)
|
||||
|
||||
# Plot Precision-Recall curves with confidence margins
|
||||
plt.figure(figsize=(8, 6))
|
||||
|
||||
# DeepSAD curve
|
||||
plt.plot(
|
||||
mean_recall,
|
||||
mean_precision,
|
||||
color="b",
|
||||
label=f"DeepSAD Mean PR (AP = {mean_ap:.2f} ± {ap_ci:.2f})",
|
||||
)
|
||||
plt.fill_between(
|
||||
mean_recall,
|
||||
mean_precision - std_precision,
|
||||
mean_precision + std_precision,
|
||||
color="b",
|
||||
alpha=0.2,
|
||||
label="DeepSAD ± 1 std. dev.",
|
||||
)
|
||||
|
||||
# IsoForest curve
|
||||
plt.plot(
|
||||
mean_recall,
|
||||
isoforest_mean_precision,
|
||||
color="r",
|
||||
label=f"IsoForest Mean PR (AP = {isoforest_mean_ap:.2f} ± {isoforest_ap_ci:.2f})",
|
||||
)
|
||||
plt.fill_between(
|
||||
mean_recall,
|
||||
isoforest_mean_precision - isoforest_std_precision,
|
||||
isoforest_mean_precision + isoforest_std_precision,
|
||||
color="r",
|
||||
alpha=0.2,
|
||||
label="IsoForest ± 1 std. dev.",
|
||||
)
|
||||
|
||||
# Optional: plot each fold's curve for DeepSAD
|
||||
for i, (precision, recall) in enumerate(prc_data):
|
||||
plt.plot(
|
||||
recall,
|
||||
precision,
|
||||
lw=1,
|
||||
alpha=0.3,
|
||||
color="b",
|
||||
label=f"DeepSAD Fold {i + 1} PR" if i == 0 else "",
|
||||
)
|
||||
|
||||
# Optional: plot each fold's curve for IsoForest
|
||||
for i, (precision, recall) in enumerate(isoforest_prc_data):
|
||||
plt.plot(
|
||||
recall,
|
||||
precision,
|
||||
lw=1,
|
||||
alpha=0.3,
|
||||
color="r",
|
||||
label=f"IsoForest Fold {i + 1} PR" if i == 0 else "",
|
||||
)
|
||||
|
||||
plt.xlabel("Recall")
|
||||
plt.ylabel("Precision")
|
||||
plt.title("Precision-Recall Curve with 5-Fold Cross-Validation")
|
||||
plt.legend(loc="upper right")
|
||||
plt.savefig("pr_curve_800_3000_4.png")
|
||||
50
tools/evaluate_prc_singlerun.py
Normal file
50
tools/evaluate_prc_singlerun.py
Normal file
@@ -0,0 +1,50 @@
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.metrics import auc
|
||||
|
||||
results_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_selective"
|
||||
)
|
||||
|
||||
# Paths to your result files
|
||||
deepsad_results_file = results_path / "results.pkl"
|
||||
isoforest_results_file = results_path / "results_isoforest.pkl"
|
||||
|
||||
# Load DeepSAD precision-recall data
|
||||
with deepsad_results_file.open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
deep_precision, deep_recall, _ = data["test_prc"]
|
||||
|
||||
# Compute AP for DeepSAD
|
||||
deep_ap = auc(deep_recall, deep_precision)
|
||||
|
||||
# Load IsoForest precision-recall data
|
||||
with isoforest_results_file.open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
iso_precision, iso_recall, _ = data["test_prc"]
|
||||
|
||||
# Compute AP for IsoForest
|
||||
iso_ap = auc(iso_recall, iso_precision)
|
||||
|
||||
# Create plot
|
||||
plt.figure(figsize=(8, 6))
|
||||
|
||||
# Plot DeepSAD PR curve
|
||||
plt.plot(deep_recall, deep_precision, color="b", label=f"DeepSAD (AP = {deep_ap:.2f})")
|
||||
|
||||
# Plot IsoForest PR curve
|
||||
plt.plot(iso_recall, iso_precision, color="r", label=f"IsoForest (AP = {iso_ap:.2f})")
|
||||
|
||||
# Labels
|
||||
plt.xlabel("Recall")
|
||||
plt.ylabel("Precision")
|
||||
plt.title("Precision-Recall Curve (Single Run)")
|
||||
|
||||
# Add legend
|
||||
plt.legend(loc="upper right")
|
||||
|
||||
# Save and/or show plot
|
||||
plt.savefig("pr_curve_single_run.png")
|
||||
plt.show()
|
||||
82
tools/evaluate_roc.py
Normal file
82
tools/evaluate_roc.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from scipy.stats import sem, t
|
||||
from sklearn.metrics import auc
|
||||
|
||||
|
||||
# Confidence interval function
|
||||
def confidence_interval(data, confidence=0.95):
|
||||
n = len(data)
|
||||
mean = np.mean(data)
|
||||
std_err = sem(data)
|
||||
h = std_err * t.ppf((1 + confidence) / 2.0, n - 1)
|
||||
return mean, h
|
||||
|
||||
|
||||
# Load ROC and AUC values from pickle files
|
||||
roc_data = []
|
||||
auc_scores = []
|
||||
isoforest_roc_data = []
|
||||
isoforest_auc_scores = []
|
||||
|
||||
results_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_0_0"
|
||||
)
|
||||
|
||||
for i in range(5):
|
||||
with (results_path / f"results_{i}.pkl").open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
roc_data.append(data["test_roc"])
|
||||
auc_scores.append(data["test_auc"])
|
||||
with (results_path / f"results.isoforest_{i}.pkl").open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
isoforest_roc_data.append(data["test_roc"])
|
||||
isoforest_auc_scores.append(data["test_auc"])
|
||||
|
||||
# Calculate mean and confidence interval for AUC scores
|
||||
mean_auc, auc_ci = confidence_interval(auc_scores)
|
||||
|
||||
# Combine ROC curves
|
||||
mean_fpr = np.linspace(0, 1, 100)
|
||||
tprs = []
|
||||
|
||||
for fpr, tpr, _ in roc_data:
|
||||
interp_tpr = np.interp(mean_fpr, fpr, tpr)
|
||||
interp_tpr[0] = 0.0
|
||||
tprs.append(interp_tpr)
|
||||
|
||||
mean_tpr = np.mean(tprs, axis=0)
|
||||
mean_tpr[-1] = 1.0
|
||||
std_tpr = np.std(tprs, axis=0)
|
||||
|
||||
# Plot ROC curves with confidence margins
|
||||
plt.figure()
|
||||
plt.plot(
|
||||
mean_fpr,
|
||||
mean_tpr,
|
||||
color="b",
|
||||
label=f"Mean ROC (AUC = {mean_auc:.2f} ± {auc_ci:.2f})",
|
||||
)
|
||||
plt.fill_between(
|
||||
mean_fpr,
|
||||
mean_tpr - std_tpr,
|
||||
mean_tpr + std_tpr,
|
||||
color="b",
|
||||
alpha=0.2,
|
||||
label="± 1 std. dev.",
|
||||
)
|
||||
|
||||
# Plot each fold's ROC curve (optional)
|
||||
for i, (fpr, tpr, _) in enumerate(roc_data):
|
||||
plt.plot(fpr, tpr, lw=1, alpha=0.3, label=f"Fold {i + 1} ROC")
|
||||
|
||||
# Labels and legend
|
||||
plt.plot([0, 1], [0, 1], "k--", label="Chance")
|
||||
plt.xlabel("False Positive Rate")
|
||||
plt.ylabel("True Positive Rate")
|
||||
plt.title("ROC Curve with 5-Fold Cross-Validation")
|
||||
plt.legend(loc="lower right")
|
||||
plt.savefig("roc_curve_0_0.png")
|
||||
133
tools/evaluate_roc_all.py
Normal file
133
tools/evaluate_roc_all.py
Normal file
@@ -0,0 +1,133 @@
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from scipy.stats import sem, t
|
||||
from sklearn.metrics import auc
|
||||
|
||||
|
||||
# Confidence interval function
|
||||
def confidence_interval(data, confidence=0.95):
|
||||
n = len(data)
|
||||
mean = np.mean(data)
|
||||
std_err = sem(data)
|
||||
h = std_err * t.ppf((1 + confidence) / 2.0, n - 1)
|
||||
return mean, h
|
||||
|
||||
|
||||
# Load ROC and AUC values from pickle files
|
||||
roc_data = []
|
||||
auc_scores = []
|
||||
isoforest_roc_data = []
|
||||
isoforest_auc_scores = []
|
||||
|
||||
results_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/log/DeepSAD/subter_kfold_800_3000_new"
|
||||
)
|
||||
|
||||
for i in range(5):
|
||||
with (results_path / f"results_{i}.pkl").open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
roc_data.append(data["test_roc"])
|
||||
auc_scores.append(data["test_auc"])
|
||||
with (results_path / f"results_isoforest_{i}.pkl").open("rb") as f:
|
||||
data = pickle.load(f)
|
||||
isoforest_roc_data.append(data["test_roc"])
|
||||
isoforest_auc_scores.append(data["test_auc"])
|
||||
|
||||
# Calculate mean and confidence interval for DeepSAD AUC scores
|
||||
mean_auc, auc_ci = confidence_interval(auc_scores)
|
||||
|
||||
# Combine ROC curves for DeepSAD
|
||||
mean_fpr = np.linspace(0, 1, 100)
|
||||
tprs = []
|
||||
|
||||
for fpr, tpr, _ in roc_data:
|
||||
interp_tpr = np.interp(mean_fpr, fpr, tpr)
|
||||
interp_tpr[0] = 0.0
|
||||
tprs.append(interp_tpr)
|
||||
|
||||
mean_tpr = np.mean(tprs, axis=0)
|
||||
mean_tpr[-1] = 1.0
|
||||
std_tpr = np.std(tprs, axis=0)
|
||||
|
||||
# -- ADDED: Calculate mean and confidence interval for IsoForest AUC scores
|
||||
isoforest_mean_auc, isoforest_auc_ci = confidence_interval(isoforest_auc_scores)
|
||||
|
||||
# -- ADDED: Combine ROC curves for IsoForest
|
||||
isoforest_mean_fpr = np.linspace(0, 1, 100)
|
||||
isoforest_tprs = []
|
||||
|
||||
for fpr, tpr, _ in isoforest_roc_data:
|
||||
interp_tpr = np.interp(isoforest_mean_fpr, fpr, tpr)
|
||||
interp_tpr[0] = 0.0
|
||||
isoforest_tprs.append(interp_tpr)
|
||||
|
||||
isoforest_mean_tpr = np.mean(isoforest_tprs, axis=0)
|
||||
isoforest_mean_tpr[-1] = 1.0
|
||||
isoforest_std_tpr = np.std(isoforest_tprs, axis=0)
|
||||
|
||||
# Plot ROC curves with confidence margins for DeepSAD
|
||||
plt.figure(figsize=(8, 6))
|
||||
plt.plot(
|
||||
mean_fpr,
|
||||
mean_tpr,
|
||||
color="b",
|
||||
label=f"DeepSAD Mean ROC (AUC = {mean_auc:.2f} ± {auc_ci:.2f})",
|
||||
)
|
||||
plt.fill_between(
|
||||
mean_fpr,
|
||||
mean_tpr - std_tpr,
|
||||
mean_tpr + std_tpr,
|
||||
color="b",
|
||||
alpha=0.2,
|
||||
label="DeepSAD ± 1 std. dev.",
|
||||
)
|
||||
|
||||
# -- ADDED: Plot ROC curves with confidence margins for IsoForest
|
||||
plt.plot(
|
||||
isoforest_mean_fpr,
|
||||
isoforest_mean_tpr,
|
||||
color="r",
|
||||
label=f"IsoForest Mean ROC (AUC = {isoforest_mean_auc:.2f} ± {isoforest_auc_ci:.2f})",
|
||||
)
|
||||
plt.fill_between(
|
||||
isoforest_mean_fpr,
|
||||
isoforest_mean_tpr - isoforest_std_tpr,
|
||||
isoforest_mean_tpr + isoforest_std_tpr,
|
||||
color="r",
|
||||
alpha=0.2,
|
||||
label="IsoForest ± 1 std. dev.",
|
||||
)
|
||||
|
||||
# Plot each fold's ROC curve (optional) for DeepSAD
|
||||
for i, (fpr, tpr, _) in enumerate(roc_data):
|
||||
plt.plot(
|
||||
fpr,
|
||||
tpr,
|
||||
lw=1,
|
||||
alpha=0.3,
|
||||
color="b",
|
||||
label=f"DeepSAD Fold {i+1} ROC" if i == 0 else "",
|
||||
)
|
||||
|
||||
# -- ADDED: Plot each fold's ROC curve (optional) for IsoForest
|
||||
for i, (fpr, tpr, _) in enumerate(isoforest_roc_data):
|
||||
plt.plot(
|
||||
fpr,
|
||||
tpr,
|
||||
lw=1,
|
||||
alpha=0.3,
|
||||
color="r",
|
||||
label=f"IsoForest Fold {i+1} ROC" if i == 0 else "",
|
||||
)
|
||||
|
||||
# Labels and legend
|
||||
plt.plot([0, 1], [0, 1], "k--", label="Chance")
|
||||
plt.xlabel("False Positive Rate")
|
||||
plt.ylabel("True Positive Rate")
|
||||
plt.title("ROC Curve with 5-Fold Cross-Validation")
|
||||
plt.legend(loc="lower right")
|
||||
plt.savefig("roc_curve_800_3000_isoforest.png")
|
||||
plt.show()
|
||||
37
tools/list_topics.py
Normal file
37
tools/list_topics.py
Normal file
@@ -0,0 +1,37 @@
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
from rosbags.highlevel import AnyReader
|
||||
|
||||
|
||||
def list_topics(bag_file):
|
||||
frame_count = 0
|
||||
output_folder = Path("./test") / bag_file.stem
|
||||
output_folder.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
with AnyReader([bag_file]) as reader:
|
||||
connections = reader.connections
|
||||
topics = dict(sorted({conn.topic: conn for conn in connections}.items()))
|
||||
|
||||
topic = topics["/stereo_inertial_publisher/left/image_rect"]
|
||||
|
||||
for connection, timestamp, rawdata in reader.messages(connections=[topic]):
|
||||
img_msg = reader.deserialize(rawdata, connection.msgtype)
|
||||
|
||||
img_data = np.frombuffer(img_msg.data, dtype=np.uint8).reshape(
|
||||
(img_msg.height, img_msg.width)
|
||||
)
|
||||
img = Image.fromarray(img_data, mode="L")
|
||||
frame_count += 1
|
||||
img.save(output_folder / f"frame_{frame_count:04d}.png")
|
||||
|
||||
print(f"Saved {frame_count} frames to {output_folder}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bag_path = Path(
|
||||
"/home/fedex/mt/data/subter/3_smoke_robot_stationary_static_excess_smoke_2023-01-23.bag"
|
||||
)
|
||||
print(f"{bag_path.exists()=}")
|
||||
list_topics(bag_path)
|
||||
100
tools/plot_score.py
Normal file
100
tools/plot_score.py
Normal file
@@ -0,0 +1,100 @@
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Path to your .npy file with anomaly scores
|
||||
scores_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/"
|
||||
"DeepSAD/subter_test/inference/3_smoke_human_walking_2023-01-23.npy"
|
||||
)
|
||||
|
||||
# Frames per second
|
||||
fps = 10
|
||||
|
||||
# Load all scores
|
||||
all_scores = np.load(scores_path)
|
||||
|
||||
# (Optional) If your scores are a 1D array, remove or adjust this reshape
|
||||
# The final shape is expected to be (1, total_frames) or (N,) if you want only one line.
|
||||
all_scores = all_scores.reshape(1, -1) # shape becomes (1, total_frames)
|
||||
|
||||
# We only want frames [0..299] => 300 frames
|
||||
max_frames = 300
|
||||
scores = all_scores[0, :max_frames] # Take the first row's first 300 frames
|
||||
|
||||
# Convert frames to time in seconds: time[i] = i / fps
|
||||
time_values = np.arange(len(scores)) / fps # shape is (max_frames,)
|
||||
|
||||
# Recalculate y-limit based on the selected slice
|
||||
y_limit = 1.1 * np.max(scores[np.isfinite(scores)])
|
||||
# If you want to ensure y_limit >= 10, uncomment below:
|
||||
# y_limit = max(y_limit, 10)
|
||||
|
||||
print("Selected Scores Shape:", scores.shape)
|
||||
print("Time Range:", time_values[0], "->", time_values[-1])
|
||||
print("Y-limit:", y_limit)
|
||||
|
||||
# --------------------------
|
||||
# Improve default font sizes
|
||||
# --------------------------
|
||||
plt.rcParams.update(
|
||||
{
|
||||
"figure.dpi": 150,
|
||||
"font.size": 14,
|
||||
"axes.labelsize": 16,
|
||||
"axes.titlesize": 16,
|
||||
"xtick.labelsize": 14,
|
||||
"ytick.labelsize": 14,
|
||||
"legend.fontsize": 14,
|
||||
}
|
||||
)
|
||||
|
||||
# --------------------------------------------------
|
||||
# Create the figure (half the previous width = 3.84)
|
||||
# --------------------------------------------------
|
||||
fig, ax = plt.subplots(figsize=(3.84, 7))
|
||||
|
||||
# Plot the scores vs. time
|
||||
ax.plot(time_values, scores, label="Anomaly Score", color="blue")
|
||||
|
||||
# Set axis labels
|
||||
ax.set_xlabel("Time [s]", fontsize=16)
|
||||
ax.set_ylabel("Score", fontsize=16)
|
||||
|
||||
# Restrict X-axis from 0 to last time value (or 30 s if exactly 300 frames)
|
||||
ax.set_xlim([0, time_values[-1]])
|
||||
ax.set_ylim([0, y_limit])
|
||||
|
||||
# -------------------------
|
||||
# Customize the ticks
|
||||
# -------------------------
|
||||
# Example: only 4 ticks on the x-axis (0, 10, 20, 30) if 300 frames => 30 s
|
||||
ax.set_xticks([0, 10, 20, 30])
|
||||
|
||||
# Guarantee at least one tick at y=10.
|
||||
# Adjust other ticks as you like; here we use 0 and the y_limit.
|
||||
# If y_limit <= 10, you might want to override it.
|
||||
if y_limit > 10:
|
||||
ax.set_yticks([0, 10, round(y_limit, 1)]) # round for cleaner display
|
||||
else:
|
||||
# If your data doesn't go that high, just keep 0 and y_limit
|
||||
ax.set_yticks([0, round(y_limit, 1)])
|
||||
|
||||
# Optionally add a legend
|
||||
ax.legend(loc="upper right")
|
||||
|
||||
plt.axvline(x=22, color="r", label="shown image")
|
||||
|
||||
# Tight layout for small figure
|
||||
plt.tight_layout()
|
||||
|
||||
# ----------------------
|
||||
# Save to disk, no show
|
||||
# ----------------------
|
||||
output_filename = f"{scores_path.stem}_static_time_plot.png"
|
||||
plt.savefig(output_filename, dpi=150)
|
||||
|
||||
# Clear the figure if you continue using matplotlib
|
||||
plt.clf()
|
||||
179
tools/plot_scripts/data_count_lidar_frames.py
Normal file
179
tools/plot_scripts/data_count_lidar_frames.py
Normal file
@@ -0,0 +1,179 @@
|
||||
# this script loads the numpy array files containing the lidar frames and counts the number of frames in each file
|
||||
# the number of frames is then printed to the console per file as well as a congregated sum of all frames in files
|
||||
# containing the word smoke and ones that do not contain that word, as well as an overall sum of all frames
|
||||
# We also plot a pie chart of the distribution of data points in normal and anomalous experiments
|
||||
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from tabulate import tabulate
|
||||
|
||||
# define data path containing the numpy array files and output path for the plots
|
||||
data_path = Path("/home/fedex/mt/data/subter")
|
||||
|
||||
output_path = Path("/home/fedex/mt/plots/data_count_lidar_frames")
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
latest_folder_path = output_path / "latest"
|
||||
archive_folder_path = output_path / "archive"
|
||||
output_datetime_path = output_path / datetime_folder_name
|
||||
|
||||
# if output does not exist, create it
|
||||
output_path.mkdir(exist_ok=True, parents=True)
|
||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
|
||||
# find all numpy array files and sort them correctly by name
|
||||
normal_experiment_paths, anomaly_experiment_paths = [], []
|
||||
for npy_file_path in data_path.iterdir():
|
||||
if npy_file_path.suffix != ".npy":
|
||||
continue
|
||||
if "smoke" in npy_file_path.name:
|
||||
anomaly_experiment_paths.append(npy_file_path)
|
||||
else:
|
||||
normal_experiment_paths.append(npy_file_path)
|
||||
|
||||
|
||||
# function that counts the number of frames in one experiment
|
||||
def count_frames(npy_file_path):
|
||||
frames = np.load(npy_file_path).shape[0]
|
||||
return frames
|
||||
|
||||
|
||||
# we want to print the numbers of frames in a table so we first gather all the data in two maps
|
||||
normal_experiment_frames = {
|
||||
npy_file_path.stem: count_frames(npy_file_path)
|
||||
for npy_file_path in normal_experiment_paths
|
||||
}
|
||||
anomaly_experiment_frames = {
|
||||
npy_file_path.stem: count_frames(npy_file_path)
|
||||
for npy_file_path in anomaly_experiment_paths
|
||||
}
|
||||
|
||||
# prepare data for tabulate
|
||||
normal_experiment_table = [
|
||||
(experiment, frames) for experiment, frames in normal_experiment_frames.items()
|
||||
]
|
||||
anomaly_experiment_table = [
|
||||
(experiment, frames) for experiment, frames in anomaly_experiment_frames.items()
|
||||
]
|
||||
|
||||
# sort the tables by experiment name
|
||||
normal_experiment_table.sort(key=lambda x: x[0])
|
||||
anomaly_experiment_table.sort(key=lambda x: x[0])
|
||||
|
||||
# add the sum of all frames to the tables
|
||||
normal_experiment_table.append(("Sum", sum(normal_experiment_frames.values())))
|
||||
anomaly_experiment_table.append(("Sum", sum(anomaly_experiment_frames.values())))
|
||||
|
||||
# print the number of frames in each file using tabulate
|
||||
print("Normal experiments:")
|
||||
print(
|
||||
tabulate(normal_experiment_table, headers=["Experiment", "Frames"], tablefmt="grid")
|
||||
)
|
||||
|
||||
# print the smallest, largest, mean and median time of the normal experiments assuming 10 frames per second
|
||||
normal_experiment_frames_values = list(normal_experiment_frames.values())
|
||||
print(
|
||||
f"Smallest time: {min(normal_experiment_frames_values) / 10} seconds, Largest time: {max(normal_experiment_frames_values) / 10} seconds, Mean time: {np.mean(normal_experiment_frames_values) / 10} seconds, Median time: {np.median(normal_experiment_frames_values) / 10} seconds"
|
||||
)
|
||||
|
||||
print("Anomaly experiments:")
|
||||
print(
|
||||
tabulate(
|
||||
anomaly_experiment_table, headers=["Experiment", "Frames"], tablefmt="grid"
|
||||
)
|
||||
)
|
||||
|
||||
# print the smallest, largest, mean and median time of the anomalous experiments assuming 10 frames per second
|
||||
anomaly_experiment_frames_values = list(anomaly_experiment_frames.values())
|
||||
print(
|
||||
f"Smallest time: {min(anomaly_experiment_frames_values) / 10} seconds, Largest time: {max(anomaly_experiment_frames_values) / 10} seconds, Mean time: {np.mean(anomaly_experiment_frames_values) / 10} seconds, Median time: {np.median(anomaly_experiment_frames_values) / 10} seconds"
|
||||
)
|
||||
|
||||
# print the sum of all frames in all experiments
|
||||
total_frames = sum(normal_experiment_frames.values()) + sum(
|
||||
anomaly_experiment_frames.values()
|
||||
)
|
||||
print(f"Total frames in all (normal and anmoaly) experiments: {total_frames} frames")
|
||||
|
||||
# print the sum of normal and anomalous experiments as percentage of the total frames
|
||||
print(
|
||||
f"Percentage of normal experiments: {sum(normal_experiment_frames.values()) / total_frames * 100}%"
|
||||
)
|
||||
print(
|
||||
f"Percentage of anomaly experiments: {sum(anomaly_experiment_frames.values()) / total_frames * 100}%"
|
||||
)
|
||||
sum(normal_experiment_frames.values()) + sum(anomaly_experiment_frames.values())
|
||||
|
||||
|
||||
# define function to plot pie chart of the distribution of data points in normal and anomalous experiments
|
||||
def plot_data_points_pie(normal_experiment_frames, anomaly_experiment_frames):
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
# we want to plot the sum of all frames in normal and anomaly experiments as total values and also percentages
|
||||
total_normal_frames = sum(normal_experiment_frames.values())
|
||||
total_anomaly_frames = sum(anomaly_experiment_frames.values())
|
||||
total_frames = total_normal_frames + total_anomaly_frames
|
||||
|
||||
# prepare data for pie chart
|
||||
labels = [
|
||||
"Normal Lidar Frames\nNon-Degraded Pointclouds",
|
||||
"Anomalous Lidar Frames\nDegraded Pointclouds",
|
||||
]
|
||||
sizes = [total_normal_frames, total_anomaly_frames]
|
||||
explode = (0.1, 0) # explode the normal slice
|
||||
|
||||
# define an autopct function that shows percentage and total number of frames per slice
|
||||
def make_autopct(pct):
|
||||
return f"{pct:.1f}%\n({int(pct * total_frames / 100)} frames)"
|
||||
|
||||
# plot pie chart without shadow and with custom autopct
|
||||
fig1, ax1 = plt.subplots()
|
||||
# set a figure size of 10x5
|
||||
fig1.set_size_inches(10, 5)
|
||||
ax1.pie(sizes, explode=explode, labels=labels, autopct=make_autopct, shadow=False)
|
||||
# for labels use center alignment
|
||||
|
||||
ax1.axis("equal") # Equal aspect ratio ensures that pie is drawn as a circle.
|
||||
# display the total number of frames in the center of the pie chart (adjusted vertically)
|
||||
plt.text(
|
||||
0,
|
||||
0.2,
|
||||
f"Total:\n{total_frames} frames",
|
||||
fontsize=12,
|
||||
ha="center",
|
||||
va="center",
|
||||
color="black",
|
||||
)
|
||||
plt.title(
|
||||
"Distribution of Normal and Anomalous\nPointclouds in all Experiments (Lidar Frames)"
|
||||
)
|
||||
plt.tight_layout()
|
||||
|
||||
# save the plot
|
||||
plt.savefig(output_datetime_path / "data_points_pie.png")
|
||||
|
||||
|
||||
plot_data_points_pie(normal_experiment_frames, anomaly_experiment_frames)
|
||||
|
||||
# delete current latest folder
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
|
||||
# create new latest folder
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# copy contents of output folder to the latest folder
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# copy this python script to the output datetime folder to preserve the code used to generate the plots
|
||||
shutil.copy2(__file__, output_datetime_path)
|
||||
shutil.copy2(__file__, latest_folder_path)
|
||||
|
||||
# move output date folder to archive
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
243
tools/plot_scripts/data_missing_points.py
Normal file
243
tools/plot_scripts/data_missing_points.py
Normal file
@@ -0,0 +1,243 @@
|
||||
import pickle
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from pointcloudset import Dataset
|
||||
|
||||
# define data path containing the bag files
|
||||
all_data_path = Path("/home/fedex/mt/data/subter")
|
||||
|
||||
output_path = Path("/home/fedex/mt/plots/data_missing_points")
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
latest_folder_path = output_path / "latest"
|
||||
archive_folder_path = output_path / "archive"
|
||||
output_datetime_path = output_path / datetime_folder_name
|
||||
|
||||
# if output does not exist, create it
|
||||
output_path.mkdir(exist_ok=True, parents=True)
|
||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
data_resolution = 32 * 2048
|
||||
|
||||
normal_experiment_paths, anomaly_experiment_paths = [], []
|
||||
|
||||
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
|
||||
for bag_file_path in all_data_path.iterdir():
|
||||
if bag_file_path.suffix != ".bag":
|
||||
continue
|
||||
if "smoke" in bag_file_path.name:
|
||||
anomaly_experiment_paths.append(bag_file_path)
|
||||
else:
|
||||
normal_experiment_paths.append(bag_file_path)
|
||||
|
||||
# sort anomaly and normal experiments by filesize, ascending
|
||||
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||
|
||||
|
||||
# function that plots histogram of how many points are missing in pointclouds for both normal and anomaly experiments
|
||||
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
# function that finds the number of missing points in list of experiments (used for both normal and anomalous)
|
||||
def find_missing_points(experiment_paths):
|
||||
missing_points = []
|
||||
for dataset in (
|
||||
Dataset.from_file(experiment_path, topic="/ouster/points")
|
||||
for experiment_path in experiment_paths
|
||||
):
|
||||
missing_points_per_pc = []
|
||||
for pc in dataset:
|
||||
missing_points_per_pc.append(data_resolution - pc.data.shape[0])
|
||||
missing_points.append(missing_points_per_pc)
|
||||
# FIXME temporary break to test code on only one experiment
|
||||
# break
|
||||
return missing_points
|
||||
|
||||
# check if the data has already been calculated and saved to a pickle file
|
||||
if (output_path / "missing_points.pkl").exists():
|
||||
with open(output_path / "missing_points.pkl", "rb") as file:
|
||||
missing_points_normal, missing_points_anomaly = pickle.load(file)
|
||||
else:
|
||||
missing_points_normal = find_missing_points(normal_experiment_paths)
|
||||
missing_points_anomaly = find_missing_points(anomaly_experiment_paths)
|
||||
|
||||
# for faster subsequent runs, save the data to a pickle file
|
||||
with open(output_path / "missing_points.pkl", "wb") as file:
|
||||
pickle.dump(
|
||||
(missing_points_normal, missing_points_anomaly),
|
||||
file,
|
||||
protocol=pickle.HIGHEST_PROTOCOL,
|
||||
)
|
||||
|
||||
# combine all missing points into one list for each type of experiment
|
||||
missing_points_normal = np.concatenate(missing_points_normal)
|
||||
missing_points_anomaly = np.concatenate(missing_points_anomaly)
|
||||
|
||||
# create histogram of missing points for normal and anomaly experiments
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(missing_points_normal, bins=100, alpha=0.5, label="Normal Experiments")
|
||||
plt.hist(missing_points_anomaly, bins=100, alpha=0.5, label="Anomaly Experiments")
|
||||
plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Number of Pointclouds")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points.png")
|
||||
|
||||
plt.clf()
|
||||
|
||||
# alternatively plot curves representing the data
|
||||
|
||||
# create alternative version with missing points on y axis and number of pointclouds on x axis
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(
|
||||
missing_points_normal,
|
||||
bins=100,
|
||||
alpha=0.5,
|
||||
label="Normal Experiments",
|
||||
orientation="horizontal",
|
||||
)
|
||||
plt.hist(
|
||||
missing_points_anomaly,
|
||||
bins=100,
|
||||
alpha=0.5,
|
||||
label="Anomaly Experiments",
|
||||
orientation="horizontal",
|
||||
)
|
||||
plt.title(title)
|
||||
plt.xlabel("Number of Pointclouds")
|
||||
plt.ylabel("Number of Missing Points")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points_alternative.png")
|
||||
|
||||
# find min and max of both categories so we can set the same limits for both plots
|
||||
min = np.min([np.min(missing_points_normal), np.min(missing_points_anomaly)])
|
||||
max = np.max([np.max(missing_points_normal), np.max(missing_points_anomaly)])
|
||||
|
||||
# create bins array with min and max values
|
||||
bins = np.linspace(min, max, 100)
|
||||
|
||||
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(
|
||||
missing_points_normal,
|
||||
bins=bins,
|
||||
alpha=0.5,
|
||||
label="Normal Experiments",
|
||||
color="blue",
|
||||
density=True,
|
||||
)
|
||||
plt.hist(
|
||||
missing_points_anomaly,
|
||||
bins=bins,
|
||||
alpha=0.5,
|
||||
color="red",
|
||||
label="Anomaly Experiments",
|
||||
density=True,
|
||||
)
|
||||
plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Density")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points_density.png")
|
||||
|
||||
# create another density version which does not plot number of missing points but percentage of measurements that are missing (total number of points is 32*2048)
|
||||
bins = np.linspace(0, 1, 100)
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.hist(
|
||||
missing_points_normal / data_resolution,
|
||||
bins=bins,
|
||||
alpha=0.5,
|
||||
label="Normal Experiments (No Artifical Smoke)",
|
||||
color="blue",
|
||||
density=True,
|
||||
)
|
||||
plt.hist(
|
||||
missing_points_anomaly / data_resolution,
|
||||
bins=bins,
|
||||
alpha=0.5,
|
||||
color="red",
|
||||
label="Anomaly Experiments (With Artifical Smoke)",
|
||||
density=True,
|
||||
)
|
||||
plt.title(title)
|
||||
plt.xlabel("Percentage of Missing Lidar Measurements")
|
||||
plt.ylabel("Density")
|
||||
# display the x axis as percentages
|
||||
plt.gca().set_xticklabels(
|
||||
["{:.0f}%".format(x * 100) for x in plt.gca().get_xticks()]
|
||||
)
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points_density_percentage.png")
|
||||
|
||||
# mathplotlib does not support normalizing the histograms to the same scale, so we do it manually using numpy
|
||||
num_bins = 100
|
||||
bin_lims = np.linspace(0, 40000, num_bins + 1)
|
||||
bin_centers = 0.5 * (bin_lims[:-1] + bin_lims[1:])
|
||||
bin_widths = bin_lims[1:] - bin_lims[:-1]
|
||||
# calculate the histogram for normal and anomaly experiments
|
||||
normal_hist, _ = np.histogram(missing_points_normal, bins=bin_lims)
|
||||
anomaly_hist, _ = np.histogram(missing_points_anomaly, bins=bin_lims)
|
||||
# normalize the histograms to the same scale
|
||||
normal_hist_normalized = np.array(normal_hist, dtype=float) / np.max(normal_hist)
|
||||
anomaly_hist_normalized = np.array(anomaly_hist, dtype=float) / np.max(anomaly_hist)
|
||||
# plot the normalized histograms
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.bar(
|
||||
bin_centers,
|
||||
normal_hist_normalized,
|
||||
width=bin_widths,
|
||||
align="center",
|
||||
alpha=0.5,
|
||||
label="Normal Experiments",
|
||||
)
|
||||
plt.bar(
|
||||
bin_centers,
|
||||
anomaly_hist_normalized,
|
||||
width=bin_widths,
|
||||
align="center",
|
||||
alpha=0.5,
|
||||
label="Anomaly Experiments",
|
||||
)
|
||||
plt.title(title)
|
||||
plt.xlabel("Number of Missing Points")
|
||||
plt.ylabel("Normalized Density")
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "missing_points_normalized.png")
|
||||
|
||||
|
||||
# plot histogram of missing points for normal and anomaly experiments
|
||||
plot_data_points(
|
||||
normal_experiment_paths,
|
||||
anomaly_experiment_paths,
|
||||
"Missing Lidar Measurements per Scan",
|
||||
)
|
||||
|
||||
# delete current latest folder
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
|
||||
# create new latest folder
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# copy contents of output folder to the latest folder
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# copy this python script to the output datetime folder to preserve the code used to generate the plots
|
||||
shutil.copy2(__file__, output_datetime_path)
|
||||
shutil.copy2(__file__, latest_folder_path)
|
||||
|
||||
# move output date folder to archive
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
220
tools/plot_scripts/data_particles_near_sensor.py
Normal file
220
tools/plot_scripts/data_particles_near_sensor.py
Normal file
@@ -0,0 +1,220 @@
|
||||
import pickle
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from pointcloudset import Dataset
|
||||
|
||||
# define data path containing the bag files
|
||||
all_data_path = Path("/home/fedex/mt/data/subter")
|
||||
|
||||
output_path = Path("/home/fedex/mt/plots/data_particles_near_sensor")
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
latest_folder_path = output_path / "latest"
|
||||
archive_folder_path = output_path / "archive"
|
||||
output_datetime_path = output_path / datetime_folder_name
|
||||
|
||||
# if output does not exist, create it
|
||||
output_path.mkdir(exist_ok=True, parents=True)
|
||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
normal_experiment_paths, anomaly_experiment_paths = [], []
|
||||
|
||||
# find all bag files and sort them correctly by name (experiments with smoke in the name are anomalies)
|
||||
for bag_file_path in all_data_path.iterdir():
|
||||
if bag_file_path.suffix != ".bag":
|
||||
continue
|
||||
if "smoke" in bag_file_path.name:
|
||||
anomaly_experiment_paths.append(bag_file_path)
|
||||
else:
|
||||
normal_experiment_paths.append(bag_file_path)
|
||||
|
||||
# print out the names of the normal and anomaly experiments that we found
|
||||
print("Normal experiments:")
|
||||
for path in normal_experiment_paths:
|
||||
print(path.name)
|
||||
print("\nAnomaly experiments:")
|
||||
for path in anomaly_experiment_paths:
|
||||
print(path.name)
|
||||
|
||||
# sort anomaly and normal experiments by filesize, ascending
|
||||
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||
|
||||
|
||||
# function that plots histogram of how many measurements have a range smaller than 0.5m for both normal and anomaly experiments
|
||||
def plot_data_points(normal_experiment_paths, anomaly_experiment_paths, title):
|
||||
# function that finds the number of measurements with a range smaller than 0.5m in list of experiments (used for both normal and anomalous)
|
||||
def find_particles_near_sensor(experiment_paths, range_threshold):
|
||||
particles_near_sensor = []
|
||||
for dataset in (
|
||||
Dataset.from_file(experiment_path, topic="/ouster/points")
|
||||
for experiment_path in experiment_paths
|
||||
):
|
||||
particles_near_sensor_per_pc = []
|
||||
for pc in dataset:
|
||||
particles_near_sensor_per_pc.append(
|
||||
pc.data[pc.data["range"] < range_threshold].shape[0]
|
||||
)
|
||||
particles_near_sensor.append(particles_near_sensor_per_pc)
|
||||
return particles_near_sensor
|
||||
|
||||
range_thresholds = [500, 750, 1000, 1250, 1500]
|
||||
|
||||
for rt in range_thresholds:
|
||||
print(f"Processing range threshold {rt}...")
|
||||
|
||||
# check if the data has already been calculated and saved to a pickle file
|
||||
if (output_path / f"particles_near_sensor_counts_{rt}.pkl").exists():
|
||||
with open(
|
||||
output_path / f"particles_near_sensor_counts_{rt}.pkl", "rb"
|
||||
) as file:
|
||||
particles_near_sensor_normal, particles_near_sensor_anomaly = (
|
||||
pickle.load(file)
|
||||
)
|
||||
else:
|
||||
particles_near_sensor_normal = find_particles_near_sensor(
|
||||
normal_experiment_paths,
|
||||
rt,
|
||||
)
|
||||
particles_near_sensor_anomaly = find_particles_near_sensor(
|
||||
anomaly_experiment_paths,
|
||||
rt,
|
||||
)
|
||||
|
||||
# for faster subsequent runs, save the data to a pickle file
|
||||
with open(output_path / f"particles_near_sensor_counts_{rt}.pkl", "wb") as file:
|
||||
pickle.dump(
|
||||
(particles_near_sensor_normal, particles_near_sensor_anomaly),
|
||||
file,
|
||||
protocol=pickle.HIGHEST_PROTOCOL,
|
||||
)
|
||||
|
||||
# combine all counts of how many particles are close to the sensor into one list for each type of experiment
|
||||
particles_near_sensor_normal = np.concatenate(particles_near_sensor_normal)
|
||||
particles_near_sensor_anomaly = np.concatenate(particles_near_sensor_anomaly)
|
||||
|
||||
# find min and max of both categories so we can set the same limits for both plots
|
||||
min = np.min(
|
||||
[
|
||||
np.min(particles_near_sensor_normal),
|
||||
np.min(particles_near_sensor_anomaly),
|
||||
]
|
||||
)
|
||||
max = np.max(
|
||||
[
|
||||
np.max(particles_near_sensor_normal),
|
||||
np.max(particles_near_sensor_anomaly),
|
||||
]
|
||||
)
|
||||
|
||||
# create bins array with min and max values
|
||||
bins = np.linspace(min, max, 100)
|
||||
|
||||
# since the two histograms (normal and anomalous) have different scales, normalize their amplitude and plot a density version as well
|
||||
# commented out since boxplot is more informative
|
||||
# plt.clf()
|
||||
# plt.figure(figsize=(10, 5))
|
||||
# plt.hist(
|
||||
# particles_near_sensor_normal,
|
||||
# bins=bins,
|
||||
# alpha=0.5,
|
||||
# label="Normal Experiments (No Artifical Smoke)",
|
||||
# color="blue",
|
||||
# density=True,
|
||||
# )
|
||||
# plt.hist(
|
||||
# particles_near_sensor_anomaly,
|
||||
# bins=bins,
|
||||
# alpha=0.5,
|
||||
# color="red",
|
||||
# label="Anomaly Experiments (With Artifical Smoke)",
|
||||
# density=True,
|
||||
# )
|
||||
# plt.title(title)
|
||||
# plt.xlabel("Number of Particles Near Sensor")
|
||||
# plt.ylabel("Density")
|
||||
# plt.legend()
|
||||
# plt.tight_layout()
|
||||
# plt.savefig(output_datetime_path / f"particles_near_sensor_density_{rt}.png")
|
||||
|
||||
# alternatively create a box plot to show the distribution of the data
|
||||
# instead of plotting the frequency of particles near sensor we'll plot the percentage of points (compared to the total number of points in the pointcloud)
|
||||
|
||||
data_resolution = 32 * 2048
|
||||
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.boxplot(
|
||||
[
|
||||
particles_near_sensor_normal / data_resolution,
|
||||
particles_near_sensor_anomaly / data_resolution,
|
||||
],
|
||||
tick_labels=[
|
||||
"Normal Experiments (No Artifical Smoke)",
|
||||
"Anomaly Experiments (With Artifical Smoke)",
|
||||
],
|
||||
)
|
||||
# format the y axis labels as percentages
|
||||
plt.gca().set_yticklabels(
|
||||
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
|
||||
)
|
||||
plt.title("Particles Closer than 0.5m to the Sensor")
|
||||
plt.ylabel("Percentage of measurements closer than 0.5m")
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / f"particles_near_sensor_boxplot_{rt}.png")
|
||||
|
||||
# we create the same boxplot but limit the y-axis to 5% to better see the distribution of the data
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.boxplot(
|
||||
[
|
||||
particles_near_sensor_normal / data_resolution,
|
||||
particles_near_sensor_anomaly / data_resolution,
|
||||
],
|
||||
tick_labels=[
|
||||
"Normal Experiments (No Artifical Smoke)",
|
||||
"Anomaly Experiments (With Artifical Smoke)",
|
||||
],
|
||||
)
|
||||
# format the y axis labels as percentages
|
||||
plt.gca().set_yticklabels(
|
||||
["{:.0f}%".format(y * 100) for y in plt.gca().get_yticks()]
|
||||
)
|
||||
plt.title("Particles Closer than 0.5m to the Sensor")
|
||||
plt.ylabel("Percentage of measurements closer than 0.5m")
|
||||
plt.ylim(0, 0.05)
|
||||
plt.tight_layout()
|
||||
plt.savefig(
|
||||
output_datetime_path / f"particles_near_sensor_boxplot_zoomed_{rt}.png"
|
||||
)
|
||||
|
||||
|
||||
# plot histogram of how many measurements have a range smaller than 0.5m for both normal and anomaly experiments
|
||||
plot_data_points(
|
||||
normal_experiment_paths,
|
||||
anomaly_experiment_paths,
|
||||
"Density of Number of Particles Near Sensor",
|
||||
)
|
||||
|
||||
# delete current latest folder
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
|
||||
# create new latest folder
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# copy contents of output folder to the latest folder
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# copy this python script to the output datetime folder to preserve the code used to generate the plots
|
||||
shutil.copy2(__file__, output_datetime_path)
|
||||
shutil.copy2(__file__, latest_folder_path)
|
||||
|
||||
# move output date folder to archive
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
188
tools/plot_scripts/data_spherical_projection.py
Normal file
188
tools/plot_scripts/data_spherical_projection.py
Normal file
@@ -0,0 +1,188 @@
|
||||
import argparse
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from matplotlib import colormaps
|
||||
from matplotlib.cm import ScalarMappable
|
||||
from matplotlib.colors import Colormap, ListedColormap
|
||||
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
|
||||
from PIL import Image # new import
|
||||
|
||||
|
||||
def get_colormap_with_special_missing_color(
|
||||
colormap_name: str, missing_data_color: str = "black", reverse: bool = False
|
||||
) -> Colormap:
|
||||
colormap = (
|
||||
colormaps[colormap_name] if not reverse else colormaps[f"{colormap_name}_r"]
|
||||
)
|
||||
colormap.set_bad(missing_data_color)
|
||||
colormap.set_over("white")
|
||||
return colormap
|
||||
|
||||
|
||||
# --- Setup output folders (similar to data_missing_points.py) ---
|
||||
# Change the output path as needed
|
||||
output_path = Path("/home/fedex/mt/plots/data_2d_projections")
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
output_datetime_path = output_path / datetime_folder_name
|
||||
latest_folder_path = output_path / "latest"
|
||||
archive_folder_path = output_path / "archive"
|
||||
|
||||
for folder in (
|
||||
output_path,
|
||||
output_datetime_path,
|
||||
latest_folder_path,
|
||||
archive_folder_path,
|
||||
):
|
||||
folder.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# --- Parse command-line arguments ---
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Plot two 2D projections (from .npy files) in vertical subplots"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input1",
|
||||
type=Path,
|
||||
default=Path(
|
||||
"/home/fedex/mt/data/subter/new_projection/1_loop_closure_illuminated_2023-01-23.npy"
|
||||
),
|
||||
help="Path to first .npy file containing 2D projection data",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input2",
|
||||
type=Path,
|
||||
default=Path(
|
||||
"/home/fedex/mt/data/subter/new_projection/3_smoke_human_walking_2023-01-23.npy"
|
||||
),
|
||||
help="Path to second .npy file containing 2D projection data",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--frame1",
|
||||
type=int,
|
||||
default=955,
|
||||
help="Frame index to plot from first file (0-indexed)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--frame2",
|
||||
type=int,
|
||||
default=242,
|
||||
help="Frame index to plot from second file (0-indexed)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--colormap",
|
||||
default="viridis",
|
||||
type=str,
|
||||
help="Name of matplotlib colormap to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--missing-data-color",
|
||||
default="black",
|
||||
type=str,
|
||||
help="Color to use for missing data in projection",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--reverse-colormap",
|
||||
action="store_true",
|
||||
help="Reverse the colormap if specified",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
# --- Load the numpy projection data from the provided files ---
|
||||
# Each file is assumed to be a 3D array: (num_frames, height, width)
|
||||
proj_data1 = np.load(args.input1)
|
||||
proj_data2 = np.load(args.input2)
|
||||
|
||||
# Choose the desired frames
|
||||
try:
|
||||
frame1 = proj_data1[args.frame1]
|
||||
except IndexError:
|
||||
raise ValueError(f"Frame index {args.frame1} out of range for file: {args.input1}")
|
||||
try:
|
||||
frame2 = proj_data2[args.frame2]
|
||||
except IndexError:
|
||||
raise ValueError(f"Frame index {args.frame2} out of range for file: {args.input2}")
|
||||
|
||||
# Determine shared range across both frames (ignoring NaNs)
|
||||
global_vmin = 0 # min(np.nanmin(frame1), np.nanmin(frame2))
|
||||
global_vmax = 0.8 # max(np.nanmax(frame1), np.nanmax(frame2))
|
||||
|
||||
# Create colormap using the utility (to mimic create_2d_projection)
|
||||
cmap = get_colormap_with_special_missing_color(
|
||||
args.colormap, args.missing_data_color, args.reverse_colormap
|
||||
)
|
||||
|
||||
# --- Create a figure with 2 vertical subplots ---
|
||||
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(10, 5))
|
||||
for ax, frame, title in zip(
|
||||
(ax1, ax2),
|
||||
(frame1, frame2),
|
||||
(
|
||||
"Projection of Lidar Frame without Degradation",
|
||||
"Projection of Lidar Frame with Degradation (Artifical Smoke)",
|
||||
),
|
||||
):
|
||||
im = ax.imshow(frame, cmap=cmap, aspect="auto", vmin=global_vmin, vmax=global_vmax)
|
||||
ax.set_title(title)
|
||||
ax.axis("off")
|
||||
|
||||
# Adjust layout to fit margins for a paper
|
||||
plt.tight_layout(rect=[0, 0.05, 1, 1])
|
||||
# Add a colorbar with the colormap below the subplots
|
||||
cbar = fig.colorbar(im, ax=[ax1, ax2], orientation="vertical", fraction=0.05)
|
||||
cbar.set_label("Normalized Range")
|
||||
|
||||
# Add a separate colorbar for NaN values
|
||||
sm = ScalarMappable(cmap=ListedColormap([cmap.get_bad(), cmap.get_over()]))
|
||||
divider = make_axes_locatable(cbar.ax)
|
||||
nan_ax = divider.append_axes(
|
||||
"bottom", size="15%", pad="3%", aspect=3, anchor=cbar.ax.get_anchor()
|
||||
)
|
||||
nan_ax.grid(visible=False, which="both", axis="both")
|
||||
nan_cbar = fig.colorbar(sm, cax=nan_ax, orientation="vertical")
|
||||
nan_cbar.set_ticks([0.3, 0.7])
|
||||
nan_cbar.set_ticklabels(["NaN", f"> {global_vmax}"])
|
||||
nan_cbar.ax.tick_params(length=0)
|
||||
|
||||
# Save the combined plot
|
||||
output_file = output_datetime_path / "data_2d_projections.png"
|
||||
plt.savefig(output_file, dpi=300, bbox_inches="tight", pad_inches=0.1)
|
||||
plt.close()
|
||||
|
||||
print(f"Plot saved to: {output_file}")
|
||||
|
||||
# --- Create grayscale images (high precision) from the numpy frames using Pillow ---
|
||||
# Convert NaN values to 0 and ensure the array is in float32 for 32-bit precision.
|
||||
for degradation_status, frame_number, frame in (
|
||||
("normal", args.frame1, frame1),
|
||||
("smoke", args.frame2, frame2),
|
||||
):
|
||||
frame_gray = np.nan_to_num(frame, nan=0).astype(np.float32)
|
||||
gray_image = Image.fromarray(frame_gray, mode="F")
|
||||
gray_output_file = (
|
||||
output_datetime_path
|
||||
/ f"frame_{frame_number}_grayscale_{degradation_status}.tiff"
|
||||
)
|
||||
gray_image.save(gray_output_file)
|
||||
print(f"Grayscale image saved to: {gray_output_file}")
|
||||
|
||||
# --- Handle folder structure: update latest folder and archive the output folder ---
|
||||
|
||||
# Delete current latest folder and recreate it
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# Copy contents of the current output datetime folder to latest folder
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# Copy this python script to both output datetime folder and latest folder for preservation
|
||||
script_path = Path(__file__)
|
||||
shutil.copy2(script_path, output_datetime_path)
|
||||
shutil.copy2(script_path, latest_folder_path)
|
||||
|
||||
# Move the output datetime folder to the archive folder
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
print(f"Output archived to: {archive_folder_path}")
|
||||
160
tools/plot_scripts/deepsad_tsne_latent_space.py
Normal file
160
tools/plot_scripts/deepsad_tsne_latent_space.py
Normal file
@@ -0,0 +1,160 @@
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.manifold import TSNE
|
||||
|
||||
# define data path containing the npy output files from your method
|
||||
all_data_path = Path(
|
||||
"/home/fedex/mt/projects/thesis-kowalczyk-jan/Deep-SAD-PyTorch/infer/DeepSAD/all_infer/inference"
|
||||
)
|
||||
|
||||
output_path = Path("/home/fedex/mt/plots/deepsad_reduced_latent_space")
|
||||
datetime_folder_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
||||
|
||||
latest_folder_path = output_path / "latest"
|
||||
archive_folder_path = output_path / "archive"
|
||||
output_datetime_path = output_path / datetime_folder_name
|
||||
|
||||
# create required output directories if they do not exist
|
||||
output_path.mkdir(exist_ok=True, parents=True)
|
||||
output_datetime_path.mkdir(exist_ok=True, parents=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
archive_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
normal_experiment_paths = []
|
||||
anomaly_experiment_paths = []
|
||||
|
||||
# locate and sort the npy files (experiment outputs) based on file size
|
||||
for file_path in all_data_path.iterdir():
|
||||
if file_path.suffix != ".npy":
|
||||
continue
|
||||
# check if the file name contains "output" to ensure it's an experiment output file
|
||||
if "output" not in file_path.stem:
|
||||
continue
|
||||
if "smoke" in file_path.name:
|
||||
anomaly_experiment_paths.append(file_path)
|
||||
else:
|
||||
normal_experiment_paths.append(file_path)
|
||||
|
||||
print("Normal experiments:")
|
||||
for path in normal_experiment_paths:
|
||||
print(path.name)
|
||||
|
||||
print("\nAnomaly experiments:")
|
||||
for path in anomaly_experiment_paths:
|
||||
print(path.name)
|
||||
|
||||
normal_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||
anomaly_experiment_paths.sort(key=lambda path: path.stat().st_size)
|
||||
|
||||
|
||||
def load_latent_space_data(experiment_paths):
|
||||
"""
|
||||
Load latent space data from npy files and return a single numpy array.
|
||||
Modify this function if your file structure is different.
|
||||
"""
|
||||
data_list = []
|
||||
for path in experiment_paths:
|
||||
latent_data = np.load(path)
|
||||
data_list.append(latent_data)
|
||||
return np.vstack(data_list)
|
||||
|
||||
|
||||
def reduce_dimensionality(data, n_components=50):
|
||||
"""
|
||||
Reduce the dimensionality of the data using PCA.
|
||||
This function can be re-used by TSNE or other methods for an initial reduction.
|
||||
"""
|
||||
pca = PCA(n_components=n_components, random_state=42)
|
||||
return pca.fit_transform(data)
|
||||
|
||||
|
||||
def plot_tsne_latent_space(normal_data, anomaly_data, title="TSNE of Latent Space"):
|
||||
"""
|
||||
Plot the TSNE representation of the latent space.
|
||||
This function first applies a PCA-based dimensionality reduction for efficiency.
|
||||
"""
|
||||
# Combine normal and anomaly data
|
||||
combined_data = np.vstack((normal_data, anomaly_data))
|
||||
|
||||
# Initial dimensionality reduction with PCA
|
||||
reduced_data = reduce_dimensionality(combined_data, n_components=50)
|
||||
|
||||
# Apply TSNE transformation on the PCA-reduced data
|
||||
tsne = TSNE(n_components=2, random_state=42)
|
||||
tsne_results = tsne.fit_transform(reduced_data)
|
||||
|
||||
# Split the TSNE results back into normal and anomaly arrays
|
||||
tsne_normal = tsne_results[: len(normal_data)]
|
||||
tsne_anomaly = tsne_results[len(normal_data) :]
|
||||
|
||||
# Plotting TSNE results
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.scatter(
|
||||
tsne_anomaly[:, 0], tsne_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x"
|
||||
)
|
||||
plt.scatter(
|
||||
tsne_normal[:, 0], tsne_normal[:, 1], label="Normal", alpha=0.6, marker="o"
|
||||
)
|
||||
plt.title(title)
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "tsne_latent_space_plot.png")
|
||||
|
||||
|
||||
def plot_pca_scatter(normal_data, anomaly_data, title="PCA Scatter Plot"):
|
||||
"""
|
||||
Plot a 2-dimensional scatterplot of the latent space using PCA.
|
||||
This is useful for visualization and can be easily extended.
|
||||
"""
|
||||
# Combine normal and anomaly data
|
||||
combined_data = np.vstack((normal_data, anomaly_data))
|
||||
pca = PCA(n_components=2, random_state=42)
|
||||
pca_results = pca.fit_transform(combined_data)
|
||||
|
||||
# Split the PCA results back into normal and anomaly arrays
|
||||
pca_normal = pca_results[: len(normal_data)]
|
||||
pca_anomaly = pca_results[len(normal_data) :]
|
||||
|
||||
# Plotting PCA scatter results
|
||||
plt.clf()
|
||||
plt.figure(figsize=(10, 5))
|
||||
plt.scatter(
|
||||
pca_anomaly[:, 0], pca_anomaly[:, 1], label="Anomaly", alpha=0.6, marker="x"
|
||||
)
|
||||
plt.scatter(
|
||||
pca_normal[:, 0], pca_normal[:, 1], label="Normal", alpha=0.6, marker="o"
|
||||
)
|
||||
plt.title(title)
|
||||
plt.legend()
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_datetime_path / "pca_latent_space_plot.png")
|
||||
|
||||
|
||||
# load latent space data for both normal and anomalous experiments
|
||||
normal_data = load_latent_space_data(normal_experiment_paths)
|
||||
anomaly_data = load_latent_space_data(anomaly_experiment_paths)
|
||||
|
||||
# call your plotting functions
|
||||
plot_tsne_latent_space(normal_data, anomaly_data)
|
||||
plot_pca_scatter(normal_data, anomaly_data)
|
||||
|
||||
# update the 'latest' results folder: delete previous and copy current outputs
|
||||
shutil.rmtree(latest_folder_path, ignore_errors=True)
|
||||
latest_folder_path.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
for file in output_datetime_path.iterdir():
|
||||
shutil.copy2(file, latest_folder_path)
|
||||
|
||||
# copy this script to the output folder and to the latest folder to preserve the used code
|
||||
script_path = Path(__file__)
|
||||
shutil.copy2(script_path, output_datetime_path)
|
||||
shutil.copy2(script_path, latest_folder_path)
|
||||
|
||||
# move the output date folder to the archive folder for record keeping
|
||||
shutil.move(output_datetime_path, archive_folder_path)
|
||||
@@ -20,6 +20,7 @@ dask = "^2024.4.2"
|
||||
dask-expr = "^1.1.3"
|
||||
pandas = "^2.2.2"
|
||||
pathvalidate = "^3.2.0"
|
||||
tabulate = "^0.9.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
||||
@@ -137,47 +137,47 @@ def process_frame(args) -> Tuple[int, np.ndarray, Optional[Path]]:
|
||||
lidar_data["horizontal_position"] = (
|
||||
lidar_data["original_id"] % horizontal_resolution
|
||||
)
|
||||
lidar_data["horizontal_position_yaw_f"] = (
|
||||
0.5
|
||||
* horizontal_resolution
|
||||
* (np.arctan2(lidar_data["y"], lidar_data["x"]) / pi + 1.0)
|
||||
)
|
||||
lidar_data["horizontal_position_yaw"] = np.floor(
|
||||
lidar_data["horizontal_position_yaw_f"]
|
||||
)
|
||||
# lidar_data["horizontal_position_yaw_f"] = (
|
||||
# 0.5
|
||||
# * horizontal_resolution
|
||||
# * (np.arctan2(lidar_data["y"], lidar_data["x"]) / pi + 1.0)
|
||||
# )
|
||||
# lidar_data["horizontal_position_yaw"] = np.floor(
|
||||
# lidar_data["horizontal_position_yaw_f"]
|
||||
# )
|
||||
lidar_data["vertical_position"] = np.floor(
|
||||
lidar_data["original_id"] / horizontal_resolution
|
||||
)
|
||||
# fov = 32 * pi / 180
|
||||
# fov_down = 17 * pi / 180
|
||||
fov = 31.76 * pi / 180
|
||||
fov_down = 17.3 * pi / 180
|
||||
lidar_data["vertical_angle"] = np.arcsin(
|
||||
lidar_data["z"]
|
||||
/ np.sqrt(lidar_data["x"] ** 2 + lidar_data["y"] ** 2 + lidar_data["z"] ** 2)
|
||||
)
|
||||
lidar_data["vertical_angle_degree"] = lidar_data["vertical_angle"] * 180 / pi
|
||||
# fov = 31.76 * pi / 180
|
||||
# fov_down = 17.3 * pi / 180
|
||||
# lidar_data["vertical_angle"] = np.arcsin(
|
||||
# lidar_data["z"]
|
||||
# / np.sqrt(lidar_data["x"] ** 2 + lidar_data["y"] ** 2 + lidar_data["z"] ** 2)
|
||||
# )
|
||||
# lidar_data["vertical_angle_degree"] = lidar_data["vertical_angle"] * 180 / pi
|
||||
|
||||
lidar_data["vertical_position_pitch_f"] = vertical_resolution * (
|
||||
1 - ((lidar_data["vertical_angle"] + fov_down) / fov)
|
||||
)
|
||||
lidar_data["vertical_position_pitch"] = np.floor(
|
||||
lidar_data["vertical_position_pitch_f"]
|
||||
)
|
||||
# lidar_data["vertical_position_pitch_f"] = vertical_resolution * (
|
||||
# 1 - ((lidar_data["vertical_angle"] + fov_down) / fov)
|
||||
# )
|
||||
# lidar_data["vertical_position_pitch"] = np.floor(
|
||||
# lidar_data["vertical_position_pitch_f"]
|
||||
# )
|
||||
|
||||
duplicates = lidar_data[
|
||||
lidar_data.duplicated(
|
||||
subset=["vertical_position_pitch", "horizontal_position_yaw"],
|
||||
keep=False,
|
||||
)
|
||||
].sort_values(by=["vertical_position_pitch", "horizontal_position_yaw"])
|
||||
# duplicates = lidar_data[
|
||||
# lidar_data.duplicated(
|
||||
# subset=["vertical_position_pitch", "horizontal_position_yaw"],
|
||||
# keep=False,
|
||||
# )
|
||||
# ].sort_values(by=["vertical_position_pitch", "horizontal_position_yaw"])
|
||||
|
||||
lidar_data["normalized_range"] = 1 / np.sqrt(
|
||||
lidar_data["x"] ** 2 + lidar_data["y"] ** 2 + lidar_data["z"] ** 2
|
||||
)
|
||||
projection_data = lidar_data.pivot(
|
||||
index="vertical_position_pitch",
|
||||
columns="horizontal_position_yaw",
|
||||
index="vertical_position",
|
||||
columns="horizontal_position",
|
||||
values="normalized_range",
|
||||
)
|
||||
projection_data = projection_data.reindex(
|
||||
@@ -208,8 +208,8 @@ def process_frame(args) -> Tuple[int, np.ndarray, Optional[Path]]:
|
||||
i,
|
||||
projection_data.to_numpy(),
|
||||
image_path,
|
||||
lidar_data["vertical_position_pitch_f"].min(),
|
||||
lidar_data["vertical_position_pitch_f"].max(),
|
||||
# lidar_data["vertical_position_pitch_f"].min(),
|
||||
# lidar_data["vertical_position_pitch_f"].max(),
|
||||
)
|
||||
|
||||
|
||||
@@ -304,7 +304,7 @@ def create_projection_data(
|
||||
if render_images:
|
||||
return projection_data, rendered_images
|
||||
else:
|
||||
return (projection_data,)
|
||||
return projection_data, None
|
||||
|
||||
|
||||
def main() -> int:
|
||||
|
||||
@@ -1,21 +1,24 @@
|
||||
from configargparse import ArgParser, YAMLConfigFileParser, ArgumentDefaultsRawHelpFormatter
|
||||
from pathlib import Path
|
||||
from sys import exit
|
||||
from open3d.visualization.rendering import OffscreenRenderer, MaterialRecord
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from configargparse import (
|
||||
ArgParser,
|
||||
ArgumentDefaultsRawHelpFormatter,
|
||||
YAMLConfigFileParser,
|
||||
)
|
||||
from open3d.io import read_pinhole_camera_parameters, write_image
|
||||
from open3d.utility import Vector3dVector
|
||||
from pathlib import Path
|
||||
from open3d.visualization.rendering import MaterialRecord, OffscreenRenderer
|
||||
from pointcloudset import Dataset
|
||||
from rich.progress import track
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import numpy as np
|
||||
|
||||
from util import (
|
||||
load_dataset,
|
||||
existing_file,
|
||||
create_video_from_images,
|
||||
calculate_average_frame_rate,
|
||||
create_video_from_images,
|
||||
existing_file,
|
||||
load_dataset,
|
||||
)
|
||||
|
||||
|
||||
@@ -42,14 +45,18 @@ def render_3d_images(
|
||||
distances = np.linalg.norm(points, axis=1)
|
||||
max_distance = distances.max()
|
||||
min_distance = distances.min()
|
||||
normalized_distances = (distances - min_distance) / (max_distance - min_distance)
|
||||
normalized_distances = (distances - min_distance) / (
|
||||
max_distance - min_distance
|
||||
)
|
||||
colors = plt.get_cmap("jet")(normalized_distances)[:, :3]
|
||||
pcd.colors = Vector3dVector(colors)
|
||||
return pcd
|
||||
|
||||
rendered_images = []
|
||||
|
||||
for i, pc in track(enumerate(dataset, 1), description="Rendering images...", total=len(dataset)):
|
||||
for i, pc in track(
|
||||
enumerate(dataset, 1), description="Rendering images...", total=len(dataset)
|
||||
):
|
||||
o3d_pc = pc.to_instance("open3d")
|
||||
o3d_pc = color_points_by_range(o3d_pc)
|
||||
renderer.scene.add_geometry("point_cloud", o3d_pc, MaterialRecord())
|
||||
@@ -68,19 +75,35 @@ def main() -> int:
|
||||
formatter_class=ArgumentDefaultsRawHelpFormatter,
|
||||
description="Render a 3d representation of a point cloud",
|
||||
)
|
||||
parser.add_argument("--render-config-file", is_config_file=True, help="yaml config file path")
|
||||
parser.add_argument("--input-bag-path", required=True, type=existing_file, help="path to bag file")
|
||||
parser.add_argument(
|
||||
"--tmp-files-path", default=Path("./tmp"), type=Path, help="path temporary files will be written to"
|
||||
"--render-config-file", is_config_file=True, help="yaml config file path"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-images", type=bool, default=True, help="if rendered frames should be outputted as images"
|
||||
"--input-bag-path", required=True, type=existing_file, help="path to bag file"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-images-path", default=Path("./output"), type=Path, help="path rendered frames should be written to"
|
||||
"--tmp-files-path",
|
||||
default=Path("./tmp"),
|
||||
type=Path,
|
||||
help="path temporary files will be written to",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-video", type=bool, default=True, help="if rendered frames should be outputted as a video"
|
||||
"--output-images",
|
||||
type=bool,
|
||||
default=True,
|
||||
help="if rendered frames should be outputted as images",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-images-path",
|
||||
default=Path("./output"),
|
||||
type=Path,
|
||||
help="path rendered frames should be written to",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-video",
|
||||
type=bool,
|
||||
default=True,
|
||||
help="if rendered frames should be outputted as a video",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-video-path",
|
||||
@@ -88,7 +111,12 @@ def main() -> int:
|
||||
type=Path,
|
||||
help="path rendered video should be written to",
|
||||
)
|
||||
parser.add_argument("--output-images-prefix", default="2d_render", type=str, help="filename prefix for output")
|
||||
parser.add_argument(
|
||||
"--output-images-prefix",
|
||||
default="2d_render",
|
||||
type=str,
|
||||
help="filename prefix for output",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--camera-config-input-json-path",
|
||||
default="./saved_camera_settings.json",
|
||||
@@ -110,12 +138,21 @@ def main() -> int:
|
||||
dataset = load_dataset(args.input_bag_path)
|
||||
|
||||
images = render_3d_images(
|
||||
dataset, args.camera_config_input_json_path, args.tmp_files_path, args.output_images_prefix
|
||||
dataset,
|
||||
args.camera_config_input_json_path,
|
||||
args.tmp_files_path,
|
||||
args.output_images_prefix,
|
||||
)
|
||||
|
||||
if args.output_video:
|
||||
input_images_pattern = f"{args.tmp_files_path / args.output_images_prefix}_%04d.png"
|
||||
create_video_from_images(input_images_pattern, args.output_video_path, calculate_average_frame_rate(dataset))
|
||||
input_images_pattern = (
|
||||
f"{args.tmp_files_path / args.output_images_prefix}_%04d.png"
|
||||
)
|
||||
create_video_from_images(
|
||||
input_images_pattern,
|
||||
args.output_video_path,
|
||||
calculate_average_frame_rate(dataset),
|
||||
)
|
||||
|
||||
if not args.output_images:
|
||||
for image in images:
|
||||
|
||||
@@ -14,6 +14,12 @@ def load_dataset(
|
||||
return Dataset.from_file(bag_file_path, topic=pointcloud_topic)
|
||||
|
||||
|
||||
def save_dataset(dataset: Dataset, output_file_path: Path):
|
||||
if not output_file_path.is_dir():
|
||||
raise ArgumentTypeError(f"{output_file_path} has to be a valid folder!")
|
||||
dataset.to_file(output_file_path)
|
||||
|
||||
|
||||
def calculate_average_frame_rate(dataset: Dataset):
|
||||
timestamps = dataset.timestamps
|
||||
time_deltas = [
|
||||
|
||||
Reference in New Issue
Block a user