| | """The DataProvider class serve as a helper module for retriving subject model data""" |
| | from abc import ABC, abstractmethod |
| |
|
| | import os |
| | import gc |
| | import time |
| |
|
| | from singleVis.utils import * |
| | from singleVis.eval.evaluate import evaluate_inv_accu |
| |
|
| | """ |
| | DataContainder module |
| | 1. prepare data |
| | 2. estimate boundary |
| | 3. provide data |
| | """ |
| | class DataProviderAbstractClass(ABC): |
| | |
| | def __init__(self, content_path, model, epoch_start, epoch_end, epoch_period): |
| | self.mode = "abstract" |
| | self.content_path = content_path |
| | self.model = model |
| | self.s = epoch_start |
| | self.e = epoch_end |
| | self.p = epoch_period |
| | |
| | @property |
| | @abstractmethod |
| | def train_num(self): |
| | pass |
| |
|
| | @property |
| | @abstractmethod |
| | def test_num(self): |
| | pass |
| |
|
| | @abstractmethod |
| | def _meta_data(self): |
| | pass |
| |
|
| | @abstractmethod |
| | def _estimate_boundary(self): |
| | pass |
| | |
| | def update_interval(self, epoch_s, epoch_e): |
| | self.s = epoch_s |
| | self.e = epoch_e |
| |
|
| | class DataProvider(DataProviderAbstractClass): |
| | def __init__(self, content_path, model, epoch_start, epoch_end, epoch_period, device, classes, epoch_name, verbose=1): |
| | self.content_path = content_path |
| | self.model = model |
| | self.s = epoch_start |
| | self.e = epoch_end |
| | self.p = epoch_period |
| | self.DEVICE = device |
| | self.classes = classes |
| | self.verbose = verbose |
| | self.epoch_name = epoch_name |
| | self.model_path = os.path.join(self.content_path, "Model") |
| | if verbose: |
| | print("Finish initialization...") |
| |
|
| | @property |
| | def train_num(self): |
| | with open(os.path.join(self.content_path, "Model", "{}_{}".format(self.epoch_name, self.s), "index.json"), "r") as f: |
| | idxs = json.load(f) |
| | return len(idxs) |
| |
|
| | @property |
| | def test_num(self): |
| | testing_data_path = os.path.join(self.content_path, "Testing_data") |
| | testing_data = torch.load(os.path.join(testing_data_path, "testing_dataset_data.pth"), |
| | map_location="cpu") |
| | test_num = len(testing_data) |
| | del testing_data |
| | gc.collect() |
| | return test_num |
| | |
| | def _meta_data(self): |
| | raise NotImplementedError |
| |
|
| | def _estimate_boundary(self): |
| | raise NotImplementedError |
| |
|
| |
|
| | class NormalDataProvider(DataProvider): |
| | def __init__(self, content_path, model, epoch_start, epoch_end, epoch_period, device, classes, epoch_name, verbose=1): |
| | super().__init__(content_path, model, epoch_start, epoch_end, epoch_period, device, classes, epoch_name, verbose) |
| | self.mode = "normal" |
| | |
| | @property |
| | def representation_dim(self): |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, self.s), "train_data.npy") |
| | try: |
| | train_data = np.load(train_data_loc) |
| | repr_dim = np.prod(train_data.shape[1:]) |
| | return repr_dim |
| | except Exception as e: |
| | return None |
| |
|
| | def _meta_data(self): |
| | time_inference = list() |
| | training_data_path = os.path.join(self.content_path, "Training_data") |
| | training_data = torch.load(os.path.join(training_data_path, "training_dataset_data.pth"), |
| | map_location="cpu") |
| | training_data = training_data.to(self.DEVICE) |
| | testing_data_path = os.path.join(self.content_path, "Testing_data") |
| | testing_data = torch.load(os.path.join(testing_data_path, "testing_dataset_data.pth"), |
| | map_location="cpu") |
| | testing_data = testing_data.to(self.DEVICE) |
| |
|
| | for n_epoch in range(self.s, self.e + 1, self.p): |
| | t_s = time.time() |
| |
|
| | |
| | test_index_file = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "test_index.json") |
| | if os.path.exists(test_index_file): |
| | test_index = load_labelled_data_index(test_index_file) |
| | else: |
| | test_index = range(len(testing_data)) |
| | testing_data = testing_data[test_index] |
| |
|
| | model_location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "subject_model.pth") |
| | self.model.load_state_dict(torch.load(model_location, map_location=torch.device("cpu"))) |
| | self.model = self.model.to(self.DEVICE) |
| | self.model.eval() |
| |
|
| | repr_model = self.feature_function(n_epoch) |
| | |
| |
|
| | |
| | data_pool_representation = batch_run(repr_model, training_data) |
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "train_data.npy") |
| | np.save(location, data_pool_representation) |
| |
|
| | |
| | test_data_representation = batch_run(repr_model, testing_data) |
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "test_data.npy") |
| | np.save(location, test_data_representation) |
| |
|
| | t_e = time.time() |
| | time_inference.append(t_e-t_s) |
| | if self.verbose > 0: |
| | print("Finish inferencing data for Epoch {:d}...".format(n_epoch)) |
| | print( |
| | "Average time for inferencing data: {:.4f}".format(sum(time_inference) / len(time_inference))) |
| |
|
| | |
| | save_dir = os.path.join(self.model_path, "time.json") |
| | if not os.path.exists(save_dir): |
| | evaluation = dict() |
| | else: |
| | f = open(save_dir, "r") |
| | evaluation = json.load(f) |
| | f.close() |
| | evaluation["data_inference"] = round(sum(time_inference) / len(time_inference), 3) |
| | with open(save_dir, 'w') as f: |
| | json.dump(evaluation, f) |
| |
|
| | del training_data |
| | del testing_data |
| | gc.collect() |
| |
|
| | def _estimate_boundary(self, num, l_bound): |
| | ''' |
| | Preprocessing data. This process includes find_border_points and find_border_centers |
| | save data for later training |
| | ''' |
| |
|
| | time_borders_gen = list() |
| | training_data_path = os.path.join(self.content_path, "Training_data") |
| | training_data = torch.load(os.path.join(training_data_path, "training_dataset_data.pth"), |
| | map_location="cpu") |
| | training_data = training_data.to(self.DEVICE) |
| | for n_epoch in range(self.s, self.e + 1, self.p): |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "index.json") |
| | index = load_labelled_data_index(index_file) |
| | training_data = training_data[index] |
| |
|
| | |
| | |
| | |
| | |
| | |
| | repr_model = self.feature_function(n_epoch) |
| |
|
| | t0 = time.time() |
| | confs = batch_run(self.model, training_data) |
| | preds = np.argmax(confs, axis=1).squeeze() |
| | |
| | num_adv_eg = num |
| | |
| | border_points, _, _ = get_border_points(model=self.model, input_x=training_data, confs=confs, predictions=preds, device=self.DEVICE, l_bound=l_bound, num_adv_eg=num_adv_eg, lambd=0.05, verbose=0) |
| | t1 = time.time() |
| | time_borders_gen.append(round(t1 - t0, 4)) |
| |
|
| | |
| | border_points = border_points.to(self.DEVICE) |
| | border_centers = batch_run(repr_model, border_points) |
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "border_centers.npy") |
| | np.save(location, border_centers) |
| |
|
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "ori_border_centers.npy") |
| | np.save(location, border_points.cpu().numpy()) |
| |
|
| | num_adv_eg = num |
| | border_points, _, _ = get_border_points(model=self.model, input_x=training_data, confs=confs, predictions=preds, device=self.DEVICE, l_bound=l_bound, num_adv_eg=num_adv_eg, lambd=0.05, verbose=0) |
| |
|
| | |
| | border_points = border_points.to(self.DEVICE) |
| | border_centers = batch_run(repr_model, border_points) |
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "test_border_centers.npy") |
| | np.save(location, border_centers) |
| |
|
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, n_epoch), "test_ori_border_centers.npy") |
| | np.save(location, border_points.cpu().numpy()) |
| |
|
| | if self.verbose > 0: |
| | print("Finish generating borders for Epoch {:d}...".format(n_epoch)) |
| | print( |
| | "Average time for generate border points: {:.4f}".format(sum(time_borders_gen) / len(time_borders_gen))) |
| |
|
| | |
| | save_dir = os.path.join(self.model_path, "time.json") |
| | if not os.path.exists(save_dir): |
| | evaluation = dict() |
| | else: |
| | f = open(save_dir, "r") |
| | evaluation = json.load(f) |
| | f.close() |
| | evaluation["data_B_gene"] = round(sum(time_borders_gen) / len(time_borders_gen), 3) |
| | with open(save_dir, 'w') as f: |
| | json.dump(evaluation, f) |
| |
|
| | def initialize(self, num, l_bound): |
| | self._meta_data() |
| | self._estimate_boundary(num, l_bound) |
| |
|
| | def train_representation(self, epoch): |
| | |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "train_data.npy") |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "index.json") |
| | index = load_labelled_data_index(index_file) |
| | try: |
| | train_data = np.load(train_data_loc) |
| | train_data = train_data[index] |
| | except Exception as e: |
| | print("no train data saved for Epoch {}".format(epoch)) |
| | train_data = None |
| | return train_data |
| | |
| | def train_labels(self, epoch): |
| | |
| | training_data_loc = os.path.join(self.content_path, "Training_data", "training_dataset_label.pth") |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "index.json") |
| | index = load_labelled_data_index(index_file) |
| | try: |
| | training_labels = torch.load(training_data_loc, map_location="cpu") |
| | |
| | |
| | |
| | training_labels = np.array(training_labels) |
| | except Exception as e: |
| | print("no train labels saved for Epoch {}".format(epoch)) |
| | training_labels = None |
| | return training_labels |
| |
|
| | def test_representation(self, epoch): |
| | data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "test_data.npy") |
| | try: |
| | test_data = np.load(data_loc) |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "test_index.json") |
| | if os.path.exists(index_file): |
| | index = load_labelled_data_index(index_file) |
| | test_data = test_data[index] |
| | except Exception as e: |
| | print("no test data saved for Epoch {}".format(epoch)) |
| | test_data = None |
| | |
| | return test_data |
| | |
| | def test_labels(self, epoch): |
| | |
| | testing_data_loc = os.path.join(self.content_path, "Testing_data", "testing_dataset_label.pth") |
| | try: |
| | testing_labels = torch.load(testing_data_loc).to(device="cpu") |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "test_index.json") |
| | print(index_file) |
| | if os.path.exists(index_file): |
| | idxs = load_labelled_data_index(index_file) |
| | |
| | testing_labels = torch.zeros(len(idxs)) |
| | except Exception as e: |
| | print("no test labels saved for Epoch {}".format(epoch)) |
| | testing_labels = None |
| | return testing_labels.cpu().numpy() |
| |
|
| | def border_representation(self, epoch): |
| | border_centers_loc = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), |
| | "border_centers.npy") |
| | try: |
| | border_centers = np.load(border_centers_loc) |
| | except Exception as e: |
| | print("no border points saved for Epoch {}".format(epoch)) |
| | border_centers = np.array([]) |
| | return border_centers |
| | |
| | def test_border_representation(self, epoch): |
| | border_centers_loc = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), |
| | "test_border_centers.npy") |
| | try: |
| | border_centers = np.load(border_centers_loc) |
| | except Exception as e: |
| | print("no border points saved for Epoch {}".format(epoch)) |
| | border_centers = np.array([]) |
| | return border_centers |
| | |
| | def max_norm(self, epoch): |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "train_data.npy") |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "index.json") |
| | index = load_labelled_data_index(index_file) |
| | try: |
| | train_data = np.load(train_data_loc) |
| | train_data = train_data[index] |
| | max_x = np.linalg.norm(train_data, axis=1).max() |
| | except Exception as e: |
| | print("no train data saved for Epoch {}".format(epoch)) |
| | max_x = None |
| | return max_x |
| |
|
| | def prediction_function(self, epoch): |
| | model_location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "subject_model.pth") |
| |
|
| | self.model.load_state_dict(torch.load(model_location, map_location=torch.device("cpu")),strict=False) |
| | self.model.to(self.DEVICE) |
| | self.model.eval() |
| |
|
| | pred_fn = self.model.prediction |
| | return pred_fn |
| |
|
| |
|
| | def feature_function(self, epoch): |
| | model_location = os.path.join(self.model_path, "{}_{:d}".format(self.epoch_name, epoch), "subject_model.pth") |
| | self.model.load_state_dict(torch.load(model_location, map_location=torch.device("cpu")),strict=False) |
| | self.model = self.model.to(self.DEVICE) |
| | self.model.eval() |
| |
|
| | fea_fn = self.model.feature |
| | return fea_fn |
| |
|
| | def get_pred(self, epoch, data): |
| | ''' |
| | get the prediction score for data in epoch_id |
| | :param data: numpy.ndarray |
| | :param epoch_id: |
| | :return: pred, numpy.ndarray |
| | ''' |
| | prediction_func = self.prediction_function(epoch) |
| |
|
| | data = torch.from_numpy(data) |
| | data = data.to(self.DEVICE) |
| | pred = batch_run(prediction_func, data) |
| | return pred |
| |
|
| | def training_accu(self, epoch): |
| | data = self.train_representation(epoch) |
| | labels = self.train_labels(epoch) |
| | pred = self.get_pred(epoch, data).argmax(-1) |
| | val = evaluate_inv_accu(labels, pred) |
| | return val |
| |
|
| | def testing_accu(self, epoch): |
| | data = self.test_representation(epoch) |
| | labels = self.test_labels(epoch) |
| | test_index_file = os.path.join(self.model_path, "{}_{}".format(self.epoch_name, epoch), "test_index.json") |
| | if os.path.exists(test_index_file): |
| | index = load_labelled_data_index(test_index_file) |
| | labels = labels[index] |
| | pred = self.get_pred(epoch, data).argmax(-1) |
| | val = evaluate_inv_accu(labels, pred) |
| | return val |
| | |
| | def is_deltaB(self, epoch, data): |
| | """ |
| | check wheter input vectors are lying on delta-boundary or not |
| | :param epoch_id: |
| | :param data: numpy.ndarray |
| | :return: numpy.ndarray, boolean, True stands for is_delta_boundary |
| | """ |
| | preds = self.get_pred(epoch, data) |
| | border = is_B(preds) |
| | return border |
| | |
| | def checkpoint_path(self, epoch): |
| | path = os.path.join(self.model_path, "{}_{}".format(self.epoch_name, epoch)) |
| | return path |
| |
|
| | |
| | class ActiveLearningDataProvider(DataProvider): |
| | def __init__(self, content_path, model, base_epoch_start, device, classes, iteration_name="Iteration",verbose=1): |
| | |
| | super().__init__(content_path, model, base_epoch_start, base_epoch_start, 1, device, classes, iteration_name, verbose) |
| | self.mode = "al" |
| | self.iteration_name = iteration_name |
| | |
| | @property |
| | def pool_num(self): |
| | return len(self.train_labels_all()) |
| | |
| | def label_num(self, iteration): |
| | return len(self.get_labeled_idx(iteration)) |
| |
|
| | def representation_dim(self, iteration): |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "train_data.npy") |
| | try: |
| | train_data = np.load(train_data_loc) |
| | repr_dim = np.prod(train_data.shape[1:]) |
| | return repr_dim |
| | except Exception as e: |
| | return None |
| | |
| | def get_labeled_idx(self, iteration): |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "index.json") |
| | lb_idxs = np.array(load_labelled_data_index(index_file)) |
| | return lb_idxs |
| |
|
| | def get_unlabeled_idx(self, pool_num, lb_idx): |
| | tot_idx = np.arange(pool_num) |
| | |
| | ulb_idx = np.setdiff1d(tot_idx, lb_idx) |
| | return ulb_idx |
| |
|
| | def _meta_data(self, iteration): |
| | training_data_path = os.path.join(self.content_path, "Training_data") |
| | training_data = torch.load(os.path.join(training_data_path, "training_dataset_data.pth"), |
| | map_location="cpu") |
| | training_data = training_data.to(self.DEVICE) |
| | testing_data_path = os.path.join(self.content_path, "Testing_data") |
| | testing_data = torch.load(os.path.join(testing_data_path, "testing_dataset_data.pth"), |
| | map_location="cpu") |
| | testing_data = testing_data.to(self.DEVICE) |
| |
|
| | t_s = time.time() |
| | repr_model = self.feature_function(iteration) |
| |
|
| | |
| | data_pool_representation = batch_run(repr_model, training_data) |
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "train_data.npy") |
| | np.save(location, data_pool_representation) |
| |
|
| | |
| | test_data_representation = batch_run(repr_model, testing_data) |
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "test_data.npy") |
| | np.save(location, test_data_representation) |
| |
|
| | t_e = time.time() |
| |
|
| | if self.verbose > 0: |
| | print("Finish inferencing data for Iteration {:d} in {:.2f} seconds...".format(iteration, t_e-t_s)) |
| |
|
| | |
| | save_dir = os.path.join(self.model_path, "time_al.json") |
| | if not os.path.exists(save_dir): |
| | evaluation = dict() |
| | |
| | else: |
| | f = open(save_dir, "r") |
| | evaluation = json.load(f) |
| | f.close() |
| | if "data_inference" not in evaluation.keys(): |
| | evaluation["data_inference"] = dict() |
| | evaluation["data_inference"][str(iteration)] = round(t_e - t_s, 3) |
| | with open(save_dir, 'w') as f: |
| | json.dump(evaluation, f) |
| |
|
| | del training_data |
| | del testing_data |
| | gc.collect() |
| |
|
| | def _estimate_boundary(self, iteration, num, l_bound): |
| | ''' |
| | Preprocessing data. This process includes find_border_points and find_border_centers |
| | save data for later training |
| | ''' |
| |
|
| | training_data_path = os.path.join(self.content_path, "Training_data") |
| | training_data = torch.load(os.path.join(training_data_path, "training_dataset_data.pth"), |
| | map_location="cpu") |
| | training_data = training_data.to(self.DEVICE) |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "index.json") |
| | index = load_labelled_data_index(index_file) |
| | training_data = training_data[index] |
| |
|
| | repr_model = self.feature_function(iteration) |
| |
|
| | t0 = time.time() |
| | confs = batch_run(self.model, training_data) |
| | preds = np.argmax(confs, axis=1).squeeze() |
| | |
| | num_adv_eg = num |
| | border_points, _, _ = get_border_points(model=self.model, input_x=training_data, confs=confs, predictions=preds, device=self.DEVICE, l_bound=l_bound, num_adv_eg=num_adv_eg, lambd=0.05, verbose=0) |
| | t1 = time.time() |
| |
|
| | |
| | border_points = border_points.to(self.DEVICE) |
| | border_centers = batch_run(repr_model, border_points) |
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "border_centers.npy") |
| | np.save(location, border_centers) |
| |
|
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "ori_border_centers.npy") |
| | np.save(location, border_points.cpu().numpy()) |
| |
|
| | num_adv_eg = num |
| | border_points, _, _ = get_border_points(model=self.model, input_x=training_data, confs=confs, predictions=preds, device=self.DEVICE, l_bound=l_bound, num_adv_eg=num_adv_eg, lambd=0.05, verbose=0) |
| |
|
| | |
| | border_points = border_points.to(self.DEVICE) |
| | border_centers = batch_run(repr_model, border_points) |
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "test_border_centers.npy") |
| | np.save(location, border_centers) |
| |
|
| | location = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_nameiteration), "test_ori_border_centers.npy") |
| | np.save(location, border_points.cpu().numpy()) |
| |
|
| | if self.verbose > 0: |
| | print("Finish generating borders for Iteration {:d} in {:.2f} seconds ...".format(iteration, t1-t0)) |
| |
|
| | |
| | save_dir = os.path.join(self.model_path, "time_al.json") |
| | if not os.path.exists(save_dir): |
| | evaluation = dict() |
| | else: |
| | f = open(save_dir, "r") |
| | evaluation = json.load(f) |
| | f.close() |
| | if "data_B_gene" not in evaluation.keys(): |
| | evaluation["data_B_gene"] = dict() |
| | evaluation["data_B_gene"][str(iteration)] = round(t1-t0, 3) |
| | with open(save_dir, 'w') as f: |
| | json.dump(evaluation, f) |
| |
|
| | def initialize_iteration(self, iteration, num, l_bound): |
| | self._meta_data(iteration) |
| | self._estimate_boundary(iteration, num, l_bound) |
| |
|
| | def train_representation(self, iteration): |
| | |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "train_data.npy") |
| | try: |
| | idxs = self.get_labeled_idx(iteration) |
| | train_data = np.load(train_data_loc)[idxs] |
| | except Exception as e: |
| | print("no train data saved for Iteration {}".format(iteration)) |
| | train_data = None |
| | return train_data |
| | |
| | def train_labels(self, epoch): |
| | |
| | training_data_loc = os.path.join(self.content_path, "Training_data", "training_dataset_label.pth") |
| | try: |
| | idxs = self.get_labeled_idx(epoch) |
| | training_labels = torch.load(training_data_loc, map_location="cpu")[idxs] |
| | except Exception as e: |
| | print("no train labels saved for Iteration {}".format(epoch)) |
| | training_labels = None |
| | return training_labels.numpy() |
| | |
| | def train_representation_ulb(self, iteration): |
| | |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "train_data.npy") |
| | lb_idxs = self.get_labeled_idx(iteration) |
| | try: |
| | train_data = np.load(train_data_loc) |
| | ulb_idxs = self.get_unlabeled_idx(len(train_data), lb_idxs) |
| | train_data = train_data[ulb_idxs] |
| | except Exception as e: |
| | print("no train data saved for Iteration {}".format(iteration)) |
| | train_data = None |
| | return train_data |
| | |
| | def train_labels_ulb(self, epoch): |
| | |
| | training_data_loc = os.path.join(self.content_path, "Training_data", "training_dataset_label.pth") |
| | lb_idxs = self.get_labeled_idx(epoch) |
| | try: |
| | training_labels = torch.load(training_data_loc, map_location="cpu") |
| | ulb_idxs = self.get_unlabeled_idx(len(training_labels), lb_idxs) |
| | training_labels = training_labels[ulb_idxs] |
| | except Exception as e: |
| | print("no train labels saved for Iteration {}".format(epoch)) |
| | training_labels = None |
| | return training_labels.numpy() |
| | |
| | def train_representation_all(self, iteration): |
| | |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "train_data.npy") |
| | try: |
| | train_data = np.load(train_data_loc) |
| | except Exception as e: |
| | print("no train data saved for Iteration {}".format(iteration)) |
| | train_data = None |
| | return train_data |
| | |
| | def train_labels_all(self): |
| | |
| | training_data_loc = os.path.join(self.content_path, "Training_data", "training_dataset_label.pth") |
| | try: |
| | training_labels = torch.load(training_data_loc, map_location="cpu") |
| | except Exception as e: |
| | print("no train labels saved") |
| | training_labels = None |
| | return training_labels.numpy() |
| |
|
| | def test_representation(self, epoch): |
| | data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, epoch), "test_data.npy") |
| | try: |
| | test_data = np.load(data_loc) |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, epoch), "test_index.json") |
| | if os.path.exists(index_file): |
| | index = load_labelled_data_index(index_file) |
| | test_data = test_data[index] |
| | except Exception as e: |
| | print("no test data saved for Iteration {}".format(epoch)) |
| | test_data = None |
| | |
| | return test_data |
| | |
| | def test_labels(self, epoch): |
| | |
| | testing_data_loc = os.path.join(self.content_path, "Testing_data", "testing_dataset_label.pth") |
| | try: |
| | testing_labels = torch.load(testing_data_loc, map_location="cpu").numpy() |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, epoch), "test_index.json") |
| | if os.path.exists(index_file): |
| | idxs = load_labelled_data_index(index_file) |
| | testing_labels = testing_labels[idxs] |
| | except Exception as e: |
| | print("no test labels saved for Iteration {}".format(epoch)) |
| | testing_labels = None |
| | return testing_labels |
| |
|
| | def border_representation(self, epoch): |
| | border_centers_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, epoch), |
| | "border_centers.npy") |
| | try: |
| | border_centers = np.load(border_centers_loc) |
| | except Exception as e: |
| | print("no border points saved for Iteration {}".format(epoch)) |
| | border_centers = np.array([]) |
| | return border_centers |
| | |
| | def test_border_representation(self, epoch): |
| | border_centers_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, epoch), |
| | "test_border_centers.npy") |
| | try: |
| | border_centers = np.load(border_centers_loc) |
| | except Exception as e: |
| | print("no border points saved for Epoch {}".format(epoch)) |
| | border_centers = np.array([]) |
| | return border_centers |
| | |
| | def max_norm(self, epoch): |
| | train_data = self.train_representation(epoch) |
| | max_x = np.linalg.norm(train_data, axis=1).max() |
| | return max_x |
| |
|
| | def prediction_function(self, iteration): |
| | model_location = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "subject_model.pth") |
| | self.model.load_state_dict(torch.load(model_location, map_location=torch.device("cpu"))) |
| | self.model.to(self.DEVICE) |
| | self.model.eval() |
| |
|
| | pred_fn = self.model.prediction |
| | return pred_fn |
| |
|
| | def feature_function(self, epoch): |
| | model_location = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, epoch), "subject_model.pth") |
| | self.model.load_state_dict(torch.load(model_location, map_location=torch.device("cpu"))) |
| | self.model.to(self.DEVICE) |
| | self.model.eval() |
| |
|
| | fea_fn = self.model.feature |
| | return fea_fn |
| |
|
| | def get_pred(self, iteration, data): |
| | ''' |
| | get the prediction score for data in epoch_id |
| | :param data: numpy.ndarray |
| | :param epoch_id: |
| | :return: pred, numpy.ndarray |
| | ''' |
| | prediction_func = self.prediction_function(iteration) |
| |
|
| | data = torch.from_numpy(data) |
| | data = data.to(self.DEVICE) |
| | pred = batch_run(prediction_func, data) |
| | return pred |
| |
|
| | def training_accu(self, epoch): |
| | data = self.train_representation_lb(epoch) |
| | labels = self.train_labels_lb(epoch) |
| | pred = self.get_pred(epoch, data).argmax(1) |
| | val = evaluate_inv_accu(labels, pred) |
| | return val |
| |
|
| | def testing_accu(self, epoch): |
| | data = self.test_representation(epoch) |
| | labels = self.test_labels(epoch) |
| | pred = self.get_pred(epoch, data).argmax(1) |
| | val = evaluate_inv_accu(labels, pred) |
| | return val |
| | |
| | def is_deltaB(self, epoch, data): |
| | """ |
| | check wheter input vectors are lying on delta-boundary or not |
| | :param epoch_id: |
| | :param data: numpy.ndarray |
| | :return: numpy.ndarray, boolean, True stands for is_delta_boundary |
| | """ |
| | preds = self.get_pred(epoch, data) |
| | border = is_B(preds) |
| | return border |
| | |
| | def checkpoint_path(self, epoch): |
| | path = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, epoch)) |
| | return path |
| | |
| | class DenseActiveLearningDataProvider(ActiveLearningDataProvider): |
| | def __init__(self, content_path, model, base_epoch_start, epoch_num, device, classes, iteration_name="Iteration", epoch_name="Epoch", verbose=1): |
| | super().__init__(content_path, model, base_epoch_start, device, classes, iteration_name, verbose) |
| | self.mode = "dense_al" |
| | self.epoch_num = epoch_num |
| | self.s = 1 |
| | self.p = 1 |
| | self.e = epoch_num |
| | self.epoch_name = epoch_name |
| |
|
| | def representation_dim(self): |
| | train_data_loc = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, self.s), "{}_{:d}".format(self.epoch_name, self.epoch_num), "train_data.npy") |
| | try: |
| | train_data = np.load(train_data_loc) |
| | repr_dim = np.prod(train_data.shape[1:]) |
| | return repr_dim |
| | except Exception as e: |
| | return None |
| |
|
| | def _meta_data(self, iteration): |
| | time_inference = list() |
| | training_data_path = os.path.join(self.content_path, "Training_data") |
| | training_data = torch.load(os.path.join(training_data_path, "training_dataset_data.pth"), |
| | map_location="cpu") |
| | training_data = training_data.to(self.DEVICE) |
| | testing_data_path = os.path.join(self.content_path, "Testing_data") |
| | testing_data = torch.load(os.path.join(testing_data_path, "testing_dataset_data.pth"), |
| | map_location="cpu") |
| | testing_data = testing_data.to(self.DEVICE) |
| |
|
| | t_s = time.time() |
| |
|
| | |
| | test_index_file = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "test_index.json") |
| | if os.path.exists(test_index_file): |
| | test_index = load_labelled_data_index(test_index_file) |
| | else: |
| | test_index = range(len(testing_data)) |
| | testing_data = testing_data[test_index] |
| |
|
| | for n_epoch in range(1, self.epoch_num+1, 1): |
| | repr_model = self.feature_function(iteration, n_epoch) |
| |
|
| | |
| | data_pool_representation = batch_run(repr_model, training_data) |
| | location = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, n_epoch), "train_data.npy") |
| | np.save(location, data_pool_representation) |
| |
|
| | |
| | test_data_representation = batch_run(repr_model, testing_data) |
| | location = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, n_epoch), "test_data.npy") |
| | np.save(location, test_data_representation) |
| |
|
| | t_e = time.time() |
| | time_inference.append(t_e-t_s) |
| | if self.verbose > 0: |
| | print("Finish inferencing data for Iteration {:d}...".format(iteration)) |
| | print( |
| | "Average time for inferencing data: {:.4f}...".format(sum(time_inference) / len(time_inference))) |
| |
|
| | |
| | save_dir = os.path.join(self.model_path, "SV_time.json") |
| | if not os.path.exists(save_dir): |
| | evaluation = dict() |
| | else: |
| | f = open(save_dir, "r") |
| | evaluation = json.load(f) |
| | f.close() |
| | evaluation["data_inference"] = round(sum(time_inference) / len(time_inference), 3) |
| | with open(save_dir, 'w') as f: |
| | json.dump(evaluation, f) |
| |
|
| | del training_data |
| | del testing_data |
| | gc.collect() |
| |
|
| | def _estimate_boundary(self, iteration, num, l_bound): |
| | ''' |
| | Preprocessing data. This process includes find_border_points and find_border_centers |
| | save data for later training |
| | ''' |
| |
|
| | time_borders_gen = list() |
| | training_data_path = os.path.join(self.content_path, "Training_data") |
| | training_data = torch.load(os.path.join(training_data_path, "training_dataset_data.pth"), |
| | map_location="cpu") |
| | training_data = training_data.to(self.DEVICE) |
| |
|
| | for n_epoch in range(1, self.epoch_num+1, 1): |
| | index_file = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "index.json") |
| | index = load_labelled_data_index(index_file) |
| | training_data = training_data[index] |
| |
|
| | repr_model = self.feature_function(iteration, n_epoch) |
| |
|
| | t0 = time.time() |
| | confs = batch_run(self.model, training_data) |
| | preds = np.argmax(confs, axis=1).squeeze() |
| | |
| | num_adv_eg = num |
| | border_points, _, _ = get_border_points(model=self.model, input_x=training_data, confs=confs, predictions=preds, device=self.DEVICE, l_bound=l_bound, num_adv_eg=num_adv_eg, lambd=0.05, verbose=0) |
| | t1 = time.time() |
| | time_borders_gen.append(round(t1 - t0, 4)) |
| |
|
| | |
| | border_points = border_points.to(self.DEVICE) |
| | border_centers = batch_run(repr_model, border_points) |
| | location = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, n_epoch), "border_centers.npy") |
| | np.save(location, border_centers) |
| |
|
| | location = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, n_epoch), "ori_border_centers.npy") |
| | np.save(location, border_points.cpu().numpy()) |
| |
|
| | num_adv_eg = num |
| | border_points, _, _ = get_border_points(model=self.model, input_x=training_data, confs=confs, predictions=preds, device=self.DEVICE, l_bound=l_bound, num_adv_eg=num_adv_eg, lambd=0.05, verbose=0) |
| |
|
| | |
| | border_points = border_points.to(self.DEVICE) |
| | border_centers = batch_run(repr_model, border_points) |
| | location = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_nanme, n_epoch), "test_border_centers.npy") |
| | np.save(location, border_centers) |
| |
|
| | location = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, n_epoch), "test_ori_border_centers.npy") |
| | np.save(location, border_points.cpu().numpy()) |
| |
|
| | if self.verbose > 0: |
| | print("Finish generating borders for Epoch {:d}...".format(n_epoch)) |
| | print( |
| | "Average time for generate border points for each iteration: {:.4f}".format(sum(time_borders_gen) / len(time_borders_gen))) |
| |
|
| | |
| | save_dir = os.path.join(self.model_path, "SV_time.json") |
| | if not os.path.exists(save_dir): |
| | evaluation = dict() |
| | else: |
| | f = open(save_dir, "r") |
| | evaluation = json.load(f) |
| | f.close() |
| | evaluation["data_B_gene"] = round(sum(time_borders_gen) / len(time_borders_gen), 3) |
| | with open(save_dir, 'w') as f: |
| | json.dump(evaluation, f) |
| |
|
| | def train_representation(self, iteration, epoch): |
| | |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "{}_{}".format(self.epoch_name, epoch), "train_data.npy") |
| | lb_idxs = self.get_labeled_idx(iteration) |
| | try: |
| | train_data = np.load(train_data_loc)[lb_idxs] |
| | except Exception as e: |
| | print("no train data saved for Iteration {}".format(iteration)) |
| | train_data = None |
| | return train_data |
| |
|
| | def train_representation_all(self, iteration, epoch): |
| | |
| | train_data_loc = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, epoch), "train_data.npy") |
| | try: |
| | train_data = np.load(train_data_loc) |
| | except Exception as e: |
| | print("no train data saved for Iteration {} Epoch {}".format(iteration, epoch)) |
| | train_data = None |
| | return train_data |
| | |
| | def train_representation_ulb(self, iteration, epoch): |
| | |
| | train_data_loc = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "{}_{}".format(self.epoch_name, epoch), "train_data.npy") |
| | lb_idxs = self.get_labeled_idx(iteration) |
| | try: |
| | train_data = np.load(train_data_loc) |
| | pool_num = len(train_data) |
| | ulb_idx = self.get_unlabeled_idx(pool_num=pool_num, lb_idx=lb_idxs) |
| | train_data = train_data[ulb_idx] |
| | except Exception as e: |
| | print("no train data saved for Iteration {}".format(iteration)) |
| | train_data = None |
| | return train_data |
| | |
| | def train_labels_ulb(self, iteration): |
| | |
| | training_data_loc = os.path.join(self.content_path, "Training_data", "training_dataset_label.pth") |
| | lb_idxs = self.get_labeled_idx(iteration) |
| | try: |
| | training_labels = torch.load(training_data_loc, map_location="cpu") |
| | ulb_idxs = self.get_unlabeled_idx(len(training_labels), lb_idxs) |
| | training_labels = training_labels[ulb_idxs] |
| | except Exception as e: |
| | print("no train labels saved for Iteration {}".format(iteration)) |
| | training_labels = None |
| | return training_labels.numpy() |
| | |
| | def train_labels(self, iteration): |
| | |
| | training_data_loc = os.path.join(self.content_path, "Training_data", "training_dataset_label.pth") |
| | index_file = os.path.join(self.model_path, "{}_{:d}".format(self.iteration_name, iteration), "index.json") |
| | lb_idxs = np.array(load_labelled_data_index(index_file)) |
| | try: |
| | training_labels = torch.load(training_data_loc, map_location="cpu") |
| | training_labels = training_labels[lb_idxs] |
| | except Exception as e: |
| | print("no train labels saved for Iteration {}".format(iteration)) |
| | training_labels = None |
| | return training_labels.numpy() |
| |
|
| | def train_labels_all(self): |
| | |
| | training_data_loc = os.path.join(self.content_path, "Training_data", "training_dataset_label.pth") |
| | try: |
| | training_labels = torch.load(training_data_loc, map_location="cpu") |
| | except Exception as e: |
| | print("no train labels saved") |
| | training_labels = None |
| | return training_labels.numpy() |
| |
|
| | def test_representation(self, iteration, epoch): |
| | data_loc = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{}".format(self.epoch_name, epoch), "test_data.npy") |
| | try: |
| | test_data = np.load(data_loc) |
| | index_file = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "test_index.json") |
| | if os.path.exists(index_file): |
| | index = load_labelled_data_index(index_file) |
| | test_data = test_data[index] |
| | except Exception as e: |
| | print("no test data saved for Iteration {} Epoch {}".format(iteration, epoch)) |
| | test_data = None |
| | return test_data |
| |
|
| | def border_representation(self, iteration, epoch): |
| | border_centers_loc = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, epoch), |
| | "border_centers.npy") |
| | try: |
| | border_centers = np.load(border_centers_loc) |
| | except Exception as e: |
| | print("no border points saved for Epoch {}".format(epoch)) |
| | border_centers = np.array([]) |
| | return border_centers |
| | |
| | def test_border_representation(self, iteration, epoch): |
| | border_centers_loc = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, epoch), |
| | "test_border_centers.npy") |
| | try: |
| | border_centers = np.load(border_centers_loc) |
| | except Exception as e: |
| | print("no border points saved for Iteration {} Epoch {}".format(iteration, epoch)) |
| | border_centers = np.array([]) |
| | return border_centers |
| | |
| | def max_norm(self, iteration, epoch): |
| | train_data_loc = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, epoch), "train_data.npy") |
| | index_file = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "index.json") |
| | index = load_labelled_data_index(index_file) |
| | try: |
| | train_data = np.load(train_data_loc) |
| | train_data = train_data[index] |
| | max_x = np.linalg.norm(train_data, axis=1).max() |
| | except Exception as e: |
| | print("no train data saved for Iteration {} Epoch {}".format(iteration, epoch)) |
| | max_x = None |
| | return max_x |
| |
|
| | def prediction_function(self, iteration, epoch): |
| | model_location = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, epoch), "subject_model.pth") |
| | self.model.load_state_dict(torch.load(model_location, map_location=torch.device("cpu"))) |
| | self.model.to(self.DEVICE) |
| | self.model.eval() |
| |
|
| | pred_fn = self.model.prediction |
| | return pred_fn |
| |
|
| | def feature_function(self, iteration, epoch): |
| | model_location = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{:d}".format(self.epoch_name, epoch), "subject_model.pth") |
| | self.model.load_state_dict(torch.load(model_location, map_location=torch.device("cpu"))) |
| | self.model = self.model.to(self.DEVICE) |
| | self.model.eval() |
| |
|
| | fea_fn = self.model.feature |
| | return fea_fn |
| |
|
| | def get_pred(self, iteration, epoch, data): |
| | ''' |
| | get the prediction score for data in epoch_id |
| | :param data: numpy.ndarray |
| | :param epoch_id: |
| | :return: pred, numpy.ndarray |
| | ''' |
| | prediction_func = self.prediction_function(iteration, epoch) |
| |
|
| | data = torch.from_numpy(data) |
| | data = data.to(self.DEVICE) |
| | pred = batch_run(prediction_func, data) |
| | return pred |
| |
|
| | def training_accu(self, iteration, epoch): |
| | data = self.train_representation_lb(iteration, epoch) |
| | labels = self.train_labels_lb(iteration) |
| | pred = self.get_pred(iteration, epoch, data).argmax(-1) |
| | val = evaluate_inv_accu(labels, pred) |
| | return val |
| |
|
| | def testing_accu(self, iteration, epoch): |
| | data = self.test_representation(epoch) |
| | labels = self.test_labels(epoch) |
| | test_index_file = os.path.join(self.model_path,"{}_{}".format(self.iteration_name, iteration), "{}_{}".format(self.epoch_name, epoch), "test_index.json") |
| | if os.path.exists(test_index_file): |
| | index = load_labelled_data_index(test_index_file) |
| | labels = labels[index] |
| | pred = self.get_pred(epoch, data).argmax(-1) |
| | val = evaluate_inv_accu(labels, pred) |
| | return val |
| | |
| | def is_deltaB(self, iteration, epoch, data): |
| | """ |
| | check wheter input vectors are lying on delta-boundary or not |
| | :param epoch_id: |
| | :param data: numpy.ndarray |
| | :return: numpy.ndarray, boolean, True stands for is_delta_boundary |
| | """ |
| | preds = self.get_pred(iteration, epoch, data) |
| | border = is_B(preds) |
| | return border |
| | |
| | def single_checkpoint_path(self, iteration, epoch): |
| | path = os.path.join(self.model_path, "{}_{}".format(self.iteration_name, iteration), "{}_{}".format(self.epoch_name, epoch)) |
| | return path |
| |
|