| |
|
| | import numpy as np
|
| | import re
|
| |
|
| |
|
| | def get_most_common_features(target, all_features, max = 3, min = 3):
|
| | res = []
|
| | main_keys = target.split('_')
|
| |
|
| | for feature in all_features:
|
| | if target == feature:
|
| | continue
|
| |
|
| | f_keys = feature.split('_')
|
| | common_key_num = len(list(set(f_keys) & set(main_keys)))
|
| |
|
| | if common_key_num >= min and common_key_num <= max:
|
| | res.append(feature)
|
| |
|
| | return res
|
| |
|
| | def build_net(target, all_features):
|
| |
|
| | main_keys = target.split('_')
|
| | edge_indexes = [
|
| | [],
|
| | []
|
| | ]
|
| | index_feature_map = [target]
|
| |
|
| |
|
| | parent_list = [target]
|
| | graph_map = {}
|
| | depth = 2
|
| |
|
| | for i in range(depth):
|
| | for feature in parent_list:
|
| | children = get_most_common_features(feature, all_features)
|
| |
|
| | if feature not in graph_map:
|
| | graph_map[feature] = []
|
| |
|
| |
|
| | pure_children = []
|
| | for child in children:
|
| | if child not in graph_map:
|
| | pure_children.append(child)
|
| |
|
| | graph_map[feature] = pure_children
|
| |
|
| | if feature not in index_feature_map:
|
| | index_feature_map.append(feature)
|
| | p_index = index_feature_map.index(feature)
|
| | for child in pure_children:
|
| | if child not in index_feature_map:
|
| | index_feature_map.append(child)
|
| | c_index = index_feature_map.index(child)
|
| |
|
| | edge_indexes[1].append(p_index)
|
| | edge_indexes[0].append(c_index)
|
| |
|
| | parent_list = pure_children
|
| |
|
| | return edge_indexes, index_feature_map
|
| |
|
| |
|
| | def construct_data(data, feature_map, labels=0):
|
| | res = []
|
| |
|
| | for feature in feature_map:
|
| | if feature in data.columns:
|
| | res.append(data.loc[:, feature].values.tolist())
|
| | else:
|
| | print(feature, 'not exist in data')
|
| |
|
| | sample_n = len(res[0])
|
| |
|
| | if type(labels) == int:
|
| | res.append([labels]*sample_n)
|
| | elif len(labels) == sample_n:
|
| | res.append(labels)
|
| |
|
| | return res
|
| |
|
| | def build_loc_net(struc, all_features, feature_map=[]):
|
| |
|
| | index_feature_map = feature_map
|
| | edge_indexes = [
|
| | [],
|
| | []
|
| | ]
|
| | for node_name, node_list in struc.items():
|
| | if node_name not in all_features:
|
| | continue
|
| |
|
| | if node_name not in index_feature_map:
|
| | index_feature_map.append(node_name)
|
| |
|
| | p_index = index_feature_map.index(node_name)
|
| | for child in node_list:
|
| | if child not in all_features:
|
| | continue
|
| |
|
| | if child not in index_feature_map:
|
| | print(f'error: {child} not in index_feature_map')
|
| |
|
| |
|
| | c_index = index_feature_map.index(child)
|
| |
|
| |
|
| | edge_indexes[0].append(c_index)
|
| | edge_indexes[1].append(p_index)
|
| |
|
| |
|
| |
|
| | return edge_indexes |