ravimohan19 commited on
Commit
d70a716
·
verified ·
1 Parent(s): eb315ba

Upload priors/data_prior.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. priors/data_prior.py +123 -0
priors/data_prior.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data-based prior: incorporate initial experimental data to warm-start BO."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Dict, List, Optional, Tuple
5
+
6
+ import torch
7
+ from torch import Tensor
8
+ import pandas as pd
9
+ import numpy as np
10
+
11
+
12
+ @dataclass
13
+ class DataPrior:
14
+ """Manages initial experimental data as a prior for Bayesian optimization.
15
+
16
+ Supports loading from:
17
+ - Tensors directly
18
+ - Pandas DataFrames
19
+ - CSV files
20
+ - Dictionary format
21
+
22
+ The data prior can be used to:
23
+ - Warm-start the GP model
24
+ - Estimate initial hyperparameters
25
+ - Define the feasible region based on past experiments
26
+ """
27
+
28
+ X: Optional[Tensor] = None
29
+ y: Optional[Tensor] = None
30
+ feature_names: List[str] = field(default_factory=list)
31
+ objective_name: str = "objective"
32
+ metadata: Dict = field(default_factory=dict)
33
+
34
+ @classmethod
35
+ def from_dataframe(
36
+ cls,
37
+ df: pd.DataFrame,
38
+ feature_columns: List[str],
39
+ objective_column: str,
40
+ dtype: torch.dtype = torch.float64,
41
+ ) -> "DataPrior":
42
+ """Create a DataPrior from a pandas DataFrame."""
43
+ X = torch.tensor(df[feature_columns].values, dtype=dtype)
44
+ y = torch.tensor(df[objective_column].values, dtype=dtype).unsqueeze(-1)
45
+ return cls(
46
+ X=X,
47
+ y=y,
48
+ feature_names=feature_columns,
49
+ objective_name=objective_column,
50
+ metadata={"source": "dataframe", "n_samples": len(df)},
51
+ )
52
+
53
+ @classmethod
54
+ def from_csv(
55
+ cls,
56
+ filepath: str,
57
+ feature_columns: List[str],
58
+ objective_column: str,
59
+ dtype: torch.dtype = torch.float64,
60
+ ) -> "DataPrior":
61
+ """Create a DataPrior from a CSV file."""
62
+ df = pd.read_csv(filepath)
63
+ return cls.from_dataframe(df, feature_columns, objective_column, dtype)
64
+
65
+ @classmethod
66
+ def from_dict(
67
+ cls,
68
+ data: Dict[str, List[float]],
69
+ feature_keys: List[str],
70
+ objective_key: str,
71
+ dtype: torch.dtype = torch.float64,
72
+ ) -> "DataPrior":
73
+ """Create a DataPrior from a dictionary."""
74
+ X = torch.tensor(
75
+ [[data[k][i] for k in feature_keys] for i in range(len(data[feature_keys[0]]))],
76
+ dtype=dtype,
77
+ )
78
+ y = torch.tensor(data[objective_key], dtype=dtype).unsqueeze(-1)
79
+ return cls(
80
+ X=X,
81
+ y=y,
82
+ feature_names=feature_keys,
83
+ objective_name=objective_key,
84
+ metadata={"source": "dict", "n_samples": len(X)},
85
+ )
86
+
87
+ def add_observations(self, X_new: Tensor, y_new: Tensor) -> None:
88
+ """Add new observations to the prior data."""
89
+ if y_new.dim() == 1:
90
+ y_new = y_new.unsqueeze(-1)
91
+
92
+ if self.X is None:
93
+ self.X = X_new
94
+ self.y = y_new
95
+ else:
96
+ self.X = torch.cat([self.X, X_new], dim=0)
97
+ self.y = torch.cat([self.y, y_new], dim=0)
98
+
99
+ self.metadata["n_samples"] = len(self.X)
100
+
101
+ def get_bounds(self) -> Tuple[Tensor, Tensor]:
102
+ """Get the observed bounds of the data."""
103
+ if self.X is None:
104
+ raise ValueError("No data available.")
105
+ return self.X.min(dim=0).values, self.X.max(dim=0).values
106
+
107
+ def get_best(self, maximize: bool = True) -> Tuple[Tensor, Tensor]:
108
+ """Get the best observation so far."""
109
+ if self.y is None:
110
+ raise ValueError("No data available.")
111
+ if maximize:
112
+ idx = self.y.argmax()
113
+ else:
114
+ idx = self.y.argmin()
115
+ return self.X[idx], self.y[idx]
116
+
117
+ @property
118
+ def n_observations(self) -> int:
119
+ return 0 if self.X is None else len(self.X)
120
+
121
+ @property
122
+ def n_features(self) -> int:
123
+ return 0 if self.X is None else self.X.shape[-1]