-
-
Notifications
You must be signed in to change notification settings - Fork 132
/
surrogate_model_tools.py
291 lines (228 loc) · 10.1 KB
/
surrogate_model_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
import aerosandbox.numpy as _np
import casadi as _cas
from typing import Tuple, Union
def softmax(
*args: Union[float, _np.ndarray],
softness: float = None,
hardness: float = None,
) -> Union[float, _np.ndarray]:
"""
An element-wise softmax between two or more arrays. Also referred to as the logsumexp() function.
Useful for optimization because it's differentiable and preserves convexity!
Great writeup by John D Cook here:
https://www.johndcook.com/soft_maximum.pdf
Notes: Can provide either `hardness` or `softness`, not both. These are the inverse of each other. If neither is
provided, `hardness` is set to 1.
Args:
*args: Provide any number of arguments as values to take the softmax of.
hardness: Hardness parameter. Higher values make this closer to max(x1, x2).
softness: Softness parameter. (Inverse of hardness.) Lower values make this closer to max(x1, x2).
- Setting `softness` is particularly useful, because it has the same units as each of the function's
inputs. For example, if you're taking the softmax of two values that are lengths in units of meters,
then `softness` is also in units of meters. In this case, `softness` has the rough meaning of "an amount
of discrepancy between the input values that would be considered physically significant".
Returns:
Soft maximum of the supplied values.
"""
### Set defaults for hardness/softness
n_specified_arguments = (hardness is not None) + (softness is not None)
if n_specified_arguments == 0:
softness = 1
elif n_specified_arguments == 2:
raise ValueError("You must provide exactly one of `hardness` or `softness`.")
if hardness is not None:
softness = 1 / hardness
if _np.any(softness <= 0):
if softness is not None:
raise ValueError("The value of `softness` must be positive.")
else:
raise ValueError("The value of `hardness` must be positive.")
if len(args) <= 1:
raise ValueError(
"You must call softmax with the value of two or more arrays that you'd like to take the "
"element-wise softmax of."
)
### Scale the args by softness
args = [arg / softness for arg in args]
### Find the element-wise max and min of the arrays:
min = args[0]
max = args[0]
for arg in args[1:]:
min = _np.fmin(min, arg)
max = _np.fmax(max, arg)
out = max + _np.log(
sum([_np.exp(_np.maximum(array - max, -500)) for array in args])
)
out = out * softness
return out
def softmin(
*args: Union[float, _np.ndarray],
softness: float = None,
hardness: float = None,
) -> Union[float, _np.ndarray]:
"""
An element-wise softmin between two or more arrays. Related to the logsumexp() function.
Useful for optimization because it's differentiable and preserves convexity!
Great writeup by John D Cook here:
https://www.johndcook.com/soft_maximum.pdf
Notes: Can provide either `hardness` or `softness`, not both. These are the inverse of each other. If neither is
provided, `hardness` is set to 1.
Args:
*args: Provide any number of arguments as values to take the softmin of.
hardness: Hardness parameter. Higher values make this closer to min(x1, x2).
softness: Softness parameter. (Inverse of hardness.) Lower values make this closer to min(x1, x2).
- Setting `softness` is particularly useful, because it has the same units as each of the function's
inputs. For example, if you're taking the softmin of two values that are lengths in units of meters,
then `softness` is also in units of meters. In this case, `softness` has the rough meaning of "an amount
of discrepancy between the input values that would be considered physically significant".
Returns:
Soft minimum of the supplied values.
"""
return -softmax(
*[-arg for arg in args],
softness=softness,
hardness=hardness,
)
def softmax_scalefree(
*args: Union[float, _np.ndarray],
relative_softness: float = None,
relative_hardness: float = None,
) -> Union[float, _np.ndarray]:
n_specified_arguments = (relative_hardness is not None) + (
relative_softness is not None
)
if n_specified_arguments == 0:
relative_softness = 0.01
elif n_specified_arguments == 2:
raise ValueError(
"You must provide exactly one of `relative_softness` or `relative_hardness."
)
if relative_hardness is not None:
relative_softness = 1 / relative_hardness
return softmax(*args, softness=relative_softness * _np.linalg.norm(_np.array(args)))
def softmin_scalefree(
*args: Union[float, _np.ndarray],
relative_softness: float = None,
relative_hardness: float = None,
) -> Union[float, _np.ndarray]:
return -softmax_scalefree(
*[-arg for arg in args],
relative_softness=relative_softness,
relative_hardness=relative_hardness,
)
def softplus(
x: Union[float, _np.ndarray],
beta=1,
threshold=40,
):
"""
A smooth approximation of the ReLU function, applied elementwise to an array `x`.
Softplus(x) = 1/beta * log(1 + exp(beta * x))
Often used as an activation function in neural networks.
Args:
x: The input
beta: A parameter that controls the "softness" of the function. Higher values of beta make the function
approach ReLU.
threshold: Values above this threshold are approximated as linear.
Returns: The value of the softplus function.
"""
if _np.is_casadi_type(x, recursive=False):
return _np.where(
beta * x > threshold, x, 1 / beta * _cas.log1p(_cas.exp(beta * x))
)
else:
return 1 / beta * _np.logaddexp(0, beta * x)
def sigmoid(
x,
sigmoid_type: str = "tanh",
normalization_range: Tuple[Union[float, int], Union[float, int]] = (0, 1),
):
"""
A sigmoid function. From Wikipedia (https://en.wikipedia.org/wiki/Sigmoid_function):
A sigmoid function is a mathematical function having a characteristic "S"-shaped curve
or sigmoid curve.
Args:
x: The input
sigmoid_type: Type of sigmoid function to use [str]. Can be one of:
* "tanh" or "logistic" (same thing)
* "arctan"
* "polynomial"
normalization_type: Range in which to normalize the sigmoid, shorthanded here in the
documentation as "N". This parameter is given as a two-element tuple (min, max).
After normalization:
>>> sigmoid(-Inf) == normalization_range[0]
>>> sigmoid(Inf) == normalization_range[1]
* In the special case of N = (0, 1):
>>> sigmoid(-Inf) == 0
>>> sigmoid(Inf) == 1
>>> sigmoid(0) == 0.5
>>> d(sigmoid)/dx at x=0 == 0.5
* In the special case of N = (-1, 1):
>>> sigmoid(-Inf) == -1
>>> sigmoid(Inf) == 1
>>> sigmoid(0) == 0
>>> d(sigmoid)/dx at x=0 == 1
Returns: The value of the sigmoid.
"""
### Sigmoid equations given here under the (-1, 1) normalization:
if sigmoid_type == ("tanh" or "logistic"):
# Note: tanh(x) is simply a scaled and shifted version of a logistic curve.
s = _np.tanh(x)
elif sigmoid_type == "arctan":
s = 2 / _np.pi * _np.arctan(_np.pi / 2 * x)
elif sigmoid_type == "polynomial":
s = x / (1 + x**2) ** 0.5
else:
raise ValueError("Bad value of parameter 'type'!")
### Normalize
min = normalization_range[0]
max = normalization_range[1]
s_normalized = s * (max - min) / 2 + (max + min) / 2
return s_normalized
def swish(
x: Union[float, _np.ndarray],
beta: float = 1.0,
):
"""
A smooth approximation of the ReLU function, applied elementwise to an array `x`.
Swish(x) = x / (1 + exp(-beta * x)) = x * logistic(x) = x * (0.5 + 0.5 * tanh(x/2))
Often used as an activation function in neural networks.
Args:
x: The input
beta: A parameter that controls the "softness" of the function. Higher values of beta make the function
approach ReLU.
Returns: The value of the swish function.
"""
return x / (1 + _np.exp(-beta * x))
def blend(
switch: float,
value_switch_high,
value_switch_low,
):
"""
Smoothly blends between two values on the basis of some switch function.
This function is similar in usage to numpy.where (documented here:
https://numpy.org/doc/stable/reference/generated/numpy.where.html) , except that
instead of using a boolean as to switch between the two values, a float is used to
smoothly transition between the two in a differentiable manner.
Before using this function, be sure to understand the difference between this and
smoothmax(), and choose the correct one.
Args:
switch: A value that acts as a "switch" between the two values [float].
If switch is -Inf, value_switch_low is returned.
If switch is Inf, value_switch_high is returned.
If switch is 0, the mean of value_switch_low and value_switch_high is returned.
If switch is 1, the return value is roughly (0.88 * value_switch_high + 0.12 * value_switch_low).
If switch is -1, the return value is roughly (0.88 * value_switch_low + 0.12 * value_switch_high).
value_switch_high: Value to be returned when switch is high. Can be a float or an array.
value_switch_low: Value to be returned when switch is low. Can be a float or an array.
Returns: A value that is a blend between value_switch_low and value_switch_high, with the weighting dependent
on the value of the 'switch' parameter.
"""
def blend_function(x):
return sigmoid(x, normalization_range=(0, 1))
weight_to_value_switch_high = blend_function(switch)
blend_value = value_switch_high * weight_to_value_switch_high + value_switch_low * (
1 - weight_to_value_switch_high
)
return blend_value