-
Notifications
You must be signed in to change notification settings - Fork 2
/
utils.py
372 lines (298 loc) · 17.8 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
"""Has utility functions for reading and saving configuration files, transforming coordinates, and plotting predictions."""
import geopandas as gpd
from shapely.geometry import LineString, box
import numpy as np
import pandas as pd
import pyproj
import yaml
import os
import matplotlib.pyplot as plt
import pytorch_lightning as pl
from model import PL_PINN
from dataset_new import PL_GlacierDataset
from typing import Any, Dict, Union, Tuple
from matplotlib.ticker import FormatStrFormatter, MaxNLocator
# Function to load yaml configuration file
def read_config(config_path: str):
with open(config_path) as file:
config = yaml.safe_load(file)
return config
# Function to save yaml configuration file
def save_config(config: Dict[str, Any], path: str):
with open(path, 'w') as file:
yaml.dump(config, file)
def transform_projection(coordinates: Union[Tuple[Union[pd.Series, np.ndarray], Union[pd.Series, np.ndarray]], pd.DataFrame], to_epsg: int=4326, from_epsg: int=3049):
"""
Transforms the coordinates in the given DataFrame from one coordinate reference system (CRS) to another.
Args:
coordinates_df (pandas.DataFrame): A DataFrame containing the coordinates to be transformed.
to_crs (int): The EPSG code of the target CRS. Default is 4326, which represents the lonlat projection with underlying WGS84 lon lat.
from_crs (int): The EPSG code of the source CRS. Default is 3049, which represents the geodetic CRS ETRS89.
Returns:
pandas.DataFrame: A DataFrame containing the transformed coordinates.
"""
crs_to = pyproj.crs.CRS.from_epsg(to_epsg)
crs_from = pyproj.crs.CRS.from_epsg(from_epsg)
transformer_latlon_to_3049 = pyproj.Transformer.from_crs(crs_from, crs_to, always_xy=True)
if isinstance(coordinates, pd.DataFrame):
x, y = transformer_latlon_to_3049.transform(coordinates.iloc[:,0].values, coordinates.iloc[:,1].values)
position_transf = pd.DataFrame()
position_transf['x'] = x
position_transf['y'] = y
return position_transf
elif isinstance(coordinates, tuple):
x, y = transformer_latlon_to_3049.transform(coordinates[0], coordinates[1])
return x, y
# --------------------------------------------------------
# Functions for prediction and plotting
# --------------------------------------------------------
def predictions_on_data_with_groundtruth(trainer: pl.Trainer, model: PL_PINN, config: Dict[str, Any], path: str=None, gt_dataset: PL_GlacierDataset=None):
"""Predict for the data at path, where ground truth data is known.
The config file determines how the dataset is loaded as PL_ItsLiveDataset.
Config should also provide scaling mean and variance, that was used for the scaling of the training data.
Alternatively the dataset can be provided directly.
The coordinates are transformed to lonlat coordinates.
Args:
trainer (Trainer): The PyTorch Lightning trainer object.
model (nn.Module): The PyTorch Lightning model object.
config (dict): The configuration dictionary.
path (str): The path to the data.
gt_dataset (PL_GlacierDataset, optional): The ground truth dataset. Defaults to None.
Returns:
tuple: A tuple containing the transformed coordinates, thicknesses, velocities, and residuals.
"""
if gt_dataset is None:
config["ds"]["data_dir_labeled"]=path
config["ds"]["unlabeled_sample_size"]=0.
config["ds"]["labeled_sample_size"]=1.
gt_dataset = PL_GlacierDataset(config)
predictions = trainer.predict(model, datamodule = gt_dataset)
thicknesses = np.concatenate([thick[0] for thick in predictions]).ravel()
velocities = np.concatenate([pred[1] for pred in predictions])
residuals = gt_dataset.dataset.target[config["ds"]["target"][0]]-thicknesses
coordinates = gt_dataset.dataset.inputs[config["ds"]["input_features"][:2]]
# transform coordinates to epsg4326 aka longitude and latitude
if 'epsg_crs' in config['ds']:
coordinates = transform_projection(coordinates, from_epsg=config['ds']['epsg_crs'])
return coordinates, thicknesses, velocities, residuals
def predictions_on_data_without_groundtruth(trainer: pl.Trainer, model: PL_PINN, config: Dict[str, Any], path: str=None, grid_dataset: PL_GlacierDataset=None):
"""
Generate predictions on unlabeled data without ground truth. The config file determines how the dataset is loaded as PL_GlacierDataset and
the scaling of the inputs and targets. Alternatively the dataset can be provided directly.
The coordinates are transformed to lonlat coordinates.
Args:
trainer (pl.Trainer): The PyTorch Lightning trainer object.
model (PL_PINN): The PyTorch Lightning model object.
config (dict): Configuration parameters for the dataset.
path (str): The path to the unlabeled data.
grid_dataset (PL_GlacierDataset, optional): The PyTorch Lightning dataset object. Defaults to None.
Returns:
tuple: A tuple containing the transformed coordinates, thicknesses, and velocities.
"""
if grid_dataset is None:
config["ds"]["data_dir_unlabeled"]=path
#config["ds"]["data_dir_labeled"]=path # does not matter, just pick any, as the sample size is set to 0 here
config["ds"]["unlabeled_sample_size"]=1.
config["ds"]["labeled_sample_size"]=0.
grid_dataset = PL_GlacierDataset(config)
predictions = trainer.predict(model, datamodule = grid_dataset)
thicknesses = np.concatenate([thick[0] for thick in predictions]).ravel()
velocities = np.concatenate([pred[1] for pred in predictions])
coordinates = grid_dataset.dataset.inputs[config["ds"]["input_features"][:2]]
# transform coordinates to epsg4326 aka longitude and latitude
if 'epsg_crs' in config['ds']:
coordinates = transform_projection(coordinates, from_epsg=config['ds']['epsg_crs'])
return coordinates, thicknesses, velocities
class SpitsbergenPlotFormatter():
"""A class that helps formatting all the plots of Spitsbergen, Edgeoya, and Barentsoya. """
def __init__(self, fontsize_dict: Dict[str, int]=None):
self.fontsize_small = fontsize_dict['fontsize_small']
self.fontsize_big = fontsize_dict['fontsize_big']
self.fontsize_medium = fontsize_dict['fontsize_medium']
self.coastline = None
def format_spitsbergen_plot(self, ax, title: str=None, xlabel: str='Longitude', ylabel: str='Latitude'):
ax.set_xlabel(xlabel, fontsize=self.fontsize_small)
if ylabel is not None:
ax.set_ylabel(ylabel, fontsize=self.fontsize_small)
ax.xaxis.set_major_formatter(FormatStrFormatter('%d° E'))
ax.yaxis.set_major_formatter(FormatStrFormatter('%d° N'))
ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
ax.set_title(title, fontsize=self.fontsize_big)
ax.tick_params(axis='both', which='both', labelsize=self.fontsize_small)
def format_spitsbergen_cbar(self, ax, plot, label, extend='neither'):
cbar = plt.colorbar(plot, ax=ax, extend=extend,fraction=0.07, pad=0.04)
cbar.set_label(label=label,size=self.fontsize_small)
cbar.ax.tick_params(labelsize=self.fontsize_small)
def add_coastline(self, ax):
if self.coastline is None:
self._get_coastline(ax, self.coastline)
self.coastline.plot(ax=ax, edgecolor='black', facecolor='none', lw=1, label='Coastline')
def _get_coastline(self, ax, coastline: pd.DataFrame):
# Read the coastline data
coastlines = gpd.read_file('data/svalbardcoastline/Coast2015.shp')
# Transform the coordinate system to EPSG:4326
coastlines = coastlines.to_crs(epsg=4326)
# Define the bounding box
minx, miny, maxx, maxy = 11, 76.5, 24, 80 # Adjust these values as needed
bounding_box = box(minx, miny, maxx, maxy)
# Clip the coastline data to the bounding box
coastlines_clipped = gpd.clip(coastlines, bounding_box)
# Create a line that connects the two points [20°E, 80°N] and [24°E, 78°N]
line = LineString([(20, 80), (24, 78)])
# Exclude all the coastlines that are east of this line
coastlines_clipped = coastlines_clipped[coastlines_clipped.geometry.apply(lambda x: not x.intersects(line))]
self.coastline = coastlines_clipped
def plot_thickness_pred(coordinate_grid: pd.DataFrame, thicknesses_grid: np.ndarray, eval: list, config: Dict[str, Any], title: str='thickness_prediction.png', vmax: float=None):
"""
Plots the predicted ice thickness and saves it as png file with title.
Args:
position_grid (pandas.DataFrame): DataFrame containing the position grid.
thicknesses_grid (numpy.ndarray): Array containing the predicted ice thicknesses.
eval (list): List of evaluation metrics.
config (dict): Configuration settings.
title (str, optional): Title of the plot. Defaults to 'thickness_prediction.png'.
vmax (float, optional): Maximum value for the colorbar. Defaults to None.
Returns:
None
"""
ccmap = plt.cm.viridis
ccmap.set_under('magenta')
f, ax1 = plt.subplots(1,1, figsize=(10,10))
p1=ax1.scatter(coordinate_grid.iloc[:,0], coordinate_grid.iloc[:,1], c=thicknesses_grid, marker='.', cmap=ccmap,vmax=vmax, vmin=0,s=1)
plt.colorbar(p1, ax=ax1,label='Ice thickness [m]', extend='min')
ax1.set_title('Predicted ice thickness')
plt.figtext(0.1, 0.05, s=('PINN estimate: Val RMSE', eval[0]['val Thickness RMSE'], ' Val MAPE ' , eval[0]['val MAPE']))
plt.savefig(os.path.join(config["experiment"]["save_dir"], title))
plt.close()
def plot_difference_to_measurement(residuals_coords: pd.DataFrame, grid_coords: pd.DataFrame, residuals: pd.Series, thicknesses_grid: np.ndarray, config: Dict[str, Any], eval=None, title: str='thickness_predictionvstruth.png', vmax: float=700, s: int=None):
"""
Plots the predicted ice thickness and the difference to measurements.
Args:
residuals_coords (pd.DataFrame): DataFrame containing the coordinates of the residuals.
grid_coords (pd.DataFrame): DataFrame containing the coordinates of the grid.
residuals (pd.Series): Series containing the difference between the true and predicted values.
thicknesses_grid (np.ndarray): Array containing the predicted ice thickness values.
config (dict): Dictionary containing the configuration settings.
eval (optional): Evaluation metrics for the predictions. Defaults to None.
title (str, optional): Title of the plot. Defaults to 'thickness_predictionvstruth.png'.
vmax (float, optional): Maximum value for the colorbar. Defaults to 700.
s (int, optional): Size of the markers. Defaults to None.
"""
ccmap = plt.cm.viridis
ccmap.set_under('magenta')
f, ax1 = plt.subplots(1,1, figsize=(10,10))
p1=ax1.scatter(grid_coords.iloc[:,0], grid_coords.iloc[:,1], c=thicknesses_grid, marker='.', cmap=ccmap,vmax=vmax, vmin=0,s=s)
plt.colorbar(p1, ax=ax1,label='Ice thickness [m]', extend='min')
ax1.set_title('Predicted ice thickness')
p2 = ax1.scatter(residuals_coords.iloc[:,0], residuals_coords.iloc[:,1], c=residuals, marker='.',cmap='RdBu',vmax=50, vmin=-50 , s=s)
plt.colorbar(p2, ax=ax1, label='Difference to measurements (True-Pred)')
if eval is not None:
plt.figtext(0.1, 0.05, s=('PINN estimate: Val RMSE', eval[0]['val Thickness RMSE'], ' Val MAPE ' , eval[0]['val MAPE']))
plt.savefig(os.path.join(config["experiment"]["save_dir"], title))
plt.close()
def plot_velocity_predictions(coordinate_grid: pd.DataFrame, velocities: np.ndarray, config: Dict[str, Any]):
"""
Plots the velocity predictions on a coordinate grid. Maximum and minimum values are given by the mean and the 3 sigma standard deviation of the velocities.
Args:
coordinate_grid (pd.DataFrame): The coordinate grid data.
velocities (np.ndarray): The velocity data.
config (dict): The configuration settings.
Returns:
None
"""
max_value_x = abs(np.mean(velocities[:,0]))+3*np.std(velocities[:,0])
max_value_y = abs(np.mean(velocities[:,1])) +3*np.std(velocities[:,1])
f, (ax1,ax2) = plt.subplots(1,2, figsize=(20,10))
p1=ax1.scatter(coordinate_grid.iloc[:,0], coordinate_grid.iloc[:,1], c=velocities[:,0], marker='.', cmap='coolwarm',s=1,vmax=max_value_x, vmin=-max_value_x,)
plt.colorbar(p1, ax=ax1,label='Velocity [m/a]')
ax1.set_title('Depth-averaged Vel_x [m/a]')
p2 = ax2.scatter(coordinate_grid.iloc[:,0], coordinate_grid.iloc[:,1], c=velocities[:,1], marker='.',cmap='coolwarm', s=1,vmax=max_value_y, vmin=-max_value_y)
ax2.set_title('Depth-averaged Vel_y [m/a]')
plt.colorbar(p2, ax=ax2, label='Velocity [m/a]')
plt.savefig(os.path.join(config["experiment"]["save_dir"],'velocity_prediction.png'))
plt.close()
def plot_glacier(rgi_id: str, trainer: pl.Trainer, model: PL_PINN, config: Dict[str, Any], title: str='thickness_predVStruth_leftoutglacier.png'):
"""
Plots the glacier thickness predictions and the residual to the measurements for the glacier with rgi_id.
Calculates evaluation metrics for the given glacier. This function is used to evaluate the model on a left out glacier in the LOGO_CV.py script.
Args:
rgi_id (str): The ID of the glacier.
trainer (pl.Trainer): The PyTorch Lightning trainer object.
model (PL_PINN): The PyTorch Lightning model object.
config (dict): The configuration dictionary.
title (str, optional): The title of the plot. Defaults to 'thickness_predVStruth_leftoutglacier.png'.
Returns:
dict: The evaluation metrics for the left out glacier.
"""
if "num_points" in config["ds"]:
del config["ds"]["num_points"]
config["ds"]["glacier_ids"] = []
config["ds"]["only_glacier"] = rgi_id
# Grid dataset
grid_config = config.copy()
grid_config["ds"]["unlabeled_sample_size"] = 1.0
grid_config["ds"]["labeled_sample_size"] = 0.0
print(grid_config["ds"]["unlabeled_sample_size"], grid_config["ds"]["labeled_sample_size"])
grid_dataset = PL_GlacierDataset(grid_config)
grid_coords, grid_thick, _ = predictions_on_data_without_groundtruth(trainer, model, grid_config, path=grid_config["ds"]["data_dir_unlabeled"], grid_dataset=grid_dataset)
# Groundtruth dataset
gt_config = config.copy()
gt_config["ds"]["unlabeled_sample_size"] = 0.0
gt_config["ds"]["labeled_sample_size"] = 1.0
print(gt_config["ds"]["unlabeled_sample_size"], gt_config["ds"]["labeled_sample_size"])
gt_dataset = PL_GlacierDataset(gt_config)
eval1 = trainer.validate(model, datamodule=gt_dataset)
print('Evaluation on left out glacier: ', eval1)
gt_coords, _, _, residuals = predictions_on_data_with_groundtruth(trainer, model, gt_config, path=gt_config["ds"]["data_dir_labeled"], gt_dataset=gt_dataset)
plot_difference_to_measurement(gt_coords, grid_coords, residuals, grid_thick, config, eval=eval1, title=title, vmax=None)
return eval1
def comparison_to_millan_full_grid(config: Dict[str, Any], path: str, trainer: pl.Trainer, model: PL_PINN):
"""
Compare the predicted ice thickness and velocities to the target values from the Millan dataset.
Args:
config (dict): Configuration parameters for the experiment.
path (str): Path to the data directory.
trainer: Trainer object for training and evaluation.
model: Model object for prediction.
Returns:
None
"""
config["ds"]["target"][0] = 'millan_ice_thickness'
config["ds"]["data_dir_labeled"] = path
config["ds"]["unlabeled_sample_size"] = 0.
config["ds"]["labeled_sample_size"] = 1.
test_dataset = PL_GlacierDataset(config)
evaluation = trainer.validate(model, datamodule=test_dataset)
predictions = trainer.predict(model, datamodule=test_dataset)
coords = test_dataset.dataset.inputs[config["ds"]["input_features"][:2]]
thicknesses = np.concatenate([thick[0] for thick in predictions]).ravel()
residual = test_dataset.dataset.target['millan_ice_thickness'] - thicknesses
f, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 10))
p1 = ax1.scatter(coords.iloc[:, 0], coords.iloc[:, 1], c=thicknesses, marker='.', cmap='viridis', vmax=700, vmin=0, s=1)
plt.colorbar(p1, ax=ax1)
ax1.set_title('Predicted ice thickness [m]')
p2 = ax2.scatter(coords.iloc[:, 0], coords.iloc[:, 1], c=test_dataset.dataset.target['millan_ice_thickness'], marker='.', vmax=700, s=1)
plt.colorbar(p2, ax=ax2)
ax2.set_title('Target ice thickness [m]')
p3 = ax3.scatter(coords.iloc[:, 0], coords.iloc[:, 1], c=residual, cmap='RdBu', marker='.', vmin=-50, vmax=50, s=1)
plt.colorbar(p3, ax=ax3)
ax3.set_title('Residual: Millan-Predicted')
plt.figtext(0.6, 0.05, s=evaluation)
plt.savefig(os.path.join(config["experiment"]["save_dir"], "comparison_thickness_prediction_to_millan.png"))
plt.close()
velocities = np.concatenate([pred[1] for pred in predictions])
f, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(30, 10))
p1 = ax1.scatter(coords.iloc[:, 0], coords.iloc[:, 1], c=velocities[:, 0], vmin=-50, vmax=50, s=0.5)
plt.colorbar(p1, ax=ax1)
p2 = ax2.scatter(coords.iloc[:, 0], coords.iloc[:, 1], c=test_dataset.dataset.inputs['millan_vx_smoothed'], vmin=-50, vmax=50, s=0.5)
plt.colorbar(p2, ax=ax2)
p3 = ax3.scatter(coords.iloc[:, 0], coords.iloc[:, 1], c=velocities[:, 1], vmin=-100, vmax=100, s=0.5)
plt.colorbar(p3, ax=ax3)
p4 = ax4.scatter(coords.iloc[:, 0], coords.iloc[:, 1], c=test_dataset.dataset.inputs['millan_vy_smoothed'], vmin=-100, vmax=100, s=0.5)
plt.colorbar(p4, ax=ax4)
ax1.title.set_text('depth_avg x')
ax2.title.set_text('surface vel x')
ax3.title.set_text('depth_avg y')
ax4.title.set_text('surface_vel y')
plt.savefig(os.path.join(config["experiment"]["save_dir"], 'velocity_prediction.png'))
plt.close()