WSL/SLF GitLab Repository

ResamplingAlgorithms.cc 18.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
/***********************************************************************************/
/*  Copyright 2009 WSL Institute for Snow and Avalanche Research    SLF-DAVOS      */
/***********************************************************************************/
/* This file is part of MeteoIO.
    MeteoIO is free software: you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    MeteoIO is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with MeteoIO.  If not, see <http://www.gnu.org/licenses/>.
*/
#include <meteoio/ResamplingAlgorithms.h>
19
#include <cmath>
20
#include <algorithm>
21
22
23
24
25
26

using namespace std;

namespace mio {
 /**
 * @page resampling Resampling overview
27
 * The resampling infrastructure is described in ResamplingAlgorithms (for its API).
28
29
 * The goal of this page is to give an overview of the available resampling algorithms and their usage.
 *
30
31
32
33
34
35
36
37
 * @section resampling_section Resampling section
 * The resampling is specified for each parameter in the [Interpol1D] section. This section contains
 * a list of the various meteo parameters with their associated choice of resampling algorithm and
 * optional parameters. If a meteo parameter is not listed in this section, a linear resampling would be
 * assumed. An example of such section is given below:
 * @code
 * [Interpolations1D]
 * TA::resample    = linear
38
 *
39
 * RH::resample    = linear
40
 *
41
42
 * VW::resample    = nearest_neighbour
 * VW::args        = extrapolate
43
 *
44
45
46
 * HNW::resample   = linear
 * @endcode
 *
47
48
49
50
51
52
53
54
 * The size (in seconds) of the biggest gap that can be interpolated is given with the key WINDOW_SIZE. Therefore if two valid points are less than
 * WINDOW_SIZE seconds apart, points in between will be interpolated. If they are further apart, all points in between will remain IOUtils::nodata.
 * If using the "extrapolate" optional argument, points at WINDOW_SIZE distance of only one valid point will be extrapolated, otherwise they will remain
 * IOUtils::nodata. Please keep in mind that allowing extrapolated values can lead to grossly out of range data: using the slope
 * between two hourly measurements to extrapolate a point 10 days ahead is obviously risky!
 *
 * By default, WINDOW_SIZE is set to 10 days. This key has a <b>dramatic impact on run time/performance</b>:
 * reducing WINDOW_SIZE from 10 days down to one day makes geting meteo data 8 times faster while increasing it to 20 days makes it twice slower.
55
 *
56
57
 * @section algorithms_available Available Resampling Algorithms
 * Two algorithms for the resampling are implemented:
58
 * - none: do not perform resampling, see ResamplingAlgorithms::NoResampling
59
60
 * - linear: linear data resampling, see ResamplingAlgorithms::LinearResampling
 * - nearest_neighbour:  data resampling, see ResamplingAlgorithms::NearestNeighbour
61
 * - accumulate: data re-accumulation as suitable for precipitations, see ResamplingAlgorithms::Accumulate
62
63
 */

64
std::map<std::string, ResamplingAlgorithms::resamplingptr> ResamplingAlgorithms::algorithmMap;
65
66
67
68
const bool ResamplingAlgorithms::__init = ResamplingAlgorithms::initStaticData();

bool ResamplingAlgorithms::initStaticData()
{
69
	algorithmMap["none"]              = &ResamplingAlgorithms::NoResampling;
70
71
	algorithmMap["linear"]            = &ResamplingAlgorithms::LinearResampling;
	algorithmMap["nearest_neighbour"] = &ResamplingAlgorithms::NearestNeighbour;
72
	algorithmMap["accumulate"]        = &ResamplingAlgorithms::Accumulate;
73
74
75
76

	return true;
}

77
const ResamplingAlgorithms::resamplingptr& ResamplingAlgorithms::getAlgorithm(const std::string& algoname)
78
79
80
81
82
{
	std::map<std::string, resamplingptr>::const_iterator it;
	it = algorithmMap.find(algoname);

	if (it==algorithmMap.end())
83
		throw UnknownValueException("Unknown resampling algorithm called: " + algoname, AT);
84
85
86
87

	return it->second;
}

88
/**********************************************************************************
89
 * The following functions are implementations of different resampling algorithms *
90
 **********************************************************************************/
91

92
93
94
95
96
97
98
99
/**
 * @brief No resampling: do not resample parameter but keep original sampling rate
 * @code
 * [Interpolations1D]
 * TA::resample = none
 * @endcode
 */

100
101
void ResamplingAlgorithms::NoResampling(const size_t& index, const ResamplingPosition& position, const size_t& paramindex,
                                        const std::vector<std::string>& /*taskargs*/, const double& /*window_size*/, const std::vector<MeteoData>& vecM, MeteoData& md)
102
{
103
104
105
106
107
108
109
110
111
112
113
	if (index >= vecM.size())
		throw IOException("The index of the element to be resampled is out of bounds", AT);

	if (position == ResamplingAlgorithms::exact_match) {
		const double& value = vecM[index](paramindex);
		if (value != IOUtils::nodata) {
			md(paramindex) = value; //propagate value
			return;
		}
	}

114
115
116
117
	return;
}


118
/**
119
120
 * @brief Nearest Neighbour data resampling
 * Find the nearest neighbour of a desired data point that is not IOUtils::nodata and copy that value into the desired data point
121
122
 *        - If the data point itself is not IOUtils::nodata, nothing needs to be done
 *        - If two points have the same distance from the data point to be resampled, calculate mean and return it
123
 *        - if the argument extrapolate is provided, points within WINDOW_SIZE seconds of only one valid point will receive the value of this point
124
125
126
127
128
129
 * @code
 * [Interpolations1D]
 * TA::resample = nearest_neighbour
 * @endcode
 */

130
void ResamplingAlgorithms::NearestNeighbour(const size_t& index, const ResamplingPosition& position, const size_t& paramindex,
131
                                            const std::vector<std::string>& taskargs, const double& window_size, const std::vector<MeteoData>& vecM, MeteoData& md)
132
{
133
134
	if (index >= vecM.size())
		throw IOException("The index of the element to be resampled is out of bounds", AT);
135

136
137
138
139
	const Date& resampling_date = md.date;

	if (position == ResamplingAlgorithms::exact_match) {
		const double& value = vecM[index](paramindex);
140
		if (value != IOUtils::nodata) {
141
142
143
144
			md(paramindex) = value; //propagate value
			return;
		}
	}
145

146
147
148
149
	//Check whether extrapolation is activated
	bool extrapolate = false;
	if ((taskargs.size()==1) && (taskargs[0]=="extrapolate"))
		extrapolate = true;
150

151
152
	//if we are at the very beginning or end of vecM and !extrapolate, then there's nothing to do
	if (((!extrapolate) && (position == ResamplingAlgorithms::end))
153
	    || ((!extrapolate) && (position == ResamplingAlgorithms::begin)))
154
		return;
155

156
	size_t indexP1=IOUtils::npos, indexP2=IOUtils::npos;
157
	getNearestValidPts(index, paramindex, vecM, resampling_date, window_size, indexP1, indexP2);
158
	const bool foundP1=(indexP1!=IOUtils::npos), foundP2=(indexP2!=IOUtils::npos);
159

160
	//Try to find the nearest neighbour, if there are two equally distant, then return the arithmetic mean
161
	if (foundP1 && foundP2) { //standard behavior
162
163
		const Duration diff1 = resampling_date - vecM[indexP1].date; //calculate time interval to element at index
		const Duration diff2 = vecM[indexP2].date - resampling_date; //calculate time interval to element at index
164
165
		const double& val1 = vecM[indexP1](paramindex);
		const double& val2 = vecM[indexP2](paramindex);
166

167
		if (IOUtils::checkEpsilonEquality(diff1.getJulianDate(true), diff2.getJulianDate(true), 0.1/1440)){ //within 6 seconds
168
			md(paramindex) = Interpol1D::weightedMean(val1, val2, 0.5);
169
		} else if (diff1 < diff2){
170
			md(paramindex) = val1;
171
		} else if (diff1 > diff2){
172
			md(paramindex) = val2;
173
		}
174
	} else if (extrapolate) {
175
176
177
178
		if(foundP1 && !foundP2){ //nearest neighbour on found after index 'index'
			md(paramindex) = vecM[indexP1](paramindex);
		} else if (!foundP1 && foundP2){ //nearest neighbour on found before index 'index'
			md(paramindex) = vecM[indexP2](paramindex);
179
180
		} else { // no nearest neighbour with a value different from IOUtils::nodata
			return;
181
182
183
184
185
		}
	}
}

/**
186
187
 * @brief Linear data resampling: If a point is requested that is in between two input data points,
 *        the requested value is automatically calculated using a linear interpolation. Furthermore
188
 *        if the argument extrapolate is provided there will be an attempt made to extrapolate the
189
 *        point if the interpolation fails, by solving the line equation y = kx + d
190
191
192
193
194
195
 * @code
 * [Interpolations1D]
 * TA::resample = linear
 * TA::args     = extrapolate
 * @endcode
 */
196
197
void ResamplingAlgorithms::LinearResampling(const size_t& index, const ResamplingPosition& position, const size_t& paramindex,
                                            const std::vector<std::string>& taskargs, const double& window_size, const std::vector<MeteoData>& vecM, MeteoData& md)
198
{
199
200
	if (index >= vecM.size())
		throw IOException("The index of the element to be resampled is out of bounds", AT);
201

202
203
204
205
	const Date& resampling_date = md.date;

	if (position == ResamplingAlgorithms::exact_match) {
		const double& value = vecM[index](paramindex);
206
		if (value != IOUtils::nodata) {
207
208
209
210
			md(paramindex) = value; //propagate value
			return;
		}
	}
211
212
213
214
215
216

	//Check whether extrapolation is activated
	bool extrapolate = false;
	if ((taskargs.size()==1) && (taskargs[0]=="extrapolate"))
		extrapolate = true;

217
218
	//if we are at the very beginning or end of vecM and !extrapolate, then there's nothing to do
	if (((!extrapolate) && (position == ResamplingAlgorithms::end))
219
	    || ((!extrapolate) && (position == ResamplingAlgorithms::begin)))
220
		return;
221

222
	size_t indexP1=IOUtils::npos, indexP2=IOUtils::npos;
223
224

	getNearestValidPts(index, paramindex, vecM, resampling_date, window_size, indexP1, indexP2);
225
	bool foundP1=(indexP1!=IOUtils::npos), foundP2=(indexP2!=IOUtils::npos);
226
227

	//do nothing if we can't interpolate, and extrapolation is not explicitly activated
228
	if ((!extrapolate) && ((!foundP1) || (!foundP2)))
229
230
231
232
233
234
		return;
	//do nothing if not at least one value different from IOUtils::nodata has been found
	if (!foundP1 && !foundP2)
		return;

	//At this point we either have a valid indexP1 or indexP2 and we can at least try to extrapolate
235
	if (!foundP1 && foundP2){ //only nodata values found before index, try looking after indexP2
236
		for (size_t ii=indexP2+1; ii<vecM.size(); ii++){
237
			if (vecM[ii](paramindex) != IOUtils::nodata){
238
239
240
241
				indexP1 = ii;
				foundP1 = true;
				break;
			}
242
		}
243
	} else if (foundP1 && !foundP2){ //only nodata found after index, try looking before indexP1
244
		for (size_t ii=indexP1; (ii--) > 0; ){
245
			if (vecM[ii](paramindex) != IOUtils::nodata){
246
247
248
249
				indexP2=ii;
				foundP2 = true;
				break;
			}
250
		}
251
252
253
254
255
256
	}

	if (!foundP1 || !foundP2) //now at least two points need to be present
		return;

	//At this point indexP1 and indexP2 point to values that are different from IOUtils::nodata
257
	const double& val1 = vecM[indexP1](paramindex);
258
	const double jul1 = vecM[indexP1].date.getJulianDate(true);
259
	const double& val2 = vecM[indexP2](paramindex);
260
261
	const double jul2 = vecM[indexP2].date.getJulianDate(true);

262
	md(paramindex) = linearInterpolation(jul1, val1, jul2, val2, resampling_date.getJulianDate(true));
263
264
}

265
266
/**
 * @brief Accumulation over a user given period.
267
 * The input data is accumulated over a given time interval (given as filter argument, in seconds).
268
269
270
 * This is for example needed for converting rain gauges measurements read every 10 minutes to
 * hourly precipitation measurements. Remarks:
 * - the accumulation period has to be provided as an argument (in seconds)
271
 * - if giving as a second argument "strict", nodatas will propagate (ie. a single nodata in the input will force the re-accumulated value to be nodata). By default, all valid values are aggregated and only pure nodata intervals produce a nodata in the output.
272
273
274
275
276
 * @code
 * HNW::filter1 = accumulate
 * HNW::arg1	 = 3600
 * @endcode
 */
277
void ResamplingAlgorithms::Accumulate(const size_t& index, const ResamplingPosition& position, const size_t& paramindex,
278
279
                                      const std::vector<std::string>& taskargs, const double& /*window_size*/,
                                      const std::vector<MeteoData>& vecM, MeteoData& md)
280
{
281
282
	if (index >= vecM.size())
		throw IOException("The index of the element to be resampled is out of bounds", AT);
283
284
	if(position==ResamplingAlgorithms::begin || position==ResamplingAlgorithms::end)
		return;
285
286
287

	const Date& resampling_date = md.date;
	md(paramindex) = IOUtils::nodata;
288

289
	//Read accumulation period and potential "strict" option
290
	double accumulate_period;
291
292
	bool strict = false;
	if (taskargs.size()==1 || taskargs.size()==2) {
293
294
295
296
		IOUtils::convertString(accumulate_period, taskargs[0]);
		if(accumulate_period<=0.) {
			std::stringstream tmp;
			tmp << "Invalid accumulation period (" << accumulate_period << ") ";
297
			tmp << "for parameter " << vecM.at(0).getNameForParameter(paramindex);
298
299
			throw InvalidArgumentException(tmp.str(), AT);
		}
300
301
302
303
304
305
306
307
		if(taskargs.size()==2) {
			if(taskargs[1]=="strict") strict=true;
			else {
				std::stringstream ss;
				ss << "Invalid argument \"" << taskargs[1] << "\" for param " << vecM.at(0).getNameForParameter(paramindex);
				throw InvalidArgumentException(ss.str(), AT);
			}
		}
308
309
	} else {
		std::stringstream tmp;
310
		tmp << "Please provide accumulation period (in seconds) for param " << vecM.at(0).getNameForParameter(paramindex);
311
312
		throw InvalidArgumentException(tmp.str(), AT);
	}
313

314
	//find start of accumulation period and initialize the sum
315
	double sum = IOUtils::nodata;
316
	const Date dateStart(resampling_date.getJulianDate() - accumulate_period/(24.*3600.), resampling_date.getTimeZone());
317
	bool found_start=false;
318
	size_t start_idx; //this is the index of the first point of the window that contains dateStart
319
	for (start_idx=index+1; (start_idx--) > 0; ) {
320
321
322
323
		const Date& date = vecM[start_idx].date;
		if(date <= dateStart) {
			if(date<dateStart) {
				const double start_value = funcval(start_idx, paramindex, vecM, dateStart, true); //resampling the starting point
324
325
326
				if(start_value!=IOUtils::nodata)
					sum=start_value;
				else if(strict) return;
327
			}
328
329
330
331
			found_start=true;
			break;
		}
	}
332
333
	if (!found_start) {
		cerr << "[W] Could not accumulate " << vecM.at(0).getNameForParameter(paramindex) << ": ";
334
335
		cerr << "not enough data for accumulation period at date " << resampling_date.toString(Date::ISO) << "\n";
		md(paramindex) = IOUtils::nodata;
336
		return;
337
	}
338
339
340
341
342
343
344
345
346
	if(vecM[start_idx].date != dateStart) start_idx++; //we need to skip the first point that was already used in the interpolation

	//if up-sampling, take a quicker path (for example, generate 15min values from hourly data)
	if(start_idx==index) {
		const double start_val = funcval(start_idx, paramindex, vecM, dateStart, false);
		const double end_val = funcval(index, paramindex, vecM, resampling_date, false);
		if(start_val!=IOUtils::nodata && end_val!=IOUtils::nodata) md(paramindex) = end_val - start_val;
		return;
	}
347

348
349
350
351
352
353
	 //sum all whole periods
	for(size_t idx=(start_idx+1); idx<index; idx++) {
		const double curr_value = vecM[idx](paramindex);
		if(curr_value!=IOUtils::nodata) {
			if(sum!=IOUtils::nodata) sum += curr_value;
			else sum = curr_value;
354
		} else if(strict) return;
355
	}
356

357
358
359
360
361
	//resample end point
	const double end_val = funcval(index, paramindex, vecM, resampling_date, false);
	if(end_val!=IOUtils::nodata) {
		if(sum!=IOUtils::nodata) sum += end_val;
		else sum = end_val;
362
	} else if(strict) return;
363

364
	//write out sum
365
	md(paramindex) = sum;
366
367
}

368
369
double ResamplingAlgorithms::funcval(size_t pos, const size_t& paramindex, const std::vector<MeteoData>& vecM,
                                     const Date& date, const bool& start_pt)
370
{
371
372
373
	if(!start_pt) pos--;
	const double valstart = vecM[pos](paramindex);
	if (vecM[pos].date == date) return valstart;
374

375
	const size_t end = pos+1;
376
	if(end>=vecM.size()) return IOUtils::nodata; //reaching the end of the input vector
377

378
379
	const double valend = vecM[end](paramindex);
	if (valend == IOUtils::nodata) return IOUtils::nodata;
380

381
382
	const double jul1 = vecM[pos].date.getJulianDate(true);
	const double jul2 = vecM[end].date.getJulianDate(true);
383

384
385
386
	if(start_pt)
		return valend - linearInterpolation(jul1, 0., jul2, valend, date.getJulianDate(true));
	else
387
		return linearInterpolation(jul1, 0., jul2, valend, date.getJulianDate(true));
388
}
389

390
/**
391
392
393
394
395
396
397
398
 * @brief This function returns the last and next valid points around a given position
 * @param pos current position (index)
 * @param paramindex meteo parameter to use
 * @param vecM vector of MeteoData
 * @param window_size size of the search window
 * @param indexP1 index of point before the current position (IOUtils::npos if none could be found)
 * @param indexP2 index of point after the current position (IOUtils::npos if none could be found)
 */
399
void ResamplingAlgorithms::getNearestValidPts(const size_t& pos, const size_t& paramindex, const std::vector<MeteoData>& vecM, const Date& resampling_date,
400
401
402
403
404
                                              const double& window_size, size_t& indexP1, size_t& indexP2)
{
	indexP1=IOUtils::npos;
	indexP2=IOUtils::npos;

405
	const Date dateStart = resampling_date - window_size;
406
407
408
409
410
411
412
	if(vecM[0].date<=dateStart) { //we try to find a valid point only if it's even worth it...
		for (size_t ii=pos; (ii--) > 0; ) {
			if (vecM[ii].date < dateStart) break;
			if (vecM[ii](paramindex) != IOUtils::nodata){
				indexP1 = ii;
				break;
			}
413
414
415
		}
	}

416
417
	//make sure the search window remains window_size
	const Date dateEnd = (indexP1 != IOUtils::npos)? vecM[indexP1].date+window_size : resampling_date+window_size;
418

419
420
421
422
423
424
425
	if(vecM[vecM.size()].date>=dateEnd) { //we try to find a valid point only if it's even worth it...
		for (size_t ii=pos; ii<vecM.size(); ii++) {
			if (vecM[ii].date > dateEnd) break;
			if (vecM[ii](paramindex) != IOUtils::nodata) {
				indexP2 = ii;
				break;
			}
426
427
428
429
430
431
		}
	}
}

/**
 * @brief This function solves the equation y = ax + b for two given points and returns y for a given x
432
433
434
435
436
437
438
439
440
441
442
 * @param x1 x-coordinate of first point
 * @param y1 y-coordinate of first point
 * @param x2 x-coordinate of second point
 * @param y2 y-coordinate of second point
 * @param x3 x-coordinate of desired point
 * @return y-coordinate of desired point
 */
double ResamplingAlgorithms::linearInterpolation(const double& x1, const double& y1,
                                       const double& x2, const double& y2, const double& x3)
{
	if (x1 == x2)
443
		throw IOException("Attempted division by zero", AT);
444

445
446
447
	//Solving y = ax + b
	const double a = (y2 - y1) / (x2 - x1);
	const double b = y2 - a*x2;
448

449
	return (a*x3 + b);
450
451
}

452
453
} //namespace