WSL/SLF GitLab Repository

TimeSeriesManager.cc 13.9 KB
Newer Older
1
/***********************************************************************************/
2
/*  Copyright 2014 WSL Institute for Snow and Avalanche Research    SLF-DAVOS      */
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
/***********************************************************************************/
/* This file is part of MeteoIO.
    MeteoIO is free software: you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    MeteoIO is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with MeteoIO.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <meteoio/TimeSeriesManager.h>

21
22
#include <algorithm>

23
24
25
26
27
28
using namespace std;

namespace mio {

TimeSeriesManager::TimeSeriesManager(IOHandler& in_iohandler, const Config& in_cfg) : cfg(in_cfg), iohandler(in_iohandler),
                                            meteoprocessor(in_cfg), dataGenerator(in_cfg),
29
30
                                            proc_properties(), point_cache(), raw_buffer(), filtered_cache(),
                                            chunk_size(), buff_before(),
31
                                            processing_level(IOUtils::raw | IOUtils::filtered | IOUtils::resampled | IOUtils::generated)
32
33
34
35
36
37
38
{
	meteoprocessor.getWindowSize(proc_properties);
	setDfltBufferProperties();
}

void TimeSeriesManager::setDfltBufferProperties()
{
39
40
	double chunk_size_days = 370.; //default chunk size value
	cfg.getValue("BUFFER_SIZE", "General", chunk_size_days, IOUtils::nothrow); //in days
41
42
43
44
45
46
47
48
49
50
	chunk_size = Duration(chunk_size_days, 0);

	//get buffer centering options
	double buff_centering = -1.;
	double buff_start = -1.;
	cfg.getValue("BUFF_CENTERING", "General", buff_centering, IOUtils::nothrow);
	cfg.getValue("BUFF_BEFORE", "General", buff_start, IOUtils::nothrow);
	if ((buff_centering != -1.) && (buff_start != -1.))
		throw InvalidArgumentException("Please do NOT provide both BUFF_CENTERING and BUFF_BEFORE!!", AT);

51
	if (buff_start != -1.) {
52
53
		buff_before = Duration(buff_start, 0);
	} else {
54
		if (buff_centering != -1.) {
55
56
57
58
59
			if ((buff_centering < 0.) || (buff_centering > 1.))
				throw InvalidArgumentException("BUFF_CENTERING must be between 0 and 1", AT);

			buff_before = chunk_size * buff_centering;
		} else {
60
			buff_before = chunk_size * 0.05; //5% centering by default
61
62
63
64
		}
	}

	//if buff_before>chunk_size, we will have a problem (ie: we won't ever read the whole data we need)
65
	if (buff_before>chunk_size) chunk_size = buff_before;
66
67
68
69
70
71
	//BUG: if we do this, we still have the meteo1d window in the way
	//-> we end up not reading enough data and rebuffering...
}

void TimeSeriesManager::setMinBufferRequirements(const double& i_chunk_size, const double& i_buff_before)
{
72
	if (i_buff_before!=IOUtils::nodata) {
73
		const Duration app_buff_before(i_buff_before, 0);
74
		if (app_buff_before>buff_before) buff_before = app_buff_before;
75
	}
76
	if (i_chunk_size!=IOUtils::nodata) {
77
		const Duration app_chunk_size(i_chunk_size, 0);
78
		if (app_chunk_size>chunk_size) chunk_size = app_chunk_size;
79
80
81
	}

	//if buff_before>chunk_size, we will have a problem (ie: we won't ever read the whole data we need)
82
	if (buff_before>chunk_size) chunk_size = buff_before;
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
}

void TimeSeriesManager::setProcessingLevel(const unsigned int& i_level)
{
	if (i_level >= IOUtils::num_of_levels)
		throw InvalidArgumentException("The processing level is invalid", AT);

	processing_level = i_level;
}

void TimeSeriesManager::push_meteo_data(const IOUtils::ProcessingLevel& level, const Date& date_start, const Date& date_end,
                                const std::vector< METEO_SET >& vecMeteo)
{
	//perform check on date_start and date_end
	if (date_end < date_start) {
		std::ostringstream ss;
		ss << "Trying to push data set from " << date_start.toString(Date::ISO) << " to " << date_end.toString(Date::ISO) << ". ";
		ss << " Obviously, date_start should be less than date_end!";
		throw InvalidArgumentException(ss.str(), AT);
	}

104
	if (level == IOUtils::filtered) {
105
		filtered_cache.push(date_start, date_end, vecMeteo);
106
	} else if (level == IOUtils::raw) {
107
		filtered_cache.clear();
108
		raw_buffer.push(date_start, date_end, vecMeteo);
109
110
111
112
113
	} else {
		throw InvalidArgumentException("The processing level is invalid (should be raw OR filtered)", AT);
	}

	point_cache.clear(); //clear point cache, so that we don't return resampled values of deprecated data
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
}

void TimeSeriesManager::push_meteo_data(const IOUtils::ProcessingLevel& level, const Date& date_start, const Date& date_end,
		                     const std::vector< MeteoData >& vecMeteo, const bool& invalidate_cache)
{
	//perform check on date_start and date_end
	if (date_end < date_start) {
		std::ostringstream ss;
		ss << "Trying to push data set from " << date_start.toString(Date::ISO) << " to " << date_end.toString(Date::ISO) << ". ";
		ss << " Obviously, date_start should be less than date_end!";
		throw InvalidArgumentException(ss.str(), AT);
	}

	if (level == IOUtils::filtered) {
		filtered_cache.push(date_start, date_end, vecMeteo);
	} else if (level == IOUtils::raw) {
		filtered_cache.clear();
		raw_buffer.push(date_start, date_end, vecMeteo);
	} else {
		throw InvalidArgumentException("The processing level is invalid (should be raw OR filtered)", AT);
	}

	if (invalidate_cache) point_cache.clear(); //clear point cache, so that we don't return resampled values of deprecated data
137
138
}

139
//should we implement a cache for stationData?
140
size_t TimeSeriesManager::getStationData(const Date& date, STATIONS_SET& vecStation)
141
142
{
	vecStation.clear();
143
	iohandler.readStationData(date, vecStation);
144
145
146
147
148
149
150
151

	return vecStation.size();
}

//for an interval of data: decide whether data should be filtered or raw
size_t TimeSeriesManager::getMeteoData(const Date& dateStart, const Date& dateEnd, std::vector< METEO_SET >& vecVecMeteo)
{
	vecVecMeteo.clear();
152
	if (processing_level == IOUtils::raw) {
153
154
		iohandler.readMeteoData(dateStart, dateEnd, vecVecMeteo);
	} else {
155
		const bool success = filtered_cache.get(dateStart, dateEnd, vecVecMeteo);
156

157
		if (!success) {
158
			std::vector< std::vector<MeteoData> > tmp_meteo;
159
			if ((IOUtils::raw & processing_level) == IOUtils::raw) fillRawBuffer(dateStart, dateEnd);
160
			raw_buffer.get(dateStart, dateEnd, tmp_meteo);
161
162

			//now it needs to be secured that the data is actually filtered, if configured
163
			if ((IOUtils::filtered & processing_level) == IOUtils::filtered) {
164
				fill_filtered_cache();
165
				filtered_cache.get(dateStart, dateEnd, vecVecMeteo);
166
167
168
169
170
			} else {
				vecVecMeteo = tmp_meteo;
			}
		}

171
		if ((IOUtils::generated & processing_level) == IOUtils::generated)
172
173
174
175
176
177
178
179
180
181
182
183
184
			dataGenerator.fillMissing(vecVecMeteo);
	}

	return vecVecMeteo.size(); //equivalent with the number of stations that have data
}

size_t TimeSeriesManager::getMeteoData(const Date& i_date, METEO_SET& vecMeteo)
{
	vecMeteo.clear();

	//1. Check whether user wants raw data or processed data
	//The first case: we are looking at raw data directly, only unresampled values are considered, exact date match
	if (processing_level == IOUtils::raw) {
185
		std::vector< std::vector<MeteoData> > vec_cache;
186
187
		const Duration eps(1./(24.*3600.), 0.);
		iohandler.readMeteoData(i_date-eps, i_date+eps, vec_cache);
188
		for (size_t ii=0; ii<vec_cache.size(); ii++) { //for every station
189
190
			const size_t index = IOUtils::seek(i_date, vec_cache[ii], true);
			if (index != IOUtils::npos)
191
				vecMeteo.push_back( vec_cache[ii][index] ); //Insert station into vecMeteo
192
193
194
195
196
197
		}
		return vecMeteo.size();
	}

	//2.  Check which data point is available, buffered locally
	const map<Date, vector<MeteoData> >::const_iterator it = point_cache.find(i_date);
198
	if (it != point_cache.end()) {
199
200
201
202
203
204
		vecMeteo = it->second;
		return vecMeteo.size();
	}

	//Let's make sure we have the data we need, in the filtered_cache or in vec_cache
	const Date buffer_start( i_date-proc_properties.time_before ), buffer_end( i_date+proc_properties.time_after );
205
	std::vector< vector<MeteoData> >* data = NULL; //reference to either filtered_cache or raw_buffer
206
	if ((IOUtils::filtered & processing_level) == IOUtils::filtered) {
207
		const bool cached = (!filtered_cache.empty()) && (filtered_cache.getBufferStart() <= buffer_start) && (filtered_cache.getBufferEnd() >= buffer_end);
208
209
		if (!cached) {
			//explicit caching, rebuffer if necessary
210
			if ((IOUtils::raw & processing_level) == IOUtils::raw) fillRawBuffer(buffer_start, buffer_end);
211
212
			fill_filtered_cache();
		}
213
		data = &filtered_cache.getBuffer();
214
	} else { //data to be resampled should be IOUtils::raw
215
		if ((IOUtils::raw & processing_level) == IOUtils::raw) fillRawBuffer(buffer_start, buffer_end);
216
		data = &raw_buffer.getBuffer();
217
218
	}

219
	if ((IOUtils::resampled & processing_level) == IOUtils::resampled) { //resampling required
220
		for (size_t ii=0; ii<(*data).size(); ii++) { //for every station
221
			MeteoData md;
222
			const bool success = meteoprocessor.resample(i_date, (*data)[ii], md);
223
			if (success) vecMeteo.push_back( md );
224
		}
225
226
227
228
	} else { //no resampling required
		for (size_t ii=0; ii<(*data).size(); ii++) { //for every station
			const size_t index = IOUtils::seek(i_date, (*data)[ii], true); //needs to be an exact match
			if (index != IOUtils::npos)
229
				vecMeteo.push_back( (*data)[ii][index] ); //Insert station into vecMeteo
230
		}
231
232
	}

233
	if ((IOUtils::generated & processing_level) == IOUtils::generated)
234
235
236
237
238
239
240
241
242
		dataGenerator.fillMissing(vecMeteo);

	add_to_points_cache(i_date, vecMeteo); //Store result in the local cache

	return vecMeteo.size();
}

void TimeSeriesManager::writeMeteoData(const std::vector< METEO_SET >& vecMeteo, const std::string& name)
{
243
	iohandler.writeMeteoData(vecMeteo, name);
244
245
246
247
}

double TimeSeriesManager::getAvgSamplingRate() const
{
248
	return raw_buffer.getAvgSamplingRate();
249
250
251
252
253
254
255
}

/**
 * @brief Filter the whole raw meteo data buffer
 */
void TimeSeriesManager::fill_filtered_cache()
{
256
	if ((IOUtils::filtered & processing_level) == IOUtils::filtered) {
257
258
		filtered_cache.clear(); //HACK until we get true ringbuffers, to prevent eating up all memory
		meteoprocessor.process(raw_buffer.getBuffer(), filtered_cache.getBuffer());
259
260
		filtered_cache.setBufferStart( raw_buffer.getBufferStart() );
		filtered_cache.setBufferEnd( raw_buffer.getBufferEnd() );
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
	}
}

void TimeSeriesManager::add_to_points_cache(const Date& i_date, const METEO_SET& vecMeteo)
{
	//Check cache size, delete oldest elements if necessary
	if (point_cache.size() > 2000) {
		point_cache.clear(); //HACK: implement a true ring buffer!
	}

	point_cache[i_date] = vecMeteo;
}

void TimeSeriesManager::clear_cache()
{
	raw_buffer.clear();
	filtered_cache.clear();
	point_cache.clear();
}

void TimeSeriesManager::fillRawBuffer(const Date& date_start, const Date& date_end)
{
283
284
	const Date new_start( date_start-buff_before ); //taking centering into account
	const Date new_end( max(new_start + chunk_size, date_end) );
285

286
	raw_buffer.clear(); //HACK until we have a proper ring buffer to avoid eating up all memory...
287
288

	if (raw_buffer.empty()) {
289
		std::vector< METEO_SET > vecMeteo;
290
291
292
		iohandler.readMeteoData(new_start, new_end, vecMeteo);
		raw_buffer.push(new_start, new_end, vecMeteo);
		return;
293
294
	}

295
296
297
	const Date buffer_start( raw_buffer.getBufferStart() );
	const Date buffer_end( raw_buffer.getBufferEnd() );
	if (new_start>buffer_end || new_end<buffer_start) { //easy: full rebuffer
298
		std::vector< METEO_SET > vecMeteo;
299
300
301
302
		iohandler.readMeteoData(new_start, new_end, vecMeteo);
		raw_buffer.push(new_start, new_end, vecMeteo);
		return;
	}
303

304
	if (new_start<buffer_start) { //some data must be inserted before
305
		std::vector< METEO_SET > vecMeteo;
306
307
308
		iohandler.readMeteoData(new_start, buffer_start, vecMeteo);
		raw_buffer.push(new_start, buffer_start, vecMeteo);
	}
309

310
	if (new_end>buffer_end) { //some data must be inserted after. Keep in mind both before and after could happen simultaneously!
311
		std::vector< METEO_SET > vecMeteo;
312
313
		iohandler.readMeteoData(buffer_end, new_end, vecMeteo);
		raw_buffer.push(buffer_end, new_end, vecMeteo);
314
	}
315

316
317
318
319
320
321
322
323
324
325
326
}

const std::string TimeSeriesManager::toString() const {
	ostringstream os;
	os << "<TimeSeriesManager>\n";
	os << "Config& cfg = " << hex << &cfg << dec << "\n";
	os << "IOHandler& iohandler = " << hex << &iohandler << dec << "\n";
	os << meteoprocessor.toString();
	os << "Processing level = " << processing_level << "\n";
	os << dataGenerator.toString();

327
	os << "RawBuffer:\n" << raw_buffer.toString();
328
	os << "Filteredcache:\n" << filtered_cache.toString();
329

330
	//display point_cache
331
332
333
334
335
336
	size_t count=0;
	size_t min_stations=std::numeric_limits<size_t>::max();
	size_t max_stations=0;
	std::map<Date, std::vector<MeteoData> >::const_iterator iter = point_cache.begin();
	for (; iter != point_cache.end(); ++iter) {
		const size_t nb_stations = iter->second.size();
337
338
		if (nb_stations>max_stations) max_stations=nb_stations;
		if (nb_stations<min_stations) min_stations=nb_stations;
339
340
341
		count++;
	}

342
	if (count==0) {
343
344
		os << "Resampled cache is empty\n";
	}
345
	if (count==1) {
346
		os << "Resampled cache content (";
347
		if (max_stations==min_stations)
348
349
350
351
352
353
			os << min_stations;
		else
			os << min_stations << " to " << max_stations;
		os << " station(s))\n";
		os << std::setw(22) << point_cache.begin()->first.toString(Date::ISO) << " - 1 timestep\n";
	}
354
	if (count>1) {
355
356
357
		const double avg_sampling = ( (point_cache.rbegin()->first.getJulian()) - (point_cache.begin()->first.getJulian()) ) / (double)(count-1);

		os << "Resampled cache content (";
358
		if (max_stations==min_stations)
359
360
361
362
363
364
365
366
367
368
369
370
371
372
			os << min_stations;
		else
			os << min_stations << " to " << max_stations;
		os << " station(s))\n";
		os << std::setw(22) << point_cache.begin()->first.toString(Date::ISO);
		os << " - " << point_cache.rbegin()->first.toString(Date::ISO);
		os << " - " << count << " timesteps (" << setprecision(3) << fixed << avg_sampling*24.*3600. << " s sampling rate)";
	}

	os << "</TimeSeriesManager>\n";
	return os.str();
}

} //namespace