WSL/SLF GitLab Repository

Commit 3667cd5b authored by Mathias Bavay's avatar Mathias Bavay
Browse files

A new "raw data editing" method has been implemented: merge_by_name. Stations...

A new "raw data editing" method has been implemented: merge_by_name. Stations sharing a common name can be merged together with the MERGE_BY_NAME key in the [Input] section. The documentation has been edited to bring visibility to the "raw data editing" features.
parent c1ec3336
......@@ -47,6 +47,8 @@ namespace mio {
* @note it is generally not advised to use data generators in combination with spatial interpolations as this would
* potentially mix measured and generated values in the resulting grid. It is therefore advised to turn the data generators
* off and let the spatial interpolations algorithms adjust to the amount of measured data.
* @note it is also possible to make a copy of a given parameter under a different name. This is explained in section
* \ref data_manipulations "Raw data editing".
*
* @section generators_section Data generators section
* The data generators are defined per meteorological parameter. They are applied to all stations
......
......@@ -17,6 +17,7 @@
*/
#include <meteoio/IOUtils.h>
#include <meteoio/MathOptim.h>
#include <meteoio/IOHandler.h>
#cmakedefine PLUGIN_ALPUG
......@@ -138,7 +139,7 @@ namespace mio {
* <tr><td>\subpage snowpack "SNOWPACK"</td><td>meteo</td><td>original SNOWPACK meteo files</td><td></td></tr>
* </table></center>
*
* @section data_manipulations Data generators & exclusion
* @section data_manipulations Raw data editing
* @subsection data_generators Data generators
* It is also possible to duplicate a meteorological parameter as another meteorological parameter. This is done by specifying a COPY key, following the syntax
* new_name::COPY = existing_parameter. For example:
......@@ -160,7 +161,7 @@ namespace mio {
*
* @code
* WFJ2::EXCLUDE = HS PSUM ;inline declaration of parameters exclusion
* KLO3::KEEP = TA RH VW DW ;inline declaration of parameters to keep
* KLO3::KEEP = TA RH VW DW ;inline declaration of parameters to keep
*
* EXCLUDE_FILE = ../input/meteo/excludes.csv ;parameters exclusions defined in a separate file
* KEEP_FILE = ../input/meteo/keeps.csv ;parameters to keep defined in a separate file
......@@ -172,6 +173,25 @@ namespace mio {
* KLO3 HS PSUM
* @endcode
*
* @subsection data_merging Data merging
* It is possible to merge different data sets together, based on a common station name. This is enabled with the key \em MERGE_BY_NAME in the [Input] section.
* If set to \em true, all stations that have the same station name will be merged together, in the order they have been declared/read by the plugin
* (ie the first station that has a value for a given parameter has priority). This is useful, for example, to provide measurements from different
* stations that actually share the same measurement location or to build "composite" station from multiple real stations.
* @code
* STATION1 = *WFJ
* STATION2 = WFJ2
* STATION3 = WFJ3
*
* *WFJ::KEEP = ILWR PSUM
* WFJ2::EXCLUDE = PSUM ILWR RSWR
* WFJ1::KEEP = ISWR VW DW
*
* MERGE_BY_NAME = true
* @endcode
* In the above example, if the same station name would have been given to "*WFJ", "WFJ2" and "WFJ1", then a composite station with station ID
* "*WFJ" (ie the first one of the list) would be built with ILWR and PSUM coming from the original *WFJ station, every fields of WFJ2 excepted PSUM, ILWR and ISWR
* and only ISWR, VW, DW from WFJ1 <b>when</b> WFJ2 does not have them.
*/
IOInterface* IOHandler::getPlugin(const std::string& plugin_name) const
......@@ -257,13 +277,16 @@ IOInterface* IOHandler::getPlugin(const std::string& cfgkey, const std::string&
//Copy constructor
IOHandler::IOHandler(const IOHandler& aio)
: IOInterface(), cfg(aio.cfg), mapPlugins(aio.mapPlugins), excluded_params(aio.excluded_params), kept_params(aio.kept_params),
copy_parameter(aio.copy_parameter), copy_name(aio.copy_name), enable_copying(aio.enable_copying), excludes_ready(aio.excludes_ready), keeps_ready(aio.keeps_ready)
copy_parameter(aio.copy_parameter), copy_name(aio.copy_name), enable_copying(aio.enable_copying),
excludes_ready(aio.excludes_ready), keeps_ready(aio.keeps_ready), mergeByName(aio.mergeByName)
{}
IOHandler::IOHandler(const Config& cfgreader)
: IOInterface(), cfg(cfgreader), mapPlugins(), excluded_params(), kept_params(), copy_parameter(), copy_name(), enable_copying(false), excludes_ready(false), keeps_ready(false)
: IOInterface(), cfg(cfgreader), mapPlugins(), excluded_params(), kept_params(), copy_parameter(), copy_name(),
enable_copying(false), excludes_ready(false), keeps_ready(false), mergeByName(false)
{
parse_copy_config();
cfg.getValue("MERGE_BY_NAME", "Input", mergeByName, IOUtils::nothrow);
}
IOHandler::~IOHandler() throw()
......@@ -285,6 +308,7 @@ IOHandler& IOHandler::operator=(const IOHandler& source) {
enable_copying = source.enable_copying;
excludes_ready = source.excludes_ready;
keeps_ready = source.keeps_ready;
mergeByName = source.mergeByName;
}
return *this;
}
......@@ -318,6 +342,7 @@ void IOHandler::readStationData(const Date& date, STATIONS_SET& vecStation)
{
IOInterface *plugin = getPlugin("METEO", "Input");
plugin->readStationData(date, vecStation);
if (mergeByName) merge_by_name(vecStation);
}
void IOHandler::readMeteoData(const Date& dateStart, const Date& dateEnd,
......@@ -335,6 +360,8 @@ void IOHandler::readMeteoData(const Date& dateStart, const Date& dateEnd,
if (!keeps_ready) create_keep_map();
keep_params(vecMeteo);
if (mergeByName) merge_by_name(vecMeteo);
copy_parameters(stationindex, vecMeteo);
}
......@@ -389,6 +416,42 @@ void IOHandler::checkTimestamps(const std::vector<METEO_SET>& vecVecMeteo) const
}
}
//merge stations that have identical names
void IOHandler::merge_by_name(STATIONS_SET& vecStation) const
{
const size_t idxMiddle = Optim::ceil( static_cast<double>(vecStation.size()) / 2. );
for (size_t ii=0; (ii<=idxMiddle) && (ii<vecStation.size()); ii++) {
for (size_t jj=ii+1; jj<vecStation.size(); jj++) {
if (vecStation[ii].stationName==vecStation[jj].stationName) {
vecStation[ii].merge( vecStation[jj] );
std::swap( vecStation[jj], vecStation.back() );
vecStation.pop_back();
jj--; //we need to re-compare the current station since it has been swapped
}
}
}
}
//in this implementation, we consider that the station name does NOT change over time
void IOHandler::merge_by_name(std::vector<METEO_SET>& vecVecMeteo) const
{
const size_t idxMiddle = Optim::ceil( static_cast<double>(vecVecMeteo.size()) / 2. );
for (size_t ii=0; (ii<=idxMiddle) && (ii<vecVecMeteo.size()); ii++) {
if (vecVecMeteo[ii].empty()) continue;
for (size_t jj=ii+1; jj<vecVecMeteo.size(); jj++) {
if (vecVecMeteo[jj].empty()) continue;
if (vecVecMeteo[ii][0].meta.stationName==vecVecMeteo[jj][0].meta.stationName) {
MeteoData::mergeTimeSeries(vecVecMeteo[ii], vecVecMeteo[jj]);
std::swap( vecVecMeteo[jj], vecVecMeteo.back() );
vecVecMeteo.pop_back();
jj--; //we need to re-compare the current station since it has been swapped
}
}
}
}
void IOHandler::create_exclude_map()
{
excludes_ready = true;
......@@ -668,6 +731,9 @@ const std::string IOHandler::toString() const
}
os << "</excluded_params>\n";
}
if (mergeByName)
os << "Merge stations by stationName\n";
os << "</IOHandler>\n";
return os.str();
......
......@@ -73,13 +73,15 @@ class IOHandler : public IOInterface {
void exclude_params(std::vector<METEO_SET>& vecVecMeteo) const;
void keep_params(std::vector<METEO_SET>& vecVecMeteo) const;
void copy_parameters(const size_t& stationindex, std::vector< METEO_SET >& vecMeteo) const;
void merge_by_name(std::vector<METEO_SET>& vecVecMeteo) const;
void merge_by_name(STATIONS_SET& vecStation) const;
const Config& cfg;
std::map<std::string, IOInterface*> mapPlugins;
std::map< std::string, std::set<std::string> > excluded_params;
std::map< std::string, std::set<std::string> > kept_params;
std::vector<std::string> copy_parameter, copy_name;
bool enable_copying, excludes_ready, keeps_ready;
bool enable_copying, excludes_ready, keeps_ready, mergeByName;
};
} //namespace
......
......@@ -360,6 +360,25 @@ std::iostream& operator>>(std::iostream& is, MeteoData& data) {
return is;
}
void MeteoData::mergeTimeSeries(std::vector<MeteoData>& vec1, const std::vector<MeteoData>& vec2)
{
if (vec1.empty() || vec2.empty()) return;
if (vec1.back().date<vec2.front().date) return; //vec1 is before vec2
if (vec1.front().date>vec2.back().date) return; //vec1 is after vec2
size_t idx2 = 0;
for (size_t ii=0; ii<vec1.size(); ii++) {
while ((vec1[ii].date>vec2[idx2].date) && (idx2<vec2.size()))
idx2++;
if (idx2==vec2.size()) return; //no more chances of common timestamps
if (vec1[ii].date==vec2[idx2].date) {//we found a common timestamp
vec1[ii].merge( vec2[idx2] );
}
}
}
void MeteoData::merge(std::vector<MeteoData>& vec1, const std::vector<MeteoData>& vec2, const bool& simple_merge)
{
if (vec2.empty()) return;
......
......@@ -182,6 +182,17 @@ class MeteoData {
const std::string& getNameForParameter(const size_t& parindex) const;
size_t getParameterIndex(const std::string& parname) const;
size_t getNrOfParameters() const;
/**
* @brief Simple merge strategy for two vectors containing meteodata time series for two stations.
* If some fields of the MeteoData objects given in the first vector are nodata, they will be
* filled by the matching field from the MeteoData objects given in the second vector (if the
* same timestamp exist).
* @note Only timestamps common to both data sets will be merged!
* @param vec1 reference vector, highest priority
* @param vec2 extra vector to merge, lowest priority
*/
static void mergeTimeSeries(std::vector<MeteoData>& vec1, const std::vector<MeteoData>& vec2);
/**
* @brief Simple merge strategy for vectors containing meteodata for a given timestamp.
......
......@@ -51,6 +51,7 @@ namespace mio {
/**
* @page processing Processing overview
* The pre-processing infrastructure is described in ProcessingBlock (for its API). The goal of this page is to give an overview of the available filters and processing elements and their usage.
* @note it is possible to remove some parameters on a per-station basis, even before entering the filters. See in section \ref data_manipulations "Raw data editing".
*
* @section processing_modes Modes of operation
* It should be noted that filters often have two modes of operations: soft or hard. In soft mode, all value that is rejected is replaced by the filter parameter's value. This means that for a soft min filter set at 0.0, all values less than 0.0 will be replaced by 0.0. In hard mode, all rejected values are replaced by nodata.
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment