WSL/SLF GitLab Repository

Commit 8e696b51 authored by Mathias Bavay's avatar Mathias Bavay
Browse files

The warnings have been fixed. The ALPUG plugin now supports metadata (a...

The warnings have been fixed. The ALPUG plugin now supports metadata (a specific file format has been designed for it), supports duplicates in the data files (since this seems to happen quite often) with either no reporting (for limited number of duplicates) and reporting a warning for the begining of a large duplicated block of data. The plugin also now handles better missing files: if some files have already been found for this station, a warning is printed out but the execution continues. If no single file was found for a given station, an exception is thrown. The documentation has also been written.
parent 6d90256d
......@@ -177,11 +177,11 @@ size_t ResamplingAlgorithms::getDailyValue(const std::vector<MeteoData>& vecM, c
//if pos was not properly pre-positioned, do it
if (vecM[pos].date<intervalStart) {
for (pos; pos<vecM.size(); pos++)
for (; pos<vecM.size(); pos++)
if (vecM[pos].date>=intervalStart) break;
}
if (vecM[pos].date>intervalEnd) {
for (pos; pos-- >0;)
for (; pos-- >0;)
if (vecM[pos].date<=intervalEnd) break;
}
......@@ -805,8 +805,8 @@ void DailyAverage::resample(const size_t& index, const ResamplingPosition& /*pos
//HACK this means that this should be implemented as a data creator so it could be filtered afterward
//HACK or we could one more pass of filtering *after* the resampling
if (paramindex==MeteoData::RH) {
if (md(paramindex)<0.01) md(paramindex)==0.01;
if (md(paramindex)>1.) md(paramindex)==1.;
if (md(paramindex)<0.01) md(paramindex)=0.01;
if (md(paramindex)>1.) md(paramindex)=1.;
} else if (paramindex==MeteoData::TA || paramindex==MeteoData::VW || paramindex==MeteoData::VW_MAX || paramindex==MeteoData::ISWR || paramindex==MeteoData::RSWR || paramindex==MeteoData::ILWR || paramindex==MeteoData::TSG || paramindex==MeteoData::TSS || paramindex==MeteoData::TAU_CLD) {
if (md(paramindex)<0.)
md(paramindex)=0.;
......
......@@ -39,6 +39,14 @@ namespace mio {
* @code
* cod area,cod,id_AWS,date hour,Mean Wind,MaxWind,WD,AT (C),HR %, SWOR,HS (cm),empty,HTS0 (cm),empty,6697,empty,GST (C),empty,????,empty,TSS (C),ISWR,P(hpa)
* @endcode
*
* The metadata are provided in a separate file. This comma delimited file can contain comments (same syntax as for the configuration files) and must contain
* first the station ID (as used in the meteo data file name and in the configuration file), a full name, the decimal latitude, decimal longitude and the altitude.
* @code
* #ID,name,lat,lon,alt
* DAV3,Davos::Baerentalli,46.078243,9.272790,2400
* SLF5,Davos::SLF,46.,9.2728,1550 ;test station
* @endcode
*
* @section alpug_units Units
* Temperatures are in Celsius, relative humidity between 0 and 100%, snow heights in cm.
......@@ -50,27 +58,35 @@ namespace mio {
* - METEOPATH: where to find/write the meteo data; [Input] and [Output] section
* - STATION#: input filename (in METEOPATH). As many meteofiles as needed may be specified
* - ALPUG_FIELDS: comma delimited list of fields. The fields <b>MUST</b> use the \ref meteoparam "MeteoData" naming scheme. Unknown or ignored fields are replaced by "%".
* - WRAP_MONTH: which month (numerical) triggers the start of a new file (belonging to the next year. Default: 10); [Input] section
* - WRAP_MONTH: which month (numerical) triggers the start of a new file (belonging to the next year. Default: 10); [Input] section
* - METAFILE: file within METEOPATH that contains the stations' metadata; [Input] section
*
* @code
* METEO = ALPUG
* METEOPATH = ./Met_files
* METAFILE = meta.txt
* STATION1 = CAND5
* ALPUG_FIELDS = %,%,ID,timestamp,VW,VW_MAX,DW,TA,RH, RSWR,HS,%,%,%,%,%,TSG,%,%,%,TSS,ISWR,P
* WRAP_MONTH = 10
* WRAP_MONTH = 10
* @endcode
*/
const std::string ALPUG::dflt_extension = ".met";
const double ALPUG::plugin_nodata = -999.; //plugin specific nodata value. It can also be read by the plugin (depending on what is appropriate)
const size_t ALPUG::max_buffered_lines = 4; //how many lines to keep in buffer in order to detect and skip duplicates
const size_t ALPUG::max_buffered_lines = 4; //how many lines to keep in buffer in order to detect and silently skip duplicates
ALPUG::ALPUG(const std::string& configfile) : cfg(configfile), wrap_month(10)
ALPUG::ALPUG(const std::string& configfile)
: cfg(configfile), vecMeta(), LinesBuffer(), vecIDs(), vecFields(),
coordin(), coordinparam(), coordout(), coordoutparam(), inpath(), outpath(),
in_dflt_TZ(0.), out_dflt_TZ(0.), wrap_month(10)
{
parseInputOutputSection();
}
ALPUG::ALPUG(const Config& cfgreader) : cfg(cfgreader), wrap_month(10)
ALPUG::ALPUG(const Config& cfgreader)
: cfg(cfgreader), vecMeta(), LinesBuffer(), vecIDs(), vecFields(),
coordin(), coordinparam(), coordout(), coordoutparam(), inpath(), outpath(),
in_dflt_TZ(0.), out_dflt_TZ(0.), wrap_month(10)
{
parseInputOutputSection();
}
......@@ -92,7 +108,7 @@ void ALPUG::parseInputOutputSection()
cfg.getValue("METEOPATH", "Input", inpath);
vecIDs.clear();
cfg.getValues("STATION", "Input", vecIDs);
readMetaData();
cfg.getValue("WRAP_MONTH", "Input", wrap_month, IOUtils::nothrow);
string fields;
......@@ -109,6 +125,85 @@ void ALPUG::parseInputOutputSection()
cfg.getValue("METEOPATH", "Output", outpath, IOUtils::nothrow);
}
void ALPUG::readMetaData()
{
const size_t nr_ids = vecIDs.size();
vecMeta.clear();
vecMeta.resize( nr_ids );
vector<bool> foundID(nr_ids, false);
const string filename = cfg.get("METAFILE", "Input");
const string metafile = inpath + "/" + filename;
std::ifstream fin;
fin.open (metafile.c_str(), std::ifstream::in);
if (fin.fail()) {
ostringstream ss;
ss << "File \'" << metafile << "\' could not be opened. Possible reason: " << strerror(errno) << "\n";
throw FileAccessException(ss.str(), AT);
}
try {
const char eoln = IOUtils::getEoln(fin); //get the end of line character for the file
size_t linenr = 0;
vector<string> vecLine;
while (!fin.eof()) {
string line;
linenr++;
getline(fin, line, eoln); //read complete line of data
//strip comments, white spaces and skip empty lines
IOUtils::stripComments(line);
IOUtils::trim(line);
if (line.empty())
continue;
const size_t ncols = IOUtils::readLineToVec(line, vecLine, ',');
if (ncols!=5) { //invalid line
ostringstream ss;
ss << "Error in file \'" << metafile << "\' at line" <<linenr << ": invalid number of columns";
throw InvalidFormatException(ss.str(), AT);
}
const string line_id = vecLine[0];
for(size_t ii=0; ii<nr_ids; ++ii) {
if (line_id==vecIDs[ii]) { //station ID found in the input list
if (foundID[ii])
throw InvalidFormatException("Error: station "+line_id+" appears multiple times in metafile \'"+metafile+"\'", AT);
vector<double> tmpdata = vector<double>(vecLine.size());
for (size_t jj=2; jj<5; jj++) {
if (!IOUtils::convertString(tmpdata[jj], vecLine[jj], std::dec))
throw ConversionFailedException("While reading meta data for station " + vecLine[0], AT);
}
Coords stationcoord(coordin, coordinparam);
stationcoord.setLatLon(tmpdata[2], tmpdata[3], tmpdata[4]);
vecMeta[ii].setStationData(stationcoord, vecLine[0], vecLine[1]);
foundID[ii] = true;
}
}
}
fin.close();
} catch(const std::exception&){
fin.close();
throw;
}
string msg;
for (size_t ii=0; ii<nr_ids; ++ii) {
if (!foundID[ii]) {
if (msg.empty())
msg = "Station(s) " + vecIDs[ii];
else
msg.append( ","+vecIDs[ii] );
}
}
if (!msg.empty())
throw NoAvailableDataException(msg+" do(es) not have metadata in \'"+metafile+"\'", AT);
}
void ALPUG::read2DGrid(Grid2DObject& /*grid_out*/, const std::string& /*name_in*/)
{
//Nothing so far
......@@ -139,10 +234,9 @@ void ALPUG::readAssimilationData(const Date& /*date_in*/, Grid2DObject& /*da_out
throw IOException("Nothing implemented here", AT);
}
void ALPUG::readStationData(const Date&, std::vector<StationData>& /*vecStation*/)
void ALPUG::readStationData(const Date&, std::vector<StationData>& vecStation)
{
//Nothing so far
throw IOException("Nothing implemented here", AT);
vecStation = vecMeta;
}
Date ALPUG::parseDINDate(const std::string& datum) const
......@@ -163,7 +257,7 @@ Date ALPUG::parseDINDate(const std::string& datum) const
//return TRUE if we should keep reading lines
//if isValid==false, don't store the MeteoData object
bool ALPUG::parseLine(const std::string& filename, const char& eoln, const size_t& nr_of_data_fields, const Date& dateStart, const Date& dateEnd, const std::string& line, MeteoData &md, bool &isValid) const
bool ALPUG::parseLine(const std::string& filename, const size_t& nr_of_data_fields, const Date& dateStart, const Date& dateEnd, const std::string& line, MeteoData &md, bool &isValid) const
{
md.reset();
......@@ -218,7 +312,7 @@ bool ALPUG::isDuplicate(const std::string& line)
return false;
}
void ALPUG::readMetoFile(const std::string& station_id, const Date& dateStart, const Date& dateEnd,
void ALPUG::readMetoFile(const size_t& station_index, const Date& dateStart, const Date& dateEnd,
std::vector<MeteoData>& vecM)
{
vecM.clear();
......@@ -227,11 +321,12 @@ void ALPUG::readMetoFile(const std::string& station_id, const Date& dateStart, c
int end_year, end_month, end_day;
dateStart.getDate(start_year, start_month, start_day);
dateEnd.getDate(end_year, end_month, end_day);
if (start_month>=wrap_month) start_year++;
if (end_month>=wrap_month) end_year++;
std::cout << "start_year=" << start_year << " end_year=" << end_year << "\n";
const string station_id = vecIDs[station_index];
Date prev_date(0., 0.);
bool file_found = false; //this will turn to true if at least one file could be opened for reading data
for(int year=start_year; year<=end_year; ++year) {
stringstream ss;
ss << year;
......@@ -242,34 +337,51 @@ void ALPUG::readMetoFile(const std::string& station_id, const Date& dateStart, c
fin.clear();
fin.open (filename.c_str(), ios::in|ios::binary); //ascii does end of line translation, which messes up the pointer code
if (fin.fail()) {
ostringstream ss;
ss << "Error opening file \"" << filename << "\" for reading, possible reason: " << strerror(errno);
ss << " Please check file existence and permissions!";
throw FileAccessException(ss.str(), AT);
std::cerr << "[W] file \'" << filename << "\' could not be opened. Possible reason: " << strerror(errno) << "\n";
continue;
}
file_found = true;
const char eoln = smet::SMETCommon::getEoln(fin); //get the end of line character for the file
const size_t nr_of_data_fields = vecFields.size();
string line;
while (!fin.eof()){
unsigned int nr_line = 0.;
bool print_warning = true; //to only print 1 warning when a block of multiple lines is duplicated
while (!fin.eof()) {
string line;
getline(fin, line, eoln);
nr_line++;
if (line.empty())
continue; //Pure comment lines and empty lines are ignored
if (isDuplicate(line))
continue;
Coords pos;
MeteoData md(Date(), StationData(pos, station_id, station_id));
MeteoData md(Date(), vecMeta[station_index]);
bool isValid;
if (!parseLine(filename, eoln, nr_of_data_fields, dateStart, dateEnd, line, md, isValid))
if (!parseLine(filename, nr_of_data_fields, dateStart, dateEnd, line, md, isValid))
break;
if(isValid)
if(isValid) {
if (md.date<=prev_date) { //this happens when large blocks of data are duplicated
if (print_warning)
std::cerr << "[W] timstamps not in order in file \'" << filename << "\' starting at line " << nr_line << "; please check your data!\n";
print_warning = false;
continue;
}
vecM.push_back( md );
prev_date = md.date;
print_warning = true;
}
}
fin.close();
}
if (!file_found) {
ostringstream ss;
ss << "Error opening files for station " << station_id << " in directory \'" << inpath << "\' for reading.";
ss << " Please check file existence and permissions!";
throw FileAccessException(ss.str(), AT);
}
}
void ALPUG::readMeteoData(const Date& dateStart, const Date& dateEnd,
......@@ -279,7 +391,7 @@ void ALPUG::readMeteoData(const Date& dateStart, const Date& dateEnd,
vecMeteo.clear();
for (size_t ii=0; ii<vecIDs.size(); ++ii) {
std::vector<MeteoData> vecM;
readMetoFile(vecIDs[ii], dateStart, dateEnd, vecM);
readMetoFile(ii, dateStart, dateEnd, vecM);
vecMeteo.push_back( vecM );
}
}
......
......@@ -61,23 +61,25 @@ class ALPUG : public IOInterface {
private:
void parseInputOutputSection();
void readMetaData();
bool isDuplicate(const std::string& line) ;
Date parseDINDate(const std::string& datum) const;
bool parseLine(const std::string& filename, const char& eoln, const size_t& nr_of_data_fields, const Date& dateStart, const Date& dateEnd, const std::string& line, MeteoData &md, bool &isValid) const;
void readMetoFile(const std::string& station_name, const Date& dateStart, const Date& dateEnd,
bool parseLine(const std::string& filename, const size_t& nr_of_data_fields, const Date& dateStart, const Date& dateEnd, const std::string& line, MeteoData &md, bool &isValid) const;
void readMetoFile(const size_t& station_index, const Date& dateStart, const Date& dateEnd,
std::vector<MeteoData>& vecM);
const Config cfg;
static const double plugin_nodata; //plugin specific nodata value, e.g. -999
static const size_t max_buffered_lines; //how many lines to keep in buffer to check for duplicates?
static const std::string dflt_extension;
std::deque<std::string> LinesBuffer;
std::vector<StationData> vecMeta;
std::deque<std::string> LinesBuffer;
std::vector<std::string> vecIDs, vecFields;
std::string coordin, coordinparam, coordout, coordoutparam; //projection parameters
std::vector<std::string> vecIDs, vecFields; //read from the Config [Input] section
std::string inpath, outpath; //read from the Config [Output] section
std::string inpath, outpath;
double in_dflt_TZ, out_dflt_TZ;
unsigned short wrap_month;
static const double plugin_nodata; //plugin specific nodata value, e.g. -999
static const size_t max_buffered_lines; //how many lines to keep in buffer to check for duplicates?
static const std::string dflt_extension;
};
} //namespace
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment