WSL/SLF GitLab Repository

Commit 8d756f3e authored by Rebecca Kurup Buchholz's avatar Rebecca Kurup Buchholz
Browse files

Made NEAD import functional

parent 05f94034
......@@ -5,7 +5,7 @@
# python manage.py nead_import -i https://os.zhdk.cloud.switch.ch/envidat4lwf/p1_meteo/historical/1.csv -s web -t lwf/data -a lwf -m test41
# python manage.py nead_import -i https://os.zhdk.cloud.switch.ch/envidat4lwf/p1_meteo/1.csv -s web -t lwf/data -a lwf -m test41
# python manage.py nead_import -i gcnet/data/01-SwissCamp.csv -s local -t gcnet/data -a gcnet -m test
import csv
import os
from pathlib import Path
import requests
......@@ -15,6 +15,8 @@ from django.utils.timezone import make_aware
from configparser import ConfigParser
from django.apps import apps
from gcnet.management.commands.importers.helpers.import_date_helpers import dict_from_csv_line
from gcnet.util.cleaners import get_gcnet_line_clean
from generic.util.views_helpers import get_model_cl
from generic.util.nead import write_nead_config
from lwf.util.cleaners import get_lwf_meteo_line_clean, get_lwf_station_line_clean
......@@ -109,38 +111,144 @@ class Command(BaseCommand):
parent_class_name = model_class.__base__.__name__
# Get line cleaner function
# try:
# line_cleaner = self.get_line_cleaner(parent_class_name)
# except Exception as e:
# logger.error(e)
# return
# Get line cleaner function that corresponds to parent class
try:
line_cleaner = self.get_line_cleaner(parent_class_name)
except Exception as e:
logger.error(e)
return
# Assign other variables used to write csv_temporary
csv_temporary = Path(f'{tempdir}/{model}_temporary.csv')
# input_fields = model_class.input_fields
database_fields = [field.name for field in model_class._meta.fields if field.name != 'id']
# print(database_fields)
model_fields = [field.name for field in model_class._meta.fields if field.name != 'id']
# date_format = model_class.date_format
written_timestamps = []
rows_before = 24
rows_after = 0
rows_buffer = []
nead_header = []
# written_timestamps = []
# rows_before = 24
# rows_after = 0
# rows_buffer = []
# nead_header = []
records_written = 0
line_number = 0
# Write data in input_file into csv_temporary with additional computed fields
try:
with open(csv_temporary, 'w', newline='') as sink, open(input_file, 'r') as source:
# Get NEAD database fields
with open(input_file) as source:
nead_database_fields = self.get_nead_database_fields(source)
# Write data in input_file into csv_temporary with additional computed fields
with open(csv_temporary, 'w', newline='') as sink, open(input_file) as source:
# Reading the source here causes the csv reader below to fail!
# nead_database_fields = self.get_nead_database_fields(source)
sink.write(','.join(model_fields) + '\n')
while True:
# Increment line_number
line_number += 1
line = source.readline()
if not line:
break
# Skip header comment lines that start with '#' or are empty
if line.startswith('#') or (len(line.strip()) == 0):
continue
# Transform the line in a dictionary
row = dict_from_csv_line(line, nead_database_fields, sep=',')
# Raise ValueError if row does not have as many columns as nead_database_fields
if not row:
raise ValueError(f'ERROR: Line {line_number} should have the same number of columns as the '
f'nead_database_fields: {len(nead_database_fields)}')
# Process row and add new calculated fields
# TODO add date format and null value as optional arguments with these default values
line_clean = line_cleaner(row, '%Y-%m-%d %H:%M:%S+00:00', '-999.0')
# Make timestamp_iso value a UTC timezone aware datetime object
dt_obj = line_clean['timestamp_iso']
aware_dt = make_aware(dt_obj)
line_clean['timestamp_iso'] = aware_dt
if line_clean['timestamp']:
try:
timestamp_dict = {'timestamp': line_clean['timestamp']}
obj, created = model_class.objects.update_or_create(**timestamp_dict, defaults=line_clean)
if created:
records_written += 1
# created = True
# if force:
# key_dict = {Columns.TIMESTAMP.value: line_clean[Columns.TIMESTAMP.value],
# Columns.TIMESTAMP_ISO.value: line_clean[Columns.TIMESTAMP_ISO.value]}
# obj, created = model_class.objects.get_or_create(**key_dict, defaults=line_clean)
# else:
# model_class.objects.create(**line_clean)
# if created:
# records_written += 1
except Exception as e:
raise e
# defaults = {'first_name': 'Bob'}
# try:
# obj = model_class.objects.get(timestamp=line_clean['timestamp'])
# for key, value in line_clean.items():
# setattr(obj, key, value)
# obj.save()
# except model_class.DoesNotExist:
# # new_values = {'first_name': 'John', 'last_name': 'Lennon'}
# # new_values.update(defaults)
# obj = model_class(**line_clean)
# print(obj)
# obj.save()
# obj, created = model_class.objects.update_or_create(**line_clean)
# print(obj)
# print(created)
# Make timestamp_iso value a UTC timezone aware datetime object
# dt_obj = line_clean['timestamp_iso']
# aware_dt = make_aware(dt_obj)
# Check if record with identical timestamp already exists in table
# try:
# # obj = model_class.objects.get(timestamp_iso=aware_dt)
# obj = model_class.objects.get(timestamp=line_clean['timestamp'])
# print(obj)
# except model_class.DoesNotExist:
# print(obj)
# pass
# , otherwise write record to
# temporary csv file after checking for record with duplicate timestamp
# try:
# model_class.objects.get(timestamp_iso=aware_dt)
# except model_class.DoesNotExist:
# if line_clean['timestamp_iso'] not in written_timestamps:
# # keep timestamps length small
# written_timestamps = written_timestamps[(-1) * min(len(written_timestamps), 1000):]
# written_timestamps += [line_clean['timestamp_iso']]
#
# # slide the row buffer window
# rows_buffer = rows_buffer[(-1) * min(len(rows_buffer), rows_before + rows_after):] + [
# line_clean]
#
# # check values before and after
# if len(rows_buffer) > rows_after:
# sink.write(
# ','.join(["{0}".format(v) for v in rows_buffer[-(1 + rows_after)].values()]) + '\n')
# records_written += 1
fields = self.get_nead_fields(source)
print(fields)
# nead_config = self.get_nead_config(source)
# print({section: dict(nead_config[section]) for section in nead_config.sections()})
sink.write(','.join(database_fields) + '\n')
records_written = 0
# # Skip number of header lines designated in parent class header line count
# for i in range(model_class.header_line_count):
......@@ -148,14 +256,18 @@ class Command(BaseCommand):
# nead_header.append(first_lines)
# next(source, None)
#
# while True:
# while True:
#
# # Skip comment lines
#
# line = source.readline()
#
# if not line:
# break
# line_array = [v for v in line.strip().split(model_class.delimiter) if len(v) > 0]
#
# line = source.readline()
#
# if not line:
# break
# line_array = [v for v in line.strip().split(model_class.delimiter) if len(v) > 0]
# # Skip header lines that start with designated parent class header symbol
# # For example: the '#' character
# if line.startswith(model_class.header_symbol):
......@@ -223,9 +335,9 @@ class Command(BaseCommand):
# # Then save it.
# c.save()
#
# # Log import message
# logger.info(f'FINISHED importing {input_file}, {records_written} new records written in {model}')
#
# Log import message
logger.info(f'FINISHED importing {input_file}, {records_written} new records written in {model}')
# Delete csv_temporary
# os.remove(csv_temporary)
......@@ -243,15 +355,18 @@ class Command(BaseCommand):
elif parent_class_name == 'LWFStation':
return get_lwf_station_line_clean
elif parent_class_name == 'Station':
return get_gcnet_line_clean
else:
raise Exception(f'ERROR parent class {parent_class_name} does not exist '
f'or is not specified in nead_import.py')
# Read NEAD header and return fields as list
# Read NEAD header and return database_fields as list
@staticmethod
def get_nead_fields(source):
def get_nead_database_fields(source):
fields_starting_strings = ['# fields', '#fields']
fields_starting_strings = ['# database_fields =', '#database_fields=', '# database_fields=']
fields_lines = []
for line in source:
......@@ -261,8 +376,8 @@ class Command(BaseCommand):
if len(fields_lines) == 1:
fields_string = fields_lines.pop()
fields_values = ((fields_string.split('=', 1))[1]).strip().rstrip('\n')
fields = fields_values.split(',')
return fields
database_fields = fields_values.split(',')
return database_fields
else:
raise Exception(f'ERROR input NEAD file does not have exactly one line starting with one of these values:'
f' {fields_starting_strings}')
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment