# SPDX-License-Identifier: AGPL-3.0-or-later
import re
from pathlib import Path
from typing import List, Optional, Iterable, Dict
import datetime
from pydantic import BaseModel
from dateutil.parser import parse as dateutil_parse



class ExtractedDataQARowDTO(BaseModel):
    """
    A Pydantic model representing a Data quality monitoring log line.
    Full log lines example:

        [DATA_QA] Filtering LAR_met-onDemand::RH_THY::MIN 2024-04-30T09:00:00+01:00 [2024-W18-02]
        [DATA_QA] Filtering LAR_met-onDemand::RH_THY::MIN 2024-04-30T09:10:00+01:00 [2024-W18-02]
    """

    kind: Optional[str] = None  # Eg.: Filtering, Resampling, Missing
    station: Optional[str] = None  # Eg.: SIM2, ALI2, PAR2, DAV5
    parameter: Optional[str] = None  # Eg.: HS, RH, RSWR, PSUM, TA
    operator: Optional[str] = None  # Eg.: MAD, MIN, LINEAR... may be absent
    data_timestamp: Optional[datetime.datetime] = None  # iso format
    data_isoweek: Optional[str] = None  # Eg.: [2024-W18-02]


class LogDataQARule(BaseModel):
    """
    A Pydantic model representing a data quality log parsing rule.
    """
    regex: re.Pattern

    def match_line(self, line: str) -> Optional[ExtractedDataQARowDTO]:
        """
        Checks if the given line matches the rule's regex and creates a data QA row if matched.

        :param line: The log line to check.
        :return: An ExtractedDataQARowDTO if the line matches, otherwise None.
        """
        match = self.regex.match(line)
        if not match:
            return None

        # Direct creation of the object with extracted values
        groups = match.groupdict()

        data_timestamp = groups.get("timestamp")
        if data_timestamp is not None:
            try:
                data_timestamp = datetime.datetime.fromisoformat(groups.get("timestamp"))
            except ValueError:
                try:
                    data_timestamp = dateutil_parse(groups.get("timestamp"))
                except ValueError:
                    data_timestamp = None

        data_qa_row = ExtractedDataQARowDTO(
            kind=groups.get("kind"),
            station=groups.get("station"),
            parameter=groups.get("parameter"),
            operator=groups.get("operator"),
            data_timestamp=data_timestamp,
            data_isoweek=groups.get("isoweek")
        )

        return data_qa_row


_DATA_QA_RULES: List[LogDataQARule] = [
    LogDataQARule(
        regex=re.compile(
            r"\[DATA_QA\] "
            r"(?P<kind>\w+) "
            r"(?P<station>[^\s:]+)::(?P<parameter>[^\s:]+)(?:::(?P<operator>\S+))? "
            r"(?P<timestamp>\S+) "
            r"\[(?P<isoweek>\d{4}-W\d{2}-\d{2})\]"
        )
    )
]


def parse_log_for_data_qa(log_file_path: Path) -> Iterable[ExtractedDataQARowDTO]:
    """
    Parses the log text and generates a list of Data QA row objects.

    :param log_file_path: Path to the log file to parse.
    :return: A list of ExtractedDataQARowDTO objects.
    """
    with log_file_path.open("r") as f:
        for line in f:
            line = line.strip()
            for rule in _DATA_QA_RULES:
                data_qa_row = rule.match_line(line)
                if data_qa_row:
                    yield data_qa_row
                    break  # Stop processing this line after the first match


class DataQAAggregatedParameter(BaseModel):
    """
    Aggregated data for a single parameter within a station.
    """
    parameter: str
    kind_count: Dict[str, int] = {}  # Counter for each type of event (Filtering, Resampling, etc.)
    total_count: int = 0
    first_timestamp: Optional[datetime.datetime] = None
    last_timestamp: Optional[datetime.datetime] = None


class DataQAAggregatedStation(BaseModel):
    """
    Aggregated data for a station with nested parameters.
    """
    station: str
    parameters: Dict[str, DataQAAggregatedParameter] = {}  # key by parameter
    total_count: int = 0


def aggregate_qa_data(data_rows: Iterable[ExtractedDataQARowDTO]) -> Dict[str, DataQAAggregatedStation]:
    """
    Aggregates QA data into a hierarchical structure, first by station and then by parameter.
    The operator is ignored as requested.

    :param data_rows: Iterable of ExtractedDataQARowDTO objects to aggregate
    :return: Dictionary with station as key and DataQAAggregatedStation as value
    """
    aggregated_data: Dict[str, DataQAAggregatedStation] = {}
    
    for row in data_rows:
        if not row.station or not row.parameter:
            continue
        
        # If the station doesn't exist yet in aggregated data, we create it
        if row.station not in aggregated_data:
            aggregated_data[row.station] = DataQAAggregatedStation(station=row.station)
        
        station_data = aggregated_data[row.station]
        
        # If the parameter doesn't exist yet for this station, we create it
        if row.parameter not in station_data.parameters:
            station_data.parameters[row.parameter] = DataQAAggregatedParameter(parameter=row.parameter)
        
        param_data = station_data.parameters[row.parameter]
        
        # Update counters
        kind = row.kind or "unknown"
        param_data.kind_count[kind] = param_data.kind_count.get(kind, 0) + 1
        param_data.total_count += 1
        station_data.total_count += 1
        
        # Update timestamps
        if row.data_timestamp:
            if param_data.first_timestamp is None or row.data_timestamp.timestamp() < param_data.first_timestamp.timestamp():
                param_data.first_timestamp = row.data_timestamp
            if param_data.last_timestamp is None or row.data_timestamp.timestamp() > param_data.last_timestamp.timestamp():
                param_data.last_timestamp = row.data_timestamp

    return aggregated_data


class DataQAAnalysisView(BaseModel):
    stations: List[DataQAAggregatedStation]

    @staticmethod
    def make_from_log_file_path(log_path: Path) -> 'DataQAAnalysisView':
        stations = list(aggregate_qa_data(parse_log_for_data_qa(log_path)).values())
        return DataQAAnalysisView(
            stations=stations
        )
