# SPDX-License-Identifier: AGPL-3.0-or-later
import re
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import Union

from .models import NcML, Variable, Attribute, Dimension

_fix_attr_quotes_re = re.compile(r'(\w+)=(""([^"]*?)"")')
_fix2_value_quotes_re = re.compile(r'^"(.*?)"$')

def _fix_attr_quotes(text):
    # This is to work around an old syntax error in some the XML sources
    # e.g. value=""Test"" → value="Test"
    return _fix_attr_quotes_re.sub(
        lambda m: f'{m.group(1)}="{m.group(3)}"',
        text
    )

def _fix2_value_quotes(value):
    # This is to work around the possible presence of escaped quotes in the value
    # e.g. "&quot;Test&quot;" ("\"Test\"") → "Test" instead of "\"Test\""
    # Note that this function is applied to XML decoded value, so escapes in the source are already unescaped.
    return _fix2_value_quotes_re.sub(
        lambda m: f'{m.group(1)}',
        value
    )

def parse_ncml(path: Union[str, Path]) -> NcML:
    """
    Parse an NcML file and return a Netcdf Pydantic model.
    """
    xml_source = Path(path).read_text()
    xml_source = _fix_attr_quotes(xml_source)
    root = ET.fromstring(xml_source)

    # Extract location attribute from <netcdf>
    nc_kwargs = {}
    if 'location' in root.attrib:
        nc_kwargs['location'] = root.attrib['location']

    ncml = NcML(**nc_kwargs)

    # Iterate over child elements
    for elem in root:
        tag = elem.tag.lower().split('}')[-1]

        if tag == 'dimension':
            dim = Dimension(
                name=elem.attrib['name'],
                length=elem.attrib.get('length', None),
                isUnlimited=elem.attrib.get('isUnlimited', 'false').lower() == 'true'
            )
            ncml.dimensions.append(dim)

        elif tag == 'attribute':
            attr = Attribute(
                name=elem.attrib['name'],
                type=elem.attrib.get('type', None),
                value=_fix2_value_quotes(elem.attrib['value']),
            )
            ncml.attributes.append(attr)

        elif tag == 'variable':
            # Parse nested attributes
            var_attrs = [
                Attribute(
                    name=att.attrib['name'],
                    type=att.attrib['type'],
                    value=att.attrib['value']
                )
                for att in elem.findall('attribute')
            ]
            var = Variable(
                name=elem.attrib['name'],
                type=elem.attrib['type'],
                shape=elem.attrib.get('shape'),
                dimensions=elem.attrib.get('shape') or elem.attrib.get('dimensions'),
                attributes=var_attrs
            )
            ncml.variables.append(var)

        # You could add more clauses here for <aggregation>, <group>, etc.

    return ncml
