WSL/SLF GitLab Repository

Commit ee1c46e0 authored by Rebecca Kurup Buchholz's avatar Rebecca Kurup Buchholz
Browse files

Implemented logic to convert multiple packages

parent cf6c5456
......@@ -26,27 +26,106 @@ def envidat_to_opendataswiss_converter(package_list_url):
# Assign package_list API JSON data to Python dictionary
with request.urlopen(package_list_url) as metadata:
package_list = json.load(metadata)
packages = json.load(metadata)
# Assign metadata_dict to store ordered dictionaries for all packages in package_list
package_list_dict = collections.OrderedDict()
# package_list_dict = collections.OrderedDict()
# TEST
# package_list_dict = {}
converted_packages = []
# Iterate though packages
for package in package_list['result']:
# Convert each package (metadata record) in package_list to XML format
package_dict = get_opendataswiss_ordered_dict(package)
try:
# Iterate though packages
for package in packages['result']:
# Convert each package (metadata record) in package_list to XML format
package_dict = get_opendataswiss_ordered_dict(package)
# print(package_dict)
if package_dict:
# converted_packages += [{'dcat:dataset': package_dict}]
converted_packages += [package_dict]
# print(len(converted_packages))
except Exception as e:
log.error(f'ERROR: Cannot convert to OpenDataSwiss format, Exeption: {e}')
catalog_dict = collections.OrderedDict()
# header
catalog_dict['@xmlns:dct'] = "http://purl.org/dc/terms/"
catalog_dict['@xmlns:dc'] = "http://purl.org/dc/elements/1.1/"
catalog_dict['@xmlns:dcat'] = "http://www.w3.org/ns/dcat#"
catalog_dict['@xmlns:foaf'] = "http://xmlns.com/foaf/0.1/"
catalog_dict['@xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema#"
catalog_dict['@xmlns:rdfs'] = "http://www.w3.org/2000/01/rdf-schema#"
catalog_dict['@xmlns:rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
catalog_dict['@xmlns:vcard'] = "http://www.w3.org/2006/vcard/ns#"
catalog_dict['@xmlns:odrs'] = "http://schema.theodi.org/odrs#"
catalog_dict['@xmlns:schema'] = "http://schema.org/"
# Update package_list_dict with package (metadata record) in OpenDataSwiss format ordered dictionary
package_list_dict.update(package_dict)
catalog_dict['dcat:Catalog'] = {'dcat:dataset': converted_packages}
dcat_catalog_dict = collections.OrderedDict()
dcat_catalog_dict['rdf:RDF'] = catalog_dict
catalog_converted = unparse(dcat_catalog_dict, short_empty_elements=True, pretty=True)
# return make_response(catalog_converted, 200, headers)
return catalog_converted
# Iterate though packages
# for package in packages['result']:
# # Convert each package (metadata record) in package_list to XML format
# package_dict = get_opendataswiss_ordered_dict(package)
#
# print(package_dict)
#
# # Update package_list_dict with package (metadata record) in OpenDataSwiss format ordered dictionary
# package_list_dict.update(package_dict)
#
# print(package_list_dict)
# root element
opendata_metadata_dict = collections.OrderedDict()
opendata_metadata_dict['rdf:RDF'] = package_list_dict
# opendata_metadata_dict = collections.OrderedDict()
# opendata_metadata_dict['rdf:RDF'] = converted_packages
# Convert metadata to xml
metadata_xml = unparse(opendata_metadata_dict, short_empty_elements=True, pretty=True)
return metadata_xml
# metadata_xml = unparse(opendata_metadata_dict, short_empty_elements=True, pretty=True)
#
# return metadata_xml
# # Assign metadata_dict to store ordered dictionaries for all packages in package_list
# # package_list_dict = collections.OrderedDict()
# # TEST
# # package_list_dict = {}
# package_list = []
#
# # Iterate though packages
# for package in packages['result']:
#
# # Convert each package (metadata record) in package_list to XML format
# package_dict = get_opendataswiss_ordered_dict(package)
#
# print(package_dict)
#
# # Update package_list_dict with package (metadata record) in OpenDataSwiss format ordered dictionary
# package_list_dict.update(package_dict)
#
# print(package_list_dict)
#
# # root element
# opendata_metadata_dict = collections.OrderedDict()
# opendata_metadata_dict['rdf:RDF'] = package_list_dict
#
# # Convert metadata to xml
# metadata_xml = unparse(opendata_metadata_dict, short_empty_elements=True, pretty=True)
#
# return metadata_xml
# ======================================= Format Converter Function ==================================================
......@@ -54,19 +133,20 @@ def envidat_to_opendataswiss_converter(package_list_url):
# TODO check which tags are mandatory
# Returns OpenDataSwiss format OrderedDict created from EnviDat format metadata JSON package
def get_opendataswiss_ordered_dict(package):
md_metadata_dict = collections.OrderedDict()
# Header
md_metadata_dict['@xmlns:dct'] = "http://purl.org/dc/terms/"
md_metadata_dict['@xmlns:dc'] = "http://purl.org/dc/elements/1.1/"
md_metadata_dict['@xmlns:dcat'] = "http://www.w3.org/ns/dcat#"
md_metadata_dict['@xmlns:foaf'] = "http://xmlns.com/foaf/0.1/"
md_metadata_dict['@xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema#"
md_metadata_dict['@xmlns:rdfs'] = "http://www.w3.org/2000/01/rdf-schema#"
md_metadata_dict['@xmlns:rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
md_metadata_dict['@xmlns:vcard'] = "http://www.w3.org/2006/vcard/ns#"
md_metadata_dict['@xmlns:odrs'] = "http://schema.theodi.org/odrs#"
md_metadata_dict['@xmlns:schema'] = "http://schema.org/"
# md_metadata_dict['@xmlns:dct'] = "http://purl.org/dc/terms/"
# md_metadata_dict['@xmlns:dc'] = "http://purl.org/dc/elements/1.1/"
# md_metadata_dict['@xmlns:dcat'] = "http://www.w3.org/ns/dcat#"
# md_metadata_dict['@xmlns:foaf'] = "http://xmlns.com/foaf/0.1/"
# md_metadata_dict['@xmlns:xsd'] = "http://www.w3.org/2001/XMLSchema#"
# md_metadata_dict['@xmlns:rdfs'] = "http://www.w3.org/2000/01/rdf-schema#"
# md_metadata_dict['@xmlns:rdf'] = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
# md_metadata_dict['@xmlns:vcard'] = "http://www.w3.org/2006/vcard/ns#"
# md_metadata_dict['@xmlns:odrs'] = "http://schema.theodi.org/odrs#"
# md_metadata_dict['@xmlns:schema'] = "http://schema.org/"
# Dataset URL
package_name = package['name']
......@@ -129,7 +209,6 @@ def get_opendataswiss_ordered_dict(package):
# theme (MANDATORY)
md_metadata_dict['dcat:Dataset']['dcat:theme'] = {'@rdf:resource': "http://opendata.swiss/themes/education"}
# TODO check are records really always in English?
# language
md_metadata_dict['dcat:Dataset']['dct:language'] = {'#text': 'en'}
......@@ -272,4 +351,6 @@ def get_keywords(package):
# ========================================== TESTING ===========================================================
# envidat_to_opendataswiss_converter("https://www.envidat.ch/api/action/package_show?id=d6939be3-ed78-4714-890d-d974ae2e58be")
# print(envidat_to_opendataswiss_converter("https://www.envidat.ch/api/action/current_package_list_with_resources?limit=2"))
print(envidat_to_opendataswiss_converter("https://www.envidat.ch/api/action/current_package_list_with_resources?limit=1"))
# envidat_to_opendataswiss_converter("https://www.envidat.ch/api/action/current_package_list_with_resources?limit=1")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment