WSL/SLF GitLab Repository
Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
EnviDat
opendataswiss
Commits
e4a01630
Commit
e4a01630
authored
May 10, 2022
by
Rebecca Kurup Buchholz
Browse files
Implemented error handling for package conversion
parent
7f0d3943
Changes
1
Hide whitespace changes
Inline
Side-by-side
converters/package_json_xml_converter.py
View file @
e4a01630
...
...
@@ -12,6 +12,9 @@ log = getLogger(__name__)
# ============================================ MAIN CONVERTER FUNCTION ================================================
# TODO output XML currently returns data in order from most recently modified
# TODO check if this acceptable as current ckan output outputs data in alphabetical order by dataset name
# WARNING: Converter valid only for metadata schema of EnviDat!
def
envidat_to_opendataswiss_converter
(
package_list_url
):
"""
...
...
@@ -32,14 +35,28 @@ def envidat_to_opendataswiss_converter(package_list_url):
# Assign converted_packages list to store ordered dictionaries for all packages in packages
converted_packages
=
[]
# TODO remove package counter code
# TEST initalize package counter
package_count
=
0
# Try to convert packages to dictionaries compatible with OpenDataSwiss format
try
:
# Iterate though packages
for
package
in
packages
[
'result'
]:
# TEST increment package counter
package_count
+=
1
# Convert each package (metadata record) in package_list to XML format
package_dict
=
get_opendataswiss_ordered_dict
(
package
)
if
package_dict
:
converted_packages
+=
[
package_dict
]
else
:
log
.
error
(
f
'ERROR: Failed to convert
{
package
}
'
)
# TEST print package count
print
(
package_count
)
except
Exception
as
e
:
log
.
error
(
f
'ERROR: Cannot convert to OpenDataSwiss format, Exeption:
{
e
}
'
)
...
...
@@ -47,99 +64,110 @@ def envidat_to_opendataswiss_converter(package_list_url):
wrapper_dict
=
get_wrapper_dict
(
converted_packages
)
# Convert wrapper_dict to XML format
c
atalog_c
onverted
=
unparse
(
wrapper_dict
,
short_empty_elements
=
True
,
pretty
=
True
)
converted
_data_xml
=
unparse
(
wrapper_dict
,
short_empty_elements
=
True
,
pretty
=
True
)
# return make_response(catalog_converted, 200, headers)
# TODO block that writes XML data
# TEST
with
open
(
'test.xml'
,
'w'
,
encoding
=
"utf-8"
)
as
xml_file
:
xml_file
.
write
(
converted_data_xml
)
return
c
atalog_c
onverted
return
converted
_data_xml
# ======================================= Format Converter Function ==================================================
# TODO check which tags are mandatory
# Returns OpenDataSwiss format OrderedDict created from EnviDat format metadata JSON package
# If conversion is not successful returns None
def
get_opendataswiss_ordered_dict
(
package
):
md_metadata_dict
=
collections
.
OrderedDict
()
# Dataset URL
package_name
=
package
[
'name'
]
package_url
=
f
'https://www.envidat.ch/#/metadata/
{
package_name
}
'
md_metadata_dict
[
'dcat:Dataset'
]
=
{
'@rdf:about'
:
package_url
}
# identifier
package_id
=
package
[
'id'
]
md_metadata_dict
[
'dcat:Dataset'
][
'dct:identifier'
]
=
f
'
{
package_id
}
@envidat'
# title
title
=
package
[
'title'
]
md_metadata_dict
[
'dcat:Dataset'
][
'dct:title'
]
=
{
'@xml:lang'
:
"en"
,
'#text'
:
title
}
# description
description
=
clean_text
(
package
.
get
(
'notes'
,
''
))
md_metadata_dict
[
'dcat:Dataset'
][
'dct:description'
]
=
{
'@xml:lang'
:
"en"
,
'#text'
:
description
}
# issued
creation_date
=
package
.
get
(
'metadata_created'
)
if
creation_date
:
md_metadata_dict
[
'dcat:Dataset'
][
'dct:issued'
]
=
{
'@rdf:datatype'
:
"http://www.w3.org/2001/XMLSchema#dateTime"
,
'#text'
:
parse
(
creation_date
).
strftime
(
"%Y-%m-%dT%H:%M:%SZ"
)
}
# modified
modification_date
=
package
.
get
(
'metadata_modified'
,
creation_date
)
if
modification_date
:
md_metadata_dict
[
'dcat:Dataset'
][
'dct:modified'
]
=
{
'@rdf:datatype'
:
"http://www.w3.org/2001/XMLSchema#dateTime"
,
'#text'
:
parse
(
modification_date
).
strftime
(
"%Y-%m-%dT%H:%M:%SZ"
)
}
# publication (MANDATORY)
publisher_name
=
json
.
loads
(
package
.
get
(
'publication'
,
'{}'
)).
get
(
'publisher'
,
''
)
md_metadata_dict
[
'dcat:Dataset'
][
'dct:publisher'
]
=
{
'rdf:Description'
:
{
'rdfs:label'
:
publisher_name
}}
# contact point (MANDATORY)
maintainer
=
json
.
loads
(
package
.
get
(
'maintainer'
,
'{}'
))
maintainer_name
=
""
if
maintainer
.
get
(
'given_name'
):
maintainer_name
+=
maintainer
[
'given_name'
].
strip
()
+
' '
maintainer_name
+=
maintainer
[
'name'
]
maintainer_email
=
"mailto:"
+
maintainer
[
'email'
]
individual_contact_point
=
{
'vcard:Individual'
:
{
'vcard:fn'
:
maintainer_name
,
'vcard:hasEmail'
:
{
'@rdf:resource'
:
maintainer_email
}}}
if
maintainer_email
==
'mailto:envidat@wsl.ch'
:
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:contactPoint'
]
=
[
individual_contact_point
]
else
:
organization_contact_point
=
{
'vcard:Organization'
:
{
'vcard:fn'
:
'EnviDat Support'
,
'vcard:hasEmail'
:
{
'@rdf:resource'
:
'mailto:envidat@wsl.ch'
}}}
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:contactPoint'
]
=
[
individual_contact_point
,
organization_contact_point
]
# theme (MANDATORY)
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:theme'
]
=
{
'@rdf:resource'
:
"http://opendata.swiss/themes/education"
}
# language
md_metadata_dict
[
'dcat:Dataset'
][
'dct:language'
]
=
{
'#text'
:
'en'
}
# keyword
keywords_list
=
[]
keywords
=
get_keywords
(
package
)
for
keyword
in
keywords
:
keywords_list
+=
[{
'@xml:lang'
:
"en"
,
'#text'
:
keyword
}]
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:keyword'
]
=
keywords_list
# landing page
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:landingPage'
]
=
package_url
# Distribution - iterate through package resources (MANDATORY) and obtain package license
# Call get_distribution_list(package) to get distibution list
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:distribution'
]
=
get_distribution_list
(
package
,
package_name
)
return
md_metadata_dict
try
:
md_metadata_dict
=
collections
.
OrderedDict
()
# Dataset URL
package_name
=
package
[
'name'
]
package_url
=
f
'https://www.envidat.ch/#/metadata/
{
package_name
}
'
md_metadata_dict
[
'dcat:Dataset'
]
=
{
'@rdf:about'
:
package_url
}
# identifier
package_id
=
package
[
'id'
]
md_metadata_dict
[
'dcat:Dataset'
][
'dct:identifier'
]
=
f
'
{
package_id
}
@envidat'
# title
title
=
package
[
'title'
]
md_metadata_dict
[
'dcat:Dataset'
][
'dct:title'
]
=
{
'@xml:lang'
:
"en"
,
'#text'
:
title
}
# description
description
=
clean_text
(
package
.
get
(
'notes'
,
''
))
md_metadata_dict
[
'dcat:Dataset'
][
'dct:description'
]
=
{
'@xml:lang'
:
"en"
,
'#text'
:
description
}
# issued
creation_date
=
package
.
get
(
'metadata_created'
)
if
creation_date
:
md_metadata_dict
[
'dcat:Dataset'
][
'dct:issued'
]
=
{
'@rdf:datatype'
:
"http://www.w3.org/2001/XMLSchema#dateTime"
,
'#text'
:
parse
(
creation_date
).
strftime
(
"%Y-%m-%dT%H:%M:%SZ"
)
}
# modified
modification_date
=
package
.
get
(
'metadata_modified'
,
creation_date
)
if
modification_date
:
md_metadata_dict
[
'dcat:Dataset'
][
'dct:modified'
]
=
{
'@rdf:datatype'
:
"http://www.w3.org/2001/XMLSchema#dateTime"
,
'#text'
:
parse
(
modification_date
).
strftime
(
"%Y-%m-%dT%H:%M:%SZ"
)
}
# publication (MANDATORY)
publisher_name
=
json
.
loads
(
package
.
get
(
'publication'
,
'{}'
)).
get
(
'publisher'
,
''
)
md_metadata_dict
[
'dcat:Dataset'
][
'dct:publisher'
]
=
{
'rdf:Description'
:
{
'rdfs:label'
:
publisher_name
}}
# contact point (MANDATORY)
maintainer
=
json
.
loads
(
package
.
get
(
'maintainer'
,
'{}'
))
maintainer_name
=
""
if
maintainer
.
get
(
'given_name'
):
maintainer_name
+=
maintainer
[
'given_name'
].
strip
()
+
' '
maintainer_name
+=
maintainer
[
'name'
]
maintainer_email
=
"mailto:"
+
maintainer
[
'email'
]
individual_contact_point
=
{
'vcard:Individual'
:
{
'vcard:fn'
:
maintainer_name
,
'vcard:hasEmail'
:
{
'@rdf:resource'
:
maintainer_email
}}}
if
maintainer_email
==
'mailto:envidat@wsl.ch'
:
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:contactPoint'
]
=
[
individual_contact_point
]
else
:
organization_contact_point
=
{
'vcard:Organization'
:
{
'vcard:fn'
:
'EnviDat Support'
,
'vcard:hasEmail'
:
{
'@rdf:resource'
:
'mailto:envidat@wsl.ch'
}}}
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:contactPoint'
]
=
[
individual_contact_point
,
organization_contact_point
]
# theme (MANDATORY)
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:theme'
]
=
{
'@rdf:resource'
:
"http://opendata.swiss/themes/education"
}
# language
md_metadata_dict
[
'dcat:Dataset'
][
'dct:language'
]
=
{
'#text'
:
'en'
}
# keyword
keywords_list
=
[]
keywords
=
get_keywords
(
package
)
for
keyword
in
keywords
:
keywords_list
+=
[{
'@xml:lang'
:
"en"
,
'#text'
:
keyword
}]
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:keyword'
]
=
keywords_list
# landing page
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:landingPage'
]
=
package_url
# Distribution - iterate through package resources (MANDATORY) and obtain package license
# Call get_distribution_list(package) to get distibution list
md_metadata_dict
[
'dcat:Dataset'
][
'dcat:distribution'
]
=
get_distribution_list
(
package
,
package_name
)
return
md_metadata_dict
except
Exception
as
e
:
log
.
error
(
f
'ERROR: Cannot convert
{
package
}
to OpenDataSwiss format, Exeption:
{
e
}
'
)
return
None
# ======================================= Distribution List Function ==================================================
...
...
@@ -266,8 +294,9 @@ def clean_text(text):
.
replace
(
'# '
,
' '
)
\
.
replace
(
'__'
,
''
)
\
.
replace
(
' '
,
' '
)
\
.
replace
(
'
\r
'
,
'
\n
'
).
\
replace
(
'
\n\n
'
,
'
\n
'
)
.
replace
(
'
\r
'
,
'
\n
'
)
\
.
replace
(
'
\r\n
'
,
'
\n
'
)
\
.
replace
(
'
\n\n
'
,
'
\n
'
)
return
cleaned_text
...
...
@@ -281,7 +310,4 @@ def get_keywords(package):
# ========================================== TESTING ===========================================================
# envidat_to_opendataswiss_converter("https://www.envidat.ch/api/action/package_show?id=d6939be3-ed78-4714-890d-d974ae2e58be")
# print(
# envidat_to_opendataswiss_converter("https://www.envidat.ch/api/action/current_package_list_with_resources?limit=3"))
# envidat_to_opendataswiss_converter("https://www.envidat.ch/api/action/current_package_list_with_resources?limit=2")
# envidat_to_opendataswiss_converter("https://www.envidat.ch/api/action/current_package_list_with_resources?limit=100000")
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment