WSL/SLF GitLab Repository
Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
EnviDat
opendataswiss
Commits
2ee3bd57
Commit
2ee3bd57
authored
May 11, 2022
by
Sam
Browse files
update main to use s3 functions
parent
b6add0a8
Changes
4
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
2ee3bd57
...
...
@@ -12,12 +12,12 @@ Code to make EnviDat data accessible via OpenDataSwiss scrapers.
```
env
LOG_LEVEL=DEBUG
API_URL=xxx
AWS_ENDPOINT=xxx
AWS_REGION=xxx
AWS_ACCESS_KEY=xxx
AWS_SECRET_KEY=xxx
AWS_BUCKET_NAME=xxx
BACKEND_PACKAGE_URL=xxx
```
2a. Local Debug
...
...
main.py
View file @
2ee3bd57
...
...
@@ -4,20 +4,19 @@ import logging
import
requests
import
json
from
typing
import
Optional
from
io
import
BytesIO
from
textwrap
import
dedent
from
typing
import
Optional
,
NoReturn
from
pathlib
import
Path
from
collections
import
OrderedDict
from
dateutil.parser
import
parse
from
xmltodict
import
unparse
# from utils.s3 import (
# get_s3_connection,
# create_s3_bucket,
# set_s3_static_config,
# upload_to_s3_from_memory,
# )
from
utils.s3
import
(
get_s3_connection
,
create_s3_bucket
,
set_s3_static_config
,
generate_index_html
,
upload_to_s3_from_memory
,
)
log
=
logging
.
getLogger
(
__name__
)
...
...
@@ -30,7 +29,7 @@ def _debugger_is_active() -> bool:
return
gettrace
()
is
not
None
def
_load_debug_dotenv
()
->
Non
e
:
def
_load_debug_dotenv
()
->
No
Retur
n
:
"Load .env.secret variables from repo for debugging."
from
dotenv
import
load_dotenv
...
...
@@ -444,56 +443,6 @@ def get_opendataswiss_ordered_dict(package: dict) -> Optional[OrderedDict]:
return
None
def
generate_index_html
(
package_names
:
list
)
->
BytesIO
:
"Write index.html to root of S3 bucket, with embedded S3 download links."
buf
=
BytesIO
()
# Start HTML
html_block
=
dedent
(
"""
<html>
<head>
<meta charset="utf-8">
<title>EnviDat Metadata List</title>
</head>
<body>
"""
).
strip
()
log
.
debug
(
f
"Writing start HTML block to buffer:
{
html_block
}
"
)
buf
.
write
(
html_block
.
encode
(
"utf_8"
))
# Packages
log
.
info
(
"Iterating package list to write S3 links to index."
)
for
package_name
in
package_names
:
log
.
debug
(
f
"Package name:
{
package_name
}
"
)
html_block
=
dedent
(
f
"""
<div class='flex py-2 xs6'>
<a href='https://opendataswiss.s3-zh.os.switch.ch/
{
package_name
}
.xml'>
https://opendataswiss.s3-zh.os.switch.ch/
{
package_name
}
.xml
</a>
</div>"""
)
log
.
debug
(
f
"Writing package link HTML to buffer:
{
html_block
}
"
)
buf
.
write
(
html_block
.
encode
(
"utf_8"
))
# Close
html_block
=
dedent
(
"""
</body>
</html>"""
)
log
.
debug
(
f
"Writing end HTML block to buffer:
{
html_block
}
"
)
buf
.
write
(
html_block
.
encode
(
"utf_8"
))
# Reset read pointer.
# DOT NOT FORGET THIS, for reading afterwards!
buf
.
seek
(
0
)
return
buf
def
envidat_to_opendataswiss_converter
()
->
str
:
"""
Main converter function for OpenDataSwiss. JSON --> XML.
...
...
@@ -539,23 +488,19 @@ def main():
_load_debug_dotenv
()
_get_logger
()
os
.
environ
[
"API_HOST"
]
=
"https://www.envidat.ch"
xml
=
envidat_to_opendataswiss_converter
()
log
.
info
(
xml
)
xml_data
=
envidat_to_opendataswiss_converter
()
xml_name
=
"envidat_export_opendataswiss.xml"
# s3_client = get_s3_connection()
# bucket = create_s3_bucket(s3_client, public=True)
# set_s3_static_config(s3_client)
s3_client
=
get_s3_connection
()
bucket
=
create_s3_bucket
(
s3_client
,
public
=
True
)
#
log.debug(f"Attempting upload of {
package
_name}
.xml
to S3 bucket.")
# upload_status =
upload_to_s3_from_memory(bucket,
f"{package
_name
}.
xml
",
data)
log
.
debug
(
f
"Attempting upload of
{
xml
_name
}
to S3 bucket."
)
upload_to_s3_from_memory
(
bucket
,
xml
_name
,
xml
_
data
)
# # Create index.html
# index_html = generate_index_html(packages_in_ckan)
# log.info("Uploading generated index.html to S3 bucket.")
# bucket.upload_fileobj(
# index_html, "index.html", ExtraArgs={"ContentType": "text/html"}
# )
set_s3_static_config
(
s3_client
)
index_html
=
generate_index_html
(
"OpenDataSwiss XML"
,
xml_name
)
log
.
debug
(
"Attempting upload of index.html to S3 bucket."
)
upload_to_s3_from_memory
(
bucket
,
"index.html"
,
index_html
,
content_type
=
"text/html"
)
log
.
info
(
"Done."
)
...
...
pdm.lock
View file @
2ee3bd57
[[package]]
name = "boto3"
version = "1.22.1
0
"
version = "1.22.1
2
"
requires_python = ">= 3.6"
summary = "The AWS SDK for Python"
dependencies = [
"botocore<1.26.0,>=1.25.1
0
",
"botocore<1.26.0,>=1.25.1
2
",
"jmespath<2.0.0,>=0.7.1",
"s3transfer<0.6.0,>=0.5.0",
]
[[package]]
name = "botocore"
version = "1.25.1
0
"
version = "1.25.1
2
"
requires_python = ">= 3.6"
summary = "Low-level, data-driven core of boto 3."
dependencies = [
...
...
@@ -70,15 +70,6 @@ dependencies = [
"urllib3<1.27,>=1.21.1",
]
[[package]]
name = "s3-bucket"
version = "1.4.0"
requires_python = ">=3.6"
summary = "An easy to use client for S3"
dependencies = [
"boto3",
]
[[package]]
name = "s3transfer"
version = "0.5.2"
...
...
@@ -108,16 +99,15 @@ summary = "Makes working with XML feel like you are working with JSON"
[metadata]
lock_version = "3.1"
content_hash = "sha256:
9504fea2839696939e9b436f5b858d7a037349dbc8307ab519ef7436d98ca837
"
content_hash = "sha256:
30fb3c469b3a614aeaf3250b26ec0ed7ee4661c0e389d62ed676a477ec97429b
"
[metadata.files]
"boto3 1.22.1
0
" = [
{file = "boto3-1.22.1
0
-py3-none-any.whl", hash = "sha256:
f32f35ea86b0d9ff408279f8e9903e4d0711475bbbd07003ccdf01fac7788e0a
"},
{file = "boto3-1.22.1
0
.tar.gz", hash = "sha256:
3edf68bdd1b95576c04cb7f69b9759a75b169fa13bb567271cb4d742f60accad
"},
"boto3 1.22.1
2
" = [
{file = "boto3-1.22.1
2
-py3-none-any.whl", hash = "sha256:
9830d7f8748c164a3f0929d8a0c5bb313cc62d7cf69ce55617108bed451a8520
"},
{file = "boto3-1.22.1
2
.tar.gz", hash = "sha256:
4b3a49abf7a5f7cdd82714a3ae356a9a8ce12a668e014c5fc68454aa1e2fc0cb
"},
]
"botocore 1.25.10" = [
{file = "botocore-1.25.10-py3-none-any.whl", hash = "sha256:a4e9dd2b7ce4bf41200108af6b9d3f21ff53a95324f8a66638c4afdf27da44d3"},
{file = "botocore-1.25.10.tar.gz", hash = "sha256:d4738b1c6ae3a905be39422e7a5ca99e4028efc9017c2694cbebbe12c899fc78"},
"botocore 1.25.12" = [
{file = "botocore-1.25.12-py3-none-any.whl", hash = "sha256:53e19890124be45e47ec4f7ffdaf587343d375dbd7c7a501e55aeff80680fec0"},
]
"certifi 2021.10.8" = [
{file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"},
...
...
@@ -147,10 +137,6 @@ content_hash = "sha256:9504fea2839696939e9b436f5b858d7a037349dbc8307ab519ef7436d
{file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
{file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
]
"s3-bucket 1.4.0" = [
{file = "s3_bucket-1.4.0-py3-none-any.whl", hash = "sha256:2cc9929df28bc398e16328af19b9f0e976d2a9563e5d7321b5e7c1d65b58cbbc"},
{file = "s3-bucket-1.4.0.tar.gz", hash = "sha256:31cc4d3c85182e03107e5998be814a33e847963cb7d311e8753f3d1c31720b58"},
]
"s3transfer 0.5.2" = [
{file = "s3transfer-0.5.2-py3-none-any.whl", hash = "sha256:7a6f4c4d1fdb9a2b640244008e142cbc2cd3ae34b386584ef044dd0f27101971"},
{file = "s3transfer-0.5.2.tar.gz", hash = "sha256:95c58c194ce657a5f4fb0b9e60a84968c808888aed628cd98ab8771fe1db98ed"},
...
...
pyproject.toml
View file @
2ee3bd57
...
...
@@ -7,9 +7,11 @@ authors = [
{name
=
"Rebecca Kurup Buchholz"
,
email
=
"None"
}
]
dependencies
=
[
"s3-bucket>
=
1.4
.
0
",
"requests=
=
2.27
.
1
",
"xmltodict>
=
0.13
.
0
"]
"xmltodict>
=
0.13
.
0
",
"python-dateutil>
=
2.8
.
2
",
"boto3>
=
1.22
.
12
",
]
requires-python
=
">
=
3.9
"
license
=
{
text
=
"MIT"
}
[project.optional-dependencies]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment