Skip to content

Commit

Permalink
Basic S3 upload (#71)
Browse files Browse the repository at this point in the history
* HEXA-261 Users should be able to upload files from the catalog (s3)

WIP

* HEXA-261 Users should be able to upload files from the catalog (s3)

Working POC

* HEXA-261 Users should be able to upload files from the catalog (s3)

Working POC

* HEXA-261 Users should be able to upload files from the catalog (s3)

Simpler UI

* HEXA-261 Users should be able to upload files from the catalog (s3)

Upload in folders too

* Lint

* Cleanup

* HEXA-261 Users should be able to upload files from the catalog (s3)

Basic api tests

* HEXA-261 Users should be able to upload files from the catalog (s3)

Basic api tests
  • Loading branch information
pvanliefland authored Sep 20, 2021
1 parent 0061210 commit 7a7c25f
Show file tree
Hide file tree
Showing 19 changed files with 306 additions and 49 deletions.
1 change: 1 addition & 0 deletions hexa/catalog/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def search(request: HttpRequest) -> HttpResponse:
)


# TODO: post-only?
def datasource_sync(
request: HttpRequest, datasource_contenttype_id: int, datasource_id: uuid.UUID
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{% block page_content %}
{% embed "ui/section/section.html" %}
{% slot header %}
{% include "ui/section/section_heading_with_label.html" with title=section_title label=section_label %}
{% include "ui/section/heading.html" with title=section_title label=section_label %}
{% endslot %}
{% slot content %}
{{ data_element_grid }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{% block page_content %}
{% embed "ui/section/section.html" %}
{% slot header %}
{% include "ui/section/section_heading_with_label.html" with title=section_title label=section_label %}
{% include "ui/section/heading.html" with title=section_title label=section_label %}
{% endslot %}
{% slot content %}
{{ dataset_grid }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
{% block page_content %}
{% embed "ui/section/section.html" %}
{% slot header %}
{% include "ui/section/section_heading_with_label.html" with title=section_title label=section_label %}
{% include "ui/section/heading.html" with title=section_title label=section_label %}
{% endslot %}
{% slot content %}
{{ indicator_grid }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
{% block page_content %}
{% embed "ui/section/section.html" %}
{% slot header %}
{% include "ui/section/section_heading_with_label.html" with title=section_title label=section_label %}
{% include "ui/section/heading.html" with title=section_title label=section_label %}
{% endslot %}
{% slot content %}
{{ table_grid }}
Expand Down
69 changes: 69 additions & 0 deletions hexa/plugins/connector_s3/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import boto3
import stringcase
from botocore.config import Config

import hexa.plugins.connector_s3.models
import hexa.user_management.models
Expand Down Expand Up @@ -70,3 +71,71 @@ def generate_sts_buckets_credentials(
)

return response["Credentials"]


def _build_s3_client(
*,
principal_credentials: hexa.plugins.connector_s3.models.Credentials,
bucket: hexa.plugins.connector_s3.models.Bucket,
user: hexa.user_management.models.User | None = None,
):
sts_credentials = generate_sts_buckets_credentials(
user=user,
principal_credentials=principal_credentials,
buckets=[bucket],
duration=900,
)
return boto3.client(
"s3",
principal_credentials.default_region,
aws_access_key_id=sts_credentials["AccessKeyId"],
aws_secret_access_key=sts_credentials["SecretAccessKey"],
aws_session_token=sts_credentials["SessionToken"],
config=Config(signature_version="s3v4"),
)


def head_bucket(
*,
principal_credentials: hexa.plugins.connector_s3.models.Credentials,
bucket: hexa.plugins.connector_s3.models.Bucket,
):
s3_client = _build_s3_client(
principal_credentials=principal_credentials, bucket=bucket
)

return s3_client.head_bucket(bucket.name)


def generate_download_url(
*,
principal_credentials: hexa.plugins.connector_s3.models.Credentials,
bucket: hexa.plugins.connector_s3.models.Bucket,
target_object: hexa.plugins.connector_s3.models.Object,
):
s3_client = _build_s3_client(
principal_credentials=principal_credentials, bucket=bucket
)

return s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": bucket.name, "Key": target_object.key},
ExpiresIn=60 * 10,
)


def generate_upload_url(
*,
principal_credentials: hexa.plugins.connector_s3.models.Credentials,
bucket: hexa.plugins.connector_s3.models.Bucket,
target_key: str,
):
s3_client = _build_s3_client(
principal_credentials=principal_credentials, bucket=bucket
)

return s3_client.generate_presigned_url(
"put_object",
Params={"Bucket": bucket.name, "Key": target_key},
ExpiresIn=60 * 60,
)
35 changes: 7 additions & 28 deletions hexa/plugins/connector_s3/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from hexa.catalog.sync import DatasourceSyncResult
from hexa.core.models import Permission
from hexa.core.models.cryptography import EncryptedTextField
from hexa.plugins.connector_s3.api import generate_sts_buckets_credentials
from hexa.plugins.connector_s3.api import generate_sts_buckets_credentials, head_bucket


class Credentials(Base):
Expand Down Expand Up @@ -74,48 +74,27 @@ class Meta:

objects = BucketQuerySet.as_manager()

def get_boto_client(self):
@property
def principal_credentials(self):
try:
principal_s3_credentials = Credentials.objects.get()
return Credentials.objects.get()
except (Credentials.DoesNotExist, Credentials.MultipleObjectsReturned):
raise ValidationError(
"Ensure the S3 connector plugin first has a single credentials entry"
"The S3 connector plugin should be configured with a single Credentials entry"
)

sts_credentials = generate_sts_buckets_credentials(
user=None,
principal_credentials=principal_s3_credentials,
buckets=[self],
duration=900,
)
return boto3.client(
"s3",
principal_s3_credentials.default_region,
aws_access_key_id=sts_credentials["AccessKeyId"],
aws_secret_access_key=sts_credentials["SecretAccessKey"],
aws_session_token=sts_credentials["SessionToken"],
config=Config(signature_version="s3v4"),
)

def clean(self):
try:
self.get_boto_client().head_bucket(Bucket=self.name)
head_bucket(self.principal_credentials, self)
except ClientError as e:
raise ValidationError(e)

def sync(self): # TODO: move in api/sync module
"""Sync the bucket by querying the S3 API"""

try:
principal_s3_credentials = Credentials.objects.get()
except (Credentials.DoesNotExist, Credentials.MultipleObjectsReturned):
raise ValueError(
"Your s3 connector plugin should have a single credentials entry"
)

sts_credentials = generate_sts_buckets_credentials(
user=None,
principal_credentials=principal_s3_credentials,
principal_credentials=self.principal_credentials,
buckets=[self],
)
fs = S3FileSystem(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{% load i18n %}

<div
class="flex justify-end items-center"
>
<form action="#" method="POST" x-ref="form">
<label
for="file-upload"
class="inline-flex items-center px-3 py-2 border border-gray-300 shadow-sm text-sm leading-4 font-medium rounded-md text-gray-700 bg-white hover:bg-gray-50 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 cursor-pointer"
>
{% trans "Upload a file" %}
<span x-show="!uploading">{% include "ui/icons/upload.html" with ml=2 %}</span>
<span x-show="uploading">{% include "ui/icons/refresh.html" with ml=2 spin="true" %}</span>
<input x-ref="input" id="file-upload" name="file-upload" type="file" class="sr-only" @change="onChange">
</label>
</form>
</div>
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
{{ bucket_card }}
{% include "comments/components/page_section_comments.html" with object=datasource %}
{% embed "ui/section/section.html" with title=_("Objects") %}
{% slot extra_wrapper_attrs %}
x-data="S3Upload('{% url "connector_s3:object_upload" datasource.id %}', '{{ sync_url }}')"
x-init="init($el)"
x-swap="objects_section"
x-html="refreshedHtml"
{% endslot %}
{% slot actions %}
{% include "connector_s3/components/upload.html" with bucket=datasource sync_url=sync_url %}
{% endslot %}
{% slot content %}
{{ datagrid }}
{% endslot %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,15 @@
{% include "comments/components/page_section_comments.html" with object=object %}
{% if object.type == 'directory' %}
{% embed "ui/section/section.html" with title=_("Objects") %}
{% slot extra_wrapper_attrs %}
x-data="S3Upload('{% url "connector_s3:object_upload" datasource.id %}', '{{ sync_url }}', '{{ object.key }}')"
x-init="init($el)"
x-swap="objects_section"
x-html="refreshedHtml"
{% endslot %}
{% slot actions %}
{% include "connector_s3/components/upload.html" with bucket=datasource sync_url=sync_url %}
{% endslot %}
{% slot content %}
{{ datagrid }}
{% endslot %}
Expand Down
52 changes: 52 additions & 0 deletions hexa/plugins/connector_s3/tests/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import boto3
from django import test
from moto import mock_s3, mock_sts

from hexa.plugins.connector_s3.api import generate_download_url, generate_upload_url
from hexa.plugins.connector_s3.models import Bucket, Credentials, Object


class ApiTest(test.TestCase):
bucket_name = "test-bucket"

def setUp(self):
self.credentials = Credentials.objects.create(
username="test-username",
role_arn="test-arn-arn-arn-arn",
default_region="eu-central-1",
)
self.bucket = Bucket.objects.create(name=self.bucket_name)

@mock_s3
@mock_sts
def test_generate_download_url(self):
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket="test-bucket")
s3_client.put_object(Bucket="test-bucket", Key="test.csv", Body="test")

target_object = Object.objects.create(
bucket=self.bucket, key="test.csv", size=100
)

self.assertIsInstance(
generate_download_url(
principal_credentials=self.credentials,
bucket=self.bucket,
target_object=target_object,
),
str,
)

@mock_s3
@mock_sts
def test_generate_upload_url(self):
s3_client = boto3.client("s3")
s3_client.create_bucket(Bucket="test-bucket")
self.assertIsInstance(
generate_upload_url(
principal_credentials=self.credentials,
bucket=self.bucket,
target_key="test.csv",
),
str,
)
5 changes: 5 additions & 0 deletions hexa/plugins/connector_s3/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,9 @@
views.object_download,
name="object_download",
),
path(
"<str:bucket_id>/object_upload/",
views.object_upload,
name="object_upload",
),
]
50 changes: 44 additions & 6 deletions hexa/plugins/connector_s3/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from django.http import HttpRequest, HttpResponse
from django.shortcuts import get_object_or_404, redirect, render
from django.urls import reverse
from django.utils.translation import ugettext_lazy as _

from .api import generate_download_url, generate_upload_url
from .datacards import BucketCard, ObjectCard
from .datagrids import ObjectGrid
from .models import Bucket, Object
Expand All @@ -28,11 +30,24 @@ def datasource_detail(request: HttpRequest, datasource_id: uuid.UUID) -> HttpRes
page=int(request.GET.get("page", "1")),
)

# TODO: discuss
# Shouldn't we place that on datasource / entry models? Or at least a helper function
# alternative: sync by index_id? weird but practical
# alternative2: upload as template tag
sync_url = reverse(
"catalog:datasource_sync",
kwargs={
"datasource_id": bucket.id,
"datasource_contenttype": ContentType.objects.get_for_model(Bucket).id,
},
)

return render(
request,
"connector_s3/datasource_detail.html",
{
"datasource": bucket,
"sync_url": sync_url,
"breadcrumbs": breadcrumbs,
"bucket_card": bucket_card,
"datagrid": datagrid,
Expand Down Expand Up @@ -72,12 +87,22 @@ def object_detail(
page=int(request.GET.get("page", "1")),
)

# TODO: duplicated with above block
sync_url = reverse(
"catalog:datasource_sync",
kwargs={
"datasource_id": bucket.id,
"datasource_contenttype": ContentType.objects.get_for_model(Bucket).id,
},
)

return render(
request,
"connector_s3/object_detail.html",
{
"datasource": bucket,
"object": s3_object,
"sync_url": sync_url,
"object_card": object_card,
"breadcrumbs": breadcrumbs,
"datagrid": datagrid,
Expand All @@ -91,12 +116,25 @@ def object_download(
bucket = get_object_or_404(
Bucket.objects.filter_for_user(request.user), pk=bucket_id
)
s3_object = get_object_or_404(bucket.object_set.all(), key=path)
target_object = get_object_or_404(bucket.object_set.all(), key=path)

response = bucket.get_boto_client().generate_presigned_url(
"get_object",
Params={"Bucket": s3_object.bucket.name, "Key": s3_object.key},
ExpiresIn=60 * 10,
download_url = generate_download_url(
principal_credentials=bucket.principal_credentials,
bucket=bucket,
target_object=target_object,
)

return redirect(download_url)


def object_upload(request, bucket_id):
bucket = get_object_or_404(
Bucket.objects.filter_for_user(request.user), pk=bucket_id
)
upload_url = generate_upload_url(
principal_credentials=bucket.principal_credentials,
bucket=bucket,
target_key=request.GET["object_key"],
)

return redirect(response)
return HttpResponse(upload_url, status=201)
Loading

0 comments on commit 7a7c25f

Please sign in to comment.