diff --git a/import-io/Makefile b/import-io/Makefile index f846de54..c9b9f93c 100644 --- a/import-io/Makefile +++ b/import-io/Makefile @@ -1,4 +1,4 @@ -PLUGIN_VERSION=1.0.1 +PLUGIN_VERSION=1.0.2 PLUGIN_ID=import-io plugin: diff --git a/import-io/code-env/python/desc.json b/import-io/code-env/python/desc.json new file mode 100644 index 00000000..64f1a1e9 --- /dev/null +++ b/import-io/code-env/python/desc.json @@ -0,0 +1,6 @@ +{ + "acceptedPythonInterpreters": ["PYTHON27","PYTHON35","PYTHON36"], + "forceConda": false, + "installCorePackages": true, + "installJupyterSupport": false +} \ No newline at end of file diff --git a/import-io/code-env/python/spec/requirements.txt b/import-io/code-env/python/spec/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/import-io/custom-recipes/import-io-enrich-connector/recipe.py b/import-io/custom-recipes/import-io-enrich-connector/recipe.py index 98aa5eec..1cf42de5 100644 --- a/import-io/custom-recipes/import-io-enrich-connector/recipe.py +++ b/import-io/custom-recipes/import-io-enrich-connector/recipe.py @@ -1,4 +1,5 @@ -import urllib, json +import urllib +import json from dataiku.customrecipe import * import importio_utils diff --git a/import-io/custom-recipes/import-io-enrich-extractor/recipe.py b/import-io/custom-recipes/import-io-enrich-extractor/recipe.py index 7288d822..ba1269ab 100644 --- a/import-io/custom-recipes/import-io-enrich-extractor/recipe.py +++ b/import-io/custom-recipes/import-io-enrich-extractor/recipe.py @@ -1,4 +1,4 @@ -import dataiku, urllib, urlparse, requests, sys +import urllib from dataiku.customrecipe import * import importio_utils @@ -7,6 +7,5 @@ def build_query(in_row, apikey): url = in_row[url_field] return 'input=webpage/url:' + urllib.quote(safe='',s=url) + '&_apikey=' + apikey - # input/webpage/url= importio_utils.run(build_query) diff --git a/import-io/plugin.json b/import-io/plugin.json index 31456a8d..83d4e21f 100644 --- a/import-io/plugin.json +++ b/import-io/plugin.json @@ -1,6 +1,6 @@ { "id": "import-io", - "version": "1.0.1", + "version": "1.0.2", "meta": { "label": "import.io", "description": "Downloads data from import.io's API & enriches datasets", diff --git a/import-io/python-connectors/import-io-simple-api/connector.py b/import-io/python-connectors/import-io-simple-api/connector.py index 360d03fd..4f86c2c6 100644 --- a/import-io/python-connectors/import-io-simple-api/connector.py +++ b/import-io/python-connectors/import-io-simple-api/connector.py @@ -1,7 +1,12 @@ -import requests, json +import requests +import json import pandas as pd from dataiku.connector import Connector import importio_utils +import logging + +logger = logging.getLogger(__name__) + class ImportIOConnector(Connector): @@ -13,24 +18,23 @@ def __init__(self, config): elif self.config['api_url'].startswith('https://extraction.import.io/'): self.api_version = 'extraction' else: - raise Exception( - 'It looks like this URL is not an API URL. URLs to call the API (and get a json response) start with "https://api.import.io" .') - print '[import.io connector] calling API...' + raise Exception('It looks like this URL is not an API URL. URLs to call the API (and get a json response) start with "https://api.import.io" .') + logger.info('[import.io connector] calling API...') response = requests.get(self.config['api_url']) - print '[import.io connector] got response' + logger.info('[import.io connector] got response') try: self.json = response.json() except Exception as e: - print e - print 'response was:\n', response.text - raise + logger.error(e) + logger.error('response was:{}'.format(response.text)) + raise ValueError def get_read_schema(self): if self.api_version == 'api': columns = importio_utils.convert_schema(self.json['outputProperties']) - return {"columns":columns} + return {"columns": columns} else: - return None + return None def generate_rows(self, dataset_schema=None, dataset_partitioning=None, partition_id=None, records_limit = -1): if self.api_version == 'api': diff --git a/import-io/python-lib/importio_utils.py b/import-io/python-lib/importio_utils.py index e11f93f0..3a700fa0 100644 --- a/import-io/python-lib/importio_utils.py +++ b/import-io/python-lib/importio_utils.py @@ -1,6 +1,13 @@ # coding: UTF8 -import urlparse, requests, sys, dataiku, time +import urlparse +import requests +import sys +import dataiku +import time from dataiku.customrecipe import * +import logging + +logger = logging.getLogger(__name__) # See http://api.docs.import.io/#DataTypes importIO_subfields = { @@ -24,9 +31,7 @@ def convert_type(importIO_type): def convert_schema(import_io_schema): result = [] for col in import_io_schema: - result.append({ - 'name':col['name'], - 'type':convert_type(col['type'])}) + result.append({'name':col['name'],'type':convert_type(col['type'])}) for subfield in importIO_subfields[col['type']]: result.append({'name':col['name']+'/'+subfield, 'type':'string'}) return result @@ -54,21 +59,24 @@ def run(build_query): response = requests.get(request_url) json = response.json() except Exception as e: - print 'request to import.io failed' - print e - print 'response was:\n',response - raise + logger.error('request to import.io failed') + logger.error(e) + logger.error('response was: {}'.format(response)) + raise ValueError if 'error' in json: - print "response: ", json + logger.error("response: {}".format(json)) raise Exception(json['error']) for result_line in json['results']: if not output_schema: - print "Setting schema" + logger.error("Setting schema") input_schema_names = frozenset([e['name'] for e in input.read_schema()]) output_schema = input.read_schema() + print(')))))))))') + print(json) + print(json.keys()) for col in convert_schema(json['outputProperties']): if col['name'] in input_schema_names: - print "Warning: input col "+col['name']+" will be overwritten by output col with same name." + logger.error("Warning: input col "+col['name']+" will be overwritten by output col with same name.") input_cols_to_drop.append(col['name']) else: output_schema.append(col)