From b03c6c3687c5efd4e4628bb3de8f18f6d9c5fd92 Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaos.veneti@dreamdata.io>
Date: Mon, 25 Nov 2019 11:28:04 +0100
Subject: [PATCH 01/78] add lastUpdated field in engagements schema

---
 tap_hubspot/schemas/engagements.json | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tap_hubspot/schemas/engagements.json b/tap_hubspot/schemas/engagements.json
index 76607494..7cc93f7e 100644
--- a/tap_hubspot/schemas/engagements.json
+++ b/tap_hubspot/schemas/engagements.json
@@ -4,6 +4,9 @@
     "engagement_id": {
       "type": "integer"
     },
+    "lastUpdated": {
+      "type": ["null", "string"]
+    },
     "engagement": {
       "type": "object",
       "properties": {

From b8cfaa7bd59482df8313838d04c17342e1f622cc Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaosveneti@gmail.com>
Date: Mon, 25 Nov 2019 20:15:52 +0100
Subject: [PATCH 02/78] add custom values in .gitignore

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index 675377ad..110b8e55 100644
--- a/.gitignore
+++ b/.gitignore
@@ -98,3 +98,8 @@ config.json
 .autoenv.zsh
 *~
 env-vars*
+bq_config.json
+catalog.json
+Pipfile
+stream.ndjson
+settings.json
\ No newline at end of file

From 5212a6895e0493d22d8d4fedb0bc9e12b1229ffa Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaosveneti@gmail.com>
Date: Mon, 25 Nov 2019 20:16:42 +0100
Subject: [PATCH 03/78] change schema to accept values with underscore for
 contacts_to_company

---
 tap_hubspot/schemas/contacts_by_company.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tap_hubspot/schemas/contacts_by_company.json b/tap_hubspot/schemas/contacts_by_company.json
index dafd30b2..22e7ffbb 100644
--- a/tap_hubspot/schemas/contacts_by_company.json
+++ b/tap_hubspot/schemas/contacts_by_company.json
@@ -1,10 +1,10 @@
 {
     "type": "object",
     "properties": {
-        "contact-id": {
+        "contact_id": {
             "type": ["integer"]
         },
-        "company-id": {
+        "company_id": {
             "type": ["integer"]
         }
     },

From 0249ad6c5eea83fe993675ec56847bf287e8a3e7 Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaosveneti@gmail.com>
Date: Mon, 25 Nov 2019 20:17:04 +0100
Subject: [PATCH 04/78] write contacts_to_company values with underscore

---
 tap_hubspot/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 86d0c7fa..96a2e22a 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -396,8 +396,8 @@ def _sync_contacts_by_company(STATE, ctx, company_id):
             data = request(url, default_contacts_by_company_params).json()
             for row in data[path]:
                 counter.increment()
-                record = {'company-id' : company_id,
-                          'contact-id' : row}
+                record = {'company_id' : company_id,
+                          'contact_id' : row}
                 record = bumble_bee.transform(record, schema, mdata)
                 singer.write_record("contacts_by_company", record, time_extracted=utils.now())
 

From d25eb163f44f0aba32b7e87e0ac2b703ed69124c Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaosveneti@gmail.com>
Date: Mon, 25 Nov 2019 20:38:10 +0100
Subject: [PATCH 05/78] if contacts_by_company in ctx.selected_stream_ids

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 96a2e22a..fb6180fb 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -431,7 +431,7 @@ def sync_companies(STATE, ctx):
     max_bk_value = start
     if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
         contacts_by_company_schema = load_schema(CONTACTS_BY_COMPANY)
-        singer.write_schema("contacts_by_company", contacts_by_company_schema, ["company-id", "contact-id"])
+        singer.write_schema("contacts_by_company", contacts_by_company_schema, ["company_id", "contact_id"])
 
     with bumble_bee:
         for row in gen_request(STATE, 'companies', url, default_company_params, 'companies', 'has-more', ['offset'], ['offset']):

From 3ceb026e528bc3cd69a9367846b72e59ebd7e5f5 Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaos.veneti@dreamdata.io>
Date: Tue, 26 Nov 2019 08:07:55 +0100
Subject: [PATCH 06/78] add Pipfile.lock in .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 110b8e55..b7f64394 100644
--- a/.gitignore
+++ b/.gitignore
@@ -101,5 +101,6 @@ env-vars*
 bq_config.json
 catalog.json
 Pipfile
+Pipfile.lock
 stream.ndjson
 settings.json
\ No newline at end of file

From 61145a7557752f173c29b3a34406840b14d068ee Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaos.veneti@dreamdata.io>
Date: Thu, 28 Nov 2019 12:46:12 +0100
Subject: [PATCH 07/78] get companies by batch

---
 tap_hubspot/__init__.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index fb6180fb..e6492f00 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -449,13 +449,17 @@ def sync_companies(STATE, ctx):
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
 
-            if not modified_time or modified_time >= start:
-                record = request(get_url("companies_detail", company_id=row['companyId'])).json()
-                record = bumble_bee.transform(record, schema, mdata)
-                singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
-                if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-                    STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])
+            
+
+            # if not modified_time or modified_time >= start:
 
+               
+  
+            record = row
+            record = bumble_bee.transform(record, schema, mdata)        
+            singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
+                # if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
+                #     STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(max_bk_value, current_sync_start)
     STATE = singer.write_bookmark(STATE, 'companies', bookmark_key, utils.strftime(new_bookmark))

From 435dbffb35f9d67e6e861245f2980ad252b8ef4e Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaos.veneti@dreamdata.io>
Date: Thu, 28 Nov 2019 13:18:55 +0100
Subject: [PATCH 08/78] add contacts by company

---
 tap_hubspot/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index e6492f00..7718ed68 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -458,8 +458,8 @@ def sync_companies(STATE, ctx):
             record = row
             record = bumble_bee.transform(record, schema, mdata)        
             singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
-                # if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-                #     STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])
+            if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
+                STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(max_bk_value, current_sync_start)
     STATE = singer.write_bookmark(STATE, 'companies', bookmark_key, utils.strftime(new_bookmark))

From 5372cb9f73c297ec55a6b95c4f57347fdb730af1 Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaos.veneti@dreamdata.io>
Date: Thu, 28 Nov 2019 13:55:33 +0100
Subject: [PATCH 09/78] set count to 250

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 7718ed68..a930b353 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -382,7 +382,7 @@ class ValidationPredFailed(Exception):
 def use_recent_companies_endpoint(response):
     return response["total"] < 10000
 
-default_contacts_by_company_params = {'count' : 100}
+default_contacts_by_company_params = {'count' : 250}
 
 # NB> to do: support stream aliasing and field selection
 def _sync_contacts_by_company(STATE, ctx, company_id):

From 2d419bf5fb6a94e892a6c0f93d7df6e79a5aa081 Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaos.veneti@dreamdata.io>
Date: Thu, 28 Nov 2019 14:40:40 +0100
Subject: [PATCH 10/78] request website, name and country for company

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index a930b353..8c14ca8c 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -404,7 +404,7 @@ def _sync_contacts_by_company(STATE, ctx, company_id):
     return STATE
 
 default_company_params = {
-    'limit': 250, 'properties': ["createdate", "hs_lastmodifieddate"]
+    'limit': 250, 'properties': ["website", "name","country", "createdate", "hs_lastmodifieddate"]
 }
 
 def sync_companies(STATE, ctx):

From 99de19132e0afef54b908ae3ee3d704739be1577 Mon Sep 17 00:00:00 2001
From: Nikolaos Veneti <nikolaos.veneti@dreamdata.io>
Date: Thu, 28 Nov 2019 15:03:30 +0100
Subject: [PATCH 11/78] support domain in batch get for companies

---
 tap_hubspot/__init__.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 8c14ca8c..881375db 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -404,7 +404,7 @@ def _sync_contacts_by_company(STATE, ctx, company_id):
     return STATE
 
 default_company_params = {
-    'limit': 250, 'properties': ["website", "name","country", "createdate", "hs_lastmodifieddate"]
+    'limit': 250, 'properties': ["website", "name","country", "domain", "createdate", "hs_lastmodifieddate"]
 }
 
 def sync_companies(STATE, ctx):
@@ -449,14 +449,7 @@ def sync_companies(STATE, ctx):
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
 
-            
-
-            # if not modified_time or modified_time >= start:
-
-               
-  
-            record = row
-            record = bumble_bee.transform(record, schema, mdata)        
+            record = bumble_bee.transform(row, schema, mdata)        
             singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
             if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
                 STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])

From 1cc06bb1ea20df98e8440ba1c2c7e34f8bc75afb Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Mon, 6 Jan 2020 14:44:04 +0100
Subject: [PATCH 12/78] black commit

---
 tap_hubspot/__init__.py | 843 ++++++++++++++++++++++++++--------------
 1 file changed, 554 insertions(+), 289 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 881375db..09f8bb8e 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -15,28 +15,37 @@
 import singer.metrics as metrics
 from singer import metadata
 from singer import utils
-from singer import (transform,
-                    UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING,
-                    Transformer, _transform_datetime)
+from singer import (
+    transform,
+    UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING,
+    Transformer,
+    _transform_datetime,
+)
 
 LOGGER = singer.get_logger()
 SESSION = requests.Session()
 
+
 class InvalidAuthException(Exception):
     pass
 
+
 class SourceUnavailableException(Exception):
     pass
 
+
 class DependencyException(Exception):
     pass
 
+
 class DataFields:
-    offset = 'offset'
+    offset = "offset"
+
 
 class StateFields:
-    offset = 'offset'
-    this_stream = 'this_stream'
+    offset = "offset"
+    this_stream = "this_stream"
+
 
 BASE_URL = "https://api.hubapi.com"
 
@@ -49,7 +58,6 @@ class StateFields:
     "token_expires": None,
     "email_chunk_size": DEFAULT_CHUNK_SIZE,
     "subscription_chunk_size": DEFAULT_CHUNK_SIZE,
-
     # in config.json
     "redirect_uri": None,
     "client_id": None,
@@ -61,55 +69,55 @@ class StateFields:
 }
 
 ENDPOINTS = {
-    "contacts_properties":  "/properties/v1/contacts/properties",
-    "contacts_all":         "/contacts/v1/lists/all/contacts/all",
-    "contacts_recent":      "/contacts/v1/lists/recently_updated/contacts/recent",
-    "contacts_detail":      "/contacts/v1/contact/vids/batch/",
-
+    "contacts_properties": "/properties/v1/contacts/properties",
+    "contacts_all": "/contacts/v1/lists/all/contacts/all",
+    "contacts_recent": "/contacts/v1/lists/recently_updated/contacts/recent",
+    "contacts_detail": "/contacts/v1/contact/vids/batch/",
     "companies_properties": "/companies/v2/properties",
-    "companies_all":        "/companies/v2/companies/paged",
-    "companies_recent":     "/companies/v2/companies/recent/modified",
-    "companies_detail":     "/companies/v2/companies/{company_id}",
-    "contacts_by_company":  "/companies/v2/companies/{company_id}/vids",
-
-    "deals_properties":     "/properties/v1/deals/properties",
-    "deals_all":            "/deals/v1/deal/paged",
-    "deals_recent":         "/deals/v1/deal/recent/modified",
-    "deals_detail":         "/deals/v1/deal/{deal_id}",
-
-    "deal_pipelines":       "/deals/v1/pipelines",
-
-    "campaigns_all":        "/email/public/v1/campaigns/by-id",
-    "campaigns_detail":     "/email/public/v1/campaigns/{campaign_id}",
-
-    "engagements_all":        "/engagements/v1/engagements/paged",
-
+    "companies_all": "/companies/v2/companies/paged",
+    "companies_recent": "/companies/v2/companies/recent/modified",
+    "companies_detail": "/companies/v2/companies/{company_id}",
+    "contacts_by_company": "/companies/v2/companies/{company_id}/vids",
+    "deals_properties": "/properties/v1/deals/properties",
+    "deals_all": "/deals/v1/deal/paged",
+    "deals_recent": "/deals/v1/deal/recent/modified",
+    "deals_detail": "/deals/v1/deal/{deal_id}",
+    "deal_pipelines": "/deals/v1/pipelines",
+    "campaigns_all": "/email/public/v1/campaigns/by-id",
+    "campaigns_detail": "/email/public/v1/campaigns/{campaign_id}",
+    "engagements_all": "/engagements/v1/engagements/paged",
     "subscription_changes": "/email/public/v1/subscriptions/timeline",
-    "email_events":         "/email/public/v1/events",
-    "contact_lists":        "/contacts/v1/lists",
-    "forms":                "/forms/v2/forms",
-    "workflows":            "/automation/v3/workflows",
-    "owners":               "/owners/v2/owners",
+    "email_events": "/email/public/v1/events",
+    "contact_lists": "/contacts/v1/lists",
+    "forms": "/forms/v2/forms",
+    "workflows": "/automation/v3/workflows",
+    "owners": "/owners/v2/owners",
 }
 
+
 def get_start(state, tap_stream_id, bookmark_key):
     current_bookmark = singer.get_bookmark(state, tap_stream_id, bookmark_key)
     if current_bookmark is None:
-        return CONFIG['start_date']
+        return CONFIG["start_date"]
     return current_bookmark
 
+
 def get_current_sync_start(state, tap_stream_id):
-    current_sync_start_value = singer.get_bookmark(state, tap_stream_id, "current_sync_start")
+    current_sync_start_value = singer.get_bookmark(
+        state, tap_stream_id, "current_sync_start"
+    )
     if current_sync_start_value is None:
         return current_sync_start_value
     return utils.strptime_to_utc(current_sync_start_value)
 
+
 def write_current_sync_start(state, tap_stream_id, start):
     value = start
     if start is not None:
         value = utils.strftime(start)
     return singer.write_bookmark(state, tap_stream_id, "current_sync_start", value)
 
+
 def clean_state(state):
     """ Clear deprecated keys out of state. """
     for stream, bookmark_map in state.get("bookmarks", {}).items():
@@ -117,6 +125,7 @@ def clean_state(state):
             LOGGER.info("{} - Removing last_sync_duration from state.".format(stream))
             state["bookmarks"][stream].pop("last_sync_duration", None)
 
+
 def get_url(endpoint, **kwargs):
     if endpoint not in ENDPOINTS:
         raise ValueError("Invalid endpoint {}".format(endpoint))
@@ -129,8 +138,7 @@ def get_field_type_schema(field_type):
         return {"type": ["null", "boolean"]}
 
     elif field_type == "datetime":
-        return {"type": ["null", "string"],
-                "format": "date-time"}
+        return {"type": ["null", "string"], "format": "date-time"}
 
     elif field_type == "number":
         # A value like 'N/A' can be returned for this type,
@@ -140,6 +148,7 @@ def get_field_type_schema(field_type):
     else:
         return {"type": ["null", "string"]}
 
+
 def get_field_schema(field_type, extras=False):
     if extras:
         return {
@@ -149,93 +158,107 @@ def get_field_schema(field_type, extras=False):
                 "timestamp": get_field_type_schema("datetime"),
                 "source": get_field_type_schema("string"),
                 "sourceId": get_field_type_schema("string"),
-            }
+            },
         }
     else:
         return {
             "type": "object",
-            "properties": {
-                "value": get_field_type_schema(field_type),
-            }
+            "properties": {"value": get_field_type_schema(field_type)},
         }
 
+
 def parse_custom_schema(entity_name, data):
     return {
-        field['name']: get_field_schema(
-            field['type'], entity_name != "contacts")
+        field["name"]: get_field_schema(field["type"], entity_name != "contacts")
         for field in data
     }
 
 
 def get_custom_schema(entity_name):
-    return parse_custom_schema(entity_name, request(get_url(entity_name + "_properties")).json())
+    return parse_custom_schema(
+        entity_name, request(get_url(entity_name + "_properties")).json()
+    )
 
 
 def get_abs_path(path):
     return os.path.join(os.path.dirname(os.path.realpath(__file__)), path)
 
+
 def load_associated_company_schema():
     associated_company_schema = load_schema("companies")
-    #pylint: disable=line-too-long
-    associated_company_schema['properties']['company-id'] = associated_company_schema['properties'].pop('companyId')
-    associated_company_schema['properties']['portal-id'] = associated_company_schema['properties'].pop('portalId')
+    # pylint: disable=line-too-long
+    associated_company_schema["properties"]["company-id"] = associated_company_schema[
+        "properties"
+    ].pop("companyId")
+    associated_company_schema["properties"]["portal-id"] = associated_company_schema[
+        "properties"
+    ].pop("portalId")
     return associated_company_schema
 
+
 def load_schema(entity_name):
-    schema = utils.load_json(get_abs_path('schemas/{}.json'.format(entity_name)))
+    schema = utils.load_json(get_abs_path("schemas/{}.json".format(entity_name)))
     if entity_name in ["contacts", "companies", "deals"]:
         custom_schema = get_custom_schema(entity_name)
-        schema['properties']['properties'] = {
+        schema["properties"]["properties"] = {
             "type": "object",
             "properties": custom_schema,
         }
 
     if entity_name == "contacts":
-        schema['properties']['associated-company'] = load_associated_company_schema()
+        schema["properties"]["associated-company"] = load_associated_company_schema()
 
     return schema
 
-#pylint: disable=invalid-name
+
+# pylint: disable=invalid-name
 def acquire_access_token_from_refresh_token():
     payload = {
         "grant_type": "refresh_token",
-        "redirect_uri": CONFIG['redirect_uri'],
-        "refresh_token": CONFIG['refresh_token'],
-        "client_id": CONFIG['client_id'],
-        "client_secret": CONFIG['client_secret'],
+        "redirect_uri": CONFIG["redirect_uri"],
+        "refresh_token": CONFIG["refresh_token"],
+        "client_id": CONFIG["client_id"],
+        "client_secret": CONFIG["client_secret"],
     }
 
-
     resp = requests.post(BASE_URL + "/oauth/v1/token", data=payload)
     if resp.status_code == 403:
         raise InvalidAuthException(resp.content)
 
     resp.raise_for_status()
     auth = resp.json()
-    CONFIG['access_token'] = auth['access_token']
-    CONFIG['refresh_token'] = auth['refresh_token']
-    CONFIG['token_expires'] = (
-        datetime.datetime.utcnow() +
-        datetime.timedelta(seconds=auth['expires_in'] - 600))
-    LOGGER.info("Token refreshed. Expires at %s", CONFIG['token_expires'])
+    CONFIG["access_token"] = auth["access_token"]
+    CONFIG["refresh_token"] = auth["refresh_token"]
+    CONFIG["token_expires"] = datetime.datetime.utcnow() + datetime.timedelta(
+        seconds=auth["expires_in"] - 600
+    )
+    LOGGER.info("Token refreshed. Expires at %s", CONFIG["token_expires"])
 
 
 def giveup(exc):
-    return exc.response is not None \
-        and 400 <= exc.response.status_code < 500 \
+    return (
+        exc.response is not None
+        and 400 <= exc.response.status_code < 500
         and exc.response.status_code != 429
+    )
+
 
 def on_giveup(details):
-    if len(details['args']) == 2:
-        url, params = details['args']
+    if len(details["args"]) == 2:
+        url, params = details["args"]
     else:
-        url = details['args']
+        url = details["args"]
         params = {}
 
-    raise Exception("Giving up on request after {} tries with url {} and params {}" \
-                    .format(details['tries'], url, params))
+    raise Exception(
+        "Giving up on request after {} tries with url {} and params {}".format(
+            details["tries"], url, params
+        )
+    )
+
+
+URL_SOURCE_RE = re.compile(BASE_URL + r"/(\w+)/")
 
-URL_SOURCE_RE = re.compile(BASE_URL + r'/(\w+)/')
 
 def parse_source_from_url(url):
     match = URL_SOURCE_RE.match(url)
@@ -244,30 +267,34 @@ def parse_source_from_url(url):
     return None
 
 
-@backoff.on_exception(backoff.constant,
-                      (requests.exceptions.RequestException,
-                       requests.exceptions.HTTPError),
-                      max_tries=5,
-                      jitter=None,
-                      giveup=giveup,
-                      on_giveup=on_giveup,
-                      interval=10)
+@backoff.on_exception(
+    backoff.constant,
+    (requests.exceptions.RequestException, requests.exceptions.HTTPError),
+    max_tries=5,
+    jitter=None,
+    giveup=giveup,
+    on_giveup=on_giveup,
+    interval=10,
+)
 def request(url, params=None):
 
     params = params or {}
-    hapikey = CONFIG['hapikey']
+    hapikey = CONFIG["hapikey"]
     if hapikey is None:
-        if CONFIG['token_expires'] is None or CONFIG['token_expires'] < datetime.datetime.utcnow():
+        if (
+            CONFIG["token_expires"] is None
+            or CONFIG["token_expires"] < datetime.datetime.utcnow()
+        ):
             acquire_access_token_from_refresh_token()
-        headers = {'Authorization': 'Bearer {}'.format(CONFIG['access_token'])}
+        headers = {"Authorization": "Bearer {}".format(CONFIG["access_token"])}
     else:
-        params['hapikey'] = hapikey
+        params["hapikey"] = hapikey
         headers = {}
 
-    if 'user_agent' in CONFIG:
-        headers['User-Agent'] = CONFIG['user_agent']
+    if "user_agent" in CONFIG:
+        headers["User-Agent"] = CONFIG["user_agent"]
 
-    req = requests.Request('GET', url, params=params, headers=headers).prepare()
+    req = requests.Request("GET", url, params=params, headers=headers).prepare()
     LOGGER.info("GET %s", req.url)
     with metrics.http_request_timer(parse_source_from_url(url)) as timer:
         resp = SESSION.send(req)
@@ -278,6 +305,8 @@ def request(url, params=None):
             resp.raise_for_status()
 
     return resp
+
+
 # {"bookmarks" : {"contacts" : { "lastmodifieddate" : "2001-01-01"
 #                                "offset" : {"vidOffset": 1234
 #                                           "timeOffset": "3434434 }}
@@ -286,8 +315,10 @@ def request(url, params=None):
 # }
 # }
 
-#pylint: disable=line-too-long
-def gen_request(STATE, tap_stream_id, url, params, path, more_key, offset_keys, offset_targets):
+# pylint: disable=line-too-long
+def gen_request(
+    STATE, tap_stream_id, url, params, path, more_key, offset_keys, offset_targets
+):
     if len(offset_keys) != len(offset_targets):
         raise ValueError("Number of offset_keys must match number of offset_targets")
 
@@ -321,45 +352,67 @@ def _sync_contact_vids(catalog, vids, schema, bumble_bee):
     if len(vids) == 0:
         return
 
-    data = request(get_url("contacts_detail"), params={'vid': vids, 'showListMemberships' : True, "formSubmissionMode" : "all"}).json()
+    data = request(
+        get_url("contacts_detail"),
+        params={"vid": vids, "showListMemberships": True, "formSubmissionMode": "all"},
+    ).json()
     time_extracted = utils.now()
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
 
     for record in data.values():
         record = bumble_bee.transform(record, schema, mdata)
-        singer.write_record("contacts", record, catalog.get('stream_alias'), time_extracted=time_extracted)
+        singer.write_record(
+            "contacts",
+            record,
+            catalog.get("stream_alias"),
+            time_extracted=time_extracted,
+        )
+
 
 default_contact_params = {
-    'showListMemberships': True,
-    'includeVersion': True,
-    'count': 100,
+    "showListMemberships": True,
+    "includeVersion": True,
+    "count": 100,
 }
 
+
 def sync_contacts(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    bookmark_key = 'versionTimestamp'
+    bookmark_key = "versionTimestamp"
     start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
     LOGGER.info("sync_contacts from %s", start)
 
     max_bk_value = start
     schema = load_schema("contacts")
 
-    singer.write_schema("contacts", schema, ["vid"], [bookmark_key], catalog.get('stream_alias'))
+    singer.write_schema(
+        "contacts", schema, ["vid"], [bookmark_key], catalog.get("stream_alias")
+    )
 
     url = get_url("contacts_all")
 
     vids = []
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in gen_request(STATE, 'contacts', url, default_contact_params, 'contacts', 'has-more', ['vid-offset'], ['vidOffset']):
+        for row in gen_request(
+            STATE,
+            "contacts",
+            url,
+            default_contact_params,
+            "contacts",
+            "has-more",
+            ["vid-offset"],
+            ["vidOffset"],
+        ):
             modified_time = None
             if bookmark_key in row:
                 modified_time = utils.strptime_with_tz(
-                    _transform_datetime( # pylint: disable=protected-access
-                        row[bookmark_key],
-                        UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING))
+                    _transform_datetime(  # pylint: disable=protected-access
+                        row[bookmark_key], UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING
+                    )
+                )
 
             if not modified_time or modified_time >= start:
-                vids.append(row['vid'])
+                vids.append(row["vid"])
 
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
@@ -370,52 +423,70 @@ def sync_contacts(STATE, ctx):
 
         _sync_contact_vids(catalog, vids, schema, bumble_bee)
 
-    STATE = singer.write_bookmark(STATE, 'contacts', bookmark_key, utils.strftime(max_bk_value))
+    STATE = singer.write_bookmark(
+        STATE, "contacts", bookmark_key, utils.strftime(max_bk_value)
+    )
     singer.write_state(STATE)
     return STATE
 
+
 class ValidationPredFailed(Exception):
     pass
 
+
 # companies_recent only supports 10,000 results. If there are more than this,
 # we'll need to use the companies_all endpoint
 def use_recent_companies_endpoint(response):
     return response["total"] < 10000
 
-default_contacts_by_company_params = {'count' : 250}
+
+default_contacts_by_company_params = {"count": 250}
 
 # NB> to do: support stream aliasing and field selection
 def _sync_contacts_by_company(STATE, ctx, company_id):
     schema = load_schema(CONTACTS_BY_COMPANY)
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
     url = get_url("contacts_by_company", company_id=company_id)
-    path = 'vids'
+    path = "vids"
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         with metrics.record_counter(CONTACTS_BY_COMPANY) as counter:
             data = request(url, default_contacts_by_company_params).json()
             for row in data[path]:
                 counter.increment()
-                record = {'company_id' : company_id,
-                          'contact_id' : row}
+                record = {"company_id": company_id, "contact_id": row}
                 record = bumble_bee.transform(record, schema, mdata)
-                singer.write_record("contacts_by_company", record, time_extracted=utils.now())
+                singer.write_record(
+                    "contacts_by_company", record, time_extracted=utils.now()
+                )
 
     return STATE
 
+
 default_company_params = {
-    'limit': 250, 'properties': ["website", "name","country", "domain", "createdate", "hs_lastmodifieddate"]
+    "limit": 250,
+    "properties": [
+        "website",
+        "name",
+        "country",
+        "domain",
+        "createdate",
+        "hs_lastmodifieddate",
+    ],
 }
 
+
 def sync_companies(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
     bumble_bee = Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)
-    bookmark_key = 'hs_lastmodifieddate'
+    bookmark_key = "hs_lastmodifieddate"
     start = utils.strptime_to_utc(get_start(STATE, "companies", bookmark_key))
     LOGGER.info("sync_companies from %s", start)
-    schema = load_schema('companies')
-    singer.write_schema("companies", schema, ["companyId"], [bookmark_key], catalog.get('stream_alias'))
+    schema = load_schema("companies")
+    singer.write_schema(
+        "companies", schema, ["companyId"], [bookmark_key], catalog.get("stream_alias")
+    )
 
     # Because this stream doesn't query by `lastUpdated`, it cycles
     # through the data set every time. The issue with this is that there
@@ -431,112 +502,169 @@ def sync_companies(STATE, ctx):
     max_bk_value = start
     if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
         contacts_by_company_schema = load_schema(CONTACTS_BY_COMPANY)
-        singer.write_schema("contacts_by_company", contacts_by_company_schema, ["company_id", "contact_id"])
+        singer.write_schema(
+            "contacts_by_company",
+            contacts_by_company_schema,
+            ["company_id", "contact_id"],
+        )
 
     with bumble_bee:
-        for row in gen_request(STATE, 'companies', url, default_company_params, 'companies', 'has-more', ['offset'], ['offset']):
-            row_properties = row['properties']
+        for row in gen_request(
+            STATE,
+            "companies",
+            url,
+            default_company_params,
+            "companies",
+            "has-more",
+            ["offset"],
+            ["offset"],
+        ):
+            row_properties = row["properties"]
             modified_time = None
             if bookmark_key in row_properties:
                 # Hubspot returns timestamps in millis
-                timestamp_millis = row_properties[bookmark_key]['timestamp'] / 1000.0
-                modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
-            elif 'createdate' in row_properties:
+                timestamp_millis = row_properties[bookmark_key]["timestamp"] / 1000.0
+                modified_time = datetime.datetime.fromtimestamp(
+                    timestamp_millis, datetime.timezone.utc
+                )
+            elif "createdate" in row_properties:
                 # Hubspot returns timestamps in millis
-                timestamp_millis = row_properties['createdate']['timestamp'] / 1000.0
-                modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
+                timestamp_millis = row_properties["createdate"]["timestamp"] / 1000.0
+                modified_time = datetime.datetime.fromtimestamp(
+                    timestamp_millis, datetime.timezone.utc
+                )
 
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
 
-            record = bumble_bee.transform(row, schema, mdata)        
-            singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
+            record = bumble_bee.transform(row, schema, mdata)
+            singer.write_record(
+                "companies",
+                record,
+                catalog.get("stream_alias"),
+                time_extracted=utils.now(),
+            )
             if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-                STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])
+                STATE = _sync_contacts_by_company(STATE, ctx, record["companyId"])
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(max_bk_value, current_sync_start)
-    STATE = singer.write_bookmark(STATE, 'companies', bookmark_key, utils.strftime(new_bookmark))
-    STATE = write_current_sync_start(STATE, 'companies', None)
+    STATE = singer.write_bookmark(
+        STATE, "companies", bookmark_key, utils.strftime(new_bookmark)
+    )
+    STATE = write_current_sync_start(STATE, "companies", None)
     singer.write_state(STATE)
     return STATE
 
+
 def sync_deals(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
-    bookmark_key = 'hs_lastmodifieddate'
+    mdata = metadata.to_map(catalog.get("metadata"))
+    bookmark_key = "hs_lastmodifieddate"
     start = utils.strptime_with_tz(get_start(STATE, "deals", bookmark_key))
     max_bk_value = start
     LOGGER.info("sync_deals from %s", start)
     most_recent_modified_time = start
-    params = {'count': 250,
-              'includeAssociations': False,
-              'properties' : []}
+    params = {"count": 250, "includeAssociations": False, "properties": []}
 
     schema = load_schema("deals")
-    singer.write_schema("deals", schema, ["dealId"], [bookmark_key], catalog.get('stream_alias'))
+    singer.write_schema(
+        "deals", schema, ["dealId"], [bookmark_key], catalog.get("stream_alias")
+    )
 
     # Check if we should  include associations
     for key in mdata.keys():
-        if 'associations' in key:
+        if "associations" in key:
             assoc_mdata = mdata.get(key)
-            if (assoc_mdata.get('selected') and assoc_mdata.get('selected') == True):
-                params['includeAssociations'] = True
+            if assoc_mdata.get("selected") and assoc_mdata.get("selected") == True:
+                params["includeAssociations"] = True
 
     # Append all the properties fields for deals to the request if
     # properties is selectedOB
-    if mdata.get(('properties', 'properties'), {}).get('selected'):
-        additional_properties = schema.get("properties").get("properties").get("properties")
+    if mdata.get(("properties", "properties"), {}).get("selected"):
+        additional_properties = (
+            schema.get("properties").get("properties").get("properties")
+        )
         for key in additional_properties.keys():
-            params['properties'].append(key)
+            params["properties"].append(key)
 
-    url = get_url('deals_all')
+    url = get_url("deals_all")
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in gen_request(STATE, 'deals', url, params, 'deals', "hasMore", ["offset"], ["offset"]):
-            row_properties = row['properties']
+        for row in gen_request(
+            STATE, "deals", url, params, "deals", "hasMore", ["offset"], ["offset"]
+        ):
+            row_properties = row["properties"]
             modified_time = None
             if bookmark_key in row_properties:
                 # Hubspot returns timestamps in millis
-                timestamp_millis = row_properties[bookmark_key]['timestamp'] / 1000.0
-                modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
-            elif 'createdate' in row_properties:
+                timestamp_millis = row_properties[bookmark_key]["timestamp"] / 1000.0
+                modified_time = datetime.datetime.fromtimestamp(
+                    timestamp_millis, datetime.timezone.utc
+                )
+            elif "createdate" in row_properties:
                 # Hubspot returns timestamps in millis
-                timestamp_millis = row_properties['createdate']['timestamp'] / 1000.0
-                modified_time = datetime.datetime.fromtimestamp(timestamp_millis, datetime.timezone.utc)
+                timestamp_millis = row_properties["createdate"]["timestamp"] / 1000.0
+                modified_time = datetime.datetime.fromtimestamp(
+                    timestamp_millis, datetime.timezone.utc
+                )
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
 
             if not modified_time or modified_time >= start:
                 record = bumble_bee.transform(row, schema, mdata)
-                singer.write_record("deals", record, catalog.get('stream_alias'), time_extracted=utils.now())
-
-    STATE = singer.write_bookmark(STATE, 'deals', bookmark_key, utils.strftime(max_bk_value))
+                singer.write_record(
+                    "deals",
+                    record,
+                    catalog.get("stream_alias"),
+                    time_extracted=utils.now(),
+                )
+
+    STATE = singer.write_bookmark(
+        STATE, "deals", bookmark_key, utils.strftime(max_bk_value)
+    )
     singer.write_state(STATE)
     return STATE
 
-#NB> no suitable bookmark is available: https://developers.hubspot.com/docs/methods/email/get_campaigns_by_id
+
+# NB> no suitable bookmark is available: https://developers.hubspot.com/docs/methods/email/get_campaigns_by_id
 def sync_campaigns(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
     schema = load_schema("campaigns")
-    singer.write_schema("campaigns", schema, ["id"], catalog.get('stream_alias'))
+    singer.write_schema("campaigns", schema, ["id"], catalog.get("stream_alias"))
     LOGGER.info("sync_campaigns(NO bookmarks)")
     url = get_url("campaigns_all")
-    params = {'limit': 500}
+    params = {"limit": 500}
 
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in gen_request(STATE, 'campaigns', url, params, "campaigns", "hasMore", ["offset"], ["offset"]):
-            record = request(get_url("campaigns_detail", campaign_id=row['id'])).json()
+        for row in gen_request(
+            STATE,
+            "campaigns",
+            url,
+            params,
+            "campaigns",
+            "hasMore",
+            ["offset"],
+            ["offset"],
+        ):
+            record = request(get_url("campaigns_detail", campaign_id=row["id"])).json()
             record = bumble_bee.transform(record, schema, mdata)
-            singer.write_record("campaigns", record, catalog.get('stream_alias'), time_extracted=utils.now())
+            singer.write_record(
+                "campaigns",
+                record,
+                catalog.get("stream_alias"),
+                time_extracted=utils.now(),
+            )
 
     return STATE
 
 
 def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
     schema = load_schema(entity_name)
-    bookmark_key = 'startTimestamp'
+    bookmark_key = "startTimestamp"
 
-    singer.write_schema(entity_name, schema, key_properties, [bookmark_key], catalog.get('stream_alias'))
+    singer.write_schema(
+        entity_name, schema, key_properties, [bookmark_key], catalog.get("stream_alias")
+    )
 
     start = get_start(STATE, entity_name, bookmark_key)
     LOGGER.info("sync_%s from %s", entity_name, start)
@@ -547,26 +675,22 @@ def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
     start_ts = int(utils.strptime_with_tz(start).timestamp() * 1000)
     url = get_url(entity_name)
 
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
 
-    if entity_name == 'email_events':
-        window_size = int(CONFIG['email_chunk_size'])
-    elif entity_name == 'subscription_changes':
-        window_size = int(CONFIG['subscription_chunk_size'])
+    if entity_name == "email_events":
+        window_size = int(CONFIG["email_chunk_size"])
+    elif entity_name == "subscription_changes":
+        window_size = int(CONFIG["subscription_chunk_size"])
 
     with metrics.record_counter(entity_name) as counter:
         while start_ts < now_ts:
             end_ts = start_ts + window_size
-            params = {
-                'startTimestamp': start_ts,
-                'endTimestamp': end_ts,
-                'limit': 1000,
-            }
+            params = {"startTimestamp": start_ts, "endTimestamp": end_ts, "limit": 1000}
             with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
                 while True:
                     our_offset = singer.get_offset(STATE, entity_name)
-                    if bool(our_offset) and our_offset.get('offset') != None:
-                        params[StateFields.offset] = our_offset.get('offset')
+                    if bool(our_offset) and our_offset.get("offset") != None:
+                        params[StateFields.offset] = our_offset.get("offset")
 
                     data = request(url, params).json()
                     time_extracted = utils.now()
@@ -574,18 +698,31 @@ def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
                     for row in data[path]:
                         counter.increment()
                         record = bumble_bee.transform(row, schema, mdata)
-                        singer.write_record(entity_name,
-                                            record,
-                                            catalog.get('stream_alias'),
-                                            time_extracted=time_extracted)
-                    if data.get('hasMore'):
-                        STATE = singer.set_offset(STATE, entity_name, 'offset', data['offset'])
+                        singer.write_record(
+                            entity_name,
+                            record,
+                            catalog.get("stream_alias"),
+                            time_extracted=time_extracted,
+                        )
+                    if data.get("hasMore"):
+                        STATE = singer.set_offset(
+                            STATE, entity_name, "offset", data["offset"]
+                        )
                         singer.write_state(STATE)
                     else:
                         STATE = singer.clear_offset(STATE, entity_name)
                         singer.write_state(STATE)
                         break
-            STATE = singer.write_bookmark(STATE, entity_name, 'startTimestamp', utils.strftime(datetime.datetime.fromtimestamp((start_ts / 1000), datetime.timezone.utc ))) # pylint: disable=line-too-long
+            STATE = singer.write_bookmark(
+                STATE,
+                entity_name,
+                "startTimestamp",
+                utils.strftime(
+                    datetime.datetime.fromtimestamp(
+                        (start_ts / 1000), datetime.timezone.utc
+                    )
+                ),
+            )  # pylint: disable=line-too-long
             singer.write_state(STATE)
             start_ts = end_ts
 
@@ -593,23 +730,33 @@ def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
     singer.write_state(STATE)
     return STATE
 
+
 def sync_subscription_changes(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    STATE = sync_entity_chunked(STATE, catalog, "subscription_changes", ["timestamp", "portalId", "recipient"],
-                                "timeline")
+    STATE = sync_entity_chunked(
+        STATE,
+        catalog,
+        "subscription_changes",
+        ["timestamp", "portalId", "recipient"],
+        "timeline",
+    )
     return STATE
 
+
 def sync_email_events(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     STATE = sync_entity_chunked(STATE, catalog, "email_events", ["id"], "events")
     return STATE
 
+
 def sync_contact_lists(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
     schema = load_schema("contact_lists")
-    bookmark_key = 'updatedAt'
-    singer.write_schema("contact_lists", schema, ["listId"], [bookmark_key], catalog.get('stream_alias'))
+    bookmark_key = "updatedAt"
+    singer.write_schema(
+        "contact_lists", schema, ["listId"], [bookmark_key], catalog.get("stream_alias")
+    )
 
     start = get_start(STATE, "contact_lists", bookmark_key)
     max_bk_value = start
@@ -617,28 +764,45 @@ def sync_contact_lists(STATE, ctx):
     LOGGER.info("sync_contact_lists from %s", start)
 
     url = get_url("contact_lists")
-    params = {'count': 250}
+    params = {"count": 250}
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in gen_request(STATE, 'contact_lists', url, params, "lists", "has-more", ["offset"], ["offset"]):
+        for row in gen_request(
+            STATE,
+            "contact_lists",
+            url,
+            params,
+            "lists",
+            "has-more",
+            ["offset"],
+            ["offset"],
+        ):
             record = bumble_bee.transform(row, schema, mdata)
 
             if record[bookmark_key] >= start:
-                singer.write_record("contact_lists", record, catalog.get('stream_alias'), time_extracted=utils.now())
+                singer.write_record(
+                    "contact_lists",
+                    record,
+                    catalog.get("stream_alias"),
+                    time_extracted=utils.now(),
+                )
             if record[bookmark_key] >= max_bk_value:
                 max_bk_value = record[bookmark_key]
 
-    STATE = singer.write_bookmark(STATE, 'contact_lists', bookmark_key, max_bk_value)
+    STATE = singer.write_bookmark(STATE, "contact_lists", bookmark_key, max_bk_value)
     singer.write_state(STATE)
 
     return STATE
 
+
 def sync_forms(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
     schema = load_schema("forms")
-    bookmark_key = 'updatedAt'
+    bookmark_key = "updatedAt"
 
-    singer.write_schema("forms", schema, ["guid"], [bookmark_key], catalog.get('stream_alias'))
+    singer.write_schema(
+        "forms", schema, ["guid"], [bookmark_key], catalog.get("stream_alias")
+    )
     start = get_start(STATE, "forms", bookmark_key)
     max_bk_value = start
 
@@ -652,25 +816,33 @@ def sync_forms(STATE, ctx):
             record = bumble_bee.transform(row, schema, mdata)
 
             if record[bookmark_key] >= start:
-                singer.write_record("forms", record, catalog.get('stream_alias'), time_extracted=time_extracted)
+                singer.write_record(
+                    "forms",
+                    record,
+                    catalog.get("stream_alias"),
+                    time_extracted=time_extracted,
+                )
             if record[bookmark_key] >= max_bk_value:
                 max_bk_value = record[bookmark_key]
 
-    STATE = singer.write_bookmark(STATE, 'forms', bookmark_key, max_bk_value)
+    STATE = singer.write_bookmark(STATE, "forms", bookmark_key, max_bk_value)
     singer.write_state(STATE)
 
     return STATE
 
+
 def sync_workflows(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
     schema = load_schema("workflows")
-    bookmark_key = 'updatedAt'
-    singer.write_schema("workflows", schema, ["id"], [bookmark_key], catalog.get('stream_alias'))
+    bookmark_key = "updatedAt"
+    singer.write_schema(
+        "workflows", schema, ["id"], [bookmark_key], catalog.get("stream_alias")
+    )
     start = get_start(STATE, "workflows", bookmark_key)
     max_bk_value = start
 
-    STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
+    STATE = singer.write_bookmark(STATE, "workflows", bookmark_key, max_bk_value)
     singer.write_state(STATE)
 
     LOGGER.info("sync_workflows from %s", start)
@@ -679,32 +851,40 @@ def sync_workflows(STATE, ctx):
     time_extracted = utils.now()
 
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in data['workflows']:
+        for row in data["workflows"]:
             record = bumble_bee.transform(row, schema, mdata)
             if record[bookmark_key] >= start:
-                singer.write_record("workflows", record, catalog.get('stream_alias'), time_extracted=time_extracted)
+                singer.write_record(
+                    "workflows",
+                    record,
+                    catalog.get("stream_alias"),
+                    time_extracted=time_extracted,
+                )
             if record[bookmark_key] >= max_bk_value:
                 max_bk_value = record[bookmark_key]
 
-    STATE = singer.write_bookmark(STATE, 'workflows', bookmark_key, max_bk_value)
+    STATE = singer.write_bookmark(STATE, "workflows", bookmark_key, max_bk_value)
     singer.write_state(STATE)
     return STATE
 
+
 def sync_owners(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
     schema = load_schema("owners")
-    bookmark_key = 'updatedAt'
+    bookmark_key = "updatedAt"
 
-    singer.write_schema("owners", schema, ["ownerId"], [bookmark_key], catalog.get('stream_alias'))
+    singer.write_schema(
+        "owners", schema, ["ownerId"], [bookmark_key], catalog.get("stream_alias")
+    )
     start = get_start(STATE, "owners", bookmark_key)
     max_bk_value = start
 
     LOGGER.info("sync_owners from %s", start)
 
     params = {}
-    if CONFIG.get('include_inactives'):
-        params['includeInactives'] = "true"
+    if CONFIG.get("include_inactives"):
+        params["includeInactives"] = "true"
     data = request(get_url("owners"), params).json()
 
     time_extracted = utils.now()
@@ -716,18 +896,30 @@ def sync_owners(STATE, ctx):
                 max_bk_value = record[bookmark_key]
 
             if record[bookmark_key] >= start:
-                singer.write_record("owners", record, catalog.get('stream_alias'), time_extracted=time_extracted)
-
-    STATE = singer.write_bookmark(STATE, 'owners', bookmark_key, max_bk_value)
+                singer.write_record(
+                    "owners",
+                    record,
+                    catalog.get("stream_alias"),
+                    time_extracted=time_extracted,
+                )
+
+    STATE = singer.write_bookmark(STATE, "owners", bookmark_key, max_bk_value)
     singer.write_state(STATE)
     return STATE
 
+
 def sync_engagements(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
+    mdata = metadata.to_map(catalog.get("metadata"))
     schema = load_schema("engagements")
-    bookmark_key = 'lastUpdated'
-    singer.write_schema("engagements", schema, ["engagement_id"], [bookmark_key], catalog.get('stream_alias'))
+    bookmark_key = "lastUpdated"
+    singer.write_schema(
+        "engagements",
+        schema,
+        ["engagement_id"],
+        [bookmark_key],
+        catalog.get("stream_alias"),
+    )
     start = get_start(STATE, "engagements", bookmark_key)
 
     # Because this stream doesn't query by `lastUpdated`, it cycles
@@ -743,48 +935,73 @@ def sync_engagements(STATE, ctx):
     max_bk_value = start
     LOGGER.info("sync_engagements from %s", start)
 
-    STATE = singer.write_bookmark(STATE, 'engagements', bookmark_key, start)
+    STATE = singer.write_bookmark(STATE, "engagements", bookmark_key, start)
     singer.write_state(STATE)
 
     url = get_url("engagements_all")
-    params = {'limit': 250}
+    params = {"limit": 250}
     top_level_key = "results"
-    engagements = gen_request(STATE, 'engagements', url, params, top_level_key, "hasMore", ["offset"], ["offset"])
+    engagements = gen_request(
+        STATE,
+        "engagements",
+        url,
+        params,
+        top_level_key,
+        "hasMore",
+        ["offset"],
+        ["offset"],
+    )
 
     time_extracted = utils.now()
 
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for engagement in engagements:
             record = bumble_bee.transform(engagement, schema, mdata)
-            if record['engagement'][bookmark_key] >= start:
+            if record["engagement"][bookmark_key] >= start:
                 # hoist PK and bookmark field to top-level record
-                record['engagement_id'] = record['engagement']['id']
-                record[bookmark_key] = record['engagement'][bookmark_key]
-                singer.write_record("engagements", record, catalog.get('stream_alias'), time_extracted=time_extracted)
-                if record['engagement'][bookmark_key] >= max_bk_value:
-                    max_bk_value = record['engagement'][bookmark_key]
+                record["engagement_id"] = record["engagement"]["id"]
+                record[bookmark_key] = record["engagement"][bookmark_key]
+                singer.write_record(
+                    "engagements",
+                    record,
+                    catalog.get("stream_alias"),
+                    time_extracted=time_extracted,
+                )
+                if record["engagement"][bookmark_key] >= max_bk_value:
+                    max_bk_value = record["engagement"][bookmark_key]
 
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(utils.strptime_to_utc(max_bk_value), current_sync_start)
-    STATE = singer.write_bookmark(STATE, 'engagements', bookmark_key, utils.strftime(new_bookmark))
-    STATE = write_current_sync_start(STATE, 'engagements', None)
+    STATE = singer.write_bookmark(
+        STATE, "engagements", bookmark_key, utils.strftime(new_bookmark)
+    )
+    STATE = write_current_sync_start(STATE, "engagements", None)
     singer.write_state(STATE)
     return STATE
 
+
 def sync_deal_pipelines(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get('metadata'))
-    schema = load_schema('deal_pipelines')
-    singer.write_schema('deal_pipelines', schema, ['pipelineId'], catalog.get('stream_alias'))
-    LOGGER.info('sync_deal_pipelines')
-    data = request(get_url('deal_pipelines')).json()
+    mdata = metadata.to_map(catalog.get("metadata"))
+    schema = load_schema("deal_pipelines")
+    singer.write_schema(
+        "deal_pipelines", schema, ["pipelineId"], catalog.get("stream_alias")
+    )
+    LOGGER.info("sync_deal_pipelines")
+    data = request(get_url("deal_pipelines")).json()
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for row in data:
             record = bumble_bee.transform(row, schema, mdata)
-            singer.write_record("deal_pipelines", record, catalog.get('stream_alias'), time_extracted=utils.now())
+            singer.write_record(
+                "deal_pipelines",
+                record,
+                catalog.get("stream_alias"),
+                time_extracted=utils.now(),
+            )
     singer.write_state(STATE)
     return STATE
 
+
 @attr.s
 class Stream(object):
     tap_stream_id = attr.ib()
@@ -793,37 +1010,51 @@ class Stream(object):
     replication_key = attr.ib()
     replication_method = attr.ib()
 
+
 STREAMS = [
     # Do these first as they are incremental
-    Stream('subscription_changes', sync_subscription_changes, ['timestamp', 'portalId', 'recipient'], 'startTimestamp', 'INCREMENTAL'),
-    Stream('email_events', sync_email_events, ['id'], 'startTimestamp', 'INCREMENTAL'),
-
+    Stream(
+        "subscription_changes",
+        sync_subscription_changes,
+        ["timestamp", "portalId", "recipient"],
+        "startTimestamp",
+        "INCREMENTAL",
+    ),
+    Stream("email_events", sync_email_events, ["id"], "startTimestamp", "INCREMENTAL"),
     # Do these last as they are full table
-    Stream('forms', sync_forms, ['guid'], 'updatedAt', 'FULL_TABLE'),
-    Stream('workflows', sync_workflows, ['id'], 'updatedAt', 'FULL_TABLE'),
-    Stream('owners', sync_owners, ["ownerId"], 'updatedAt', 'FULL_TABLE'),
-    Stream('campaigns', sync_campaigns, ["id"], None, 'FULL_TABLE'),
-    Stream('contact_lists', sync_contact_lists, ["listId"], 'updatedAt', 'FULL_TABLE'),
-    Stream('contacts', sync_contacts, ["vid"], 'versionTimestamp', 'FULL_TABLE'),
-    Stream('companies', sync_companies, ["companyId"], 'hs_lastmodifieddate', 'FULL_TABLE'),
-    Stream('deals', sync_deals, ["dealId"], 'hs_lastmodifieddate', 'FULL_TABLE'),
-    Stream('deal_pipelines', sync_deal_pipelines, ['pipelineId'], None, 'FULL_TABLE'),
-    Stream('engagements', sync_engagements, ["engagement_id"], 'lastUpdated', 'FULL_TABLE')
+    Stream("forms", sync_forms, ["guid"], "updatedAt", "FULL_TABLE"),
+    Stream("workflows", sync_workflows, ["id"], "updatedAt", "FULL_TABLE"),
+    Stream("owners", sync_owners, ["ownerId"], "updatedAt", "FULL_TABLE"),
+    Stream("campaigns", sync_campaigns, ["id"], None, "FULL_TABLE"),
+    Stream("contact_lists", sync_contact_lists, ["listId"], "updatedAt", "FULL_TABLE"),
+    Stream("contacts", sync_contacts, ["vid"], "versionTimestamp", "FULL_TABLE"),
+    Stream(
+        "companies", sync_companies, ["companyId"], "hs_lastmodifieddate", "FULL_TABLE"
+    ),
+    Stream("deals", sync_deals, ["dealId"], "hs_lastmodifieddate", "FULL_TABLE"),
+    Stream("deal_pipelines", sync_deal_pipelines, ["pipelineId"], None, "FULL_TABLE"),
+    Stream(
+        "engagements", sync_engagements, ["engagement_id"], "lastUpdated", "FULL_TABLE"
+    ),
 ]
 
+
 def get_streams_to_sync(streams, state):
     target_stream = singer.get_currently_syncing(state)
     result = streams
     if target_stream:
-        skipped = list(itertools.takewhile(
-            lambda x: x.tap_stream_id != target_stream, streams))
-        rest = list(itertools.dropwhile(
-            lambda x: x.tap_stream_id != target_stream, streams))
-        result = rest + skipped # Move skipped streams to end
+        skipped = list(
+            itertools.takewhile(lambda x: x.tap_stream_id != target_stream, streams)
+        )
+        rest = list(
+            itertools.dropwhile(lambda x: x.tap_stream_id != target_stream, streams)
+        )
+        result = rest + skipped  # Move skipped streams to end
     if not result:
-        raise Exception('Unknown stream {} in state'.format(target_stream))
+        raise Exception("Unknown stream {} in state".format(target_stream))
     return result
 
+
 def get_selected_streams(remaining_streams, ctx):
     selected_streams = []
     for stream in remaining_streams:
@@ -831,6 +1062,7 @@ def get_selected_streams(remaining_streams, ctx):
             selected_streams.append(stream)
     return selected_streams
 
+
 def do_sync(STATE, catalog):
     # Clear out keys that are no longer used
     clean_state(STATE)
@@ -840,17 +1072,19 @@ def do_sync(STATE, catalog):
 
     remaining_streams = get_streams_to_sync(STREAMS, STATE)
     selected_streams = get_selected_streams(remaining_streams, ctx)
-    LOGGER.info('Starting sync. Will sync these streams: %s',
-                [stream.tap_stream_id for stream in selected_streams])
+    LOGGER.info(
+        "Starting sync. Will sync these streams: %s",
+        [stream.tap_stream_id for stream in selected_streams],
+    )
     for stream in selected_streams:
-        LOGGER.info('Syncing %s', stream.tap_stream_id)
+        LOGGER.info("Syncing %s", stream.tap_stream_id)
         STATE = singer.set_currently_syncing(STATE, stream.tap_stream_id)
         singer.write_state(STATE)
 
         try:
-            STATE = stream.sync(STATE, ctx) # pylint: disable=not-callable
+            STATE = stream.sync(STATE, ctx)  # pylint: disable=not-callable
         except SourceUnavailableException as ex:
-            error_message = str(ex).replace(CONFIG['access_token'], 10 * '*')
+            error_message = str(ex).replace(CONFIG["access_token"], 10 * "*")
             LOGGER.error(error_message)
             pass
 
@@ -858,91 +1092,120 @@ def do_sync(STATE, catalog):
     singer.write_state(STATE)
     LOGGER.info("Sync completed")
 
+
 class Context(object):
     def __init__(self, catalog):
         self.selected_stream_ids = set()
 
-        for stream in catalog.get('streams'):
-            mdata = metadata.to_map(stream['metadata'])
-            if metadata.get(mdata, (), 'selected'):
-                self.selected_stream_ids.add(stream['tap_stream_id'])
+        for stream in catalog.get("streams"):
+            mdata = metadata.to_map(stream["metadata"])
+            if metadata.get(mdata, (), "selected"):
+                self.selected_stream_ids.add(stream["tap_stream_id"])
 
         self.catalog = catalog
 
-    def get_catalog_from_id(self,tap_stream_id):
-        return [c for c in self.catalog.get('streams')
-               if c.get('stream') == tap_stream_id][0]
+    def get_catalog_from_id(self, tap_stream_id):
+        return [
+            c for c in self.catalog.get("streams") if c.get("stream") == tap_stream_id
+        ][0]
+
 
 # stream a is dependent on stream STREAM_DEPENDENCIES[a]
-STREAM_DEPENDENCIES = {
-    CONTACTS_BY_COMPANY: 'companies'
-}
+STREAM_DEPENDENCIES = {CONTACTS_BY_COMPANY: "companies"}
+
 
 def validate_dependencies(ctx):
     errs = []
-    msg_tmpl = ("Unable to extract {0} data. "
-                "To receive {0} data, you also need to select {1}.")
+    msg_tmpl = (
+        "Unable to extract {0} data. "
+        "To receive {0} data, you also need to select {1}."
+    )
 
-    for k,v in STREAM_DEPENDENCIES.items():
+    for k, v in STREAM_DEPENDENCIES.items():
         if k in ctx.selected_stream_ids and v not in ctx.selected_stream_ids:
             errs.append(msg_tmpl.format(k, v))
     if errs:
         raise DependencyException(" ".join(errs))
 
+
 def load_discovered_schema(stream):
     schema = load_schema(stream.tap_stream_id)
     mdata = metadata.new()
 
-    mdata = metadata.write(mdata, (), 'table-key-properties', stream.key_properties)
-    mdata = metadata.write(mdata, (), 'forced-replication-method', stream.replication_method)
+    mdata = metadata.write(mdata, (), "table-key-properties", stream.key_properties)
+    mdata = metadata.write(
+        mdata, (), "forced-replication-method", stream.replication_method
+    )
 
     if stream.replication_key:
-        mdata = metadata.write(mdata, (), 'valid-replication-keys', [stream.replication_key])
+        mdata = metadata.write(
+            mdata, (), "valid-replication-keys", [stream.replication_key]
+        )
 
-    for field_name, props in schema['properties'].items():
+    for field_name, props in schema["properties"].items():
         if field_name in stream.key_properties or field_name == stream.replication_key:
-            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'automatic')
+            mdata = metadata.write(
+                mdata, ("properties", field_name), "inclusion", "automatic"
+            )
         else:
-            mdata = metadata.write(mdata, ('properties', field_name), 'inclusion', 'available')
+            mdata = metadata.write(
+                mdata, ("properties", field_name), "inclusion", "available"
+            )
 
     # The engagements stream has nested data that we synthesize; The engagement field needs to be automatic
     if stream.tap_stream_id == "engagements":
-        mdata = metadata.write(mdata, ('properties', 'engagement'), 'inclusion', 'automatic')
+        mdata = metadata.write(
+            mdata, ("properties", "engagement"), "inclusion", "automatic"
+        )
 
     return schema, metadata.to_list(mdata)
 
+
 def discover_schemas():
-    result = {'streams': []}
+    result = {"streams": []}
     for stream in STREAMS:
-        LOGGER.info('Loading schema for %s', stream.tap_stream_id)
+        LOGGER.info("Loading schema for %s", stream.tap_stream_id)
         schema, mdata = load_discovered_schema(stream)
-        result['streams'].append({'stream': stream.tap_stream_id,
-                                  'tap_stream_id': stream.tap_stream_id,
-                                  'schema': schema,
-                                  'metadata': mdata})
+        result["streams"].append(
+            {
+                "stream": stream.tap_stream_id,
+                "tap_stream_id": stream.tap_stream_id,
+                "schema": schema,
+                "metadata": mdata,
+            }
+        )
     # Load the contacts_by_company schema
-    LOGGER.info('Loading schema for contacts_by_company')
-    contacts_by_company = Stream('contacts_by_company', _sync_contacts_by_company, ['company-id', 'contact-id'], None, 'FULL_TABLE')
+    LOGGER.info("Loading schema for contacts_by_company")
+    contacts_by_company = Stream(
+        "contacts_by_company",
+        _sync_contacts_by_company,
+        ["company-id", "contact-id"],
+        None,
+        "FULL_TABLE",
+    )
     schema, mdata = load_discovered_schema(contacts_by_company)
 
-    result['streams'].append({'stream': CONTACTS_BY_COMPANY,
-                              'tap_stream_id': CONTACTS_BY_COMPANY,
-                              'schema': schema,
-                              'metadata': mdata})
+    result["streams"].append(
+        {
+            "stream": CONTACTS_BY_COMPANY,
+            "tap_stream_id": CONTACTS_BY_COMPANY,
+            "schema": schema,
+            "metadata": mdata,
+        }
+    )
 
     return result
 
+
 def do_discover():
-    LOGGER.info('Loading schemas')
+    LOGGER.info("Loading schemas")
     json.dump(discover_schemas(), sys.stdout, indent=4)
 
+
 def main_impl():
     args = utils.parse_args(
-        ["redirect_uri",
-         "client_id",
-         "client_secret",
-         "refresh_token",
-         "start_date"])
+        ["redirect_uri", "client_id", "client_secret", "refresh_token", "start_date"]
+    )
 
     CONFIG.update(args.config)
     STATE = {}
@@ -957,6 +1220,7 @@ def main_impl():
     else:
         LOGGER.info("No properties were selected")
 
+
 def main():
     try:
         main_impl()
@@ -964,5 +1228,6 @@ def main():
         LOGGER.critical(exc)
         raise exc
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     main()

From 6e83a498710cfb645c1dee696259293ce10dfca8 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Mon, 6 Jan 2020 14:45:35 +0100
Subject: [PATCH 13/78] remove string as an option for numbers

---
 tap_hubspot/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 09f8bb8e..a0fb96f7 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -141,9 +141,7 @@ def get_field_type_schema(field_type):
         return {"type": ["null", "string"], "format": "date-time"}
 
     elif field_type == "number":
-        # A value like 'N/A' can be returned for this type,
-        # so we have to let this be a string sometimes
-        return {"type": ["null", "number", "string"]}
+        return {"type": ["null", "number"]}
 
     else:
         return {"type": ["null", "string"]}

From 929f4b1e4f94ffaaac151f5a84bb4472add083d1 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Mon, 6 Jan 2020 14:56:51 +0100
Subject: [PATCH 14/78] add function to remove all N/A from an  object

---
 tap_hubspot/__init__.py | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index a0fb96f7..f636e8cd 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -132,6 +132,42 @@ def get_url(endpoint, **kwargs):
 
     return BASE_URL + ENDPOINTS[endpoint].format(**kwargs)
 
+def replace_na_with_none(obj):
+    '''Given a certain object, the function will replace any 'N/A' values with None.
+    E.g: object = {
+                    "key1" : [{"subkey1": "value1"}, {"subkey2": "N/A"}],
+                    "key2" : "n/a",
+                    "key3" : {
+                                "subkey3" : "n/a",
+                                "subkey4" : "value2"
+                        }
+                    }
+        self.replace_na_with_none(object) will return:
+        {
+            "key1" : [{"subkey1": "value1"}, {"subkey2": None}],
+            "key2" : None,
+            "key3" : {
+                        "subkey3" : None,
+                        "subkey4" : "value2"
+                }
+            }
+    '''
+    if isinstance(obj, dict):
+        new_dict = {}
+        for key, value in obj.items():
+            new_dict[key] = replace_na_with_none(value)
+        return new_dict
+
+    if isinstance(obj, list):
+        new_list = []
+        for value in obj:
+            new_list.append(replace_na_with_none(value))
+        return new_list
+
+    if isinstance(obj, str):
+        if obj.lower() == 'n/a':
+            obj = None
+    return obj
 
 def get_field_type_schema(field_type):
     if field_type == "bool":

From 63087ec382302405bc86e7fbed41309bbd9660e3 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Mon, 6 Jan 2020 14:57:15 +0100
Subject: [PATCH 15/78] remove N/A from all records/rows when sync'ing

---
 tap_hubspot/__init__.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index f636e8cd..ac15b6a9 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -394,6 +394,7 @@ def _sync_contact_vids(catalog, vids, schema, bumble_bee):
     mdata = metadata.to_map(catalog.get("metadata"))
 
     for record in data.values():
+        record = replace_na_with_none(record)
         record = bumble_bee.transform(record, schema, mdata)
         singer.write_record(
             "contacts",
@@ -488,6 +489,7 @@ def _sync_contacts_by_company(STATE, ctx, company_id):
             data = request(url, default_contacts_by_company_params).json()
             for row in data[path]:
                 counter.increment()
+                row = replace_na_with_none(row)
                 record = {"company_id": company_id, "contact_id": row}
                 record = bumble_bee.transform(record, schema, mdata)
                 singer.write_record(
@@ -681,6 +683,7 @@ def sync_campaigns(STATE, ctx):
             ["offset"],
         ):
             record = request(get_url("campaigns_detail", campaign_id=row["id"])).json()
+            record = replace_na_with_none(record)
             record = bumble_bee.transform(record, schema, mdata)
             singer.write_record(
                 "campaigns",
@@ -731,6 +734,7 @@ def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
 
                     for row in data[path]:
                         counter.increment()
+                        row = replace_na_with_none(row)
                         record = bumble_bee.transform(row, schema, mdata)
                         singer.write_record(
                             entity_name,
@@ -810,6 +814,7 @@ def sync_contact_lists(STATE, ctx):
             ["offset"],
             ["offset"],
         ):
+            row = replace_na_with_none(row)
             record = bumble_bee.transform(row, schema, mdata)
 
             if record[bookmark_key] >= start:
@@ -847,6 +852,7 @@ def sync_forms(STATE, ctx):
 
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for row in data:
+            row = replace_na_with_none(row)
             record = bumble_bee.transform(row, schema, mdata)
 
             if record[bookmark_key] >= start:
@@ -886,6 +892,7 @@ def sync_workflows(STATE, ctx):
 
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for row in data["workflows"]:
+            row = replace_na_with_none(row)
             record = bumble_bee.transform(row, schema, mdata)
             if record[bookmark_key] >= start:
                 singer.write_record(
@@ -925,6 +932,7 @@ def sync_owners(STATE, ctx):
 
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for row in data:
+            row = replace_na_with_none(row)
             record = bumble_bee.transform(row, schema, mdata)
             if record[bookmark_key] >= max_bk_value:
                 max_bk_value = record[bookmark_key]
@@ -990,6 +998,7 @@ def sync_engagements(STATE, ctx):
 
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for engagement in engagements:
+            engagement = replace_na_with_none(engagement)
             record = bumble_bee.transform(engagement, schema, mdata)
             if record["engagement"][bookmark_key] >= start:
                 # hoist PK and bookmark field to top-level record
@@ -1025,6 +1034,7 @@ def sync_deal_pipelines(STATE, ctx):
     data = request(get_url("deal_pipelines")).json()
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for row in data:
+            row = replace_na_with_none(row)
             record = bumble_bee.transform(row, schema, mdata)
             singer.write_record(
                 "deal_pipelines",

From 204de23f43a6c9e5a43d9f9ec5e2860c0f179ecd Mon Sep 17 00:00:00 2001
From: JingLinDaisy <JIngLinDaisy@bitbucket.org>
Date: Wed, 8 Jan 2020 10:12:46 +0100
Subject: [PATCH 16/78] black commit

---
 tap_hubspot/__init__.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index ac15b6a9..a9dc2289 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -132,8 +132,9 @@ def get_url(endpoint, **kwargs):
 
     return BASE_URL + ENDPOINTS[endpoint].format(**kwargs)
 
+
 def replace_na_with_none(obj):
-    '''Given a certain object, the function will replace any 'N/A' values with None.
+    """Given a certain object, the function will replace any 'N/A' values with None.
     E.g: object = {
                     "key1" : [{"subkey1": "value1"}, {"subkey2": "N/A"}],
                     "key2" : "n/a",
@@ -151,7 +152,7 @@ def replace_na_with_none(obj):
                         "subkey4" : "value2"
                 }
             }
-    '''
+    """
     if isinstance(obj, dict):
         new_dict = {}
         for key, value in obj.items():
@@ -165,10 +166,11 @@ def replace_na_with_none(obj):
         return new_list
 
     if isinstance(obj, str):
-        if obj.lower() == 'n/a':
+        if obj.lower() == "n/a":
             obj = None
     return obj
 
+
 def get_field_type_schema(field_type):
     if field_type == "bool":
         return {"type": ["null", "boolean"]}

From 6804118bcc219dda1d66b4c9ffb71dd42bd538a3 Mon Sep 17 00:00:00 2001
From: JingLinDaisy <JIngLinDaisy@bitbucket.org>
Date: Wed, 8 Jan 2020 10:20:59 +0100
Subject: [PATCH 17/78] change schema dash to underscore

---
 tap_hubspot/__init__.py | 45 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index a9dc2289..73c2d914 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -244,7 +244,50 @@ def load_schema(entity_name):
     if entity_name == "contacts":
         schema["properties"]["associated-company"] = load_associated_company_schema()
 
-    return schema
+    return schema_nodash(schema)
+
+
+def schema_nodash(obj):
+    type_field = obj.get("type")
+    type = get_type(type_field)
+    if not type:
+        return obj
+    if not type in ["array", "object"]:
+        return obj
+    if "object" == type:
+        props = obj.get("properties", {})
+        new_props = replace_props(props)
+        obj["properties"] = new_props
+    if "array" == type:
+        items = obj.get("items", {})
+        obj["items"] = schema_nodash(items)
+    return obj
+
+
+def get_type(type_field):
+    if isinstance(type_field, str):
+        return type_field
+    if isinstance(type_field, list):
+        types = set(type_field)
+        if "null" in types:
+            types.remove("null")
+        return types.pop()
+    return None
+
+
+def replace_props(props):
+    if not props:
+        return props
+    keys = list(props.keys())
+    for k in keys:
+        if not "-" in k:
+            props[k] = schema_nodash(props[k])
+        else:
+            v = props.pop(k)
+            new_key = k.replace("-", "_")
+            new_value = schema_nodash(v)
+            props[new_key] = new_value
+    return props
 
 
 # pylint: disable=invalid-name

From 359eed1613e97bbf6e81ba90233e9fedbe82231f Mon Sep 17 00:00:00 2001
From: JingLinDaisy <JIngLinDaisy@bitbucket.org>
Date: Wed, 8 Jan 2020 10:26:12 +0100
Subject: [PATCH 18/78] change record dash to underscore

---
 tap_hubspot/__init__.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 73c2d914..c42ffc68 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -441,6 +441,7 @@ def _sync_contact_vids(catalog, vids, schema, bumble_bee):
     for record in data.values():
         record = replace_na_with_none(record)
         record = bumble_bee.transform(record, schema, mdata)
+        record = record_nodash(record)
         singer.write_record(
             "contacts",
             record,
@@ -449,6 +450,23 @@ def _sync_contact_vids(catalog, vids, schema, bumble_bee):
         )
 
 
+def record_nodash(obj):
+    if not isinstance(obj, dict):  # stopplesing criteria
+        return obj
+
+    for k in obj.keys():
+        value = record_nodash(obj[k])
+        if not "-" in k:
+            key = k
+        else:
+            obj.pop(k)
+            key = k.replace("-", "_")
+
+        obj[key] = value  # recursion
+
+    return obj
+
+
 default_contact_params = {
     "showListMemberships": True,
     "includeVersion": True,

From 7e6d588f64765b040bf99e13ed6b81067615f5fb Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Wed, 8 Jan 2020 14:47:37 +0100
Subject: [PATCH 19/78] black commit

---
 setup.py | 69 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/setup.py b/setup.py
index 60c48295..541abe12 100644
--- a/setup.py
+++ b/setup.py
@@ -2,40 +2,41 @@
 
 from setuptools import setup
 
-setup(name='tap-hubspot',
-      version='2.6.4',
-      description='Singer.io tap for extracting data from the HubSpot API',
-      author='Stitch',
-      url='http://singer.io',
-      classifiers=['Programming Language :: Python :: 3 :: Only'],
-      py_modules=['tap_hubspot'],
-      install_requires=[
-          'attrs==16.3.0',
-          'singer-python==5.1.1',
-          'requests==2.20.0',
-          'backoff==1.3.2',
-          'requests_mock==1.3.0',
-          'nose'
-      ],
-      entry_points='''
+setup(
+    name="tap-hubspot",
+    version="2.6.4",
+    description="Singer.io tap for extracting data from the HubSpot API",
+    author="Stitch",
+    url="http://singer.io",
+    classifiers=["Programming Language :: Python :: 3 :: Only"],
+    py_modules=["tap_hubspot"],
+    install_requires=[
+        "attrs==16.3.0",
+        "singer-python==5.1.1",
+        "requests==2.20.0",
+        "backoff==1.3.2",
+        "requests_mock==1.3.0",
+        "nose",
+    ],
+    entry_points="""
           [console_scripts]
           tap-hubspot=tap_hubspot:main
-      ''',
-      packages=['tap_hubspot'],
-      package_data = {
-          'tap_hubspot/schemas': [
-              "campaigns.json",
-              "companies.json",
-              "contact_lists.json",
-              "contacts.json",
-              "deals.json",
-              "email_events.json",
-              "forms.json",
-              "keywords.json",
-              "owners.json",
-              "subscription_changes.json",
-              "workflows.json",
-          ],
-      },
-      include_package_data=True,
+      """,
+    packages=["tap_hubspot"],
+    package_data={
+        "tap_hubspot/schemas": [
+            "campaigns.json",
+            "companies.json",
+            "contact_lists.json",
+            "contacts.json",
+            "deals.json",
+            "email_events.json",
+            "forms.json",
+            "keywords.json",
+            "owners.json",
+            "subscription_changes.json",
+            "workflows.json",
+        ],
+    },
+    include_package_data=True,
 )

From ddf0bc32b583c7b3ca9b78865cee3f639657bbf3 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Wed, 8 Jan 2020 14:48:56 +0100
Subject: [PATCH 20/78] bump dependencies

---
 setup.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 541abe12..851f6708 100644
--- a/setup.py
+++ b/setup.py
@@ -11,10 +11,10 @@
     classifiers=["Programming Language :: Python :: 3 :: Only"],
     py_modules=["tap_hubspot"],
     install_requires=[
-        "attrs==16.3.0",
-        "singer-python==5.1.1",
-        "requests==2.20.0",
-        "backoff==1.3.2",
+        "attrs>=16.3.0, <19",
+        "singer-python>=5.1.1, <5.9",
+        "requests==2.22.0",
+        "backoff>=1.3.2, <2",
         "requests_mock==1.3.0",
         "nose",
     ],

From c70b97f6c12d8ed3b54519685e0386f68cae47eb Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:35:51 +0100
Subject: [PATCH 21/78] lift schema from catalog in contacts

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index c42ffc68..a67febe2 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -481,7 +481,7 @@ def sync_contacts(STATE, ctx):
     LOGGER.info("sync_contacts from %s", start)
 
     max_bk_value = start
-    schema = load_schema("contacts")
+    schema = catalog["schema"]
 
     singer.write_schema(
         "contacts", schema, ["vid"], [bookmark_key], catalog.get("stream_alias")

From 1ba0193671e9c817cd8fc2575054b2bb5b3e21b3 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:37:22 +0100
Subject: [PATCH 22/78] use catalog schema in contacts_by_companies

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index a67febe2..63e75bea 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -542,8 +542,8 @@ def use_recent_companies_endpoint(response):
 
 # NB> to do: support stream aliasing and field selection
 def _sync_contacts_by_company(STATE, ctx, company_id):
-    schema = load_schema(CONTACTS_BY_COMPANY)
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
+    schema = catalog.get["schema"]
     mdata = metadata.to_map(catalog.get("metadata"))
     url = get_url("contacts_by_company", company_id=company_id)
     path = "vids"

From 9881146f33dcfeae512e78713a35d71ec5451ec8 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:38:00 +0100
Subject: [PATCH 23/78] catalog.. schemas.. you get the gist

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 63e75bea..7a948b39 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -582,7 +582,7 @@ def sync_companies(STATE, ctx):
     bookmark_key = "hs_lastmodifieddate"
     start = utils.strptime_to_utc(get_start(STATE, "companies", bookmark_key))
     LOGGER.info("sync_companies from %s", start)
-    schema = load_schema("companies")
+    schema = catalog["schema"]
     singer.write_schema(
         "companies", schema, ["companyId"], [bookmark_key], catalog.get("stream_alias")
     )

From 51ca8b5efd373f7c7f3ed621260fcf112f641cc5 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:40:33 +0100
Subject: [PATCH 24/78] remove unused variables

---
 tap_hubspot/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 7a948b39..729e1de7 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -662,7 +662,6 @@ def sync_deals(STATE, ctx):
     start = utils.strptime_with_tz(get_start(STATE, "deals", bookmark_key))
     max_bk_value = start
     LOGGER.info("sync_deals from %s", start)
-    most_recent_modified_time = start
     params = {"count": 250, "includeAssociations": False, "properties": []}
 
     schema = load_schema("deals")
@@ -1249,7 +1248,7 @@ def load_discovered_schema(stream):
             mdata, (), "valid-replication-keys", [stream.replication_key]
         )
 
-    for field_name, props in schema["properties"].items():
+    for field_name in schema["properties"]:
         if field_name in stream.key_properties or field_name == stream.replication_key:
             mdata = metadata.write(
                 mdata, ("properties", field_name), "inclusion", "automatic"

From 0c68b3df0ee80ab79ec64775d6cf562159630810 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:44:50 +0100
Subject: [PATCH 25/78] campaigns catalog schema

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 729e1de7..5315d5cd 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -727,7 +727,7 @@ def sync_deals(STATE, ctx):
 def sync_campaigns(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get("metadata"))
-    schema = load_schema("campaigns")
+    schema = catalog["schema"]
     singer.write_schema("campaigns", schema, ["id"], catalog.get("stream_alias"))
     LOGGER.info("sync_campaigns(NO bookmarks)")
     url = get_url("campaigns_all")

From 107af7dc3dd6647ea10a5773e65b58a31a154705 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:45:44 +0100
Subject: [PATCH 26/78] sync_entity_chunked schema

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 5315d5cd..cb820f70 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -758,7 +758,7 @@ def sync_campaigns(STATE, ctx):
 
 
 def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
-    schema = load_schema(entity_name)
+    schema = catalog["schema"]
     bookmark_key = "startTimestamp"
 
     singer.write_schema(

From 21799738be2b70352da0a503e69ebe76c2cd5172 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:46:19 +0100
Subject: [PATCH 27/78] workflows schema from catalog

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index cb820f70..e73e7e86 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -936,7 +936,7 @@ def sync_forms(STATE, ctx):
 def sync_workflows(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get("metadata"))
-    schema = load_schema("workflows")
+    schema = catalog["schema"]
     bookmark_key = "updatedAt"
     singer.write_schema(
         "workflows", schema, ["id"], [bookmark_key], catalog.get("stream_alias")

From 86e85954da94e737d8f863d116b4a913852d9496 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:46:36 +0100
Subject: [PATCH 28/78] owners schema change

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index e73e7e86..3ba85802 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -974,7 +974,7 @@ def sync_workflows(STATE, ctx):
 def sync_owners(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get("metadata"))
-    schema = load_schema("owners")
+    schema = catalog["schema"]
     bookmark_key = "updatedAt"
 
     singer.write_schema(

From 416c2d52480a19f383ce9e5b206285a5bb6b0df6 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:46:55 +0100
Subject: [PATCH 29/78] engagements... schema

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 3ba85802..b6554e42 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -1015,7 +1015,7 @@ def sync_owners(STATE, ctx):
 def sync_engagements(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get("metadata"))
-    schema = load_schema("engagements")
+    schema = catalog["schema"]
     bookmark_key = "lastUpdated"
     singer.write_schema(
         "engagements",

From b5561f350cb36243826cde4029c801363a15ae8b Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Thu, 16 Jan 2020 10:47:10 +0100
Subject: [PATCH 30/78] deal pipeline

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index b6554e42..581a243f 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -1088,7 +1088,7 @@ def sync_engagements(STATE, ctx):
 def sync_deal_pipelines(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get("metadata"))
-    schema = load_schema("deal_pipelines")
+    schema = catalog["schema"]
     singer.write_schema(
         "deal_pipelines", schema, ["pipelineId"], catalog.get("stream_alias")
     )

From a4cc5c277b09004fadfa40183d05629496fba909 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 18 Feb 2020 22:54:28 +0100
Subject: [PATCH 31/78] prettier commit

---
 tap_hubspot/schemas/contacts.json | 108 +++++++++++++++---------------
 1 file changed, 54 insertions(+), 54 deletions(-)

diff --git a/tap_hubspot/schemas/contacts.json b/tap_hubspot/schemas/contacts.json
index 1c41b8cf..f61a97b5 100644
--- a/tap_hubspot/schemas/contacts.json
+++ b/tap_hubspot/schemas/contacts.json
@@ -25,70 +25,70 @@
     "profile-url": {
       "type": ["null", "string"]
     },
-    "associated-company" : {
-        "type": ["null", "object"],
-        "properties" : {}
+    "associated-company": {
+      "type": ["null", "object"],
+      "properties": {}
     },
     "identity-profiles": {
-        "type": ["null", "array"],
-        "items": {
-            "type": ["null", "object"],
-            "properties": {
-                "deleted-changed-timestamp": {
-                    "type": ["null", "string"],
-                    "format": "date-time"
-                },
-                "saved-at-timestamp": {
-                    "type": ["null", "string"],
-                    "format": "date-time"
+      "type": ["null", "array"],
+      "items": {
+        "type": ["null", "object"],
+        "properties": {
+          "deleted-changed-timestamp": {
+            "type": ["null", "string"],
+            "format": "date-time"
+          },
+          "saved-at-timestamp": {
+            "type": ["null", "string"],
+            "format": "date-time"
+          },
+          "vid": {
+            "type": ["null", "integer"]
+          },
+          "identities": {
+            "type": ["null", "array"],
+            "items": {
+              "type": ["null", "object"],
+              "properties": {
+                "timestamp": {
+                  "type": ["null", "string"],
+                  "format": "date-time"
                 },
-                "vid": {
-                    "type": ["null", "integer"]
+                "type": {
+                  "type": ["null", "string"]
                 },
-                "identities": {
-                    "type": ["null", "array"],
-                    "items": {
-                        "type": ["null", "object"],
-                        "properties": {
-                            "timestamp": {
-                                "type": ["null", "string"],
-                                "format": "date-time"
-                            },
-                            "type": {
-                                "type": ["null", "string"]
-                            },
-                            "value": {
-                                "type": ["null", "string"]
-                            }
-                        }
-                    }
+                "value": {
+                  "type": ["null", "string"]
                 }
+              }
             }
+          }
         }
+      }
     },
     "list-memberships": {
-        "type": ["null", "array"],
-        "items": {
-            "type": ["null", "object"],
-            "properties": {
-                "internal-list-id": {
-                    "type": ["null", "integer"]
-                },
-                "is-member": {
-                    "type": ["null", "boolean"]
-                },
-                "static-list-id": {
-                    "type": ["null", "integer"]
-                },
-                "timestamp": {
-                    "type": ["null", "string"],
-                    "format": "date-time"
-                },
-                "vid": {
-                    "type": ["null", "integer"]
-                }
-            }
+      "type": ["null", "array"],
+      "items": {
+        "type": ["null", "object"],
+        "properties": {
+          "internal-list-id": {
+            "type": ["null", "integer"]
+          },
+          "is-member": {
+            "type": ["null", "boolean"]
+          },
+          "static-list-id": {
+            "type": ["null", "integer"]
+          },
+          "timestamp": {
+            "type": ["null", "string"],
+            "format": "date-time"
+          },
+          "vid": {
+            "type": ["null", "integer"]
+          }
         }
+      }
     },
     "form-submissions": {
       "type": ["null", "array"],

From 23cf922bf3e4f9bd936b4e85fbe649eea5510379 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 18 Feb 2020 23:49:11 +0100
Subject: [PATCH 32/78] only include fields that are needed

---
 tap_hubspot/schemas/companies.json    | 118 ++++++++++++++-
 tap_hubspot/schemas/contacts.json     | 210 ++++----------------------
 tap_hubspot/schemas/deals.json        |  97 ++++++++++++
 tap_hubspot/schemas/email_events.json |  91 -----------
 4 files changed, 242 insertions(+), 274 deletions(-)

diff --git a/tap_hubspot/schemas/companies.json b/tap_hubspot/schemas/companies.json
index 286f249d..06982bee 100644
--- a/tap_hubspot/schemas/companies.json
+++ b/tap_hubspot/schemas/companies.json
@@ -1,11 +1,123 @@
 {
   "type": "object",
   "properties": {
-    "portalId": {
-      "type": ["null", "integer"]
-    },
     "companyId": {
       "type": ["null", "integer"]
+    },
+    "properties": {
+      "type": "object",
+      "properties": {
+        "name": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"]
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "country": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"]
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "domain": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"]
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "website": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"]
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "createdate": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "hs_lastmodifieddate": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        }
+      }
     }
   }
 }
diff --git a/tap_hubspot/schemas/contacts.json b/tap_hubspot/schemas/contacts.json
index f61a97b5..54e4d3d2 100644
--- a/tap_hubspot/schemas/contacts.json
+++ b/tap_hubspot/schemas/contacts.json
@@ -4,190 +4,40 @@
     "vid": {
       "type": ["null", "integer"]
     },
-    "canonical-vid": {
-      "type": ["null", "integer"]
-    },
-    "merged-vids": {
-      "type": ["null", "array"],
-      "items": {
-        "type": ["null", "integer"]
-      }
-    },
-    "portal-id": {
-      "type": ["null", "integer"]
-    },
-    "is-contact": {
-      "type": ["null", "boolean"]
-    },
-    "profile-token": {
-      "type": ["null", "string"]
-    },
-    "profile-url": {
-      "type": ["null", "string"]
-    },
-    "associated-company": {
-      "type": ["null", "object"],
-      "properties": {}
-    },
-    "identity-profiles": {
-      "type": ["null", "array"],
-      "items": {
-        "type": ["null", "object"],
-        "properties": {
-          "deleted-changed-timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "saved-at-timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "vid": {
-            "type": ["null", "integer"]
-          },
-          "identities": {
-            "type": ["null", "array"],
-            "items": {
-              "type": ["null", "object"],
-              "properties": {
-                "timestamp": {
-                  "type": ["null", "string"],
-                  "format": "date-time"
-                },
-                "type": {
-                  "type": ["null", "string"]
-                },
-                "value": {
-                  "type": ["null", "string"]
-                }
-              }
+    "properties": {
+      "type": "object",
+      "properties": {
+        "email": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"]
             }
           }
-        }
-      }
-    },
-    "list-memberships": {
-      "type": ["null", "array"],
-      "items": {
-        "type": ["null", "object"],
-        "properties": {
-          "internal-list-id": {
-            "type": ["null", "integer"]
-          },
-          "is-member": {
-            "type": ["null", "boolean"]
-          },
-          "static-list-id": {
-            "type": ["null", "integer"]
-          },
-          "timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "vid": {
-            "type": ["null", "integer"]
-          }
-        }
-      }
-    },
-    "form-submissions": {
-      "type": ["null", "array"],
-      "items": {
-        "type": ["null", "object"],
-        "properties": {
-          "conversion-id": {
-            "type": ["null", "string"]
-          },
-          "timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "form-id": {
-            "type": ["null", "string"]
-          },
-          "portal-id": {
-            "type": ["null", "integer"]
-          },
-          "page-url": {
-            "type": ["null", "string"]
-          },
-          "title": {
-            "type": ["null", "string"]
+        },
+        "createdate": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            }
           }
-        }
-      }
-    },
-    "merge-audits": {
-      "type": ["null", "array"],
-      "items": {
-        "type": ["null", "object"],
-        "properties": {
-          "canonical-vid": {
-            "type": ["null", "integer"]
-          },
-          "vid-to-merge": {
-            "type": ["null", "integer"]
-          },
-          "timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "user-id": {
-            "type": ["null", "integer"]
-          },
-          "num-properties-moved": {
-            "type": ["null", "integer"]
-          },
-          "merged_from_email": {
-            "type": ["null", "object"],
-            "properties": {
-              "value": {
-                "type": ["null", "string"]
-              },
-              "source-type": {
-                "type": ["null", "string"]
-              },
-              "source-id": {
-                "type": ["null", "string"]
-              },
-              "source-label": {
-                "type": ["null", "string"]
-              },
-              "source-vids": {
-                "type": ["null", "array"],
-                "items": {
-                  "type": ["null", "integer"]
-                }
-              },
-              "timestamp": {
-                "type": ["null", "integer"]
-              },
-              "selected": {
-                "type": ["null", "boolean"]
-              }
+        },
+        "lastmodifieddate": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"],
+              "format": "date-time"
             }
-          },
-          "merged_to_email": {
-            "type": ["null", "object"],
-            "properties": {
-              "value": {
-                "type": ["null", "string"]
-              },
-              "source-type": {
-                "type": ["null", "string"]
-              },
-              "source-id": {
-                "type": ["null", "string"]
-              },
-              "source-label": {
-                "type": ["null", "string"]
-              },
-              "timestamp": {
-                "type": ["null", "integer"]
-              },
-              "selected": {
-                "type": ["null", "boolean"]
-              }
+          }
+        },
+        "associatedcompanyid": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "number"]
             }
           }
         }
diff --git a/tap_hubspot/schemas/deals.json b/tap_hubspot/schemas/deals.json
index 60d3cc9c..1696df9c 100644
--- a/tap_hubspot/schemas/deals.json
+++ b/tap_hubspot/schemas/deals.json
@@ -29,6 +29,103 @@
           }
         }
       }
+    },
+    "properties": {
+      "type": "object",
+      "properties": {
+        "closedate": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "createdate": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "dealtype": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"]
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "amount_in_home_currency": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "number"]
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
+        "dealstage": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"]
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        }
+      }
     }
   }
 }
diff --git a/tap_hubspot/schemas/email_events.json b/tap_hubspot/schemas/email_events.json
index e74aa07d..06da548c 100644
--- a/tap_hubspot/schemas/email_events.json
+++ b/tap_hubspot/schemas/email_events.json
@@ -1,110 +1,19 @@
 {
   "type": "object",
   "properties": {
-    "appId": {
-      "type": ["null", "integer"]
-    },
-    "appName": {
-      "type": ["null", "string"]
-    },
-    "browser": {
-      "type": ["null", "object"],
-      "properties": {
-        "family": {
-          "type": ["null", "string"]
-        },
-        "name": {
-          "type": ["null", "string"]
-        },
-        "producer": {
-          "type": ["null", "string"]
-        },
-        "producerUrl": {
-          "type": ["null", "string"]
-        },
-        "type": {
-          "type": ["null", "string"]
-        },
-        "url": {
-          "type": ["null", "string"]
-        }
-      }
-    },
     "created": {
       "type": ["null", "string"],
       "format": "date-time"
     },
-    "deviceType": {
-      "type": ["null", "string"]
-    },
-    "duration": {
-      "type": ["null", "integer"]
-    },
-    "emailCampaignId": {
-      "type": ["null", "integer"]
-    },
-    "emailCampaignGroupId": {
-      "type": ["null", "integer"]
-    },
-    "filteredEvent": {
-      "type": ["null", "boolean"]
-    },
-    "from": {
-      "type": ["null", "string"]
-    },    
-    "hmid": {
-      "type": ["null", "string"]
-    },
     "id": {
       "type": ["null", "string"]
     },
     "ipAddress": {
       "type": ["null", "string"]
     },
-    "linkId": {
-      "type": ["null", "integer"]
-    },    
-    "location": {
-      "type": ["null", "object"],
-      "properties": {
-        "city": {
-          "type": ["null", "string"]
-        },
-        "country": {
-          "type": ["null", "string"]
-        },
-        "state": {
-          "type": ["null", "string"]
-        }
-      }
-    },
-    "portalId": {
-      "type": ["null", "integer"]
-    },
     "recipient": {
       "type": ["null", "string"]
     },
-    "response": {
-      "type": ["null", "string"]
-    },
-    "sentBy": {
-      "type": ["null", "object"],
-      "properties": {
-        "created": {
-          "type": ["null", "string"],
-          "format": "date-time"
-        },
-        "id": {
-          "type": ["null", "string"]
-        }
-      }
-    },
-    "smtpId": {
-      "type": ["null", "string"]
-    },
-    "subject": {
-      "type": ["null", "string"]
-    },
     "type": {
       "type": ["null", "string"]
     },

From 69241e060dff0c5c19bf9d7a9af0cb38f6852893 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 19 Feb 2020 00:16:07 +0100
Subject: [PATCH 33/78] delete unused catalogs

---
 tap_hubspot/schemas/campaigns.json            | 91 -----------------
 tap_hubspot/schemas/contact_lists.json        | 97 -------------------
 tap_hubspot/schemas/contacts_by_company.json  | 12 ---
 tap_hubspot/schemas/owners.json               | 72 --------------
 tap_hubspot/schemas/subscription_changes.json | 54 -----------
 tap_hubspot/schemas/workflows.json            | 48 ---------
 6 files changed, 374 deletions(-)
 delete mode 100644 tap_hubspot/schemas/campaigns.json
 delete mode 100644 tap_hubspot/schemas/contact_lists.json
 delete mode 100644 tap_hubspot/schemas/contacts_by_company.json
 delete mode 100644 tap_hubspot/schemas/owners.json
 delete mode 100644 tap_hubspot/schemas/subscription_changes.json
 delete mode 100644 tap_hubspot/schemas/workflows.json

diff --git a/tap_hubspot/schemas/campaigns.json b/tap_hubspot/schemas/campaigns.json
deleted file mode 100644
index 29797daa..00000000
--- a/tap_hubspot/schemas/campaigns.json
+++ /dev/null
@@ -1,91 +0,0 @@
-{
-  "type": "object",
-  "properties": {
-    "appId": {
-      "type": ["null", "integer"]
-    },
-    "appName": {
-      "type": ["null", "string"]
-    },
-    "contentId": {
-      "type": ["null", "integer"]
-    },
-    "counters": {
-      "type": ["null", "object"],
-      "properties": {
-        "delievered": {
-          "type": ["null", "integer"]
-        },
-        "open": {
-          "type": ["null", "integer"]
-        },
-        "processed": {
-          "type": ["null", "integer"]
-        },
-        "sent": {
-          "type": ["null", "integer"]
-        },
-        "deferred": {
-          "type": ["null", "integer"]
-        },
-        "unsubscribed": {
-          "type": ["null", "integer"]
-        },
-        "statuschange": {
-          "type": ["null", "integer"]
-        },
-        "bounce": {
-          "type": ["null", "integer"]
-        },
-        "mta_dropped": {
-          "type": ["null", "integer"]
-        },
-        "dropped": {
-          "type": ["null", "integer"]
-        },
-        "suppressed": {
-          "type": ["null", "integer"]
-        },
-        "click": {
-          "type": ["null", "integer"]
-        },
-        "delivered": {
-          "type": ["null", "integer"]
-        },
-        "forward": {
-          "type": ["null", "integer"]
-        },
-        "print": {
-          "type": ["null", "integer"]
-        },
-        "reply": {
-          "type": ["null", "integer"]
-        },
-        "spamreport": {
-          "type": ["null", "integer"]
-        }
-      }
-    },
-    "id": {
-      "type": ["null", "integer"]
-    },
-    "name": {
-      "type": ["null", "string"]
-    },
-    "numIncluded": {
-      "type": ["null", "integer"]
-    },
-    "numQueued": {
-      "type": ["null", "integer"]
-    },
-    "subType": {
-      "type": ["null", "string"]
-    },
-    "subject": {
-      "type": ["null", "string"]
-    },
-    "type": {
-      "type": ["null", "string"]
-    }
-  }
-}
diff --git a/tap_hubspot/schemas/contact_lists.json b/tap_hubspot/schemas/contact_lists.json
deleted file mode 100644
index d3ad2ae6..00000000
--- a/tap_hubspot/schemas/contact_lists.json
+++ /dev/null
@@ -1,97 +0,0 @@
-{
-    "type": "object",
-    "properties": {
-        "parentId": {
-            "type": ["null", "integer"]
-        },
-        "metaData": {
-            "type": "object",
-            "properties": {
-                "processing": {
-                    "type": ["null", "string"]
-                },
-                "size": {
-                    "type": ["null", "integer"]
-                },
-                "error": {
-                    "type": ["null", "string"]
-                },
-                "lastProcessingStateChangeAt": {
-                    "type": ["null", "string"],
-                    "format": "date-time"
-                },
-                "lastSizeChangeAt": {
-                    "type": ["null", "string"],
-                    "format": "date-time"
-                }
-            }
-        },
-        "dynamic": {
-            "type": ["null", "boolean"]
-        },
-        "name": {
-            "type": ["null", "string"]
-        },
-        "filters": {
-            "type": "array",
-            "items": {
-                "type": "array",
-                "items": {
-                    "type": "object",
-                    "properties": {
-                        "filterFamily": {
-                            "type": ["null", "string"]
-                        },
-                        "withinTimeMode": {
-                            "type": ["null", "string"]
-                        },
-                        "checkPastVersions": {
-                            "type": ["null", "boolean"]
-                        },
-                        "type": {
-                            "type": ["null", "string"]
-                        },
-                        "property": {
-                            "type": ["null", "string"]
-                        },
-                        "value": {
-                            "type": ["null", "string"]
-                        },
-                        "operator": {
-                            "type": ["null", "string"]
-                        }
-                    }
-                }
-            }
-        },
-        "portalId": {
-            "type": ["null", "integer"]
-        },
-        "createdAt": {
-            "type": ["null", "string"],
-            "format": "date-time"
-        },
-        "listId": {
-            "type": ["null", "integer"]
-        },
-        "updatedAt": {
-            "type": ["null", "string"],
-            "format": "date-time"
-        },
-        "internalListId": {
-            "type": ["null", "integer"]
-        },
-        "readOnly": {
-            "type": ["null", "boolean"]
-        },
-        "deleteable": {
-            "type": ["null", "boolean"]
-        },
-        "listType": {
-            "type": ["null", "string"]
-        },
-        "archived": {
-            "type": ["null", "boolean"]
-        }
-    }
-}
diff --git a/tap_hubspot/schemas/contacts_by_company.json b/tap_hubspot/schemas/contacts_by_company.json
deleted file mode 100644
index 22e7ffbb..00000000
--- a/tap_hubspot/schemas/contacts_by_company.json
+++ /dev/null
@@ -1,12 +0,0 @@
-{
-    "type": "object",
-    "properties": {
-        "contact_id": {
-            "type": ["integer"]
-        },
-        "company_id": {
-            "type": ["integer"]
-        }
-    },
-    "additionalProperties": false
-}
diff --git a/tap_hubspot/schemas/owners.json b/tap_hubspot/schemas/owners.json
deleted file mode 100644
index 2e3d61d0..00000000
--- a/tap_hubspot/schemas/owners.json
+++ /dev/null
@@ -1,72 +0,0 @@
-{
-    "type": "object",
-    "properties": {
-        "portalId": {
-            "type": ["null", "integer"]
-        },
-        "ownerId": {
-            "type": ["null", "integer"]
-        },
-        "type": {
-            "type": ["null", "string"]
-        },
-        "firstName": {
-            "type": ["null", "string"]
-        },
-        "lastName": {
-            "type": ["null", "string"]
-        },
-        "email": {
-            "type": ["null", "string"]
-        },
-        "createdAt": {
-            "type": ["null", "string"],
-            "format": "date-time"
-        },
-        "signature": {
-            "type": ["null", "string"]
-        },
-        "updatedAt": {
-            "type": ["null", "string"],
-            "format": "date-time"
-        },
-        "hasContactsAccess" : {
-            "type": ["null", "boolean"]
-        },
-        "isActive": {
-            "type": ["null", "boolean"]
-        },
-        "activeUserId" : {
-            "type": ["null", "integer"]
-        },
-        "userIdIncludingInactive" : {
-            "type": ["null", "integer"]
-        },
-        "remoteList": {
-            "type": "array",
-            "items": {
-                "type": "object",
-                "properties": {
-                    "id": {
-                        "type": ["null", "integer"]
-                    },
-                    "portalId": {
-                        "type": ["null", "integer"]
-                    },
-                    "ownerId": {
-                        "type": ["null", "integer"]
-                    },
-                    "remoteId": {
-                        "type": ["null", "string"]
-                    },
-                    "remoteType": {
-                        "type": ["null", "string"]
-                    },
-                    "active": {
-                        "type": ["null", "boolean"]
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/tap_hubspot/schemas/subscription_changes.json b/tap_hubspot/schemas/subscription_changes.json
deleted file mode 100644
index 1db687d2..00000000
--- a/tap_hubspot/schemas/subscription_changes.json
+++ /dev/null
@@ -1,54 +0,0 @@
-{
-  "type": "object",
-  "properties": {
-    "timestamp": {
-      "type": ["null", "string"],
-      "format": "date-time"
-    },
-    "portalId": {
-      "type": ["null", "integer"]
-    },
-    "recipient": {
-      "type": ["null", "string"]
-    },
-    "changes": {
-      "type": ["null", "array"],
-      "items": {
-        "type": ["null", "object"],
-        "properties": {
-          "change": {
-            "type": ["null", "string"]
-          },
-          "timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "source": {
-            "type": ["null", "string"]
-          },
-          "portalId": {
-            "type": ["null", "integer"]
-          },
-          "subscriptionId": {
-            "type": ["null", "integer"]
-          },
-          "changeType": {
-            "type": ["null", "string"]
-          },
-          "causedByEvent": {
-            "type": ["null", "object"],
-            "properties": {
-              "id": {
-                "type": ["null", "string"]
-              },
-              "created": {
-                "type": ["null", "string"],
-                "format": "date-time"
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
diff --git a/tap_hubspot/schemas/workflows.json b/tap_hubspot/schemas/workflows.json
deleted file mode 100644
index a72491ae..00000000
--- a/tap_hubspot/schemas/workflows.json
+++ /dev/null
@@ -1,48 +0,0 @@
-{
-  "type": "object",
-  "properties": {
-    "name": {
-      "type": ["null", "string"]
-    },
-    "id": {
-      "type": ["null", "integer"]
-    },
-    "type": {
-      "type": ["null", "string"]
-    },
-    "enabled": {
-      "type": ["null", "boolean"]
-    },
-    "insertedAt": {
-      "type": ["null", "string"],
-      "format": "date-time"
-    },
-    "updatedAt": {
-      "type": ["null", "string"],
-      "format": "date-time"
-    },
-    "personaTagIds": {
-      "type": "array",
-      "items": {
-        "type": "integer"
-      }
-    },
-    "contactListIds": {
-      "type": "object",
-      "properties": {
-        "enrolled": {
-          "type": ["null", "integer"]
-        },
-        "active": {
-          "type": ["null", "integer"]
-        },
-        "steps": {
-          "type": ["null", "array"],
-          "items": {
-            "type": ["null", "string"]
-          }
-        }
-      }
-    }
-  }
-}

From 721b64b090dfc1a315155870311cf8c1a18f3a99 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 19 Feb 2020 00:52:27 +0100
Subject: [PATCH 34/78] delete custom schema update

---
 tap_hubspot/__init__.py | 28 ----------------------------
 1 file changed, 28 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 581a243f..9c5b9706 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -210,40 +210,12 @@ def parse_custom_schema(entity_name, data):
     }
 
 
-def get_custom_schema(entity_name):
-    return parse_custom_schema(
-        entity_name, request(get_url(entity_name + "_properties")).json()
-    )
-
-
 def get_abs_path(path):
     return os.path.join(os.path.dirname(os.path.realpath(__file__)), path)
 
 
-def load_associated_company_schema():
-    associated_company_schema = load_schema("companies")
-    # pylint: disable=line-too-long
-    associated_company_schema["properties"]["company-id"] = associated_company_schema[
-        "properties"
-    ].pop("companyId")
-    associated_company_schema["properties"]["portal-id"] = associated_company_schema[
-        "properties"
-    ].pop("portalId")
-    return associated_company_schema
-
-
 def load_schema(entity_name):
     schema = utils.load_json(get_abs_path("schemas/{}.json".format(entity_name)))
-    if entity_name in ["contacts", "companies", "deals"]:
-        custom_schema = get_custom_schema(entity_name)
-        schema["properties"]["properties"] = {
-            "type": "object",
-            "properties": custom_schema,
-        }
-
-    if entity_name == "contacts":
-        schema["properties"]["associated-company"] = load_associated_company_schema()
-
     return schema_nodash(schema)
 
 

From 677810f7e94441147b55010e3e5f6523dbae3834 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 19 Feb 2020 01:03:35 +0100
Subject: [PATCH 35/78] delete unused functions

---
 tap_hubspot/__init__.py | 201 ----------------------------------------
 1 file changed, 201 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 9c5b9706..84982042 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -510,30 +510,6 @@ def use_recent_companies_endpoint(response):
     return response["total"] < 10000
 
 
-default_contacts_by_company_params = {"count": 250}
-
-# NB> to do: support stream aliasing and field selection
-def _sync_contacts_by_company(STATE, ctx, company_id):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    schema = catalog.get["schema"]
-    mdata = metadata.to_map(catalog.get("metadata"))
-    url = get_url("contacts_by_company", company_id=company_id)
-    path = "vids"
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        with metrics.record_counter(CONTACTS_BY_COMPANY) as counter:
-            data = request(url, default_contacts_by_company_params).json()
-            for row in data[path]:
-                counter.increment()
-                row = replace_na_with_none(row)
-                record = {"company_id": company_id, "contact_id": row}
-                record = bumble_bee.transform(record, schema, mdata)
-                singer.write_record(
-                    "contacts_by_company", record, time_extracted=utils.now()
-                )
-
-    return STATE
-
-
 default_company_params = {
     "limit": 250,
     "properties": [
@@ -571,13 +547,6 @@ def sync_companies(STATE, ctx):
 
     url = get_url("companies_all")
     max_bk_value = start
-    if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-        contacts_by_company_schema = load_schema(CONTACTS_BY_COMPANY)
-        singer.write_schema(
-            "contacts_by_company",
-            contacts_by_company_schema,
-            ["company_id", "contact_id"],
-        )
 
     with bumble_bee:
         for row in gen_request(
@@ -615,8 +584,6 @@ def sync_companies(STATE, ctx):
                 catalog.get("stream_alias"),
                 time_extracted=utils.now(),
             )
-            if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-                STATE = _sync_contacts_by_company(STATE, ctx, record["companyId"])
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(max_bk_value, current_sync_start)
     STATE = singer.write_bookmark(
@@ -803,70 +770,12 @@ def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
     return STATE
 
 
-def sync_subscription_changes(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    STATE = sync_entity_chunked(
-        STATE,
-        catalog,
-        "subscription_changes",
-        ["timestamp", "portalId", "recipient"],
-        "timeline",
-    )
-    return STATE
-
-
 def sync_email_events(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     STATE = sync_entity_chunked(STATE, catalog, "email_events", ["id"], "events")
     return STATE
 
 
-def sync_contact_lists(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    schema = load_schema("contact_lists")
-    bookmark_key = "updatedAt"
-    singer.write_schema(
-        "contact_lists", schema, ["listId"], [bookmark_key], catalog.get("stream_alias")
-    )
-
-    start = get_start(STATE, "contact_lists", bookmark_key)
-    max_bk_value = start
-
-    LOGGER.info("sync_contact_lists from %s", start)
-
-    url = get_url("contact_lists")
-    params = {"count": 250}
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in gen_request(
-            STATE,
-            "contact_lists",
-            url,
-            params,
-            "lists",
-            "has-more",
-            ["offset"],
-            ["offset"],
-        ):
-            row = replace_na_with_none(row)
-            record = bumble_bee.transform(row, schema, mdata)
-
-            if record[bookmark_key] >= start:
-                singer.write_record(
-                    "contact_lists",
-                    record,
-                    catalog.get("stream_alias"),
-                    time_extracted=utils.now(),
-                )
-            if record[bookmark_key] >= max_bk_value:
-                max_bk_value = record[bookmark_key]
-
-    STATE = singer.write_bookmark(STATE, "contact_lists", bookmark_key, max_bk_value)
-    singer.write_state(STATE)
-
-    return STATE
-
-
 def sync_forms(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get("metadata"))
@@ -905,85 +814,6 @@ def sync_forms(STATE, ctx):
     return STATE
 
 
-def sync_workflows(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    schema = catalog["schema"]
-    bookmark_key = "updatedAt"
-    singer.write_schema(
-        "workflows", schema, ["id"], [bookmark_key], catalog.get("stream_alias")
-    )
-    start = get_start(STATE, "workflows", bookmark_key)
-    max_bk_value = start
-
-    STATE = singer.write_bookmark(STATE, "workflows", bookmark_key, max_bk_value)
-    singer.write_state(STATE)
-
-    LOGGER.info("sync_workflows from %s", start)
-
-    data = request(get_url("workflows")).json()
-    time_extracted = utils.now()
-
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in data["workflows"]:
-            row = replace_na_with_none(row)
-            record = bumble_bee.transform(row, schema, mdata)
-            if record[bookmark_key] >= start:
-                singer.write_record(
-                    "workflows",
-                    record,
-                    catalog.get("stream_alias"),
-                    time_extracted=time_extracted,
-                )
-            if record[bookmark_key] >= max_bk_value:
-                max_bk_value = record[bookmark_key]
-
-    STATE = singer.write_bookmark(STATE, "workflows", bookmark_key, max_bk_value)
-    singer.write_state(STATE)
-    return STATE
-
-
-def sync_owners(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    schema = catalog["schema"]
-    bookmark_key = "updatedAt"
-
-    singer.write_schema(
-        "owners", schema, ["ownerId"], [bookmark_key], catalog.get("stream_alias")
-    )
-    start = get_start(STATE, "owners", bookmark_key)
-    max_bk_value = start
-
-    LOGGER.info("sync_owners from %s", start)
-
-    params = {}
-    if CONFIG.get("include_inactives"):
-        params["includeInactives"] = "true"
-    data = request(get_url("owners"), params).json()
-
-    time_extracted = utils.now()
-
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in data:
-            row = replace_na_with_none(row)
-            record = bumble_bee.transform(row, schema, mdata)
-            if record[bookmark_key] >= max_bk_value:
-                max_bk_value = record[bookmark_key]
-
-            if record[bookmark_key] >= start:
-                singer.write_record(
-                    "owners",
-                    record,
-                    catalog.get("stream_alias"),
-                    time_extracted=time_extracted,
-                )
-
-    STATE = singer.write_bookmark(STATE, "owners", bookmark_key, max_bk_value)
-    singer.write_state(STATE)
-    return STATE
-
-
 def sync_engagements(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get("metadata"))
@@ -1090,21 +920,9 @@ class Stream(object):
 
 
 STREAMS = [
-    # Do these first as they are incremental
-    Stream(
-        "subscription_changes",
-        sync_subscription_changes,
-        ["timestamp", "portalId", "recipient"],
-        "startTimestamp",
-        "INCREMENTAL",
-    ),
     Stream("email_events", sync_email_events, ["id"], "startTimestamp", "INCREMENTAL"),
     # Do these last as they are full table
     Stream("forms", sync_forms, ["guid"], "updatedAt", "FULL_TABLE"),
-    Stream("workflows", sync_workflows, ["id"], "updatedAt", "FULL_TABLE"),
-    Stream("owners", sync_owners, ["ownerId"], "updatedAt", "FULL_TABLE"),
-    Stream("campaigns", sync_campaigns, ["id"], None, "FULL_TABLE"),
-    Stream("contact_lists", sync_contact_lists, ["listId"], "updatedAt", "FULL_TABLE"),
     Stream("contacts", sync_contacts, ["vid"], "versionTimestamp", "FULL_TABLE"),
     Stream(
         "companies", sync_companies, ["companyId"], "hs_lastmodifieddate", "FULL_TABLE"
@@ -1252,25 +1070,6 @@ def discover_schemas():
                 "metadata": mdata,
             }
         )
-    # Load the contacts_by_company schema
-    LOGGER.info("Loading schema for contacts_by_company")
-    contacts_by_company = Stream(
-        "contacts_by_company",
-        _sync_contacts_by_company,
-        ["company-id", "contact-id"],
-        None,
-        "FULL_TABLE",
-    )
-    schema, mdata = load_discovered_schema(contacts_by_company)
-
-    result["streams"].append(
-        {
-            "stream": CONTACTS_BY_COMPANY,
-            "tap_stream_id": CONTACTS_BY_COMPANY,
-            "schema": schema,
-            "metadata": mdata,
-        }
-    )
 
     return result
 

From 6a75d2596e7db3fa3b9d9333156cb730972bb9a9 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 19 Feb 2020 09:54:56 +0100
Subject: [PATCH 36/78] add missed field

---
 tap_hubspot/schemas/deals.json | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tap_hubspot/schemas/deals.json b/tap_hubspot/schemas/deals.json
index 1696df9c..597eeb4f 100644
--- a/tap_hubspot/schemas/deals.json
+++ b/tap_hubspot/schemas/deals.json
@@ -33,6 +33,24 @@
     "properties": {
       "type": "object",
       "properties": {
+        "pipeline": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"]
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
         "closedate": {
           "type": "object",
           "properties": {

From 87ccbe14c80d4f6c1c6ecc38a16147b782195a90 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Fri, 28 Feb 2020 12:30:12 +0100
Subject: [PATCH 37/78] do not show get request

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 84982042..125448ac 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -346,7 +346,7 @@ def request(url, params=None):
         headers["User-Agent"] = CONFIG["user_agent"]
 
     req = requests.Request("GET", url, params=params, headers=headers).prepare()
-    LOGGER.info("GET %s", req.url)
+    #LOGGER.info("GET %s", req.url)
     with metrics.http_request_timer(parse_source_from_url(url)) as timer:
         resp = SESSION.send(req)
         timer.tags[metrics.Tag.http_status_code] = resp.status_code

From 332b59c157cc5bbf4e8b5aec8cfe3a088c75a700 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Sun, 15 Mar 2020 18:39:58 +0100
Subject: [PATCH 38/78] add hs_lastmodifieddate field

---
 tap_hubspot/schemas/deals.json | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tap_hubspot/schemas/deals.json b/tap_hubspot/schemas/deals.json
index 597eeb4f..cd8e9be6 100644
--- a/tap_hubspot/schemas/deals.json
+++ b/tap_hubspot/schemas/deals.json
@@ -51,6 +51,25 @@
             }
           }
         },
+        "hs_lastmodifieddate": {
+          "type": "object",
+          "properties": {
+            "value": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "timestamp": {
+              "type": ["null", "string"],
+              "format": "date-time"
+            },
+            "source": {
+              "type": ["null", "string"]
+            },
+            "sourceId": {
+              "type": ["null", "string"]
+            }
+          }
+        },
         "closedate": {
           "type": "object",
           "properties": {

From 24dc42b5f0874f1b27896abb956ec2a5e76566eb Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Sun, 15 Mar 2020 23:09:53 +0100
Subject: [PATCH 39/78] sync table companies incrementally

---
 tap_hubspot/__init__.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 125448ac..8a72d4f2 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -576,14 +576,14 @@ def sync_companies(STATE, ctx):
 
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
-
-            record = bumble_bee.transform(row, schema, mdata)
-            singer.write_record(
-                "companies",
-                record,
-                catalog.get("stream_alias"),
-                time_extracted=utils.now(),
-            )
+            if not modified_time or modified_time >= start:
+                record = bumble_bee.transform(row, schema, mdata)
+                singer.write_record(
+                    "companies",
+                    record,
+                    catalog.get("stream_alias"),
+                    time_extracted=utils.now(),
+                )
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(max_bk_value, current_sync_start)
     STATE = singer.write_bookmark(

From edc57423e736a148fd6afd2cd0c9668a76bec9b0 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Mon, 16 Mar 2020 11:12:34 +0100
Subject: [PATCH 40/78] update deal-pipeline schema

---
 tap_hubspot/schemas/deal_pipelines.json | 36 ++++++++++++++++++-------
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/tap_hubspot/schemas/deal_pipelines.json b/tap_hubspot/schemas/deal_pipelines.json
index e23a6445..423d42a0 100644
--- a/tap_hubspot/schemas/deal_pipelines.json
+++ b/tap_hubspot/schemas/deal_pipelines.json
@@ -4,6 +4,20 @@
     "pipelineId": {
       "type": ["null", "string"]
     },
+    "createdAt": {
+      "type": ["null", "string"],
+      "format": "date-time"
+    },
+    "updatedAt": {
+      "type": ["null", "string"],
+      "format": "date-time"
+    },
+    "objectType": {
+      "type": ["null", "string"]
+    },
+    "objectTypeId": {
+      "type": ["null", "string"]
+    },
     "stages": {
       "type": ["null", "array"],
       "items": {
@@ -15,20 +29,22 @@
           "label": {
             "type": ["null", "string"]
           },
-          "probability": {
-            "type": ["null", "number"]
-          },
-          "active": {
-            "type": ["null", "boolean"]
-          },
           "displayOrder": {
             "type": ["null", "integer"]
           },
-          "closedWon": {
-            "type": ["null", "boolean"]
+          "metadata": {
+            "type": "object",
+            "properties": {
+              "isClosed": {
+                "type": "string"
+              },
+              "probability": {
+                "type": "string"
+              }
+            }
           }
         }
-      }    
+      }
     },
     "label": {
       "type": ["null", "string"]
@@ -40,7 +56,7 @@
       "type": ["null", "integer"]
     },
     "staticDefault": {
-      "type": ["null", "boolean"]      
+      "type": ["null", "boolean"]
     }
   }
 }

From 2969fccb6a8d6ec31674f17b01f212b10eb050bb Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Mon, 16 Mar 2020 11:13:24 +0100
Subject: [PATCH 41/78] /deals/v1/pipelines endpoint is deprecated

---
 tap_hubspot/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 8a72d4f2..bb51f2a2 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -82,7 +82,7 @@ class StateFields:
     "deals_all": "/deals/v1/deal/paged",
     "deals_recent": "/deals/v1/deal/recent/modified",
     "deals_detail": "/deals/v1/deal/{deal_id}",
-    "deal_pipelines": "/deals/v1/pipelines",
+    "deal_pipelines": "/crm-pipelines/v1/pipelines/deals",
     "campaigns_all": "/email/public/v1/campaigns/by-id",
     "campaigns_detail": "/email/public/v1/campaigns/{campaign_id}",
     "engagements_all": "/engagements/v1/engagements/paged",

From 2fd365762a44c7317d2d2f95a6dec8195df73823 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Mon, 16 Mar 2020 11:14:59 +0100
Subject: [PATCH 42/78] sync deal-pipeline incrementally with new replication
 key

---
 tap_hubspot/__init__.py | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index bb51f2a2..42bb4eea 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -888,24 +888,44 @@ def sync_engagements(STATE, ctx):
 
 
 def sync_deal_pipelines(STATE, ctx):
+    bookmark_key = "updatedAt"
+    start = utils.strptime_with_tz(get_start(STATE, "deal_pipelines", bookmark_key))
+    max_bk_value = start
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get("metadata"))
     schema = catalog["schema"]
     singer.write_schema(
         "deal_pipelines", schema, ["pipelineId"], catalog.get("stream_alias")
     )
-    LOGGER.info("sync_deal_pipelines")
+    LOGGER.info(f"sync deal_pipelines from {start}")
+
     data = request(get_url("deal_pipelines")).json()
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in data:
+        for row in data["results"]:
+            modified_time = None
             row = replace_na_with_none(row)
-            record = bumble_bee.transform(row, schema, mdata)
-            singer.write_record(
-                "deal_pipelines",
-                record,
-                catalog.get("stream_alias"),
-                time_extracted=utils.now(),
+            if bookmark_key in row:
+                timestamp_millis = row[bookmark_key]
+            elif "createdAt" in row:
+                # Hubspot returns timestamps in millis
+                timestamp_millis = row["createdAt"]
+            modified_time = datetime.datetime.fromtimestamp(
+                timestamp_millis / 1000.0, datetime.timezone.utc
             )
+            if modified_time and modified_time >= max_bk_value:
+                max_bk_value = modified_time
+
+            if not modified_time or modified_time > start:
+                record = bumble_bee.transform(row, schema, mdata)
+                singer.write_record(
+                    "deal_pipelines",
+                    record,
+                    catalog.get("stream_alias"),
+                    time_extracted=utils.now(),
+                )
+    STATE = singer.write_bookmark(
+        STATE, "deal_pipelines", bookmark_key, utils.strftime(max_bk_value)
+    )
     singer.write_state(STATE)
     return STATE
 
@@ -928,7 +948,9 @@ class Stream(object):
         "companies", sync_companies, ["companyId"], "hs_lastmodifieddate", "FULL_TABLE"
     ),
     Stream("deals", sync_deals, ["dealId"], "hs_lastmodifieddate", "FULL_TABLE"),
-    Stream("deal_pipelines", sync_deal_pipelines, ["pipelineId"], None, "FULL_TABLE"),
+    Stream(
+        "deal_pipelines", sync_deal_pipelines, ["pipelineId"], "updatedAt", "FULL_TABLE"
+    ),
     Stream(
         "engagements", sync_engagements, ["engagement_id"], "lastUpdated", "FULL_TABLE"
     ),

From b96846552a324f093f7c4937c89d341cd387da51 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Mon, 16 Mar 2020 11:15:17 +0100
Subject: [PATCH 43/78] avoid repetition of data

---
 tap_hubspot/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 42bb4eea..392e637c 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -481,7 +481,7 @@ def sync_contacts(STATE, ctx):
                     )
                 )
 
-            if not modified_time or modified_time >= start:
+            if not modified_time or modified_time > start:
                 vids.append(row["vid"])
 
             if modified_time and modified_time >= max_bk_value:
@@ -576,7 +576,7 @@ def sync_companies(STATE, ctx):
 
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
-            if not modified_time or modified_time >= start:
+            if not modified_time or modified_time > start:
                 record = bumble_bee.transform(row, schema, mdata)
                 singer.write_record(
                     "companies",
@@ -646,7 +646,7 @@ def sync_deals(STATE, ctx):
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
 
-            if not modified_time or modified_time >= start:
+            if not modified_time or modified_time > start:
                 record = bumble_bee.transform(row, schema, mdata)
                 singer.write_record(
                     "deals",
@@ -798,7 +798,7 @@ def sync_forms(STATE, ctx):
             row = replace_na_with_none(row)
             record = bumble_bee.transform(row, schema, mdata)
 
-            if record[bookmark_key] >= start:
+            if record[bookmark_key] > start:
                 singer.write_record(
                     "forms",
                     record,
@@ -864,7 +864,7 @@ def sync_engagements(STATE, ctx):
         for engagement in engagements:
             engagement = replace_na_with_none(engagement)
             record = bumble_bee.transform(engagement, schema, mdata)
-            if record["engagement"][bookmark_key] >= start:
+            if record["engagement"][bookmark_key] > start:
                 # hoist PK and bookmark field to top-level record
                 record["engagement_id"] = record["engagement"]["id"]
                 record[bookmark_key] = record["engagement"][bookmark_key]

From 3b3fb85861863ed1efb8326b2f340fdcbd94b1d9 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 00:45:45 +0200
Subject: [PATCH 44/78] delete test files

---
 tap_hubspot/tests/__init__.py                 |  0
 tap_hubspot/tests/test_bookmarks.py           | 68 -----------------
 tap_hubspot/tests/test_deals.py               | 47 ------------
 tap_hubspot/tests/test_get_streams_to_sync.py | 50 -------------
 tap_hubspot/tests/test_offsets.py             | 61 ---------------
 tap_hubspot/tests/utils.py                    | 74 -------------------
 6 files changed, 300 deletions(-)
 delete mode 100644 tap_hubspot/tests/__init__.py
 delete mode 100644 tap_hubspot/tests/test_bookmarks.py
 delete mode 100644 tap_hubspot/tests/test_deals.py
 delete mode 100644 tap_hubspot/tests/test_get_streams_to_sync.py
 delete mode 100644 tap_hubspot/tests/test_offsets.py
 delete mode 100644 tap_hubspot/tests/utils.py

diff --git a/tap_hubspot/tests/__init__.py b/tap_hubspot/tests/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tap_hubspot/tests/test_bookmarks.py b/tap_hubspot/tests/test_bookmarks.py
deleted file mode 100644
index 387a0616..00000000
--- a/tap_hubspot/tests/test_bookmarks.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import unittest
-import singer.bookmarks
-import singer.messages
-import tap_hubspot
-import pprint
-import os
-from tap_hubspot.tests import utils
-
-LOGGER = singer.get_logger()
-
-class Bookmarks(unittest.TestCase):
-    def setUp(self):
-        utils.verify_environment_vars()
-        utils.seed_tap_hubspot_config()
-        singer.write_bookmark = utils.our_write_bookmark
-        singer.write_state    = utils.our_write_state
-        singer.write_record   = utils.our_write_record
-        singer.write_schema   = utils.our_write_schema
-
-    #NB> test account must have > 2 contacts for this to work
-    def sync_contacts(self):
-        STATE = utils.get_clear_state()
-        catalog = {'stream_alias' : 'hubspot_contacts'}
-
-        tap_hubspot.default_contact_params['count'] = 1
-
-        STATE = tap_hubspot.sync_contacts(STATE, catalog)
-        #offset has been cleared
-        self.assertEqual(utils.caught_state['bookmarks']['contacts']['offset'], {})
-
-        #some bookmark has been recorded in the state
-        self.assertNotEqual(utils.caught_state['bookmarks']['contacts']['lastmodifieddate'], None)
-
-        #should sync some contacts
-        # LOGGER.info('A caught record: {}'.format(utils.caught_records['contacts'][0]))
-        self.assertGreater(len(utils.caught_records['contacts']),1)
-        self.assertEqual(set(utils.caught_records.keys()), {'contacts'})
-        self.assertEqual(utils.caught_pks, {'contacts': ['vid']})
-
-        utils.caught_records = []
-        STATE = tap_hubspot.sync_contacts(STATE, catalog)
-
-        #no new records thanks to bookmark
-        self.assertEqual(len(utils.caught_records),0)
-
-    def sync_companies(self):
-        STATE = utils.get_clear_state()
-
-        catalog = {'stream_alias' : 'hubspot_companies'}
-        STATE = tap_hubspot.sync_companies(STATE, catalog)
-
-        #offset has been cleared
-        self.assertEqual(utils.caught_state['bookmarks']['companies']['offset'], {})
-
-        #some bookmark has been recorded in the state
-        self.assertNotEqual(utils.caught_state['bookmarks']['companies']['hs_lastmodifieddate'], None)
-
-        #should sync some contacts && some hubspot_contacts_by_company
-        self.assertGreater(len(utils.caught_records), 0)
-        self.assertEqual(set(utils.caught_records.keys()), {'companies', 'hubspot_contacts_by_company'})
-
-        self.assertEqual(utils.caught_pks,  {'companies': ['companyId'], 'hubspot_contacts_by_company': ['company-id', 'contact-id']})
-
-        utils.caught_records = []
-        STATE = tap_hubspot.sync_companies(STATE, catalog)
-
-        #no new records thanks to bookmark
-        self.assertEqual(len(utils.caught_records),0)
diff --git a/tap_hubspot/tests/test_deals.py b/tap_hubspot/tests/test_deals.py
deleted file mode 100644
index 7dacf1c9..00000000
--- a/tap_hubspot/tests/test_deals.py
+++ /dev/null
@@ -1,47 +0,0 @@
-from tap_hubspot import sync_deals
-import unittest
-from unittest.mock import patch, ANY
-
-
-class TestDealsToSync(unittest.TestCase):
-
-    @patch('tap_hubspot.Context.get_catalog_from_id', return_value={"metadata":""})
-    @patch('singer.metadata.to_map', return_value={})
-    @patch('singer.utils.strptime_with_tz')
-    @patch('singer.utils.strftime')
-    @patch('tap_hubspot.load_schema')
-    @patch('tap_hubspot.gen_request', return_value=list())
-    def test_associations_are_not_validated(self,
-        mocked_gen_request,
-        mocked_catalog_from_id,
-        mocked_metadata_map,
-        mocked_utils_strptime,
-        mocked_utils_strftime,
-        mocked_load_schema):
-
-        sync_deals({}, mocked_catalog_from_id)
-
-        expected_param = {'count': 250, 'includeAssociations': False, 'properties': []}
-
-        mocked_gen_request.assert_called_once_with(ANY, ANY, ANY, expected_param, ANY, ANY, ANY, ANY)
-
-
-    @patch('tap_hubspot.Context.get_catalog_from_id', return_value={"metadata":""})
-    @patch('singer.metadata.to_map', return_value={"associations" :{"selected" : True}})
-    @patch('singer.utils.strptime_with_tz')
-    @patch('singer.utils.strftime')
-    @patch('tap_hubspot.load_schema')
-    @patch('tap_hubspot.gen_request', return_value=list())
-    def test_associations_are_validated(self,
-        mocked_gen_request,
-        mocked_catalog_from_id,
-        mocked_metadata_map,
-        mocked_utils_strptime,
-        mocked_utils_strftime,
-        mocked_load_schema):
-
-        sync_deals({}, mocked_catalog_from_id)
-
-        expected_param = {'count': 250, 'includeAssociations': True, 'properties': []}
-
-        mocked_gen_request.assert_called_once_with(ANY, ANY, ANY, expected_param, ANY, ANY, ANY, ANY)
diff --git a/tap_hubspot/tests/test_get_streams_to_sync.py b/tap_hubspot/tests/test_get_streams_to_sync.py
deleted file mode 100644
index 92c70c8f..00000000
--- a/tap_hubspot/tests/test_get_streams_to_sync.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from contextlib import contextmanager
-from io import StringIO
-from singer import utils
-from tap_hubspot import *
-import time
-import datetime
-import json
-import requests_mock
-import unittest
-
-class TestGetStreamsToSync(unittest.TestCase):
-
-    def setUp(self):
-        self.streams = [
-            Stream('a', 'a', [], None, None),
-            Stream('b', 'b', [], None, None),
-            Stream('c', 'c', [], None, None),
-        ]
-
-    def test_get_streams_to_sync_with_no_this_stream(self):
-        state = {'this_stream': None}
-        self.assertEqual(self.streams, get_streams_to_sync(self.streams, state))
-
-    def test_get_streams_to_sync_with_first_stream(self):
-        state = {'currently_syncing': 'a'}
-
-        result = get_streams_to_sync(self.streams, state)
-
-        parsed_result = [s.tap_stream_id for s in result]
-        self.assertEqual(parsed_result, ['a', 'b', 'c'])
-
-    def test_get_streams_to_sync_with_middle_stream(self):
-        state = {'currently_syncing': 'b'}
-
-        result = get_streams_to_sync(self.streams, state)
-
-        parsed_result = [s.tap_stream_id for s in result]
-        self.assertEqual(parsed_result, ['b', 'c', 'a'])
-
-    def test_get_streams_to_sync_with_last_stream(self):
-        state = {'currently_syncing': 'c'}
-
-        result = get_streams_to_sync(self.streams, state)
-
-        parsed_result = [s.tap_stream_id for s in result]
-        self.assertEqual(parsed_result, ['c', 'a', 'b'])
-
-    def test_parse_source_from_url_succeeds(self):
-        url = "https://api.hubapi.com/companies/v2/companies/recent/modified"
-        self.assertEqual('companies', parse_source_from_url(url))
diff --git a/tap_hubspot/tests/test_offsets.py b/tap_hubspot/tests/test_offsets.py
deleted file mode 100644
index 22208ff3..00000000
--- a/tap_hubspot/tests/test_offsets.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import unittest
-import logging
-import singer
-import tap_hubspot
-import singer.bookmarks
-from tap_hubspot.tests import utils
-
-LOGGER = singer.get_logger()
-
-def set_offset_with_exception(state, tap_stream_id, offset_key, offset_value):
-    LOGGER.info("set_offset_with_exception: {}".format(utils.caught_state))
-    utils.caught_state = singer.bookmarks.set_offset(state, tap_stream_id, offset_key, offset_value)
-    raise Exception("simulated")
-
-class Offsets(unittest.TestCase):
-    def setUp(self):
-        utils.verify_environment_vars()
-        utils.seed_tap_hubspot_config()
-        singer.write_bookmark = utils.our_write_bookmark
-        singer.write_state    = utils.our_write_state
-        singer.write_record   = utils.our_write_record
-        singer.write_schema   = utils.our_write_schema
-        singer.set_offset     = set_offset_with_exception
-
-    #NB> test accounts must have > 1 companies for this to work
-    def sync_companies(self):
-        simulated_exception = None
-        STATE = utils.get_clear_state()
-        catalog = {'stream_alias' : 'hubspot_companies'}
-
-        #change count = 1
-        tap_hubspot.default_company_params['limit'] = 1
-
-        try:
-            STATE = tap_hubspot.sync_companies(STATE, catalog)
-        except Exception as ex:
-            simulated_exception = ex
-            # logging.exception('strange')
-
-        self.assertIsNot(simulated_exception, None)
-
-
-        self.assertEqual(set(utils.caught_records.keys()), {'companies', 'hubspot_contacts_by_company'})
-
-        #should only emit 1 company record because of the limit
-        self.assertEqual(len(utils.caught_records['companies']), 1)
-        self.assertGreater(len(utils.caught_records['hubspot_contacts_by_company']), 0)
-
-        #offset should be set in state
-        LOGGER.info("utils.caught_state: {}".format(utils.caught_state))
-        self.assertNotEqual(utils.caught_state['bookmarks']['companies']['offset'], {})
-
-        #no bookmark though
-        self.assertEqual(utils.caught_state['bookmarks']['companies']['hs_lastmodifieddate'], None)
-
-        #change count back to 250
-        tap_hubspot.default_company_params['limit'] = 250
-
-        #call do_sync and verify:
-        #    1)sync_companies is called first
-        #    2)previous retrieved record is not retrieved again
diff --git a/tap_hubspot/tests/utils.py b/tap_hubspot/tests/utils.py
deleted file mode 100644
index 1fe8a3c6..00000000
--- a/tap_hubspot/tests/utils.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import singer
-import singer.bookmarks
-import os
-import tap_hubspot
-
-LOGGER = singer.get_logger()
-
-caught_records = {}
-caught_bookmarks = []
-caught_state = {}
-caught_schema = {}
-caught_pks = {}
-
-
-def verify_environment_vars():
-    missing_envs = [x for x in [os.getenv('TAP_HUBSPOT_REDIRECT_URI'),
-                                os.getenv('TAP_HUBSPOT_CLIENT_ID'),
-                                os.getenv('TAP_HUBSPOT_CLIENT_SECRET'),
-                                os.getenv('TAP_HUBSPOT_REFRESH_TOKEN')] if x == None]
-    if len(missing_envs) != 0:
-        #pylint: disable=line-too-long
-        raise Exception("set TAP_HUBSPOT_REDIRECT_URI, TAP_HUBSPOT_CLIENT_ID, TAP_HUBSPOT_CLIENT_SECRET, TAP_HUBSPOT_REFRESH_TOKEN")
-
-def seed_tap_hubspot_config():
-    tap_hubspot.CONFIG = {
-        "access_token": None,
-        "token_expires": None,
-
-        "redirect_uri":   os.environ['TAP_HUBSPOT_REDIRECT_URI'],
-        "client_id":      os.environ['TAP_HUBSPOT_CLIENT_ID'],
-        "client_secret":  os.environ['TAP_HUBSPOT_CLIENT_SECRET'],
-        "refresh_token":  os.environ['TAP_HUBSPOT_REFRESH_TOKEN'],
-        "start_date": "2001-01-01T00:00:00Z"
-    }
-
-def get_clear_state():
-    return {
-        "bookmarks": {
-            "contacts": {
-                "offset": {},
-                "lastmodifieddate": None
-            },
-            "companies": {
-                "offset": {},
-                "hs_lastmodifieddate": None
-            }
-
-        },
-        "currently_syncing": None
-    }
-
-
-#pylint: disable=line-too-long
-def our_write_bookmark(state, table_name, bookmark_key, bookmark_value):
-    caught_bookmarks.append([bookmark_key, bookmark_value])
-    state = singer.bookmarks.write_bookmark(state, table_name, bookmark_key, bookmark_value)
-    return state
-
-def our_write_schema(table_name, schema, pks, stream_alias=None):
-    global caught_pks
-    caught_pks[table_name] = pks
-    caught_schema[table_name] = schema
-
-def our_write_state(state):
-    LOGGER.info("our_write_state: {}".format(state))
-    global caught_state
-    caught_state = state
-    return state
-
-def our_write_record(table_name, record, stream_alias=None):
-    if caught_records.get(table_name) == None:
-        caught_records[table_name] = []
-
-    caught_records[table_name].append(record)

From 5eb3bbcb36554b8d44f078c636988c4b9e0ca6d2 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 00:49:13 +0200
Subject: [PATCH 45/78] include major cli functions

---
 tap_hubspot/__init__.py | 1180 +++------------------------------------
 1 file changed, 80 insertions(+), 1100 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 392e637c..ffb4a6ba 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -1,1131 +1,111 @@
 #!/usr/bin/env python3
-import datetime
-import pytz
-import itertools
 import os
-import re
 import sys
 import json
-
-import attr
-import backoff
-import requests
 import singer
-import singer.messages
-import singer.metrics as metrics
-from singer import metadata
-from singer import utils
-from singer import (
-    transform,
-    UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING,
-    Transformer,
-    _transform_datetime,
-)
-
-LOGGER = singer.get_logger()
-SESSION = requests.Session()
-
-
-class InvalidAuthException(Exception):
-    pass
-
-
-class SourceUnavailableException(Exception):
-    pass
-
-
-class DependencyException(Exception):
-    pass
-
-
-class DataFields:
-    offset = "offset"
-
-
-class StateFields:
-    offset = "offset"
-    this_stream = "this_stream"
-
-
-BASE_URL = "https://api.hubapi.com"
-
-CONTACTS_BY_COMPANY = "contacts_by_company"
-
-DEFAULT_CHUNK_SIZE = 1000 * 60 * 60 * 24
-
-CONFIG = {
-    "access_token": None,
-    "token_expires": None,
-    "email_chunk_size": DEFAULT_CHUNK_SIZE,
-    "subscription_chunk_size": DEFAULT_CHUNK_SIZE,
-    # in config.json
-    "redirect_uri": None,
-    "client_id": None,
-    "client_secret": None,
-    "refresh_token": None,
-    "start_date": None,
-    "hapikey": None,
-    "include_inactives": None,
+from singer import utils, metadata, Catalog, CatalogEntry, Schema
+from tap_hubspot.stream import Stream
+
+KEY_PROPERTIES = "id"
+STREAMS = {
+    "email_events": {
+        "valid_replication_keys": ["startTimestamp"],
+        "key_properties": "id",
+    },
+    "forms": {"valid_replication_keys": ["updatedAt"], "key_properties": "guid",},
+    "contacts": {
+        "valid_replication_keys": ["versionTimestamp"],
+        "key_properties": "vid",
+    },
+    "companies": {
+        "valid_replication_keys": ["hs_lastmodifieddate"],
+        "key_properties": "companyId",
+    },
+    "deals": {
+        "valid_replication_keys": ["hs_lastmodifieddate"],
+        "key_properties": "dealId",
+    },
+    "deal_pipelines": {
+        "valid_replication_keys": ["updatedAt"],
+        "key_properties": "pipelineId",
+    },
+    "engagements": {
+        "valid_replication_keys": ["lastUpdated"],
+        "key_properties": "engagement_id",
+    },
 }
-
-ENDPOINTS = {
-    "contacts_properties": "/properties/v1/contacts/properties",
-    "contacts_all": "/contacts/v1/lists/all/contacts/all",
-    "contacts_recent": "/contacts/v1/lists/recently_updated/contacts/recent",
-    "contacts_detail": "/contacts/v1/contact/vids/batch/",
-    "companies_properties": "/companies/v2/properties",
-    "companies_all": "/companies/v2/companies/paged",
-    "companies_recent": "/companies/v2/companies/recent/modified",
-    "companies_detail": "/companies/v2/companies/{company_id}",
-    "contacts_by_company": "/companies/v2/companies/{company_id}/vids",
-    "deals_properties": "/properties/v1/deals/properties",
-    "deals_all": "/deals/v1/deal/paged",
-    "deals_recent": "/deals/v1/deal/recent/modified",
-    "deals_detail": "/deals/v1/deal/{deal_id}",
-    "deal_pipelines": "/crm-pipelines/v1/pipelines/deals",
-    "campaigns_all": "/email/public/v1/campaigns/by-id",
-    "campaigns_detail": "/email/public/v1/campaigns/{campaign_id}",
-    "engagements_all": "/engagements/v1/engagements/paged",
-    "subscription_changes": "/email/public/v1/subscriptions/timeline",
-    "email_events": "/email/public/v1/events",
-    "contact_lists": "/contacts/v1/lists",
-    "forms": "/forms/v2/forms",
-    "workflows": "/automation/v3/workflows",
-    "owners": "/owners/v2/owners",
-}
-
-
-def get_start(state, tap_stream_id, bookmark_key):
-    current_bookmark = singer.get_bookmark(state, tap_stream_id, bookmark_key)
-    if current_bookmark is None:
-        return CONFIG["start_date"]
-    return current_bookmark
-
-
-def get_current_sync_start(state, tap_stream_id):
-    current_sync_start_value = singer.get_bookmark(
-        state, tap_stream_id, "current_sync_start"
-    )
-    if current_sync_start_value is None:
-        return current_sync_start_value
-    return utils.strptime_to_utc(current_sync_start_value)
-
-
-def write_current_sync_start(state, tap_stream_id, start):
-    value = start
-    if start is not None:
-        value = utils.strftime(start)
-    return singer.write_bookmark(state, tap_stream_id, "current_sync_start", value)
-
-
-def clean_state(state):
-    """ Clear deprecated keys out of state. """
-    for stream, bookmark_map in state.get("bookmarks", {}).items():
-        if "last_sync_duration" in bookmark_map:
-            LOGGER.info("{} - Removing last_sync_duration from state.".format(stream))
-            state["bookmarks"][stream].pop("last_sync_duration", None)
-
-
-def get_url(endpoint, **kwargs):
-    if endpoint not in ENDPOINTS:
-        raise ValueError("Invalid endpoint {}".format(endpoint))
-
-    return BASE_URL + ENDPOINTS[endpoint].format(**kwargs)
-
-
-def replace_na_with_none(obj):
-    """Given a certain object, the function will replace any 'N/A' values with None.
-    E.g: object = {
-                    "key1" : [{"subkey1": "value1"}, {"subkey2": "N/A"}],
-                    "key2" : "n/a",
-                    "key3" : {
-                                "subkey3" : "n/a",
-                                "subkey4" : "value2"
-                        }
-                    }
-        self.replace_na_with_none(object) will return:
-        {
-            "key1" : [{"subkey1": "value1"}, {"subkey2": None}],
-            "key2" : None,
-            "key3" : {
-                        "subkey3" : None,
-                        "subkey4" : "value2"
-                }
-            }
-    """
-    if isinstance(obj, dict):
-        new_dict = {}
-        for key, value in obj.items():
-            new_dict[key] = replace_na_with_none(value)
-        return new_dict
-
-    if isinstance(obj, list):
-        new_list = []
-        for value in obj:
-            new_list.append(replace_na_with_none(value))
-        return new_list
-
-    if isinstance(obj, str):
-        if obj.lower() == "n/a":
-            obj = None
-    return obj
-
-
-def get_field_type_schema(field_type):
-    if field_type == "bool":
-        return {"type": ["null", "boolean"]}
-
-    elif field_type == "datetime":
-        return {"type": ["null", "string"], "format": "date-time"}
-
-    elif field_type == "number":
-        return {"type": ["null", "number"]}
-
-    else:
-        return {"type": ["null", "string"]}
-
-
-def get_field_schema(field_type, extras=False):
-    if extras:
-        return {
-            "type": "object",
-            "properties": {
-                "value": get_field_type_schema(field_type),
-                "timestamp": get_field_type_schema("datetime"),
-                "source": get_field_type_schema("string"),
-                "sourceId": get_field_type_schema("string"),
-            },
-        }
-    else:
-        return {
-            "type": "object",
-            "properties": {"value": get_field_type_schema(field_type)},
-        }
-
-
-def parse_custom_schema(entity_name, data):
-    return {
-        field["name"]: get_field_schema(field["type"], entity_name != "contacts")
-        for field in data
-    }
+REQUIRED_CONFIG_KEYS = [
+    "start_date",
+    "client_id",
+    "client_secret",
+    "refresh_token",
+    "redirect_uri",
+]
+LOGGER = singer.get_logger()
 
 
 def get_abs_path(path):
     return os.path.join(os.path.dirname(os.path.realpath(__file__)), path)
 
 
-def load_schema(entity_name):
-    schema = utils.load_json(get_abs_path("schemas/{}.json".format(entity_name)))
-    return schema_nodash(schema)
-
-
-def schema_nodash(obj):
-    type_field = obj.get("type")
-    type = get_type(type_field)
-    if not type:
-        return obj
-    if not type in ["array", "object"]:
-        return obj
-    if "object" == type:
-        props = obj.get("properties", {})
-        new_props = replace_props(props)
-        obj["properties"] = new_props
-    if "array" == type:
-        items = obj.get("items", {})
-        obj["items"] = schema_nodash(items)
-    return obj
-
-
-def get_type(type_field):
-    if isinstance(type_field, str):
-        return type_field
-    if isinstance(type_field, list):
-        types = set(type_field)
-        if "null" in types:
-            types.remove("null")
-        return types.pop()
-    return None
-
-
-def replace_props(props):
-    if not props:
-        return props
-    keys = list(props.keys())
-    for k in keys:
-        if not "-" in k:
-            props[k] = schema_nodash(props[k])
-        else:
-            v = props.pop(k)
-            new_key = k.replace("-", "_")
-            new_value = schema_nodash(v)
-            props[new_key] = new_value
-    return props
-
-
-# pylint: disable=invalid-name
-def acquire_access_token_from_refresh_token():
-    payload = {
-        "grant_type": "refresh_token",
-        "redirect_uri": CONFIG["redirect_uri"],
-        "refresh_token": CONFIG["refresh_token"],
-        "client_id": CONFIG["client_id"],
-        "client_secret": CONFIG["client_secret"],
-    }
-
-    resp = requests.post(BASE_URL + "/oauth/v1/token", data=payload)
-    if resp.status_code == 403:
-        raise InvalidAuthException(resp.content)
-
-    resp.raise_for_status()
-    auth = resp.json()
-    CONFIG["access_token"] = auth["access_token"]
-    CONFIG["refresh_token"] = auth["refresh_token"]
-    CONFIG["token_expires"] = datetime.datetime.utcnow() + datetime.timedelta(
-        seconds=auth["expires_in"] - 600
-    )
-    LOGGER.info("Token refreshed. Expires at %s", CONFIG["token_expires"])
-
-
-def giveup(exc):
-    return (
-        exc.response is not None
-        and 400 <= exc.response.status_code < 500
-        and exc.response.status_code != 429
-    )
-
-
-def on_giveup(details):
-    if len(details["args"]) == 2:
-        url, params = details["args"]
-    else:
-        url = details["args"]
-        params = {}
-
-    raise Exception(
-        "Giving up on request after {} tries with url {} and params {}".format(
-            details["tries"], url, params
-        )
-    )
-
-
-URL_SOURCE_RE = re.compile(BASE_URL + r"/(\w+)/")
-
-
-def parse_source_from_url(url):
-    match = URL_SOURCE_RE.match(url)
-    if match:
-        return match.group(1)
-    return None
-
-
-@backoff.on_exception(
-    backoff.constant,
-    (requests.exceptions.RequestException, requests.exceptions.HTTPError),
-    max_tries=5,
-    jitter=None,
-    giveup=giveup,
-    on_giveup=on_giveup,
-    interval=10,
-)
-def request(url, params=None):
-
-    params = params or {}
-    hapikey = CONFIG["hapikey"]
-    if hapikey is None:
-        if (
-            CONFIG["token_expires"] is None
-            or CONFIG["token_expires"] < datetime.datetime.utcnow()
-        ):
-            acquire_access_token_from_refresh_token()
-        headers = {"Authorization": "Bearer {}".format(CONFIG["access_token"])}
-    else:
-        params["hapikey"] = hapikey
-        headers = {}
-
-    if "user_agent" in CONFIG:
-        headers["User-Agent"] = CONFIG["user_agent"]
-
-    req = requests.Request("GET", url, params=params, headers=headers).prepare()
-    #LOGGER.info("GET %s", req.url)
-    with metrics.http_request_timer(parse_source_from_url(url)) as timer:
-        resp = SESSION.send(req)
-        timer.tags[metrics.Tag.http_status_code] = resp.status_code
-        if resp.status_code == 403:
-            raise SourceUnavailableException(resp.content)
-        else:
-            resp.raise_for_status()
-
-    return resp
-
-
-# {"bookmarks" : {"contacts" : { "lastmodifieddate" : "2001-01-01"
-#                                "offset" : {"vidOffset": 1234
-#                                           "timeOffset": "3434434 }}
-#                 "users" : { "timestamp" : "2001-01-01"}}
-#  "currently_syncing" : "contacts"
-# }
-# }
-
-# pylint: disable=line-too-long
-def gen_request(
-    STATE, tap_stream_id, url, params, path, more_key, offset_keys, offset_targets
-):
-    if len(offset_keys) != len(offset_targets):
-        raise ValueError("Number of offset_keys must match number of offset_targets")
-
-    if singer.get_offset(STATE, tap_stream_id):
-        params.update(singer.get_offset(STATE, tap_stream_id))
-
-    with metrics.record_counter(tap_stream_id) as counter:
-        while True:
-            data = request(url, params).json()
-
-            for row in data[path]:
-                counter.increment()
-                yield row
-
-            if not data.get(more_key, False):
-                break
-
-            STATE = singer.clear_offset(STATE, tap_stream_id)
-            for key, target in zip(offset_keys, offset_targets):
-                if key in data:
-                    params[target] = data[key]
-                    STATE = singer.set_offset(STATE, tap_stream_id, target, data[key])
-
-            singer.write_state(STATE)
-
-    STATE = singer.clear_offset(STATE, tap_stream_id)
-    singer.write_state(STATE)
-
-
-def _sync_contact_vids(catalog, vids, schema, bumble_bee):
-    if len(vids) == 0:
-        return
-
-    data = request(
-        get_url("contacts_detail"),
-        params={"vid": vids, "showListMemberships": True, "formSubmissionMode": "all"},
-    ).json()
-    time_extracted = utils.now()
-    mdata = metadata.to_map(catalog.get("metadata"))
-
-    for record in data.values():
-        record = replace_na_with_none(record)
-        record = bumble_bee.transform(record, schema, mdata)
-        record = record_nodash(record)
-        singer.write_record(
-            "contacts",
-            record,
-            catalog.get("stream_alias"),
-            time_extracted=time_extracted,
-        )
-
-
-def record_nodash(obj):
-    if not isinstance(obj, dict):  # stopplesing criteria
-        return obj
-
-    for k in obj.keys():
-        value = record_nodash(obj[k])
-        if not "-" in k:
-            key = k
-        else:
-            obj.pop(k)
-            key = k.replace("-", "_")
-
-        obj[key] = value  # recursion
-
-    return obj
-
-
-default_contact_params = {
-    "showListMemberships": True,
-    "includeVersion": True,
-    "count": 100,
-}
-
-
-def sync_contacts(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    bookmark_key = "versionTimestamp"
-    start = utils.strptime_with_tz(get_start(STATE, "contacts", bookmark_key))
-    LOGGER.info("sync_contacts from %s", start)
-
-    max_bk_value = start
-    schema = catalog["schema"]
-
-    singer.write_schema(
-        "contacts", schema, ["vid"], [bookmark_key], catalog.get("stream_alias")
-    )
-
-    url = get_url("contacts_all")
-
-    vids = []
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in gen_request(
-            STATE,
-            "contacts",
-            url,
-            default_contact_params,
-            "contacts",
-            "has-more",
-            ["vid-offset"],
-            ["vidOffset"],
-        ):
-            modified_time = None
-            if bookmark_key in row:
-                modified_time = utils.strptime_with_tz(
-                    _transform_datetime(  # pylint: disable=protected-access
-                        row[bookmark_key], UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING
-                    )
-                )
-
-            if not modified_time or modified_time > start:
-                vids.append(row["vid"])
+def load_schemas():
+    schemas = {}
 
-            if modified_time and modified_time >= max_bk_value:
-                max_bk_value = modified_time
+    for filename in os.listdir(get_abs_path("schemas")):
+        path = get_abs_path("schemas") + "/" + filename
+        file_raw = filename.replace(".json", "")
+        with open(path) as file:
+            schemas[file_raw] = json.load(file)
 
-            if len(vids) == 100:
-                _sync_contact_vids(catalog, vids, schema, bumble_bee)
-                vids = []
+    return schemas
 
-        _sync_contact_vids(catalog, vids, schema, bumble_bee)
 
-    STATE = singer.write_bookmark(
-        STATE, "contacts", bookmark_key, utils.strftime(max_bk_value)
-    )
-    singer.write_state(STATE)
-    return STATE
+def discover() -> Catalog:
+    schemas = load_schemas()
+    streams = []
 
-
-class ValidationPredFailed(Exception):
-    pass
-
-
-# companies_recent only supports 10,000 results. If there are more than this,
-# we'll need to use the companies_all endpoint
-def use_recent_companies_endpoint(response):
-    return response["total"] < 10000
-
-
-default_company_params = {
-    "limit": 250,
-    "properties": [
-        "website",
-        "name",
-        "country",
-        "domain",
-        "createdate",
-        "hs_lastmodifieddate",
-    ],
-}
-
-
-def sync_companies(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    bumble_bee = Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING)
-    bookmark_key = "hs_lastmodifieddate"
-    start = utils.strptime_to_utc(get_start(STATE, "companies", bookmark_key))
-    LOGGER.info("sync_companies from %s", start)
-    schema = catalog["schema"]
-    singer.write_schema(
-        "companies", schema, ["companyId"], [bookmark_key], catalog.get("stream_alias")
-    )
-
-    # Because this stream doesn't query by `lastUpdated`, it cycles
-    # through the data set every time. The issue with this is that there
-    # is a race condition by which records may be updated between the
-    # start of this table's sync and the end, causing some updates to not
-    # be captured, in order to combat this, we must store the current
-    # sync's start in the state and not move the bookmark past this value.
-    current_sync_start = get_current_sync_start(STATE, "companies") or utils.now()
-    STATE = write_current_sync_start(STATE, "companies", current_sync_start)
-    singer.write_state(STATE)
-
-    url = get_url("companies_all")
-    max_bk_value = start
-
-    with bumble_bee:
-        for row in gen_request(
-            STATE,
-            "companies",
-            url,
-            default_company_params,
-            "companies",
-            "has-more",
-            ["offset"],
-            ["offset"],
-        ):
-            row_properties = row["properties"]
-            modified_time = None
-            if bookmark_key in row_properties:
-                # Hubspot returns timestamps in millis
-                timestamp_millis = row_properties[bookmark_key]["timestamp"] / 1000.0
-                modified_time = datetime.datetime.fromtimestamp(
-                    timestamp_millis, datetime.timezone.utc
-                )
-            elif "createdate" in row_properties:
-                # Hubspot returns timestamps in millis
-                timestamp_millis = row_properties["createdate"]["timestamp"] / 1000.0
-                modified_time = datetime.datetime.fromtimestamp(
-                    timestamp_millis, datetime.timezone.utc
-                )
-
-            if modified_time and modified_time >= max_bk_value:
-                max_bk_value = modified_time
-            if not modified_time or modified_time > start:
-                record = bumble_bee.transform(row, schema, mdata)
-                singer.write_record(
-                    "companies",
-                    record,
-                    catalog.get("stream_alias"),
-                    time_extracted=utils.now(),
-                )
-    # Don't bookmark past the start of this sync to account for updated records during the sync.
-    new_bookmark = min(max_bk_value, current_sync_start)
-    STATE = singer.write_bookmark(
-        STATE, "companies", bookmark_key, utils.strftime(new_bookmark)
-    )
-    STATE = write_current_sync_start(STATE, "companies", None)
-    singer.write_state(STATE)
-    return STATE
-
-
-def sync_deals(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    bookmark_key = "hs_lastmodifieddate"
-    start = utils.strptime_with_tz(get_start(STATE, "deals", bookmark_key))
-    max_bk_value = start
-    LOGGER.info("sync_deals from %s", start)
-    params = {"count": 250, "includeAssociations": False, "properties": []}
-
-    schema = load_schema("deals")
-    singer.write_schema(
-        "deals", schema, ["dealId"], [bookmark_key], catalog.get("stream_alias")
-    )
-
-    # Check if we should  include associations
-    for key in mdata.keys():
-        if "associations" in key:
-            assoc_mdata = mdata.get(key)
-            if assoc_mdata.get("selected") and assoc_mdata.get("selected") == True:
-                params["includeAssociations"] = True
-
-    # Append all the properties fields for deals to the request if
-    # properties is selectedOB
-    if mdata.get(("properties", "properties"), {}).get("selected"):
-        additional_properties = (
-            schema.get("properties").get("properties").get("properties")
+    for tap_stream_id, props in STREAMS.items():
+        schema = schemas[tap_stream_id]
+        mdata = metadata.get_standard_metadata(
+            schema=schema,
+            key_properties=props.get("key_properties", None),
+            valid_replication_keys=props.get("valid_replication_keys", []),
         )
-        for key in additional_properties.keys():
-            params["properties"].append(key)
-
-    url = get_url("deals_all")
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in gen_request(
-            STATE, "deals", url, params, "deals", "hasMore", ["offset"], ["offset"]
-        ):
-            row_properties = row["properties"]
-            modified_time = None
-            if bookmark_key in row_properties:
-                # Hubspot returns timestamps in millis
-                timestamp_millis = row_properties[bookmark_key]["timestamp"] / 1000.0
-                modified_time = datetime.datetime.fromtimestamp(
-                    timestamp_millis, datetime.timezone.utc
-                )
-            elif "createdate" in row_properties:
-                # Hubspot returns timestamps in millis
-                timestamp_millis = row_properties["createdate"]["timestamp"] / 1000.0
-                modified_time = datetime.datetime.fromtimestamp(
-                    timestamp_millis, datetime.timezone.utc
-                )
-            if modified_time and modified_time >= max_bk_value:
-                max_bk_value = modified_time
-
-            if not modified_time or modified_time > start:
-                record = bumble_bee.transform(row, schema, mdata)
-                singer.write_record(
-                    "deals",
-                    record,
-                    catalog.get("stream_alias"),
-                    time_extracted=utils.now(),
-                )
-
-    STATE = singer.write_bookmark(
-        STATE, "deals", bookmark_key, utils.strftime(max_bk_value)
-    )
-    singer.write_state(STATE)
-    return STATE
-
-
-# NB> no suitable bookmark is available: https://developers.hubspot.com/docs/methods/email/get_campaigns_by_id
-def sync_campaigns(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    schema = catalog["schema"]
-    singer.write_schema("campaigns", schema, ["id"], catalog.get("stream_alias"))
-    LOGGER.info("sync_campaigns(NO bookmarks)")
-    url = get_url("campaigns_all")
-    params = {"limit": 500}
-
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in gen_request(
-            STATE,
-            "campaigns",
-            url,
-            params,
-            "campaigns",
-            "hasMore",
-            ["offset"],
-            ["offset"],
-        ):
-            record = request(get_url("campaigns_detail", campaign_id=row["id"])).json()
-            record = replace_na_with_none(record)
-            record = bumble_bee.transform(record, schema, mdata)
-            singer.write_record(
-                "campaigns",
-                record,
-                catalog.get("stream_alias"),
-                time_extracted=utils.now(),
+        streams.append(
+            CatalogEntry(
+                stream=tap_stream_id,
+                tap_stream_id=tap_stream_id,
+                key_properties=KEY_PROPERTIES,
+                schema=Schema.from_dict(schema),
+                metadata=mdata,
             )
-
-    return STATE
-
-
-def sync_entity_chunked(STATE, catalog, entity_name, key_properties, path):
-    schema = catalog["schema"]
-    bookmark_key = "startTimestamp"
-
-    singer.write_schema(
-        entity_name, schema, key_properties, [bookmark_key], catalog.get("stream_alias")
-    )
-
-    start = get_start(STATE, entity_name, bookmark_key)
-    LOGGER.info("sync_%s from %s", entity_name, start)
-
-    now = datetime.datetime.utcnow().replace(tzinfo=pytz.UTC)
-    now_ts = int(now.timestamp() * 1000)
-
-    start_ts = int(utils.strptime_with_tz(start).timestamp() * 1000)
-    url = get_url(entity_name)
-
-    mdata = metadata.to_map(catalog.get("metadata"))
-
-    if entity_name == "email_events":
-        window_size = int(CONFIG["email_chunk_size"])
-    elif entity_name == "subscription_changes":
-        window_size = int(CONFIG["subscription_chunk_size"])
-
-    with metrics.record_counter(entity_name) as counter:
-        while start_ts < now_ts:
-            end_ts = start_ts + window_size
-            params = {"startTimestamp": start_ts, "endTimestamp": end_ts, "limit": 1000}
-            with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-                while True:
-                    our_offset = singer.get_offset(STATE, entity_name)
-                    if bool(our_offset) and our_offset.get("offset") != None:
-                        params[StateFields.offset] = our_offset.get("offset")
-
-                    data = request(url, params).json()
-                    time_extracted = utils.now()
-
-                    for row in data[path]:
-                        counter.increment()
-                        row = replace_na_with_none(row)
-                        record = bumble_bee.transform(row, schema, mdata)
-                        singer.write_record(
-                            entity_name,
-                            record,
-                            catalog.get("stream_alias"),
-                            time_extracted=time_extracted,
-                        )
-                    if data.get("hasMore"):
-                        STATE = singer.set_offset(
-                            STATE, entity_name, "offset", data["offset"]
-                        )
-                        singer.write_state(STATE)
-                    else:
-                        STATE = singer.clear_offset(STATE, entity_name)
-                        singer.write_state(STATE)
-                        break
-            STATE = singer.write_bookmark(
-                STATE,
-                entity_name,
-                "startTimestamp",
-                utils.strftime(
-                    datetime.datetime.fromtimestamp(
-                        (start_ts / 1000), datetime.timezone.utc
-                    )
-                ),
-            )  # pylint: disable=line-too-long
-            singer.write_state(STATE)
-            start_ts = end_ts
-
-    STATE = singer.clear_offset(STATE, entity_name)
-    singer.write_state(STATE)
-    return STATE
-
-
-def sync_email_events(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    STATE = sync_entity_chunked(STATE, catalog, "email_events", ["id"], "events")
-    return STATE
-
-
-def sync_forms(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    schema = load_schema("forms")
-    bookmark_key = "updatedAt"
-
-    singer.write_schema(
-        "forms", schema, ["guid"], [bookmark_key], catalog.get("stream_alias")
-    )
-    start = get_start(STATE, "forms", bookmark_key)
-    max_bk_value = start
-
-    LOGGER.info("sync_forms from %s", start)
-
-    data = request(get_url("forms")).json()
-    time_extracted = utils.now()
-
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in data:
-            row = replace_na_with_none(row)
-            record = bumble_bee.transform(row, schema, mdata)
-
-            if record[bookmark_key] > start:
-                singer.write_record(
-                    "forms",
-                    record,
-                    catalog.get("stream_alias"),
-                    time_extracted=time_extracted,
-                )
-            if record[bookmark_key] >= max_bk_value:
-                max_bk_value = record[bookmark_key]
-
-    STATE = singer.write_bookmark(STATE, "forms", bookmark_key, max_bk_value)
-    singer.write_state(STATE)
-
-    return STATE
-
-
-def sync_engagements(STATE, ctx):
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    schema = catalog["schema"]
-    bookmark_key = "lastUpdated"
-    singer.write_schema(
-        "engagements",
-        schema,
-        ["engagement_id"],
-        [bookmark_key],
-        catalog.get("stream_alias"),
-    )
-    start = get_start(STATE, "engagements", bookmark_key)
-
-    # Because this stream doesn't query by `lastUpdated`, it cycles
-    # through the data set every time. The issue with this is that there
-    # is a race condition by which records may be updated between the
-    # start of this table's sync and the end, causing some updates to not
-    # be captured, in order to combat this, we must store the current
-    # sync's start in the state and not move the bookmark past this value.
-    current_sync_start = get_current_sync_start(STATE, "engagements") or utils.now()
-    STATE = write_current_sync_start(STATE, "engagements", current_sync_start)
-    singer.write_state(STATE)
-
-    max_bk_value = start
-    LOGGER.info("sync_engagements from %s", start)
-
-    STATE = singer.write_bookmark(STATE, "engagements", bookmark_key, start)
-    singer.write_state(STATE)
-
-    url = get_url("engagements_all")
-    params = {"limit": 250}
-    top_level_key = "results"
-    engagements = gen_request(
-        STATE,
-        "engagements",
-        url,
-        params,
-        top_level_key,
-        "hasMore",
-        ["offset"],
-        ["offset"],
-    )
-
-    time_extracted = utils.now()
-
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for engagement in engagements:
-            engagement = replace_na_with_none(engagement)
-            record = bumble_bee.transform(engagement, schema, mdata)
-            if record["engagement"][bookmark_key] > start:
-                # hoist PK and bookmark field to top-level record
-                record["engagement_id"] = record["engagement"]["id"]
-                record[bookmark_key] = record["engagement"][bookmark_key]
-                singer.write_record(
-                    "engagements",
-                    record,
-                    catalog.get("stream_alias"),
-                    time_extracted=time_extracted,
-                )
-                if record["engagement"][bookmark_key] >= max_bk_value:
-                    max_bk_value = record["engagement"][bookmark_key]
-
-    # Don't bookmark past the start of this sync to account for updated records during the sync.
-    new_bookmark = min(utils.strptime_to_utc(max_bk_value), current_sync_start)
-    STATE = singer.write_bookmark(
-        STATE, "engagements", bookmark_key, utils.strftime(new_bookmark)
-    )
-    STATE = write_current_sync_start(STATE, "engagements", None)
-    singer.write_state(STATE)
-    return STATE
-
-
-def sync_deal_pipelines(STATE, ctx):
-    bookmark_key = "updatedAt"
-    start = utils.strptime_with_tz(get_start(STATE, "deal_pipelines", bookmark_key))
-    max_bk_value = start
-    catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
-    mdata = metadata.to_map(catalog.get("metadata"))
-    schema = catalog["schema"]
-    singer.write_schema(
-        "deal_pipelines", schema, ["pipelineId"], catalog.get("stream_alias")
-    )
-    LOGGER.info(f"sync deal_pipelines from {start}")
-
-    data = request(get_url("deal_pipelines")).json()
-    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
-        for row in data["results"]:
-            modified_time = None
-            row = replace_na_with_none(row)
-            if bookmark_key in row:
-                timestamp_millis = row[bookmark_key]
-            elif "createdAt" in row:
-                # Hubspot returns timestamps in millis
-                timestamp_millis = row["createdAt"]
-            modified_time = datetime.datetime.fromtimestamp(
-                timestamp_millis / 1000.0, datetime.timezone.utc
-            )
-            if modified_time and modified_time >= max_bk_value:
-                max_bk_value = modified_time
-
-            if not modified_time or modified_time > start:
-                record = bumble_bee.transform(row, schema, mdata)
-                singer.write_record(
-                    "deal_pipelines",
-                    record,
-                    catalog.get("stream_alias"),
-                    time_extracted=utils.now(),
-                )
-    STATE = singer.write_bookmark(
-        STATE, "deal_pipelines", bookmark_key, utils.strftime(max_bk_value)
-    )
-    singer.write_state(STATE)
-    return STATE
-
-
-@attr.s
-class Stream(object):
-    tap_stream_id = attr.ib()
-    sync = attr.ib()
-    key_properties = attr.ib()
-    replication_key = attr.ib()
-    replication_method = attr.ib()
-
-
-STREAMS = [
-    Stream("email_events", sync_email_events, ["id"], "startTimestamp", "INCREMENTAL"),
-    # Do these last as they are full table
-    Stream("forms", sync_forms, ["guid"], "updatedAt", "FULL_TABLE"),
-    Stream("contacts", sync_contacts, ["vid"], "versionTimestamp", "FULL_TABLE"),
-    Stream(
-        "companies", sync_companies, ["companyId"], "hs_lastmodifieddate", "FULL_TABLE"
-    ),
-    Stream("deals", sync_deals, ["dealId"], "hs_lastmodifieddate", "FULL_TABLE"),
-    Stream(
-        "deal_pipelines", sync_deal_pipelines, ["pipelineId"], "updatedAt", "FULL_TABLE"
-    ),
-    Stream(
-        "engagements", sync_engagements, ["engagement_id"], "lastUpdated", "FULL_TABLE"
-    ),
-]
-
-
-def get_streams_to_sync(streams, state):
-    target_stream = singer.get_currently_syncing(state)
-    result = streams
-    if target_stream:
-        skipped = list(
-            itertools.takewhile(lambda x: x.tap_stream_id != target_stream, streams)
-        )
-        rest = list(
-            itertools.dropwhile(lambda x: x.tap_stream_id != target_stream, streams)
-        )
-        result = rest + skipped  # Move skipped streams to end
-    if not result:
-        raise Exception("Unknown stream {} in state".format(target_stream))
-    return result
-
-
-def get_selected_streams(remaining_streams, ctx):
-    selected_streams = []
-    for stream in remaining_streams:
-        if stream.tap_stream_id in ctx.selected_stream_ids:
-            selected_streams.append(stream)
-    return selected_streams
-
-
-def do_sync(STATE, catalog):
-    # Clear out keys that are no longer used
-    clean_state(STATE)
-
-    ctx = Context(catalog)
-    validate_dependencies(ctx)
-
-    remaining_streams = get_streams_to_sync(STREAMS, STATE)
-    selected_streams = get_selected_streams(remaining_streams, ctx)
-    LOGGER.info(
-        "Starting sync. Will sync these streams: %s",
-        [stream.tap_stream_id for stream in selected_streams],
-    )
-    for stream in selected_streams:
-        LOGGER.info("Syncing %s", stream.tap_stream_id)
-        STATE = singer.set_currently_syncing(STATE, stream.tap_stream_id)
-        singer.write_state(STATE)
-
-        try:
-            STATE = stream.sync(STATE, ctx)  # pylint: disable=not-callable
-        except SourceUnavailableException as ex:
-            error_message = str(ex).replace(CONFIG["access_token"], 10 * "*")
-            LOGGER.error(error_message)
-            pass
-
-    STATE = singer.set_currently_syncing(STATE, None)
-    singer.write_state(STATE)
-    LOGGER.info("Sync completed")
-
-
-class Context(object):
-    def __init__(self, catalog):
-        self.selected_stream_ids = set()
-
-        for stream in catalog.get("streams"):
-            mdata = metadata.to_map(stream["metadata"])
-            if metadata.get(mdata, (), "selected"):
-                self.selected_stream_ids.add(stream["tap_stream_id"])
-
-        self.catalog = catalog
-
-    def get_catalog_from_id(self, tap_stream_id):
-        return [
-            c for c in self.catalog.get("streams") if c.get("stream") == tap_stream_id
-        ][0]
-
-
-# stream a is dependent on stream STREAM_DEPENDENCIES[a]
-STREAM_DEPENDENCIES = {CONTACTS_BY_COMPANY: "companies"}
-
-
-def validate_dependencies(ctx):
-    errs = []
-    msg_tmpl = (
-        "Unable to extract {0} data. "
-        "To receive {0} data, you also need to select {1}."
-    )
-
-    for k, v in STREAM_DEPENDENCIES.items():
-        if k in ctx.selected_stream_ids and v not in ctx.selected_stream_ids:
-            errs.append(msg_tmpl.format(k, v))
-    if errs:
-        raise DependencyException(" ".join(errs))
-
-
-def load_discovered_schema(stream):
-    schema = load_schema(stream.tap_stream_id)
-    mdata = metadata.new()
-
-    mdata = metadata.write(mdata, (), "table-key-properties", stream.key_properties)
-    mdata = metadata.write(
-        mdata, (), "forced-replication-method", stream.replication_method
-    )
-
-    if stream.replication_key:
-        mdata = metadata.write(
-            mdata, (), "valid-replication-keys", [stream.replication_key]
         )
+    return Catalog(streams)
 
-    for field_name in schema["properties"]:
-        if field_name in stream.key_properties or field_name == stream.replication_key:
-            mdata = metadata.write(
-                mdata, ("properties", field_name), "inclusion", "automatic"
-            )
-        else:
-            mdata = metadata.write(
-                mdata, ("properties", field_name), "inclusion", "available"
-            )
 
-    # The engagements stream has nested data that we synthesize; The engagement field needs to be automatic
-    if stream.tap_stream_id == "engagements":
-        mdata = metadata.write(
-            mdata, ("properties", "engagement"), "inclusion", "automatic"
-        )
+def sync(catalog, config, state=None):
+    for catalog_entry in catalog.streams:
+        if not catalog_entry.is_selected():
+            continue
+        LOGGER.info(f"syncing {catalog_entry.tap_stream_id}")
+        stream = Stream(catalog_entry, config)
+        stream.do_sync(state)
 
-    return schema, metadata.to_list(mdata)
 
+@utils.handle_top_exception(LOGGER)
+def main():
 
-def discover_schemas():
-    result = {"streams": []}
-    for stream in STREAMS:
-        LOGGER.info("Loading schema for %s", stream.tap_stream_id)
-        schema, mdata = load_discovered_schema(stream)
-        result["streams"].append(
-            {
-                "stream": stream.tap_stream_id,
-                "tap_stream_id": stream.tap_stream_id,
-                "schema": schema,
-                "metadata": mdata,
-            }
-        )
-
-    return result
-
-
-def do_discover():
-    LOGGER.info("Loading schemas")
-    json.dump(discover_schemas(), sys.stdout, indent=4)
-
-
-def main_impl():
-    args = utils.parse_args(
-        ["redirect_uri", "client_id", "client_secret", "refresh_token", "start_date"]
-    )
-
-    CONFIG.update(args.config)
-    STATE = {}
-
-    if args.state:
-        STATE.update(args.state)
+    args = utils.parse_args(REQUIRED_CONFIG_KEYS)
 
     if args.discover:
-        do_discover()
-    elif args.properties:
-        do_sync(STATE, args.properties)
+        catalog = discover()
+        catalog.dump()
     else:
-        LOGGER.info("No properties were selected")
-
-
-def main():
-    try:
-        main_impl()
-    except Exception as exc:
-        LOGGER.critical(exc)
-        raise exc
+        if args.catalog:
+            catalog = args.catalog
+        else:
+            catalog = discover()
+        sync(catalog, args.config, args.state)
 
 
 if __name__ == "__main__":

From 0d1613db6d4294f89446dd5ef8a9fedd4b13c410 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 00:49:52 +0200
Subject: [PATCH 46/78] handle all api calls and get replication values

---
 tap_hubspot/hubspot.py | 173 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 tap_hubspot/hubspot.py

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
new file mode 100644
index 00000000..09c38dfb
--- /dev/null
+++ b/tap_hubspot/hubspot.py
@@ -0,0 +1,173 @@
+import requests
+from dateutil import parser
+import time
+from ratelimit import limits
+import ratelimit
+import singer
+import backoff
+import sys
+import datetime
+from singer import utils
+
+LOGGER = singer.get_logger()
+
+
+class Hubspot:
+    SESSION = requests.Session()
+    BASE_URL = "https://api.hubapi.com"
+    ENDPOINTS = {
+        "companies": "/companies/v2/companies/paged",
+        "contacts": "/contacts/v1/lists/all/contacts/all",
+        "deal_pipelines": "/crm-pipelines/v1/pipelines/deals",
+        "deals": "/deals/v1/deal/paged",
+        "email_events": "/email/public/v1/events",
+        "engagements": "/engagements/v1/engagements/paged",
+        "forms": "/forms/v2/forms",
+    }
+    DATA_PATH = {
+        "companies": "companies",
+        "contacts": "contacts",
+        "deal_pipelines": "results",
+        "deals": "deals",
+        "email_events": "events",
+        "engagements": "results",
+    }
+    REPLICATION_PATH = {
+        "companies": ["properties", "hs_lastmodifieddate", "timestamp",],
+        "contacts": ["properties", "lastmodifieddate", "value"],
+        "deal_pipelines": ["updatedAt"],
+        "deals": ["properties", "hs_lastmodifieddate", "timestamp"],
+        "email_events": ["created"],
+        "engagements": ["engagement", "lastUpdated"],
+        "forms": ["updatedAt"],
+    }
+    LIMIT = 250
+
+    def __init__(self, config, tap_stream_id, properties):
+        self.access_token = None
+        self.tap_stream_id = tap_stream_id
+        self.config = config
+        self.refresh_access_token()
+        self.endpoint = self.ENDPOINTS[tap_stream_id]
+        self.offset_value = None
+        self.offset_key = None
+        self.hasmore = True
+        self.PARAMS = {
+            "companies": {"limit": self.LIMIT, "properties": properties,},
+            "contacts": {
+                "showListMemberships": True,
+                "includeVersion": True,
+                "count": self.LIMIT,
+            },
+            "engagements": {"limit": self.LIMIT},
+            "deals": {
+                "count": self.LIMIT,
+                "includeAssociations": False,
+                "properties": properties,
+                "limit": self.LIMIT,
+            },
+        }
+
+    def get_url_params(self, start_date, end_date):
+        url = f"{self.BASE_URL}{self.endpoint}"
+        params = self.PARAMS.get(self.tap_stream_id, {})
+        if self.tap_stream_id == "email_events":
+            params = {"startTimestamp": start_date, "endTimestamp": end_date}
+        if self.offset_value:
+            params[self.offset_key] = self.offset_value
+        return url, params
+
+    def get_replication_value(
+        self, obj: dict, path_to_replication_key=None, default=None
+    ):
+        if not path_to_replication_key:
+            return default
+        for path_element in path_to_replication_key:
+            obj = obj.get(path_element)
+            if not obj:
+                return default
+        return self.milliseconds_to_datetime(obj)
+
+    def milliseconds_to_datetime(self, ms):
+        return (
+            datetime.datetime.fromtimestamp((int(ms) / 1000.0), datetime.timezone.utc)
+            if ms
+            else None
+        )
+
+    def datetime_to_milliseconds(self, d: datetime.datetime):
+        return int(d.timestamp() * 1000) if d else None
+
+    def get_records(self, start_date, end_date):
+        while self.hasmore:
+            url, params = self.get_url_params(start_date, end_date)
+            records = self.call_api(url, params=params)
+            if records:
+                replication_value = map(
+                    lambda record: self.get_replication_value(
+                        obj=record,
+                        path_to_replication_key=self.REPLICATION_PATH.get(
+                            self.tap_stream_id
+                        ),
+                    ),
+                    records,
+                )
+                yield from zip(records, replication_value)
+            else:
+                break
+
+    def streams(self, start_date, end_date):
+        start_date = self.datetime_to_milliseconds(start_date)
+        end_date = self.datetime_to_milliseconds(end_date)
+        yield from self.get_records(start_date, end_date)
+
+    @backoff.on_exception(
+        backoff.expo,
+        (
+            requests.exceptions.RequestException,
+            requests.exceptions.HTTPError,
+            ratelimit.exception.RateLimitException,
+        ),
+    )
+    @limits(calls=100, period=10)
+    def call_api(self, url, params={}):
+        response = self.SESSION.get(
+            url, headers={"Authorization": f"Bearer {self.access_token}"}, params=params
+        )
+        LOGGER.info(response.url)
+        response.raise_for_status()
+        data = self.get_offset(response.json())
+
+        return data
+
+    def get_offset(self, data):
+        data_path = self.DATA_PATH.get(self.tap_stream_id)
+        if isinstance(data, list):
+            self.hasmore = False
+            return data
+
+        if self.tap_stream_id == "deal_pipelines":
+            self.hasmore = False
+
+        offset = [k for k in data.keys() if k.endswith("offset")]
+        if offset:
+            offset = offset[0]
+            self.offset_value = data.get(offset)
+            self.offset_key = "vidOffset" if offset == "vid-offset" else "offset"
+        data = data[data_path] if data_path else data
+
+        return data
+
+    def refresh_access_token(self):
+        payload = {
+            "grant_type": "refresh_token",
+            "refresh_token": self.config["refresh_token"],
+            "client_id": self.config["client_id"],
+            "client_secret": self.config["client_secret"],
+        }
+
+        resp = requests.post(self.BASE_URL + "/oauth/v1/token", data=payload)
+        resp.raise_for_status()
+        if not resp:
+            raise Exception(resp.text)
+        self.access_token = resp.json()["access_token"]

From d53acf23bbb14ad2b1b9066139a803967c32e0b4 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 00:50:20 +0200
Subject: [PATCH 47/78] write singer record, schema and state

---
 tap_hubspot/stream.py | 118 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 tap_hubspot/stream.py

diff --git a/tap_hubspot/stream.py b/tap_hubspot/stream.py
new file mode 100644
index 00000000..0cb8bb8e
--- /dev/null
+++ b/tap_hubspot/stream.py
@@ -0,0 +1,118 @@
+import singer
+from singer import (
+    metadata,
+    CatalogEntry,
+    Transformer,
+    UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING,
+    _transform_datetime,
+)
+from typing import Union
+from datetime import timedelta, datetime
+from dateutil import parser
+from tap_hubspot.hubspot import Hubspot
+import pytz
+
+LOGGER = singer.get_logger()
+
+
+class Stream:
+    def __init__(self, catalog: CatalogEntry, config):
+        self.tap_stream_id = catalog.tap_stream_id
+        self.schema = catalog.schema.to_dict()
+        self.key_properties = catalog.key_properties
+        self.mdata = metadata.to_map(catalog.metadata)
+        self.bookmark_key = self.mdata.get(()).get("valid-replication-keys")[0]
+        self.config = config
+        self.hubspot = Hubspot(config, self.tap_stream_id, self.get_properties())
+
+    def get_properties(self):
+        properties = []
+        if self.mdata.get(("properties", "properties"), {}).get("selected"):
+            additional_properties = (
+                self.schema.get("properties").get("properties").get("properties")
+            )
+            properties = [key for key in additional_properties.keys()]
+        return properties
+
+    def do_sync(self, state):
+        singer.write_schema(
+            self.tap_stream_id, self.schema, self.key_properties,
+        )
+        prev_bookmark = None
+        start_date, end_date = self.__get_start_end(state)
+        with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as transformer:
+            try:
+                data = self.hubspot.streams(start_date, end_date)
+                for d, replication_value in data:
+                    if not replication_value:
+                        record = transformer.transform(d, self.schema, self.mdata)
+                        singer.write_record(self.tap_stream_id, record)
+
+                    elif (start_date >= replication_value) or (
+                        end_date <= replication_value
+                    ):
+                        continue
+
+                    else:
+                        record = transformer.transform(d, self.schema, self.mdata)
+                        singer.write_record(self.tap_stream_id, record)
+                        new_bookmark = replication_value
+                        if not prev_bookmark:
+                            prev_bookmark = new_bookmark
+
+                        if prev_bookmark < new_bookmark:
+                            state = self.__advance_bookmark(state, prev_bookmark)
+                            prev_bookmark = new_bookmark
+                return self.__advance_bookmark(state, prev_bookmark)
+
+            except Exception:
+                self.__advance_bookmark(state, prev_bookmark)
+                raise
+
+    def __get_start_end(self, state: dict):
+        end_date = pytz.utc.localize(datetime.utcnow())
+        LOGGER.info(f"sync data until: {end_date}")
+
+        config_start_date = self.config.get("start_date")
+        if config_start_date:
+            config_start_date = parser.isoparse(config_start_date)
+        else:
+            config_start_date = datetime.utcnow() + timedelta(weeks=4)
+
+        if not state:
+            LOGGER.info(f"using 'start_date' from config: {config_start_date}")
+            return config_start_date, end_date
+
+        account_record = state["bookmarks"].get(self.tap_stream_id, None)
+        if not account_record:
+            LOGGER.info(f"using 'start_date' from config: {config_start_date}")
+            return config_start_date, end_date
+
+        current_bookmark = account_record.get(self.bookmark_key, None)
+        if not current_bookmark:
+            LOGGER.info(f"using 'start_date' from config: {config_start_date}")
+            return config_start_date, end_date
+
+        start_date = parser.isoparse(current_bookmark)
+        LOGGER.info(f"using 'start_date' from previous state: {start_date}")
+        return start_date, end_date
+
+    def __advance_bookmark(self, state: dict, bookmark: Union[str, datetime, None]):
+        if not bookmark:
+            singer.write_state(state)
+            return state
+
+        if isinstance(bookmark, datetime):
+            bookmark_datetime = bookmark
+        elif isinstance(bookmark, str):
+            bookmark_datetime = parser.isoparse(bookmark)
+        else:
+            raise ValueError(
+                f"bookmark is of type {type(bookmark)} but must be either string or datetime"
+            )
+
+        state = singer.write_bookmark(
+            state, self.tap_stream_id, self.bookmark_key, bookmark_datetime.isoformat()
+        )
+        singer.write_state(state)
+        return state

From 7d4d2c38626dc96d2ac577cf8b403cbd8383cb96 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 00:54:05 +0200
Subject: [PATCH 48/78] delete unused package

---
 setup.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/setup.py b/setup.py
index 851f6708..d6f80ef6 100644
--- a/setup.py
+++ b/setup.py
@@ -11,12 +11,9 @@
     classifiers=["Programming Language :: Python :: 3 :: Only"],
     py_modules=["tap_hubspot"],
     install_requires=[
-        "attrs>=16.3.0, <19",
         "singer-python>=5.1.1, <5.9",
         "requests==2.22.0",
         "backoff>=1.3.2, <2",
-        "requests_mock==1.3.0",
-        "nose",
     ],
     entry_points="""
           [console_scripts]

From 030f947a041236fccf2f09a81e0394b9f75648bc Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 00:56:17 +0200
Subject: [PATCH 49/78] add ratelimit package

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index d6f80ef6..12c5159d 100644
--- a/setup.py
+++ b/setup.py
@@ -14,6 +14,7 @@
         "singer-python>=5.1.1, <5.9",
         "requests==2.22.0",
         "backoff>=1.3.2, <2",
+        "ratelimit==2.2.1",
     ],
     entry_points="""
           [console_scripts]

From ea481842f8da96a28d531fa343c511dabddb72ff Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 09:22:53 +0200
Subject: [PATCH 50/78] simplify package data

---
 setup.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/setup.py b/setup.py
index 12c5159d..eba2df32 100644
--- a/setup.py
+++ b/setup.py
@@ -21,20 +21,6 @@
           tap-hubspot=tap_hubspot:main
       """,
     packages=["tap_hubspot"],
-    package_data={
-        "tap_hubspot/schemas": [
-            "campaigns.json",
-            "companies.json",
-            "contact_lists.json",
-            "contacts.json",
-            "deals.json",
-            "email_events.json",
-            "forms.json",
-            "keywords.json",
-            "owners.json",
-            "subscription_changes.json",
-            "workflows.json",
-        ],
-    },
+    package_data={"tap_hubspot/schemas": ["*.json"]},
     include_package_data=True,
 )

From 9a4a7c5983b014d5e8225d05681e3f5d6f5c0912 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 16:20:28 +0200
Subject: [PATCH 51/78] cr: make path adapt to all os by using pathlib

---
 tap_hubspot/__init__.py | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index ffb4a6ba..5f4b2e71 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -5,6 +5,7 @@
 import singer
 from singer import utils, metadata, Catalog, CatalogEntry, Schema
 from tap_hubspot.stream import Stream
+from pathlib import Path
 
 KEY_PROPERTIES = "id"
 STREAMS = {
@@ -44,18 +45,12 @@
 LOGGER = singer.get_logger()
 
 
-def get_abs_path(path):
-    return os.path.join(os.path.dirname(os.path.realpath(__file__)), path)
-
-
 def load_schemas():
     schemas = {}
-
-    for filename in os.listdir(get_abs_path("schemas")):
-        path = get_abs_path("schemas") + "/" + filename
-        file_raw = filename.replace(".json", "")
-        with open(path) as file:
-            schemas[file_raw] = json.load(file)
+    schemas_path = Path(__file__).parent.absolute() / "schemas"
+    for schema_path in schemas_path.iterdir():
+        stream_name = schema_path.stem
+        schemas[stream_name] = json.loads(schema_path.read_text())
 
     return schemas
 

From 5c65a622e48526a1628b348a058263ad6425b03d Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 16:36:26 +0200
Subject: [PATCH 52/78] cr: move session in init function

---
 tap_hubspot/hubspot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 09c38dfb..85724483 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -13,7 +13,6 @@
 
 
 class Hubspot:
-    SESSION = requests.Session()
     BASE_URL = "https://api.hubapi.com"
     ENDPOINTS = {
         "companies": "/companies/v2/companies/paged",
@@ -44,6 +43,7 @@ class Hubspot:
     LIMIT = 250
 
     def __init__(self, config, tap_stream_id, properties):
+        self.SESSION = requests.Session()
         self.access_token = None
         self.tap_stream_id = tap_stream_id
         self.config = config

From e75a9a0671797cb3f7ef9e02199e7698aa3d33df Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 16:40:33 +0200
Subject: [PATCH 53/78] cr: move limit to init

---
 tap_hubspot/hubspot.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 85724483..aadf8e6c 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -40,10 +40,10 @@ class Hubspot:
         "engagements": ["engagement", "lastUpdated"],
         "forms": ["updatedAt"],
     }
-    LIMIT = 250
 
-    def __init__(self, config, tap_stream_id, properties):
+    def __init__(self, config, tap_stream_id, properties, limit=250):
         self.SESSION = requests.Session()
+        self.limit = limit
         self.access_token = None
         self.tap_stream_id = tap_stream_id
         self.config = config
@@ -53,18 +53,18 @@ def __init__(self, config, tap_stream_id, properties):
         self.offset_key = None
         self.hasmore = True
         self.PARAMS = {
-            "companies": {"limit": self.LIMIT, "properties": properties,},
+            "companies": {"limit": self.limit, "properties": properties,},
             "contacts": {
                 "showListMemberships": True,
                 "includeVersion": True,
-                "count": self.LIMIT,
+                "count": self.limit,
             },
-            "engagements": {"limit": self.LIMIT},
+            "engagements": {"limit": self.limit},
             "deals": {
-                "count": self.LIMIT,
+                "count": self.limit,
                 "includeAssociations": False,
                 "properties": properties,
-                "limit": self.LIMIT,
+                "limit": self.limit,
             },
         }
 

From c17e39de20a59c1a54a69d062d734c5c81e9498c Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 16:43:27 +0200
Subject: [PATCH 54/78] cr: use int directly

---
 tap_hubspot/hubspot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index aadf8e6c..50ae0132 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -90,7 +90,7 @@ def get_replication_value(
 
     def milliseconds_to_datetime(self, ms):
         return (
-            datetime.datetime.fromtimestamp((int(ms) / 1000.0), datetime.timezone.utc)
+            datetime.datetime.fromtimestamp((int(ms) / 1000), datetime.timezone.utc)
             if ms
             else None
         )

From 9a2104696296a9743726bf988d4be57957d6f70b Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 16:45:06 +0200
Subject: [PATCH 55/78] cr: typehint for ms

---
 tap_hubspot/hubspot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 50ae0132..80472e8e 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -88,7 +88,7 @@ def get_replication_value(
                 return default
         return self.milliseconds_to_datetime(obj)
 
-    def milliseconds_to_datetime(self, ms):
+    def milliseconds_to_datetime(self, ms: str):
         return (
             datetime.datetime.fromtimestamp((int(ms) / 1000), datetime.timezone.utc)
             if ms

From 89c678c488b0e77f475f06dd04ba40c8936b2cc8 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 14 Apr 2020 17:00:31 +0200
Subject: [PATCH 56/78] cr: rewrite logic

---
 tap_hubspot/stream.py | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/tap_hubspot/stream.py b/tap_hubspot/stream.py
index 0cb8bb8e..8ac20871 100644
--- a/tap_hubspot/stream.py
+++ b/tap_hubspot/stream.py
@@ -44,25 +44,24 @@ def do_sync(self, state):
             try:
                 data = self.hubspot.streams(start_date, end_date)
                 for d, replication_value in data:
-                    if not replication_value:
-                        record = transformer.transform(d, self.schema, self.mdata)
-                        singer.write_record(self.tap_stream_id, record)
-
-                    elif (start_date >= replication_value) or (
-                        end_date <= replication_value
+                    if replication_value and (
+                        start_date >= replication_value or end_date <= replication_value
                     ):
                         continue
+                    
+                    record = transformer.transform(d, self.schema, self.mdata)
+                    singer.write_record(self.tap_stream_id, record)
+                    if not replication_value:
+                        continue
+
+                    new_bookmark = replication_value
+                    if not prev_bookmark:
+                        prev_bookmark = new_bookmark
 
-                    else:
-                        record = transformer.transform(d, self.schema, self.mdata)
-                        singer.write_record(self.tap_stream_id, record)
-                        new_bookmark = replication_value
-                        if not prev_bookmark:
-                            prev_bookmark = new_bookmark
+                    if prev_bookmark < new_bookmark:
+                        state = self.__advance_bookmark(state, prev_bookmark)
+                        prev_bookmark = new_bookmark
 
-                        if prev_bookmark < new_bookmark:
-                            state = self.__advance_bookmark(state, prev_bookmark)
-                            prev_bookmark = new_bookmark
                 return self.__advance_bookmark(state, prev_bookmark)
 
             except Exception:

From 84a5fd54e5b9bfdeb84b5433f6309bd4f0ce3190 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 15 Apr 2020 00:41:33 +0200
Subject: [PATCH 57/78] delete unused import

---
 tap_hubspot/__init__.py | 2 --
 tap_hubspot/hubspot.py  | 4 ----
 tap_hubspot/stream.py   | 1 -
 3 files changed, 7 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 5f4b2e71..51037283 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -1,6 +1,4 @@
 #!/usr/bin/env python3
-import os
-import sys
 import json
 import singer
 from singer import utils, metadata, Catalog, CatalogEntry, Schema
diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 80472e8e..3068f261 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -1,13 +1,9 @@
 import requests
-from dateutil import parser
-import time
 from ratelimit import limits
 import ratelimit
 import singer
 import backoff
-import sys
 import datetime
-from singer import utils
 
 LOGGER = singer.get_logger()
 
diff --git a/tap_hubspot/stream.py b/tap_hubspot/stream.py
index 8ac20871..154ee81d 100644
--- a/tap_hubspot/stream.py
+++ b/tap_hubspot/stream.py
@@ -4,7 +4,6 @@
     CatalogEntry,
     Transformer,
     UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING,
-    _transform_datetime,
 )
 from typing import Union
 from datetime import timedelta, datetime

From 0d1c816ffac05689b4841e49d5ab7888046017eb Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 15 Apr 2020 00:44:02 +0200
Subject: [PATCH 58/78] cr: add a function for each endpoint and use pagination

---
 tap_hubspot/hubspot.py | 250 +++++++++++++++++++++++++----------------
 tap_hubspot/stream.py  |   6 +-
 2 files changed, 157 insertions(+), 99 deletions(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 3068f261..cfebe6b3 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -4,74 +4,143 @@
 import singer
 import backoff
 import datetime
+from typing import Dict
 
 LOGGER = singer.get_logger()
 
 
 class Hubspot:
     BASE_URL = "https://api.hubapi.com"
-    ENDPOINTS = {
-        "companies": "/companies/v2/companies/paged",
-        "contacts": "/contacts/v1/lists/all/contacts/all",
-        "deal_pipelines": "/crm-pipelines/v1/pipelines/deals",
-        "deals": "/deals/v1/deal/paged",
-        "email_events": "/email/public/v1/events",
-        "engagements": "/engagements/v1/engagements/paged",
-        "forms": "/forms/v2/forms",
-    }
-    DATA_PATH = {
-        "companies": "companies",
-        "contacts": "contacts",
-        "deal_pipelines": "results",
-        "deals": "deals",
-        "email_events": "events",
-        "engagements": "results",
-    }
-    REPLICATION_PATH = {
-        "companies": ["properties", "hs_lastmodifieddate", "timestamp",],
-        "contacts": ["properties", "lastmodifieddate", "value"],
-        "deal_pipelines": ["updatedAt"],
-        "deals": ["properties", "hs_lastmodifieddate", "timestamp"],
-        "email_events": ["created"],
-        "engagements": ["engagement", "lastUpdated"],
-        "forms": ["updatedAt"],
-    }
-
-    def __init__(self, config, tap_stream_id, properties, limit=250):
+
+    def __init__(self, config, limit=250):
         self.SESSION = requests.Session()
         self.limit = limit
         self.access_token = None
-        self.tap_stream_id = tap_stream_id
         self.config = config
         self.refresh_access_token()
-        self.endpoint = self.ENDPOINTS[tap_stream_id]
-        self.offset_value = None
-        self.offset_key = None
-        self.hasmore = True
-        self.PARAMS = {
-            "companies": {"limit": self.limit, "properties": properties,},
-            "contacts": {
-                "showListMemberships": True,
-                "includeVersion": True,
-                "count": self.limit,
-            },
-            "engagements": {"limit": self.limit},
-            "deals": {
-                "count": self.limit,
-                "includeAssociations": False,
-                "properties": properties,
-                "limit": self.limit,
-            },
+
+    def streams(self, tap_stream_id, start_date, end_date, properties):
+        if tap_stream_id == "companies":
+            yield from self.get_companies(properties)
+        elif tap_stream_id == "contacts":
+            yield from self.get_contacts()
+        elif tap_stream_id == "engagements":
+            yield from self.get_engagements()
+        elif tap_stream_id == "deal_pipelines":
+            yield from self.get_deal_pipelines()
+        elif tap_stream_id == "deals":
+            yield from self.get_deals(properties)
+        elif tap_stream_id == "email_events":
+            start_date = self.datetime_to_milliseconds(start_date)
+            end_date = self.datetime_to_milliseconds(end_date)
+            yield from self.get_email_events(start_date, end_date)
+        elif tap_stream_id == "forms":
+            yield from self.get_forms()
+        else:
+            return []
+
+    def get_companies(self, properties):
+        path = "/contacts/v1/lists/all/contacts/all"
+        data_field = "companies"
+        replication_path = ["properties", "hs_lastmodifieddate", "timestamp"]
+        params = {
+            "limit": self.limit,
+            "properties": properties,
+        }
+        offset_key = "offset"
+        yield from self.get_records(
+            path,
+            replication_path,
+            params=params,
+            data_field=data_field,
+            offset_key=offset_key,
+        )
+
+    def get_contacts(self):
+        path = "/contacts/v1/lists/all/contacts/all"
+        data_field = "contacts"
+        replication_path = ["properties", "lastmodifieddate", "value"]
+        params = {
+            "showListMemberships": True,
+            "includeVersion": True,
+            "count": self.limit,
         }
+        offset_key = "vid-offset"
+        yield from self.get_records(
+            path,
+            replication_path,
+            params=params,
+            data_field=data_field,
+            offset_key=offset_key,
+        )
+
+    def get_engagements(self):
+        path = "/engagements/v1/engagements/paged"
+        data_field = "results"
+        replication_path = ["engagement", "lastUpdated"]
+        params = {"limit": self.limit}
+        offset_key = "offset"
+        yield from self.get_records(
+            path,
+            replication_path,
+            params=params,
+            data_field=data_field,
+            offset_key=offset_key,
+        )
+
+    def get_deal_pipelines(self):
+        path = "/crm-pipelines/v1/pipelines/deals"
+        data_field = "results"
+        replication_path = ["updatedAt"]
+        yield from self.get_records(path, replication_path, data_field=data_field)
+
+    def get_deals(self, properties):
+        path = "/deals/v1/deal/paged"
+        data_field = "deals"
+        replication_path = ["properties", "hs_lastmodifieddate", "timestamp"]
+        params = {
+            "count": self.limit,
+            "includeAssociations": False,
+            "properties": properties,
+            "limit": self.limit,
+        }
+        offset_key = "offset"
+        yield from self.get_records(
+            path,
+            replication_path,
+            params=params,
+            data_field=data_field,
+            offset_key=offset_key,
+        )
 
-    def get_url_params(self, start_date, end_date):
-        url = f"{self.BASE_URL}{self.endpoint}"
-        params = self.PARAMS.get(self.tap_stream_id, {})
-        if self.tap_stream_id == "email_events":
-            params = {"startTimestamp": start_date, "endTimestamp": end_date}
-        if self.offset_value:
-            params[self.offset_key] = self.offset_value
-        return url, params
+    def get_email_events(self, start_date, end_date):
+        path = "/email/public/v1/events"
+        data_field = "events"
+        replication_path = ["created"]
+        params = {"startTimestamp": start_date, "endTimestamp": end_date}
+        offset_key = "offset"
+
+        yield from self.get_records(
+            path,
+            replication_path,
+            params=params,
+            data_field=data_field,
+            offset_key=offset_key,
+        )
+
+    def get_forms(self):
+        path = "/forms/v2/forms"
+        replication_path = ["updatedAt"]
+        yield from self.get_records(path, replication_path)
+
+    def get_records(
+        self, path, replication_path, params={}, data_field=None, offset_key=None
+    ):
+        for record in self.paginate(
+            path, params=params, data_field=data_field, offset_key=offset_key,
+        ):
+            replication_value = self.get_replication_value(record, replication_path)
+            yield record, replication_value
 
     def get_replication_value(
         self, obj: dict, path_to_replication_key=None, default=None
@@ -94,29 +163,34 @@ def milliseconds_to_datetime(self, ms: str):
     def datetime_to_milliseconds(self, d: datetime.datetime):
         return int(d.timestamp() * 1000) if d else None
 
-    def get_records(self, start_date, end_date):
-        while self.hasmore:
-            url, params = self.get_url_params(start_date, end_date)
-            records = self.call_api(url, params=params)
-            if records:
-                replication_value = map(
-                    lambda record: self.get_replication_value(
-                        obj=record,
-                        path_to_replication_key=self.REPLICATION_PATH.get(
-                            self.tap_stream_id
-                        ),
-                    ),
-                    records,
-                )
-                yield from zip(records, replication_value)
+    def paginate(
+        self, path: str, params: Dict = None, data_field: str = None, offset_key=None
+    ):
+        offset_value = None
+        while True:
+            if offset_value:
+                if offset_key == "vid-offset":
+                    params["vidOffset"] = offset_value
+                else:
+                    params[offset_key] = offset_value
+
+            data = self.call_api(path, params=params)
+
+            if not data_field:
+                # non paginated list
+                yield from data
+                return
             else:
+                d = data.get(data_field, [])
+                yield from d
+                if not d:
+                    return
+
+            if offset_key:
+                offset_value = data.get(offset_key)
+            if not offset_value:
                 break
 
-    def streams(self, start_date, end_date):
-        start_date = self.datetime_to_milliseconds(start_date)
-        end_date = self.datetime_to_milliseconds(end_date)
-        yield from self.get_records(start_date, end_date)
-
     @backoff.on_exception(
         backoff.expo,
         (
@@ -128,31 +202,13 @@ def streams(self, start_date, end_date):
     @limits(calls=100, period=10)
     def call_api(self, url, params={}):
         response = self.SESSION.get(
-            url, headers={"Authorization": f"Bearer {self.access_token}"}, params=params
+            f"{self.BASE_URL}{url}",
+            headers={"Authorization": f"Bearer {self.access_token}"},
+            params=params,
         )
         LOGGER.info(response.url)
         response.raise_for_status()
-        data = self.get_offset(response.json())
-
-        return data
-
-    def get_offset(self, data):
-        data_path = self.DATA_PATH.get(self.tap_stream_id)
-        if isinstance(data, list):
-            self.hasmore = False
-            return data
-
-        if self.tap_stream_id == "deal_pipelines":
-            self.hasmore = False
-
-        offset = [k for k in data.keys() if k.endswith("offset")]
-        if offset:
-            offset = offset[0]
-            self.offset_value = data.get(offset)
-            self.offset_key = "vidOffset" if offset == "vid-offset" else "offset"
-        data = data[data_path] if data_path else data
-
-        return data
+        return response.json()
 
     def refresh_access_token(self):
         payload = {
diff --git a/tap_hubspot/stream.py b/tap_hubspot/stream.py
index 154ee81d..3935c2d9 100644
--- a/tap_hubspot/stream.py
+++ b/tap_hubspot/stream.py
@@ -22,7 +22,7 @@ def __init__(self, catalog: CatalogEntry, config):
         self.mdata = metadata.to_map(catalog.metadata)
         self.bookmark_key = self.mdata.get(()).get("valid-replication-keys")[0]
         self.config = config
-        self.hubspot = Hubspot(config, self.tap_stream_id, self.get_properties())
+        self.hubspot = Hubspot(config)
 
     def get_properties(self):
         properties = []
@@ -41,7 +41,9 @@ def do_sync(self, state):
         start_date, end_date = self.__get_start_end(state)
         with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as transformer:
             try:
-                data = self.hubspot.streams(start_date, end_date)
+                data = self.hubspot.streams(
+                    self.tap_stream_id, start_date, end_date, self.get_properties()
+                )
                 for d, replication_value in data:
                     if replication_value and (
                         start_date >= replication_value or end_date <= replication_value

From f2809ba88a6aea452f458f210f4c8fe888fa4d38 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 15 Apr 2020 10:31:21 +0200
Subject: [PATCH 59/78] cr: raise error if the function is not implemented

---
 tap_hubspot/hubspot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index cfebe6b3..4e94944a 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -37,7 +37,7 @@ def streams(self, tap_stream_id, start_date, end_date, properties):
         elif tap_stream_id == "forms":
             yield from self.get_forms()
         else:
-            return []
+            raise NotImplementedError(f"unknown stream_id: {tap_stream_id}")
 
     def get_companies(self, properties):
         path = "/contacts/v1/lists/all/contacts/all"

From 7779e8c8af7cb26d23a56f483046746bb36f9933 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 15 Apr 2020 10:52:31 +0200
Subject: [PATCH 60/78] update replication key

---
 tap_hubspot/__init__.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 51037283..273ebf5e 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -7,13 +7,10 @@
 
 KEY_PROPERTIES = "id"
 STREAMS = {
-    "email_events": {
-        "valid_replication_keys": ["startTimestamp"],
-        "key_properties": "id",
-    },
+    "email_events": {"valid_replication_keys": ["created"], "key_properties": "id",},
     "forms": {"valid_replication_keys": ["updatedAt"], "key_properties": "guid",},
     "contacts": {
-        "valid_replication_keys": ["versionTimestamp"],
+        "valid_replication_keys": ["lastmodifieddate"],
         "key_properties": "vid",
     },
     "companies": {

From 0a8e9c94d42c54ecd5337b81b23e5ea087457606 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Wed, 15 Apr 2020 12:04:13 +0200
Subject: [PATCH 61/78] fix up companies endpoint

---
 tap_hubspot/hubspot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 4e94944a..813f7150 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -40,7 +40,7 @@ def streams(self, tap_stream_id, start_date, end_date, properties):
             raise NotImplementedError(f"unknown stream_id: {tap_stream_id}")
 
     def get_companies(self, properties):
-        path = "/contacts/v1/lists/all/contacts/all"
+        path = "/companies/v2/companies/paged"
         data_field = "companies"
         replication_path = ["properties", "hs_lastmodifieddate", "timestamp"]
         params = {

From 4c33da442806e0a4cdfc2182946d450fc7750b0b Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Thu, 16 Apr 2020 09:25:50 +0200
Subject: [PATCH 62/78] include associations

---
 tap_hubspot/hubspot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 813f7150..48d18450 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -100,7 +100,7 @@ def get_deals(self, properties):
         replication_path = ["properties", "hs_lastmodifieddate", "timestamp"]
         params = {
             "count": self.limit,
-            "includeAssociations": False,
+            "includeAssociations": True,
             "properties": properties,
             "limit": self.limit,
         }

From ff7f71f437c2e26a0490623ace900294d6fd199a Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Thu, 23 Apr 2020 12:15:20 +0200
Subject: [PATCH 63/78] fix wrong key properties in schema

---
 tap_hubspot/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 273ebf5e..3e721a65 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -5,7 +5,6 @@
 from tap_hubspot.stream import Stream
 from pathlib import Path
 
-KEY_PROPERTIES = "id"
 STREAMS = {
     "email_events": {"valid_replication_keys": ["created"], "key_properties": "id",},
     "forms": {"valid_replication_keys": ["updatedAt"], "key_properties": "guid",},
@@ -55,17 +54,18 @@ def discover() -> Catalog:
     streams = []
 
     for tap_stream_id, props in STREAMS.items():
+        key_properties = props.get("key_properties", None)
         schema = schemas[tap_stream_id]
         mdata = metadata.get_standard_metadata(
             schema=schema,
-            key_properties=props.get("key_properties", None),
+            key_properties=key_properties,
             valid_replication_keys=props.get("valid_replication_keys", []),
         )
         streams.append(
             CatalogEntry(
                 stream=tap_stream_id,
                 tap_stream_id=tap_stream_id,
-                key_properties=KEY_PROPERTIES,
+                key_properties=key_properties,
                 schema=Schema.from_dict(schema),
                 metadata=mdata,
             )

From 291b42ad9ba31955ffbbed3a50d4f3c910bc2ad6 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Thu, 23 Apr 2020 12:15:44 +0200
Subject: [PATCH 64/78] add submissions stream

---
 tap_hubspot/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 3e721a65..e8db14d1 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -28,6 +28,7 @@
         "valid_replication_keys": ["lastUpdated"],
         "key_properties": "engagement_id",
     },
+    "submissions": {"valid_replication_keys": ["submittedAt"], "key_properties": []},
 }
 REQUIRED_CONFIG_KEYS = [
     "start_date",

From bf29a89318a31b428df580b9e60fe017d56a420d Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Thu, 23 Apr 2020 12:18:28 +0200
Subject: [PATCH 65/78] add submissions schema

---
 tap_hubspot/schemas/submissions.json | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 tap_hubspot/schemas/submissions.json

diff --git a/tap_hubspot/schemas/submissions.json b/tap_hubspot/schemas/submissions.json
new file mode 100644
index 00000000..8324e5bb
--- /dev/null
+++ b/tap_hubspot/schemas/submissions.json
@@ -0,0 +1,26 @@
+{
+  "type": "object",
+  "properties": {
+    "submittedAt": {
+      "type": ["null", "string"],
+      "format": "date-time"
+    },
+    "values": {
+      "type": ["null", "array"],
+      "items": {
+        "type": ["null", "object"],
+        "properties": {
+          "name": {
+            "type": ["null", "string"]
+          },
+          "value": {
+            "type": ["null", "string"]
+          }
+        }
+      }
+    },
+    "pageUrl": {
+      "type": ["null", "string"]
+    }
+  }
+}

From 51a19267e0819cc5f8a17543679a9c72602c6398 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Thu, 23 Apr 2020 12:21:28 +0200
Subject: [PATCH 66/78] get submissions data

---
 tap_hubspot/hubspot.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 48d18450..152cfc7a 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -36,6 +36,8 @@ def streams(self, tap_stream_id, start_date, end_date, properties):
             yield from self.get_email_events(start_date, end_date)
         elif tap_stream_id == "forms":
             yield from self.get_forms()
+        elif tap_stream_id == "submissions":
+            yield from self.get_submissions()
         else:
             raise NotImplementedError(f"unknown stream_id: {tap_stream_id}")
 
@@ -133,6 +135,24 @@ def get_forms(self):
         replication_path = ["updatedAt"]
         yield from self.get_records(path, replication_path)
 
+    def get_submissions(self):
+        # submission data is retrieved according to guid from forms
+        replication_path = ["submittedAt"]
+        data_field = "results"
+        offset_key = "after"
+        params = {"limit": 50}  # maxmimum limit is 50
+        forms = self.get_forms()
+        for form, _ in forms:
+            guid = form["guid"]
+            path = f"/form-integrations/v1/submissions/forms/{guid}"
+            yield from self.get_records(
+                path,
+                replication_path,
+                params=params,
+                data_field=data_field,
+                offset_key=offset_key,
+            )
+
     def get_records(
         self, path, replication_path, params={}, data_field=None, offset_key=None
     ):

From 2dd374cbbaef806a969613ca500033e7861ed9e3 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Thu, 23 Apr 2020 12:22:02 +0200
Subject: [PATCH 67/78] submission pagination and reuse get_replication_value
 func

---
 tap_hubspot/hubspot.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 152cfc7a..65009bac 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -159,19 +159,19 @@ def get_records(
         for record in self.paginate(
             path, params=params, data_field=data_field, offset_key=offset_key,
         ):
-            replication_value = self.get_replication_value(record, replication_path)
+            replication_value = self.milliseconds_to_datetime(
+                self.get_value(record, replication_path)
+            )
             yield record, replication_value
 
-    def get_replication_value(
-        self, obj: dict, path_to_replication_key=None, default=None
-    ):
+    def get_value(self, obj: dict, path_to_replication_key=None, default=None):
         if not path_to_replication_key:
             return default
         for path_element in path_to_replication_key:
             obj = obj.get(path_element)
             if not obj:
                 return default
-        return self.milliseconds_to_datetime(obj)
+        return obj
 
     def milliseconds_to_datetime(self, ms: str):
         return (
@@ -207,7 +207,10 @@ def paginate(
                     return
 
             if offset_key:
-                offset_value = data.get(offset_key)
+                if "paging" in data:
+                    offset_value = self.get_value(data, ["paging", "next", "after"])
+                else:
+                    offset_value = data.get(offset_key)
             if not offset_value:
                 break
 

From 448baed836b665568a8ff88594d95494709e51cf Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Sat, 25 Apr 2020 20:26:18 +0200
Subject: [PATCH 68/78] transform dash to underscore for schema and record

---
 tap_hubspot/__init__.py |  3 +-
 tap_hubspot/hubspot.py  |  2 ++
 tap_hubspot/util.py     | 62 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 tap_hubspot/util.py

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index e8db14d1..1b8ea9ea 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -4,6 +4,7 @@
 from singer import utils, metadata, Catalog, CatalogEntry, Schema
 from tap_hubspot.stream import Stream
 from pathlib import Path
+from tap_hubspot.util import schema_nodash
 
 STREAMS = {
     "email_events": {"valid_replication_keys": ["created"], "key_properties": "id",},
@@ -45,7 +46,7 @@ def load_schemas():
     schemas_path = Path(__file__).parent.absolute() / "schemas"
     for schema_path in schemas_path.iterdir():
         stream_name = schema_path.stem
-        schemas[stream_name] = json.loads(schema_path.read_text())
+        schemas[stream_name] = schema_nodash(json.loads(schema_path.read_text()))
 
     return schemas
 
diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 65009bac..1bb30f38 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -5,6 +5,7 @@
 import backoff
 import datetime
 from typing import Dict
+from tap_hubspot.util import record_nodash
 
 LOGGER = singer.get_logger()
 
@@ -159,6 +160,7 @@ def get_records(
         for record in self.paginate(
             path, params=params, data_field=data_field, offset_key=offset_key,
         ):
+                record = record_nodash(record)
             replication_value = self.milliseconds_to_datetime(
                 self.get_value(record, replication_path)
             )
diff --git a/tap_hubspot/util.py b/tap_hubspot/util.py
new file mode 100644
index 00000000..2e3a2396
--- /dev/null
+++ b/tap_hubspot/util.py
@@ -0,0 +1,62 @@
+import copy
+
+
+def record_nodash(obj):
+    transformed_obj = copy.deepcopy(obj)
+
+    if not isinstance(obj, (dict, list)):
+        return obj
+    if isinstance(obj, dict):
+        for key in obj:
+            value = record_nodash(obj[key])
+            transformed_obj.pop(key)
+            key = key.replace("-", "_")
+            transformed_obj[key] = value
+    if isinstance(obj, list):
+        for i in range(len(obj)):
+            value = record_nodash(obj[i])
+            transformed_obj[i] = value
+    return transformed_obj
+
+
+def schema_nodash(obj):
+    type_field = obj.get("type")
+    type = get_type(type_field)
+    if not type:
+        return obj
+    if not type in ["array", "object"]:
+        return obj
+    if "object" == type:
+        props = obj.get("properties", {})
+        new_props = replace_props(props)
+        obj["properties"] = new_props
+    if "array" == type:
+        items = obj.get("items", {})
+        obj["items"] = schema_nodash(items)
+    return obj
+
+
+def get_type(type_field):
+    if isinstance(type_field, str):
+        return type_field
+    if isinstance(type_field, list):
+        types = set(type_field)
+        if "null" in types:
+            types.remove("null")
+        return types.pop()
+    return None
+
+
+def replace_props(props):
+    if not props:
+        return props
+    keys = list(props.keys())
+    for k in keys:
+        if not "-" in k:
+            props[k] = schema_nodash(props[k])
+        else:
+            v = props.pop(k)
+            new_key = k.replace("-", "_")
+            new_value = schema_nodash(v)
+            props[new_key] = new_value
+    return props

From 50e8e3797c0e6cf691c58e7b6d66e9c7fd0bc717 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Sat, 25 Apr 2020 20:26:44 +0200
Subject: [PATCH 69/78] only use record_nodash when necessary

---
 tap_hubspot/hubspot.py | 24 +++++++++++++-----------
 tap_hubspot/stream.py  |  8 +++-----
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 1bb30f38..d5fd4a14 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -13,34 +13,35 @@
 class Hubspot:
     BASE_URL = "https://api.hubapi.com"
 
-    def __init__(self, config, limit=250):
+    def __init__(self, config, tap_stream_id, limit=250):
         self.SESSION = requests.Session()
         self.limit = limit
         self.access_token = None
         self.config = config
         self.refresh_access_token()
+        self.tap_stream_id = tap_stream_id
 
-    def streams(self, tap_stream_id, start_date, end_date, properties):
-        if tap_stream_id == "companies":
+    def streams(self, start_date, end_date, properties):
+        if self.tap_stream_id == "companies":
             yield from self.get_companies(properties)
-        elif tap_stream_id == "contacts":
+        elif self.tap_stream_id == "contacts":
             yield from self.get_contacts()
-        elif tap_stream_id == "engagements":
+        elif self.tap_stream_id == "engagements":
             yield from self.get_engagements()
-        elif tap_stream_id == "deal_pipelines":
+        elif self.tap_stream_id == "deal_pipelines":
             yield from self.get_deal_pipelines()
-        elif tap_stream_id == "deals":
+        elif self.tap_stream_id == "deals":
             yield from self.get_deals(properties)
-        elif tap_stream_id == "email_events":
+        elif self.tap_stream_id == "email_events":
             start_date = self.datetime_to_milliseconds(start_date)
             end_date = self.datetime_to_milliseconds(end_date)
             yield from self.get_email_events(start_date, end_date)
-        elif tap_stream_id == "forms":
+        elif self.tap_stream_id == "forms":
             yield from self.get_forms()
-        elif tap_stream_id == "submissions":
+        elif self.tap_stream_id == "submissions":
             yield from self.get_submissions()
         else:
-            raise NotImplementedError(f"unknown stream_id: {tap_stream_id}")
+            raise NotImplementedError(f"unknown stream_id: {self.tap_stream_id}")
 
     def get_companies(self, properties):
         path = "/companies/v2/companies/paged"
@@ -160,6 +161,7 @@ def get_records(
         for record in self.paginate(
             path, params=params, data_field=data_field, offset_key=offset_key,
         ):
+            if self.tap_stream_id == "contacts":
                 record = record_nodash(record)
             replication_value = self.milliseconds_to_datetime(
                 self.get_value(record, replication_path)
diff --git a/tap_hubspot/stream.py b/tap_hubspot/stream.py
index 3935c2d9..176e4484 100644
--- a/tap_hubspot/stream.py
+++ b/tap_hubspot/stream.py
@@ -22,7 +22,7 @@ def __init__(self, catalog: CatalogEntry, config):
         self.mdata = metadata.to_map(catalog.metadata)
         self.bookmark_key = self.mdata.get(()).get("valid-replication-keys")[0]
         self.config = config
-        self.hubspot = Hubspot(config)
+        self.hubspot = Hubspot(config, self.tap_stream_id)
 
     def get_properties(self):
         properties = []
@@ -41,15 +41,13 @@ def do_sync(self, state):
         start_date, end_date = self.__get_start_end(state)
         with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as transformer:
             try:
-                data = self.hubspot.streams(
-                    self.tap_stream_id, start_date, end_date, self.get_properties()
-                )
+                data = self.hubspot.streams(start_date, end_date, self.get_properties())
                 for d, replication_value in data:
                     if replication_value and (
                         start_date >= replication_value or end_date <= replication_value
                     ):
                         continue
-                    
+
                     record = transformer.transform(d, self.schema, self.mdata)
                     singer.write_record(self.tap_stream_id, record)
                     if not replication_value:

From be6a95e5d4a9fb91b210df53ee413e74f46f72a2 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Sat, 25 Apr 2020 20:26:58 +0200
Subject: [PATCH 70/78] update contacts schema

---
 tap_hubspot/schemas/contacts.json | 116 ++++++++++++++++++++++++++----
 1 file changed, 104 insertions(+), 12 deletions(-)

diff --git a/tap_hubspot/schemas/contacts.json b/tap_hubspot/schemas/contacts.json
index 54e4d3d2..0a6812c2 100644
--- a/tap_hubspot/schemas/contacts.json
+++ b/tap_hubspot/schemas/contacts.json
@@ -1,22 +1,41 @@
 {
   "type": "object",
   "properties": {
+    "addedAt": {
+      "type": ["null", "string"],
+      "format": "date-time"
+    },
     "vid": {
       "type": ["null", "integer"]
     },
+    "canonical-vid": {
+      "type": ["null", "integer"]
+    },
+    "portal-id": {
+      "type": ["null", "integer"]
+    },
+    "is-contact": {
+      "type": ["null", "boolean"]
+    },
+    "profile-token": {
+      "type": ["null", "string"]
+    },
+    "profile-url": {
+      "type": ["null", "string"]
+    },
     "properties": {
-      "type": "object",
+      "type": ["null", "object"],
       "properties": {
-        "email": {
-          "type": "object",
+        "firstname": {
+          "type": ["null", "object"],
           "properties": {
             "value": {
               "type": ["null", "string"]
             }
           }
         },
-        "createdate": {
-          "type": "object",
+        "lastmodifieddate": {
+          "type": ["null", "object"],
           "properties": {
             "value": {
               "type": ["null", "string"],
@@ -24,20 +43,93 @@
             }
           }
         },
-        "lastmodifieddate": {
-          "type": "object",
+        "company": {
+          "type": ["null", "object"],
           "properties": {
             "value": {
-              "type": ["null", "string"],
-              "format": "date-time"
+              "type": "string"
             }
           }
         },
-        "associatedcompanyid": {
-          "type": "object",
+        "lastname": {
+          "type": ["null", "object"],
           "properties": {
             "value": {
-              "type": ["null", "number"]
+              "type": ["null", "string"]
+            }
+          }
+        }
+      }
+    },
+    "form-submissions": {
+      "type": ["null", "array"],
+
+      "items": {
+        "type": ["null", "object"],
+        "properties": {
+          "conversion-id": {
+            "type": ["null", "string"]
+          },
+          "timestamp": {
+            "type": ["null", "string"],
+            "format": "date-time"
+          },
+          "form-id": {
+            "type": ["null", "string"]
+          },
+          "portal-id": {
+            "type": ["null", "integer"]
+          },
+          "title": {
+            "type": ["null", "string"]
+          },
+          "form-type": {
+            "type": ["null", "string"]
+          },
+          "contact-associated-by": {
+            "type": "array",
+            "items": {
+              "type": ["null", "string"]
+            }
+          }
+        }
+      }
+    },
+    "identity-profiles": {
+      "type": ["null", "array"],
+      "items": {
+        "type": ["null", "object"],
+        "properties": {
+          "vid": {
+            "type": ["null", "integer"]
+          },
+          "saved-at-timestamp": {
+            "type": ["null", "string"],
+            "format": "date-time"
+          },
+          "deleted-changed-timestamp": {
+            "type": ["null", "string"],
+            "format": "date-time"
+          },
+          "identities": {
+            "type": ["null", "array"],
+            "items": {
+              "type": ["null", "object"],
+              "properties": {
+                "type": {
+                  "type": ["null", "string"]
+                },
+                "value": {
+                  "type": ["null", "string"]
+                },
+                "timestamp": {
+                  "type": ["null", "string"],
+                  "format": "date-time"
+                },
+                "is-primary": {
+                  "type": ["null", "boolean"]
+                }
+              }
             }
           }
         }

From c2ab88c40dbb657a6c979fe4205c688df8338aa1 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Sun, 26 Apr 2020 21:26:40 +0200
Subject: [PATCH 71/78] get companyId and dealId according to contact vids

---
 tap_hubspot/hubspot.py | 18 +++++++++++++++++-
 tap_hubspot/stream.py  |  2 ++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index d5fd4a14..f90ec014 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -12,6 +12,7 @@
 
 class Hubspot:
     BASE_URL = "https://api.hubapi.com"
+    CONTACT_DEFINITION_IDS = {"companyId": 1, "dealId": 4}
 
     def __init__(self, config, tap_stream_id, limit=250):
         self.SESSION = requests.Session()
@@ -78,6 +79,21 @@ def get_contacts(self):
             offset_key=offset_key,
         )
 
+    def get_association(self, vid, definition_id):
+        path = (
+            f"/crm-associations/v1/associations/{vid}/HUBSPOT_DEFINED/{definition_id}"
+        )
+        record = self.call_api(url=path)["results"]
+        if record:
+            return int(record[0])
+        else:
+            return None
+
+    def set_associations(self, record):
+        for association, definition_id in self.CONTACT_DEFINITION_IDS.items():
+            record[association] = self.get_association(record["vid"], definition_id)
+        return record
+
     def get_engagements(self):
         path = "/engagements/v1/engagements/paged"
         data_field = "results"
@@ -156,7 +172,7 @@ def get_submissions(self):
             )
 
     def get_records(
-        self, path, replication_path, params={}, data_field=None, offset_key=None
+        self, path, replication_path=None, params={}, data_field=None, offset_key=None
     ):
         for record in self.paginate(
             path, params=params, data_field=data_field, offset_key=offset_key,
diff --git a/tap_hubspot/stream.py b/tap_hubspot/stream.py
index 176e4484..51b19635 100644
--- a/tap_hubspot/stream.py
+++ b/tap_hubspot/stream.py
@@ -47,6 +47,8 @@ def do_sync(self, state):
                         start_date >= replication_value or end_date <= replication_value
                     ):
                         continue
+                    if self.tap_stream_id == "contacts":
+                        d = self.hubspot.set_associations(d)
 
                     record = transformer.transform(d, self.schema, self.mdata)
                     singer.write_record(self.tap_stream_id, record)

From 881441173291679ecd2cfc6daad6bdcef52bfbdd Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Sun, 26 Apr 2020 21:27:15 +0200
Subject: [PATCH 72/78] add companyId and dealId in contact schema

---
 tap_hubspot/schemas/contacts.json | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tap_hubspot/schemas/contacts.json b/tap_hubspot/schemas/contacts.json
index 0a6812c2..64a95210 100644
--- a/tap_hubspot/schemas/contacts.json
+++ b/tap_hubspot/schemas/contacts.json
@@ -8,6 +8,12 @@
     "vid": {
       "type": ["null", "integer"]
     },
+    "companyId": {
+      "type": ["null", "integer"]
+    },
+    "dealId": {
+      "type": ["null", "integer"]
+    },
     "canonical-vid": {
       "type": ["null", "integer"]
     },

From 9a77e47247eccb65b5c03c4fecaecb7f7138ade6 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 28 Apr 2020 00:43:23 +0200
Subject: [PATCH 73/78] do not sync dealId

---
 tap_hubspot/hubspot.py            | 2 +-
 tap_hubspot/schemas/contacts.json | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index f90ec014..914fbddc 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -12,7 +12,7 @@
 
 class Hubspot:
     BASE_URL = "https://api.hubapi.com"
-    CONTACT_DEFINITION_IDS = {"companyId": 1, "dealId": 4}
+    CONTACT_DEFINITION_IDS = {"companyId": 1}
 
     def __init__(self, config, tap_stream_id, limit=250):
         self.SESSION = requests.Session()
diff --git a/tap_hubspot/schemas/contacts.json b/tap_hubspot/schemas/contacts.json
index 64a95210..6720599f 100644
--- a/tap_hubspot/schemas/contacts.json
+++ b/tap_hubspot/schemas/contacts.json
@@ -11,9 +11,6 @@
     "companyId": {
       "type": ["null", "integer"]
     },
-    "dealId": {
-      "type": ["null", "integer"]
-    },
     "canonical-vid": {
       "type": ["null", "integer"]
     },

From 64c3cfda4a73b711beb3e0067078fa560ec36b6a Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 28 Apr 2020 15:19:57 +0200
Subject: [PATCH 74/78] use v3 contacts api

---
 tap_hubspot/__init__.py           |   5 +-
 tap_hubspot/hubspot.py            |  36 +++----
 tap_hubspot/schemas/contacts.json | 165 ++++++++----------------------
 tap_hubspot/stream.py             |   2 -
 4 files changed, 59 insertions(+), 149 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 1b8ea9ea..cf939313 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -9,10 +9,7 @@
 STREAMS = {
     "email_events": {"valid_replication_keys": ["created"], "key_properties": "id",},
     "forms": {"valid_replication_keys": ["updatedAt"], "key_properties": "guid",},
-    "contacts": {
-        "valid_replication_keys": ["lastmodifieddate"],
-        "key_properties": "vid",
-    },
+    "contacts": {"valid_replication_keys": ["updatedAt"], "key_properties": "id",},
     "companies": {
         "valid_replication_keys": ["hs_lastmodifieddate"],
         "key_properties": "companyId",
diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 914fbddc..2eeea30e 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -6,6 +6,7 @@
 import datetime
 from typing import Dict
 from tap_hubspot.util import record_nodash
+from dateutil import parser
 
 LOGGER = singer.get_logger()
 
@@ -26,7 +27,7 @@ def streams(self, start_date, end_date, properties):
         if self.tap_stream_id == "companies":
             yield from self.get_companies(properties)
         elif self.tap_stream_id == "contacts":
-            yield from self.get_contacts()
+            yield from self.get_contacts(properties)
         elif self.tap_stream_id == "engagements":
             yield from self.get_engagements()
         elif self.tap_stream_id == "deal_pipelines":
@@ -61,16 +62,15 @@ def get_companies(self, properties):
             offset_key=offset_key,
         )
 
-    def get_contacts(self):
-        path = "/contacts/v1/lists/all/contacts/all"
-        data_field = "contacts"
-        replication_path = ["properties", "lastmodifieddate", "value"]
+    def get_contacts(self, properties):
+        path = "/crm/v3/objects/contacts"
+        data_field = "results"
+        offset_key = "after"
+        replication_path = ["updatedAt"]
         params = {
-            "showListMemberships": True,
-            "includeVersion": True,
-            "count": self.limit,
+            "limit": 100,
+            "properties": properties,
         }
-        offset_key = "vid-offset"
         yield from self.get_records(
             path,
             replication_path,
@@ -177,11 +177,14 @@ def get_records(
         for record in self.paginate(
             path, params=params, data_field=data_field, offset_key=offset_key,
         ):
-            if self.tap_stream_id == "contacts":
-                record = record_nodash(record)
-            replication_value = self.milliseconds_to_datetime(
-                self.get_value(record, replication_path)
-            )
+            if self.tap_stream_id in ["contacts"]:
+                replication_value = parser.isoparse(
+                    self.get_value(record, replication_path)
+                )
+            else:
+                replication_value = self.milliseconds_to_datetime(
+                    self.get_value(record, replication_path)
+                )
             yield record, replication_value
 
     def get_value(self, obj: dict, path_to_replication_key=None, default=None):
@@ -209,10 +212,7 @@ def paginate(
         offset_value = None
         while True:
             if offset_value:
-                if offset_key == "vid-offset":
-                    params["vidOffset"] = offset_value
-                else:
-                    params[offset_key] = offset_value
+                params[offset_key] = offset_value
 
             data = self.call_api(path, params=params)
 
diff --git a/tap_hubspot/schemas/contacts.json b/tap_hubspot/schemas/contacts.json
index 6720599f..55fd0b58 100644
--- a/tap_hubspot/schemas/contacts.json
+++ b/tap_hubspot/schemas/contacts.json
@@ -1,142 +1,57 @@
 {
   "type": "object",
   "properties": {
-    "addedAt": {
-      "type": ["null", "string"],
-      "format": "date-time"
-    },
-    "vid": {
-      "type": ["null", "integer"]
-    },
-    "companyId": {
-      "type": ["null", "integer"]
-    },
-    "canonical-vid": {
-      "type": ["null", "integer"]
-    },
-    "portal-id": {
-      "type": ["null", "integer"]
-    },
-    "is-contact": {
-      "type": ["null", "boolean"]
-    },
-    "profile-token": {
-      "type": ["null", "string"]
-    },
-    "profile-url": {
+    "id": {
       "type": ["null", "string"]
     },
     "properties": {
-      "type": ["null", "object"],
+      "type": "object",
       "properties": {
-        "firstname": {
-          "type": ["null", "object"],
-          "properties": {
-            "value": {
-              "type": ["null", "string"]
-            }
-          }
+        "associatedcompanyid": {
+          "type": ["null", "string"]
+        },
+        "country": {
+          "type": ["null", "string"]
+        },
+        "createdate": {
+          "type": ["null", "string"]
+        },
+        "email": {
+          "type": ["null", "string"]
         },
-        "lastmodifieddate": {
-          "type": ["null", "object"],
-          "properties": {
-            "value": {
-              "type": ["null", "string"],
-              "format": "date-time"
-            }
-          }
+        "hs_email_domain": {
+          "type": ["null", "string"]
         },
-        "company": {
-          "type": ["null", "object"],
-          "properties": {
-            "value": {
-              "type": "string"
-            }
-          }
+        "hs_object_id": {
+          "type": ["null", "string"]
         },
-        "lastname": {
-          "type": ["null", "object"],
-          "properties": {
-            "value": {
-              "type": ["null", "string"]
-            }
-          }
+        "ip_country_code": {
+          "type": ["null", "string"]
+        },
+        "ip_state": {
+          "type": ["null", "string"]
+        },
+        "jobtitle": {
+          "type": ["null", "string"]
+        },
+        "num_associated_deals": {
+          "type": ["null", "string"]
+        },
+        "state": {
+          "type": ["null", "string"]
         }
       }
     },
-    "form-submissions": {
-      "type": ["null", "array"],
-
-      "items": {
-        "type": ["null", "object"],
-        "properties": {
-          "conversion-id": {
-            "type": ["null", "string"]
-          },
-          "timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "form-id": {
-            "type": ["null", "string"]
-          },
-          "portal-id": {
-            "type": ["null", "integer"]
-          },
-          "title": {
-            "type": ["null", "string"]
-          },
-          "form-type": {
-            "type": ["null", "string"]
-          },
-          "contact-associated-by": {
-            "type": "array",
-            "items": {
-              "type": ["null", "string"]
-            }
-          }
-        }
-      }
+    "createdAt": {
+      "type": ["null", "string"],
+      "format": ["null", "date-time"]
     },
-    "identity-profiles": {
-      "type": ["null", "array"],
-      "items": {
-        "type": ["null", "object"],
-        "properties": {
-          "vid": {
-            "type": ["null", "integer"]
-          },
-          "saved-at-timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "deleted-changed-timestamp": {
-            "type": ["null", "string"],
-            "format": "date-time"
-          },
-          "identities": {
-            "type": ["null", "array"],
-            "items": {
-              "type": ["null", "object"],
-              "properties": {
-                "type": {
-                  "type": ["null", "string"]
-                },
-                "value": {
-                  "type": ["null", "string"]
-                },
-                "timestamp": {
-                  "type": ["null", "string"],
-                  "format": "date-time"
-                },
-                "is-primary": {
-                  "type": ["null", "boolean"]
-                }
-              }
-            }
-          }
-        }
-      }
+    "updatedAt": {
+      "type": ["null", "string"],
+      "format": ["null", "date-time"]
+    },
+    "archived": {
+      "type": ["null", "boolean"]
     }
   }
 }
diff --git a/tap_hubspot/stream.py b/tap_hubspot/stream.py
index 51b19635..176e4484 100644
--- a/tap_hubspot/stream.py
+++ b/tap_hubspot/stream.py
@@ -47,8 +47,6 @@ def do_sync(self, state):
                         start_date >= replication_value or end_date <= replication_value
                     ):
                         continue
-                    if self.tap_stream_id == "contacts":
-                        d = self.hubspot.set_associations(d)
 
                     record = transformer.transform(d, self.schema, self.mdata)
                     singer.write_record(self.tap_stream_id, record)

From 9b92676a5d7dafe582b9e62149042ac15d76aad5 Mon Sep 17 00:00:00 2001
From: JingLin0 <jinglin.daisy@gmail.com>
Date: Tue, 28 Apr 2020 19:45:41 +0200
Subject: [PATCH 75/78] update contacts schema

---
 tap_hubspot/schemas/contacts.json | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tap_hubspot/schemas/contacts.json b/tap_hubspot/schemas/contacts.json
index 55fd0b58..97031c59 100644
--- a/tap_hubspot/schemas/contacts.json
+++ b/tap_hubspot/schemas/contacts.json
@@ -14,7 +14,8 @@
           "type": ["null", "string"]
         },
         "createdate": {
-          "type": ["null", "string"]
+          "type": ["null", "string"],
+          "format": "date-time"
         },
         "email": {
           "type": ["null", "string"]
@@ -44,11 +45,11 @@
     },
     "createdAt": {
       "type": ["null", "string"],
-      "format": ["null", "date-time"]
+      "format": "date-time"
     },
     "updatedAt": {
       "type": ["null", "string"],
-      "format": ["null", "date-time"]
+      "format": "date-time"
     },
     "archived": {
       "type": ["null", "boolean"]

From 1de448aadb90f13164091942c2b84c0368711d24 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Tue, 5 May 2020 14:51:57 +0200
Subject: [PATCH 76/78] add ReadTimeout to handle normal timeouts

---
 tap_hubspot/hubspot.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 2eeea30e..4ef63f74 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -238,6 +238,7 @@ def paginate(
         backoff.expo,
         (
             requests.exceptions.RequestException,
+            requests.exceptions.ReadTimeout,
             requests.exceptions.HTTPError,
             ratelimit.exception.RateLimitException,
         ),

From e34c31a3c6579386353a33851e999159ac0076ef Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Tue, 5 May 2020 14:52:32 +0200
Subject: [PATCH 77/78] add max_tries=10, so we don't infinitely retry

---
 tap_hubspot/hubspot.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 4ef63f74..2787f751 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -242,6 +242,7 @@ def paginate(
             requests.exceptions.HTTPError,
             ratelimit.exception.RateLimitException,
         ),
+        max_tries=10,
     )
     @limits(calls=100, period=10)
     def call_api(self, url, params={}):

From 7721e6673a258efb61dac586afca27da4d9e9883 Mon Sep 17 00:00:00 2001
From: "Patrick-Ranjit D. Madsen" <pamad05@gmail.com>
Date: Tue, 5 May 2020 14:55:48 +0200
Subject: [PATCH 78/78] refresh token after the first 401

---
 tap_hubspot/hubspot.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/tap_hubspot/hubspot.py b/tap_hubspot/hubspot.py
index 2787f751..eaefb5e6 100644
--- a/tap_hubspot/hubspot.py
+++ b/tap_hubspot/hubspot.py
@@ -1,3 +1,4 @@
+import sys
 import requests
 from ratelimit import limits
 import ratelimit
@@ -246,11 +247,20 @@ def paginate(
     )
     @limits(calls=100, period=10)
     def call_api(self, url, params={}):
-        response = self.SESSION.get(
-            f"{self.BASE_URL}{url}",
-            headers={"Authorization": f"Bearer {self.access_token}"},
-            params=params,
-        )
+        url = f"{self.BASE_URL}{url}"
+        headers = {"Authorization": f"Bearer {self.access_token}"}
+
+        try:
+            response = self.SESSION.get(url, headers=headers, params=params)
+        except requests.exceptions.HTTPError as err:
+            if not err.response.status_code == 401:
+                raise
+
+            # attempt to refresh access token
+            self.refresh_access_token()
+            headers = {"Authorization": f"Bearer {self.access_token}"}
+            response = self.SESSION.get(url, headers=headers, params=params)
+
         LOGGER.info(response.url)
         response.raise_for_status()
         return response.json()