From 8c249c625da7c67fa654f9ec0933df9e479ee91c Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Wed, 17 Jan 2024 15:06:25 +0000
Subject: [PATCH 01/11] add batch processing for contacts_by_company stream

---
 tap_hubspot/__init__.py | 64 +++++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 17ae77e4..42ce021b 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -77,6 +77,7 @@ class StateFields:
     "companies_recent":     "/companies/v2/companies/recent/modified",
     "companies_detail":     "/companies/v2/companies/{company_id}",
     "contacts_by_company":  "/companies/v2/companies/{company_id}/vids",
+    "contacts_by_company_v3_batch_read": "/crm/v3/associations/company/contact/batch/read",
 
     "deals_properties":     "/properties/v1/deals/properties",
     "deals_all":            "/deals/v1/deal/paged",
@@ -560,32 +561,39 @@ def use_recent_companies_endpoint(response):
 default_contacts_by_company_params = {'count' : 100}
 
 # NB> to do: support stream aliasing and field selection
-def _sync_contacts_by_company(STATE, ctx, company_id):
+def _sync_contacts_by_company_batch_read(STATE, ctx, company_ids):
     schema = load_schema(CONTACTS_BY_COMPANY)
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get('metadata'))
-    url = get_url("contacts_by_company", company_id=company_id)
-    path = 'vids'
+    url = get_url("contacts_by_company_v3_batch_read")
+
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         with metrics.record_counter(CONTACTS_BY_COMPANY) as counter:
-            data = request(url, default_contacts_by_company_params).json()
-
-            if data.get(path) is None:
-                raise RuntimeError("Unexpected API response: {} not in {}".format(path, data.keys()))
-
-            for row in data[path]:
-                counter.increment()
-                record = {'company-id' : company_id,
-                          'contact-id' : row}
-                record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
-                singer.write_record("contacts_by_company", record, time_extracted=utils.now())
-
-    return STATE
+            data = {'inputs': []}
+            [data['inputs'].append({'id': company_id}) for company_id in company_ids]
+            contacts_to_company_rows = post_search_endpoint(url, data).json()
+
+            for row in contacts_to_company_rows['results']:
+                for contact in row['to']:
+                    counter.increment()
+                    record = {'company-id' : row['from']['id'],
+                              'contact-id' : contact['id']}
+                    record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
+                    singer.write_record("contacts_by_company", record, time_extracted=utils.now())
 
 default_company_params = {
     'limit': 250, 'properties': ["createdate", "hs_lastmodifieddate"]
 }
 
+def sync_company_details(companies, catalog, mdata, schema, start):
+    with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
+        for company in companies:
+            if not company['modified_time'] or company['modified_time'] >= start:
+                record = request(get_url("companies_detail",
+                                company_id=company['id'])).json()
+                record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
+                singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
+
 def sync_companies(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get('metadata'))
@@ -614,6 +622,9 @@ def sync_companies(STATE, ctx):
         contacts_by_company_schema = load_schema(CONTACTS_BY_COMPANY)
         singer.write_schema("contacts_by_company", contacts_by_company_schema, ["company-id", "contact-id"])
 
+    # We will process the companies and contacts_by_cmpany records in batches of default limit size
+    # This list will store the company records until bucket size is reached and records finished
+    companies = []
     with bumble_bee:
         for row in gen_request(STATE, 'companies', url, default_company_params, 'companies', 'has-more', ['offset'], ['offset']):
             row_properties = row['properties']
@@ -630,12 +641,23 @@ def sync_companies(STATE, ctx):
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
 
-            if not modified_time or modified_time >= start:
-                record = request(get_url("companies_detail", company_id=row['companyId'])).json()
-                record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
-                singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
+            companies.append({'id': row['companyId'], 'modified_time': modified_time})
+
+            # Process the company and contacts_by_ompany records once list size reaches default limit (=250)
+            if len(companies) >= default_company_params['limit']:
+                sync_company_details(companies, catalog, mdata, schema, start)
+
                 if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-                    STATE = _sync_contacts_by_company(STATE, ctx, record['companyId'])
+                    _sync_contacts_by_company_batch_read(STATE, ctx, [company['id'] for company in companies])
+
+                companies = []
+
+    # Stream may have less the default limit (=250) records, also last batch may have less records than the limit set
+    # Following code will handle those remaining company records
+    sync_company_details(companies, catalog, mdata, schema, start)
+
+    if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
+        _sync_contacts_by_company_batch_read(STATE, ctx, [company['id'] for company in companies])
 
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(max_bk_value, current_sync_start)

From 2624a2ab6157083b7df8937e888531914a3da5fe Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Wed, 17 Jan 2024 15:31:33 +0000
Subject: [PATCH 02/11] fix lints

---
 tap_hubspot/__init__.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 42ce021b..418f796b 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -77,7 +77,7 @@ class StateFields:
     "companies_recent":     "/companies/v2/companies/recent/modified",
     "companies_detail":     "/companies/v2/companies/{company_id}",
     "contacts_by_company":  "/companies/v2/companies/{company_id}/vids",
-    "contacts_by_company_v3_batch_read": "/crm/v3/associations/company/contact/batch/read",
+    "contacts_by_company_v3": "/crm/v3/associations/company/contact/batch/read",
 
     "deals_properties":     "/properties/v1/deals/properties",
     "deals_all":            "/deals/v1/deal/paged",
@@ -565,14 +565,12 @@ def _sync_contacts_by_company_batch_read(STATE, ctx, company_ids):
     schema = load_schema(CONTACTS_BY_COMPANY)
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get('metadata'))
-    url = get_url("contacts_by_company_v3_batch_read")
+    url = get_url("contacts_by_company_v3")
 
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         with metrics.record_counter(CONTACTS_BY_COMPANY) as counter:
-            data = {'inputs': []}
-            [data['inputs'].append({'id': company_id}) for company_id in company_ids]
-            contacts_to_company_rows = post_search_endpoint(url, data).json()
-
+            body = {'inputs': [{'id': company_id} for company_id in company_ids]}
+            contacts_to_company_rows = post_search_endpoint(url, body).json()
             for row in contacts_to_company_rows['results']:
                 for contact in row['to']:
                     counter.increment()
@@ -589,8 +587,7 @@ def sync_company_details(companies, catalog, mdata, schema, start):
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for company in companies:
             if not company['modified_time'] or company['modified_time'] >= start:
-                record = request(get_url("companies_detail",
-                                company_id=company['id'])).json()
+                record = request(get_url("companies_detail", company_id=company['id'])).json()
                 record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
                 singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
 
@@ -648,7 +645,8 @@ def sync_companies(STATE, ctx):
                 sync_company_details(companies, catalog, mdata, schema, start)
 
                 if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-                    _sync_contacts_by_company_batch_read(STATE, ctx, [company['id'] for company in companies])
+                    company_ids = [company['id'] for company in companies]
+                    _sync_contacts_by_company_batch_read(STATE, ctx, company_ids)
 
                 companies = []
 
@@ -657,7 +655,8 @@ def sync_companies(STATE, ctx):
     sync_company_details(companies, catalog, mdata, schema, start)
 
     if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-        _sync_contacts_by_company_batch_read(STATE, ctx, [company['id'] for company in companies])
+        company_ids = [company['id'] for company in companies]
+        _sync_contacts_by_company_batch_read(STATE, ctx, company_ids)
 
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(max_bk_value, current_sync_start)
@@ -1271,7 +1270,7 @@ def discover_schemas():
             LOGGER.warning(warning_message)
     # Load the contacts_by_company schema
     LOGGER.info('Loading schema for contacts_by_company')
-    contacts_by_company = Stream('contacts_by_company', _sync_contacts_by_company, ['company-id', 'contact-id'], None, 'FULL_TABLE')
+    contacts_by_company = Stream('contacts_by_company', _sync_contacts_by_company_batch_read, ['company-id', 'contact-id'], None, 'FULL_TABLE')
     schema, mdata = load_discovered_schema(contacts_by_company)
 
     result['streams'].append({'stream': CONTACTS_BY_COMPANY,

From 947772a9f1ef2b6d6db6a858100cf38c357567a0 Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Thu, 18 Jan 2024 04:46:13 +0000
Subject: [PATCH 03/11] batch only updated company records

---
 tap_hubspot/__init__.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 418f796b..bc1b2343 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -586,10 +586,9 @@ def _sync_contacts_by_company_batch_read(STATE, ctx, company_ids):
 def sync_company_details(companies, catalog, mdata, schema, start):
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         for company in companies:
-            if not company['modified_time'] or company['modified_time'] >= start:
-                record = request(get_url("companies_detail", company_id=company['id'])).json()
-                record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
-                singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
+            record = request(get_url("companies_detail", company_id=company['id'])).json()
+            record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
+            singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
 
 def sync_companies(STATE, ctx):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
@@ -638,7 +637,8 @@ def sync_companies(STATE, ctx):
             if modified_time and modified_time >= max_bk_value:
                 max_bk_value = modified_time
 
-            companies.append({'id': row['companyId'], 'modified_time': modified_time})
+            if not modified_time or modified_time >= start:
+                companies.append({'id': row['companyId'], 'modified_time': modified_time})
 
             # Process the company and contacts_by_ompany records once list size reaches default limit (=250)
             if len(companies) >= default_company_params['limit']:

From 539770ca2822e90c473a4c1a027f8cd7428e6f01 Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Thu, 18 Jan 2024 09:02:48 +0000
Subject: [PATCH 04/11] minor refactoring

---
 tap_hubspot/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 121ea878..0ae102e2 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -647,8 +647,8 @@ def sync_companies(STATE, ctx):
                 company_ids.append(row['companyId'])
                 # Process the company and contacts_by_ompany records once list size reaches default limit (=250)
                 if len(company_ids) >= default_company_params['limit']:
-                    _sync_contacts_by_company_batch_read(STATE, ctx, company_ids)
-                    company_ids = []
+                        _sync_contacts_by_company_batch_read(STATE, ctx, company_ids)
+                        company_ids = []
 
     # Stream may have less the default limit (=250) records, also last batch may have less records than the limit set
     # Following code will handle those remaining company records

From 2039a286a454c9849343aeb4d42e372d313267dd Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Fri, 19 Jan 2024 07:41:30 +0000
Subject: [PATCH 05/11] add interrupted sync support to contacts_to_company

---
 tap_hubspot/__init__.py | 42 +++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 0ae102e2..58800715 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -587,6 +587,10 @@ def _sync_contacts_by_company_batch_read(STATE, ctx, company_ids):
                     record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
                     singer.write_record("contacts_by_company", record, time_extracted=utils.now())
 
+    STATE = singer.set_offset(STATE, "contacts_by_company", 'offset', company_ids[-1])
+    singer.write_state(STATE)
+    return STATE
+
 default_company_params = {
     'limit': 250, 'properties': ["createdate", "hs_lastmodifieddate"]
 }
@@ -617,10 +621,25 @@ def sync_companies(STATE, ctx):
     max_bk_value = start
     if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
         contacts_by_company_schema = load_schema(CONTACTS_BY_COMPANY)
-        singer.write_schema("contacts_by_company", contacts_by_company_schema, ["company-id", "contact-id"])
+        singer.write_schema('contacts_by_company', contacts_by_company_schema, ["company-id", "contact-id"])
+
+        # This code handles the interrutped sync. When sync is interrupted,
+        # last batch of `contacts_by_company` extraction may get interrupted.
+        # So before ressuming, we should check between `companies` and `contacts_by_company`
+        # whose offset is lagging behind and set that as an offset value for `companies`.
+        # Note, few of the records may get duplicated.
+        if singer.get_offset(STATE, 'contacts_by_company', {}).get('offset'):
+            companies_offset = singer.get_offset(STATE, 'companies', {}).get('offset')
+            contacts_by_company_offset = singer.get_offset(STATE, 'contacts_by_company').get('offset')
+            if companies_offset:
+                offset = min(companies_offset, contacts_by_company_offset)
+            else:
+                offset = contacts_by_company_offset
 
-    # We will process the companies and contacts_by_cmpany records in batches of default limit size
-    # This list will store the company records until bucket size is reached and records finished
+            STATE = singer.set_offset(STATE, 'companies', 'offset', offset)
+            singer.write_state(STATE)
+
+    # This list collects the recently modified company ids to extract `contacts_by_company` records in batch
     company_ids = []
     with bumble_bee:
         for row in gen_request(STATE, 'companies', url, default_company_params, 'companies', 'has-more', ['offset'], ['offset']):
@@ -644,16 +663,19 @@ def sync_companies(STATE, ctx):
                 singer.write_record("companies", record, catalog.get('stream_alias'), time_extracted=utils.now())
 
             if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-                company_ids.append(row['companyId'])
-                # Process the company and contacts_by_ompany records once list size reaches default limit (=250)
+                # Collect the recently modified company id
+                if not modified_time or modified_time >= start:
+                    company_ids.append(row['companyId'])
+
+                # Once batch size reaches set limit, extract the `contacts_by_company` for company ids collected
                 if len(company_ids) >= default_company_params['limit']:
-                        _sync_contacts_by_company_batch_read(STATE, ctx, company_ids)
-                        company_ids = []
+                    STATE = _sync_contacts_by_company_batch_read(STATE, ctx, company_ids)
+                    company_ids = []    # reset the list
 
-    # Stream may have less the default limit (=250) records, also last batch may have less records than the limit set
-    # Following code will handle those remaining company records
+    # Extract the records for last remaining company ids
     if CONTACTS_BY_COMPANY in ctx.selected_stream_ids:
-        _sync_contacts_by_company_batch_read(STATE, ctx, company_ids)
+        STATE = _sync_contacts_by_company_batch_read(STATE, ctx, company_ids)
+        STATE = singer.clear_offset(STATE, "contacts_by_company")
 
     # Don't bookmark past the start of this sync to account for updated records during the sync.
     new_bookmark = min(max_bk_value, current_sync_start)

From 12e4102b022ed6aa3e41bac9f194b477bf54c5a0 Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Fri, 19 Jan 2024 08:03:07 +0000
Subject: [PATCH 06/11] update interrupted sync integration test

---
 tests/test_hubspot_interrupted_sync_offset.py | 79 +++++++++++++++----
 1 file changed, 64 insertions(+), 15 deletions(-)

diff --git a/tests/test_hubspot_interrupted_sync_offset.py b/tests/test_hubspot_interrupted_sync_offset.py
index 891362b3..7d17554b 100644
--- a/tests/test_hubspot_interrupted_sync_offset.py
+++ b/tests/test_hubspot_interrupted_sync_offset.py
@@ -12,6 +12,8 @@
 
 class TestHubspotInterruptedSyncOffsetContactLists(HubspotBaseTest):
     """Testing interrupted syncs for streams that implement unique bookmarking logic."""
+    synced_records = None
+
     @staticmethod
     def name():
         return "tt_hubspot_interrupt_contact_lists"
@@ -20,7 +22,6 @@ def streams_to_test(self):
         """expected streams minus the streams not under test"""
         untested = {
             # Streams tested elsewhere
-            'companies', # covered in TestHubspotInterruptedSync1
             'engagements', # covered in TestHubspotInterruptedSync1
             # Feature Request | TDL-16095: [tap-hubspot] All incremental
             #                   streams should implement the interruptible sync feature
@@ -31,7 +32,6 @@ def streams_to_test(self):
             'deal_pipelines', # interruptible does not apply, child of deals
             'campaigns', # unable to manually find a partial state with our test data
             'email_events', # unable to manually find a partial state with our test data
-            'contacts_by_company', # interruptible does not apply, child of 'companies'
             'subscription_changes', # BUG_TDL-14938
             'tickets' # covered in TestHubspotInterruptedSync1
         }
@@ -41,8 +41,9 @@ def streams_to_test(self):
     def stream_to_interrupt(self):
         return 'contact_lists'
 
-    def state_to_inject(self):
-        return {'offset': {'offset': 250}}
+    def state_to_inject(self, new_state):
+        new_state['bookmarks']['contact_lists'] = {'offset': {'offset': 250}}
+        return new_state
 
     def get_properties(self):
         return {
@@ -79,14 +80,15 @@ def test_run(self):
 
         # Run sync 1
         first_record_count_by_stream = self.run_and_verify_sync(conn_id)
-        synced_records = runner.get_records_from_target_output()
+        self.synced_records = runner.get_records_from_target_output()
         state_1 = menagerie.get_state(conn_id)
 
         # Update state to simulate a bookmark
         stream = self.stream_to_interrupt()
         new_state = copy.deepcopy(state_1)
-        new_state['bookmarks'][stream] = self.state_to_inject()
+        new_state = self.state_to_inject(new_state)
         new_state['currently_syncing'] = stream
+
         menagerie.set_state(conn_id, new_state)
 
         # run second sync
@@ -98,10 +100,21 @@ def test_run(self):
         # since newly created test records may get updated while stream is syncing
         replication_keys = self.expected_replication_keys()
         for stream in state_1.get('bookmarks'):
-            replication_key = list(replication_keys[stream])[0]
-            self.assertLessEqual(state_1["bookmarks"][stream].get(replication_key),
-                                 state_2["bookmarks"][stream].get(replication_key),
-                                 msg="First sync bookmark should not be greater than the second bookmark.")
+
+            if self.stream_to_interrupt() == 'companies' and stream == 'companies':
+                    replication_key = list(replication_keys[stream])[0]
+                    self.assertLessEqual(new_state.get('bookmarks')[stream].get('current_sync_start'),
+                                        state_2["bookmarks"][stream].get(replication_key),
+                                        msg="First sync bookmark should not be greater than the second bookmark.")
+            elif stream == 'contacts_by_company':
+                self.assertEquals(state_1["bookmarks"][stream], {"offset": {}})
+                self.assertEquals(state_2["bookmarks"][stream], {"offset": {}})
+
+            else:
+                replication_key = list(replication_keys[stream])[0]
+                self.assertLessEqual(state_1["bookmarks"][stream].get(replication_key),
+                                    state_2["bookmarks"][stream].get(replication_key),
+                                    msg="First sync bookmark should not be greater than the second bookmark.")
 
 
 class TestHubspotInterruptedSyncOffsetContacts(TestHubspotInterruptedSyncOffsetContactLists):
@@ -119,8 +132,9 @@ def get_properties(self):
     def stream_to_interrupt(self):
         return 'contacts'
 
-    def state_to_inject(self):
-        return {'offset': {'vidOffset': 3502}}
+    def state_to_inject(self, new_state):
+        new_state['bookmarks']['contacts'] = {'offset': {'vidOffset': 3502}}
+        return new_state
 
 class TestHubspotInterruptedSyncOffsetDeals(TestHubspotInterruptedSyncOffsetContactLists):
     """Testing interrupted syncs for streams that implement unique bookmarking logic."""
@@ -136,6 +150,41 @@ def get_properties(self):
     def stream_to_interrupt(self):
         return 'deals'
 
-    def state_to_inject(self):
-        return  {'property_hs_lastmodifieddate': '2021-10-13T08:32:08.383000Z',
-                 'offset': {'offset': 3442973342}}
+    def state_to_inject(self, new_state):
+        new_state['bookmarks']['deals'] = {'property_hs_lastmodifieddate': '2021-10-13T08:32:08.383000Z',
+                                           'offset': {'offset': 3442973342}}
+        return new_state
+
+
+class TestHubspotInterruptedSyncOffsetCompanies(TestHubspotInterruptedSyncOffsetContactLists):
+    """Testing interrupted syncs for streams that implement unique bookmarking logic."""
+    @staticmethod
+    def name():
+        return "tt_hubspot_interrupt_companies"
+
+    def get_properties(self):
+        return {
+            'start_date' : '2023-12-31T00:00:00Z'
+        }
+
+    def stream_to_interrupt(self):
+        return 'companies'
+
+    def state_to_inject(self, new_state):
+        companies_records = self.synced_records['companies']['messages']
+        contacts_by_company_records = self.synced_records['contacts_by_company']['messages']
+
+        company_record_index = int(len(companies_records)/2)
+        contact_record_index = int(3*len(contacts_by_company_records)/4)
+
+        last_modified_value = companies_records[-1]['data'][list(self.expected_replication_keys()['companies'])[0]]['value']
+        current_sync_start = companies_records[company_record_index]['data'][list(self.expected_replication_keys()['companies'])[0]]['value']
+        offset_1 = companies_records[company_record_index]['data']['companyId']
+        offset_2 = contacts_by_company_records[contact_record_index]['data']['company-id']
+
+        new_state['bookmarks']['companies'] = {'property_hs_lastmodifieddate': last_modified_value,
+                                               'current_sync_start': current_sync_start,
+                                               'offset': {'offset': offset_1}}
+        new_state['bookmarks']['contacts_by_company'] = {'offset': {'offset': offset_2}}
+
+        return new_state

From 540fe540acd63444db382e45f10171d1dd3e18da Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Fri, 19 Jan 2024 08:22:19 +0000
Subject: [PATCH 07/11] fix the lint

---
 tap_hubspot/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 58800715..806c29fc 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -6,7 +6,7 @@
 import re
 import sys
 import json
-# pylint: disable=import-error
+# pylint: disable=import-error,too-many-statements
 import attr
 import backoff
 import requests
@@ -574,7 +574,6 @@ def _sync_contacts_by_company_batch_read(STATE, ctx, company_ids):
     catalog = ctx.get_catalog_from_id(singer.get_currently_syncing(STATE))
     mdata = metadata.to_map(catalog.get('metadata'))
     url = get_url("contacts_by_company_v3")
-
     with Transformer(UNIX_MILLISECONDS_INTEGER_DATETIME_PARSING) as bumble_bee:
         with metrics.record_counter(CONTACTS_BY_COMPANY) as counter:
             body = {'inputs': [{'id': company_id} for company_id in company_ids]}
@@ -586,7 +585,6 @@ def _sync_contacts_by_company_batch_read(STATE, ctx, company_ids):
                               'contact-id' : contact['id']}
                     record = bumble_bee.transform(lift_properties_and_versions(record), schema, mdata)
                     singer.write_record("contacts_by_company", record, time_extracted=utils.now())
-
     STATE = singer.set_offset(STATE, "contacts_by_company", 'offset', company_ids[-1])
     singer.write_state(STATE)
     return STATE

From 5e9ca0656da483ddee324fb311a4395b1a1c7ee1 Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Mon, 22 Jan 2024 04:54:29 +0000
Subject: [PATCH 08/11] fix all fields integration test

---
 tests/test_hubspot_all_fields.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_hubspot_all_fields.py b/tests/test_hubspot_all_fields.py
index 93914549..13e40d76 100644
--- a/tests/test_hubspot_all_fields.py
+++ b/tests/test_hubspot_all_fields.py
@@ -156,6 +156,12 @@ def get_matching_actual_record_by_pk(expected_primary_key_dict, actual_records):
         'property_hs_analytics_latest_source_data_2',
         'property_hs_analytics_latest_source_data_2_contact',
         'property_hs_deal_score',
+        'property_hs_is_active_shared_deal',
+        'property_hs_v2_date_entered_appointmentscheduled',
+        'property_hs_v2_date_exited_appointmentscheduled',
+        'property_hs_v2_latest_time_in_appointmentscheduled',
+        'property_hs_v2_cumulative_time_in_appointmentscheduled',
+        'property_hs_v2_date_entered_qualifiedtobuy'
     },
     'subscription_changes':{
         'normalizedEmailId'

From 5d450aaca079019bd9ef2c8e839da86cd5e02714 Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Mon, 22 Jan 2024 08:43:29 +0000
Subject: [PATCH 09/11] remove old endpoint

---
 tap_hubspot/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tap_hubspot/__init__.py b/tap_hubspot/__init__.py
index 806c29fc..37118b7a 100644
--- a/tap_hubspot/__init__.py
+++ b/tap_hubspot/__init__.py
@@ -76,7 +76,6 @@ class StateFields:
     "companies_all":        "/companies/v2/companies/paged",
     "companies_recent":     "/companies/v2/companies/recent/modified",
     "companies_detail":     "/companies/v2/companies/{company_id}",
-    "contacts_by_company":  "/companies/v2/companies/{company_id}/vids",
     "contacts_by_company_v3": "/crm/v3/associations/company/contact/batch/read",
 
     "deals_properties":     "/properties/v1/deals/properties",

From de8705d0b7e8c56b0acc5301fdea15c00e1a9f99 Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Mon, 22 Jan 2024 12:35:01 +0000
Subject: [PATCH 10/11] bump version 2.13.1

---
 CHANGELOG.md | 3 +++
 setup.py     | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 952a01f1..d9ed39a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Changelog
 
+## 2.13.1
+  * Optimise contacts_by_company implementation [#250](https://github.com/singer-io/tap-hubspot/pull/250)
+
 ## 2.13.0
   * HubSpot Custom CRM Objects Support [#242](https://github.com/singer-io/tap-hubspot/pull/242)
 
diff --git a/setup.py b/setup.py
index f99dbc6a..3f15e1f7 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 from setuptools import setup
 
 setup(name='tap-hubspot',
-      version='2.13.0',
+      version='2.14.0',
       description='Singer.io tap for extracting data from the HubSpot API',
       author='Stitch',
       url='http://singer.io',

From 00ff3bfac679c7257d70d088a39f2db0fcd48e27 Mon Sep 17 00:00:00 2001
From: RushiT0122 <rtodkar@stitchdata-talend.com>
Date: Mon, 22 Jan 2024 12:49:21 +0000
Subject: [PATCH 11/11] update setup.py

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 3f15e1f7..849f20f2 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 from setuptools import setup
 
 setup(name='tap-hubspot',
-      version='2.14.0',
+      version='2.13.1',
       description='Singer.io tap for extracting data from the HubSpot API',
       author='Stitch',
       url='http://singer.io',