airbytehq · devin-ai-integration · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025 · Feb 7, 2025
diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py
@@ -35,7 +35,10 @@
 from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
     ModelToComponentFactory,
 )
-from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
+from airbyte_cdk.sources.declarative.partition_routers import (
+    AsyncJobPartitionRouter,
+    SubstreamPartitionRouter,
+)
 from airbyte_cdk.sources.declarative.requesters import HttpRequester
 from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
 from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
@@ -397,36 +400,25 @@ def _is_datetime_incremental_without_partition_routing(
             )
         )
 
-    def _stream_supports_concurrent_partition_processing(
+    def _stream_uses_stream_state_interpolation(
         self, declarative_stream: DeclarativeStream
     ) -> bool:
-        """
-        Many connectors make use of stream_state during interpolation on a per-partition basis under the assumption that
-        state is updated sequentially. Because the concurrent CDK engine processes different partitions in parallel,
-        stream_state is no longer a thread-safe interpolation context. It would be a race condition because a cursor's
-        stream_state can be updated in any order depending on which stream partition's finish first.
-
-        We should start to move away from depending on the value of stream_state for low-code components that operate
-        per-partition, but we need to gate this otherwise some connectors will be blocked from publishing. See the
-        cdk-migrations.md for the full list of connectors.
-        """
-
         if isinstance(declarative_stream.retriever, SimpleRetriever) and isinstance(
             declarative_stream.retriever.requester, HttpRequester
         ):
             http_requester = declarative_stream.retriever.requester
             if "stream_state" in http_requester._path.string:
                 self.logger.warning(
-                    f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe. Defaulting to synchronous processing"
+                    f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe."
                 )
-                return False
+                return True
 
             request_options_provider = http_requester._request_options_provider
             if request_options_provider.request_options_contain_stream_state():
                 self.logger.warning(
-                    f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe. Defaulting to synchronous processing"
+                    f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe."
                 )
-                return False
+                return True
 
             record_selector = declarative_stream.retriever.record_selector
             if isinstance(record_selector, RecordSelector):
@@ -438,9 +430,9 @@ def _stream_supports_concurrent_partition_processing(
                     and "stream_state" in record_selector.record_filter.condition
                 ):
                     self.logger.warning(
-                        f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the RecordFilter which is not thread-safe. Defaulting to synchronous processing"
+                        f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the RecordFilter which is not thread-safe."
                     )
-                    return False
+                    return True
 
                 for add_fields in [
                     transformation
@@ -450,17 +442,47 @@ def _stream_supports_concurrent_partition_processing(
                     for field in add_fields.fields:
                         if isinstance(field.value, str) and "stream_state" in field.value:
                             self.logger.warning(
-                                f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe. Defaulting to synchronous processing"
+                                f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe."
                             )
-                            return False
+                            return True
                         if (
                             isinstance(field.value, InterpolatedString)
                             and "stream_state" in field.value.string
                         ):
                             self.logger.warning(
-                                f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe. Defaulting to synchronous processing"
+                                f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the AddFields which is not thread-safe."
                             )
-                            return False
+                            return True
+        return False
+
+    def _stream_supports_concurrent_partition_processing(
+        self, declarative_stream: DeclarativeStream
+    ) -> bool:
+        """
+        Many connectors make use of stream_state during interpolation on a per-partition basis under the assumption that
+        state is updated sequentially. Because the concurrent CDK engine processes different partitions in parallel,
+        stream_state is no longer a thread-safe interpolation context. It would be a race condition because a cursor's
+        stream_state can be updated in any order depending on which stream partition's finish first.
+
+        We should start to move away from depending on the value of stream_state for low-code components that operate
+        per-partition, but we need to gate this otherwise some connectors will be blocked from publishing. See the
+        cdk-migrations.md for the full list of connectors.
+        """
+        # Check if the stream uses stream_state interpolation in any of its components
+        if self._stream_uses_stream_state_interpolation(declarative_stream):
+            return False
+
+        # Check if any parent stream uses stream_state interpolation
+        if isinstance(declarative_stream.retriever, SimpleRetriever) and isinstance(
+            declarative_stream.retriever.stream_slicer, SubstreamPartitionRouter
+        ):
+            for parent_config in declarative_stream.retriever.stream_slicer.parent_stream_configs:
+                if self._stream_uses_stream_state_interpolation(parent_config.stream):
+                    self.logger.warning(
+                        f"Low-code stream '{declarative_stream.name}' has a parent stream that uses stream_state interpolation which is not thread-safe. Defaulting to synchronous processing"
+                    )
+                    return False
+
         return True
 
     @staticmethod

diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -80,7 +80,6 @@ definitions:
           - stream_interval
           - stream_partition
           - stream_slice
-          - stream_state
         examples:
           - "{{ record['updates'] }}"
           - "{{ record['MetaData']['LastUpdatedTime'] }}"
@@ -1611,7 +1610,6 @@ definitions:
           - stream_interval
           - stream_partition
           - stream_slice
-          - stream_state
         examples:
           - "/products"
           - "/quotes/{{ stream_partition['id'] }}/quote_line_groups"
@@ -1661,7 +1659,6 @@ definitions:
           - stream_interval
           - stream_partition
           - stream_slice
-          - stream_state
         examples:
           - |
             [{"clause": {"type": "timestamp", "operator": 10, "parameters":
@@ -1679,7 +1676,6 @@ definitions:
           - stream_interval
           - stream_partition
           - stream_slice
-          - stream_state
         examples:
           - sort_order: "ASC"
             sort_field: "CREATED_AT"
@@ -1700,7 +1696,6 @@ definitions:
           - stream_interval
           - stream_partition
           - stream_slice
-          - stream_state
         examples:
           - Output-Format: JSON
           - Version: "{{ config['version'] }}"
@@ -1717,7 +1712,6 @@ definitions:
           - stream_interval
           - stream_partition
           - stream_slice
-          - stream_state
         examples:
           - unit: "day"
           - query: 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"'
@@ -2072,7 +2066,6 @@ definitions:
         interpolation_context:
           - config
           - record
-          - stream_state
           - stream_slice
       new:
         type: string
@@ -2086,7 +2079,6 @@ definitions:
         interpolation_context:
           - config
           - record
-          - stream_state
           - stream_slice
       $parameters:
         type: object
@@ -2753,7 +2745,6 @@ definitions:
           - stream_interval
           - stream_partition
           - stream_slice
-          - stream_state
         examples:
           - "{{ record['created_at'] >= stream_interval['start_time'] }}"
           - "{{ record.status in ['active', 'expired'] }}"

diff --git a/airbyte_cdk/sources/declarative/extractors/record_filter.py b/airbyte_cdk/sources/declarative/extractors/record_filter.py
@@ -37,12 +37,14 @@ def filter_records(
         stream_state: StreamState,
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
+        stream_interval: Optional[Mapping[str, Any]] = None,
     ) -> Iterable[Mapping[str, Any]]:
         kwargs = {
             "stream_state": stream_state,
             "stream_slice": stream_slice,
             "next_page_token": next_page_token,
             "stream_slice.extra_fields": stream_slice.extra_fields if stream_slice else {},
+            "stream_interval": stream_interval or {},
         }
         for record in records:
             if self._filter_interpolator.eval(self.config, record=record, **kwargs):
@@ -71,6 +73,7 @@ def filter_records(
         stream_state: StreamState,
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
+        stream_interval: Optional[Mapping[str, Any]] = None,
     ) -> Iterable[Mapping[str, Any]]:
         records = (
             record
@@ -87,5 +90,6 @@ def filter_records(
                 stream_state=stream_state,
                 stream_slice=stream_slice,
                 next_page_token=next_page_token,
+                stream_interval=stream_interval,
             )
         yield from records
diff --git a/airbyte_cdk/sources/declarative/extractors/record_selector.py b/airbyte_cdk/sources/declarative/extractors/record_selector.py
@@ -3,7 +3,7 @@
 #
 
 from dataclasses import InitVar, dataclass, field
-from typing import Any, Iterable, List, Mapping, Optional, Union
+from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
 
 import requests
 
@@ -50,8 +50,8 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
             else self._name
         )
 
-    @property  # type: ignore
-    def name(self) -> str:
+    @property
+    def stream_name(self) -> str:
         """
         :return: Stream name
         """
@@ -61,8 +61,8 @@ def name(self) -> str:
             else self._name
         )
 
-    @name.setter
-    def name(self, value: str) -> None:
+    @stream_name.setter
+    def stream_name(self, value: str) -> None:
         if not isinstance(value, property):
             self._name = value
 
@@ -73,6 +73,7 @@ def select_records(
         records_schema: Mapping[str, Any],
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
+        stream_interval: Optional[Dict[str, Any]] = None,
     ) -> Iterable[Record]:
         """
         Selects records from the response
@@ -81,11 +82,12 @@ def select_records(
         :param records_schema: json schema of records to return
         :param stream_slice: The stream slice
         :param next_page_token: The paginator token
+        :param stream_interval: The stream interval for incremental sync values
         :return: List of Records selected from the response
         """
         all_data: Iterable[Mapping[str, Any]] = self.extractor.extract_records(response)
         yield from self.filter_and_transform(
-            all_data, stream_state, records_schema, stream_slice, next_page_token
+            all_data, stream_state, records_schema, stream_slice, next_page_token, stream_interval
         )
 
     def filter_and_transform(
@@ -95,6 +97,7 @@ def filter_and_transform(
         records_schema: Mapping[str, Any],
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
+        stream_interval: Optional[Dict[str, Any]] = None,
     ) -> Iterable[Record]:
         """
         There is an issue with the selector as of 2024-08-30: it does technology-agnostic processing like filtering, transformation and
@@ -104,8 +107,12 @@ def filter_and_transform(
         Until we decide to move this logic away from the selector, we made this method public so that users like AsyncJobRetriever could
         share the logic of doing transformations on a set of records.
         """
-        filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
-        transformed_data = self._transform(filtered_data, stream_state, stream_slice)
+        filtered_data = self._filter(
+            all_data, stream_state, stream_slice, next_page_token, stream_interval
+        )
+        transformed_data = self._transform(
+            filtered_data, stream_state, stream_slice, stream_interval
+        )
         normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
         for data in normalized_data:
             yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
@@ -128,13 +135,15 @@ def _filter(
         stream_state: StreamState,
         stream_slice: Optional[StreamSlice],
         next_page_token: Optional[Mapping[str, Any]],
+        stream_interval: Optional[Dict[str, Any]] = None,
     ) -> Iterable[Mapping[str, Any]]:
         if self.record_filter:
             yield from self.record_filter.filter_records(
                 records,
                 stream_state=stream_state,
                 stream_slice=stream_slice,
                 next_page_token=next_page_token,
+                stream_interval=stream_interval,
             )
         else:
             yield from records
@@ -144,6 +153,7 @@ def _transform(
         records: Iterable[Mapping[str, Any]],
         stream_state: StreamState,
         stream_slice: Optional[StreamSlice] = None,
+        stream_interval: Optional[Dict[str, Any]] = None,
     ) -> Iterable[Mapping[str, Any]]:
         for record in records:
             for transformation in self.transformations:
@@ -152,5 +162,6 @@ def _transform(
                     config=self.config,
                     stream_state=stream_state,
                     stream_slice=stream_slice,
+                    stream_interval=stream_interval,
                 )
             yield record
diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py
@@ -15,6 +15,12 @@
 from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation
 from airbyte_cdk.sources.declarative.interpolation.macros import macros
 from airbyte_cdk.sources.types import Config
+from airbyte_cdk.utils.traced_exception import AirbyteTracedException
+
+STREAM_STATE_DEPRECATION_MESSAGE = (
+    "Using 'stream_state' in interpolation is no longer supported as it is not thread-safe. "
+    "Please use 'stream_interval' for incremental sync values or 'stream_partition' for partition router values instead."
+)
 
 
 class StreamPartitionAccessEnvironment(SandboxedEnvironment):
@@ -32,7 +38,6 @@ def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
 
 # These aliases are used to deprecate existing keywords without breaking all existing connectors.
 _ALIASES = {
-    "stream_interval": "stream_slice",  # Use stream_interval to access incremental_sync values
     "stream_partition": "stream_slice",  # Use stream_partition to access partition router's values
 }
 
@@ -84,6 +89,9 @@ def eval(
         valid_types: Optional[Tuple[Type[Any]]] = None,
         **additional_parameters: Any,
     ) -> Any:
+        if isinstance(input_str, str) and "stream_state" in input_str:
+            raise AirbyteTracedException(STREAM_STATE_DEPRECATION_MESSAGE)
+
         context = {"config": config, **additional_parameters}
 
         for alias, equivalent in _ALIASES.items():

diff --git a/...rces/declarative/requesters/request_options/interpolated_nested_request_input_provider.py b/...rces/declarative/requesters/request_options/interpolated_nested_request_input_provider.py
@@ -3,7 +3,7 @@
 #
 
 from dataclasses import InitVar, dataclass, field
-from typing import Any, Mapping, Optional, Union
+from typing import Any, Dict, Mapping, Optional, Union
 
 from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import (
     InterpolatedNestedMapping,
@@ -45,18 +45,20 @@ def eval_request_inputs(
         stream_state: Optional[StreamState] = None,
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
+        stream_interval: Optional[Dict[str, Any]] = None,
     ) -> Mapping[str, Any]:
         """
         Returns the request inputs to set on an outgoing HTTP request
 
-        :param stream_state: The stream state
+        :param stream_state: The stream state (deprecated, use stream_interval instead)
         :param stream_slice: The stream slice
         :param next_page_token: The pagination token
+        :param stream_interval: The stream interval for incremental sync values
         :return: The request inputs to set on an outgoing HTTP request
         """
         kwargs = {
-            "stream_state": stream_state,
             "stream_slice": stream_slice,
+            "stream_interval": stream_state,  # Use stream_state as stream_interval for backward compatibility
             "next_page_token": next_page_token,
         }
         return self._interpolator.eval(self.config, **kwargs)  # type: ignore  # self._interpolator is always initialized with a value and will not be None