Skip to content

Commit

Permalink
Release/1.3.0 (#358)
Browse files Browse the repository at this point in the history
* feat(mlop-2269): bump versions (#355)

* fix: bump versions adjust tests

* add checklist

* chore: bump python

* bump pyspark

* chore: java version all steps modified

* fix: sphinx version (#356)
  • Loading branch information
ralphrass authored Jun 5, 2024
1 parent 788ea75 commit 99662f6
Show file tree
Hide file tree
Showing 63 changed files with 579 additions and 236 deletions.
30 changes: 30 additions & 0 deletions .checklist.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
apiVersion: quintoandar.com.br/checklist/v2
kind: ServiceChecklist
metadata:
name: butterfree
spec:
description: >-
A solution for Feature Stores.
costCenter: C055
department: engineering
lifecycle: production
docs: true

ownership:
team: data_products_mlops
line: tech_platform
owner: [email protected]

libraries:
- name: butterfree
type: common-usage
path: https://quintoandar.github.io/python-package-server/
description: A lib to build Feature Stores.
registries:
- github-packages
tier: T0

channels:
squad: 'mlops'
alerts: 'data-products-reports'
13 changes: 13 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@ jobs:

steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v5
with:
python-version: '3.9'

- uses: actions/setup-java@v4
with:
java-version: '11'
distribution: microsoft

- uses: vemonet/setup-spark@v1
with:
spark-version: '3.5.1'
hadoop-version: '3'

- name: Install dependencies
run: make ci-install
Expand Down
17 changes: 17 additions & 0 deletions .github/workflows/skip_lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# This step is used only because we want to mark the runner-linter check as required
# for PRs to develop, but not for the merge queue to merge into develop,
# github does not have this functionality yet

name: 'Skip github-actions/runner-linter check at merge queue'

on:
merge_group:

jobs:
empty_job:
name: 'github-actions/runner-linter'
runs-on: github-actions-developers-runner
steps:
- name: Skip github-actions/runner-linter check at merge queue
run: |
echo "Done"
16 changes: 14 additions & 2 deletions .github/workflows/staging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,23 @@ jobs:
Pipeline:
if: github.ref == 'refs/heads/staging'

runs-on: ubuntu-22.04
container: quintoandar/python-3-7-java
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v5
with:
python-version: '3.9'

- uses: actions/setup-java@v4
with:
java-version: '11'
distribution: microsoft

- uses: vemonet/setup-spark@v1
with:
spark-version: '3.5.1'
hadoop-version: '3'

- name: Install dependencies
run: make ci-install
Expand Down
16 changes: 14 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,23 @@ on:

jobs:
Pipeline:
runs-on: ubuntu-22.04
container: quintoandar/python-3-7-java
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v5
with:
python-version: '3.9'

- uses: actions/setup-java@v4
with:
java-version: '11'
distribution: microsoft

- uses: vemonet/setup-spark@v1
with:
spark-version: '3.5.1'
hadoop-version: '3'

- name: Install dependencies
run: make ci-install
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ instance/

# PyBuilder
target/
pip/

# Jupyter Notebook
.ipynb_checkpoints
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Preferably use **Added**, **Changed**, **Removed** and **Fixed** topics in each

## [Unreleased]

## [1.3.0](https://github.com/quintoandar/butterfree/releases/tag/1.3.0)
* Bump versions ([#355](https://github.com/quintoandar/butterfree/pull/355))
* Sphinx version ([#356](https://github.com/quintoandar/butterfree/pull/356))

## [1.2.4](https://github.com/quintoandar/butterfree/releases/tag/1.2.4)
* Auto create feature sets ([#351](https://github.com/quintoandar/butterfree/pull/351))

Expand Down
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ style-check:
@echo "Code Style"
@echo "=========="
@echo ""
@python -m black --check -t py36 --exclude="build/|buck-out/|dist/|_build/|pip/|\.pip/|\.git/|\.hg/|\.mypy_cache/|\.tox/|\.venv/" . && echo "\n\nSuccess" || (echo "\n\nFailure\n\nYou need to run \"make apply-style\" to apply style formatting to your code"; exit 1)
@python -m black --check -t py39 --exclude="build/|buck-out/|dist/|_build/|pip/|\.pip/|\.git/|\.hg/|\.mypy_cache/|\.tox/|\.venv/" . && echo "\n\nSuccess" || (echo "\n\nFailure\n\nYou need to run \"make apply-style\" to apply style formatting to your code"; exit 1)

.PHONY: quality-check
## run code quality checks with flake8
Expand Down Expand Up @@ -104,8 +104,8 @@ checks: style-check quality-check type-check
.PHONY: apply-style
## fix stylistic errors with black
apply-style:
@python -m black -t py36 --exclude="build/|buck-out/|dist/|_build/|pip/|\.pip/|\.git/|\.hg/|\.mypy_cache/|\.tox/|\.venv/" .
@python -m isort -rc --atomic butterfree/ tests/
@python -m black -t py39 --exclude="build/|buck-out/|dist/|_build/|pip/|\.pip/|\.git/|\.hg/|\.mypy_cache/|\.tox/|\.venv/" .
@python -m isort --atomic butterfree/ tests/

.PHONY: clean
## clean unused artifacts
Expand Down Expand Up @@ -152,7 +152,7 @@ package:
## update Butterfree API docs
update-docs:
cd ./docs; rm -rf source/butterfree.*
cd ./docs; sphinx-apidoc -T -E -o source/ ../butterfree
cd ./docs; sphinx-apidoc -o source/ ../butterfree
cd ./docs; make coverage

.PHONY: docs
Expand Down
12 changes: 8 additions & 4 deletions butterfree/_cli/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ def __fs_objects(path: str) -> Set[FeatureSetPipeline]:
logger.error(f"Path: {path} not found!")
return set()

logger.info(f"Importing modules...")
logger.info("Importing modules...")
package = ".".join(path.strip("/").split("/"))
imported = set(
importlib.import_module(f".{name}", package=package) for name in modules
)

logger.info(f"Scanning modules...")
logger.info("Scanning modules...")
content = {
module: set(
filter(
Expand Down Expand Up @@ -93,7 +93,8 @@ def __fs_objects(path: str) -> Set[FeatureSetPipeline]:


PATH = typer.Argument(
..., help="Full or relative path to where feature set pipelines are being defined.",
...,
help="Full or relative path to where feature set pipelines are being defined.",
)

GENERATE_LOGS = typer.Option(
Expand All @@ -113,7 +114,10 @@ class Migrate:
pipelines: list of Feature Set Pipelines to use to migration.
"""

def __init__(self, pipelines: Set[FeatureSetPipeline],) -> None:
def __init__(
self,
pipelines: Set[FeatureSetPipeline],
) -> None:
self.pipelines = pipelines

def _send_logs_to_s3(self, file_local: bool, debug_mode: bool) -> None:
Expand Down
4 changes: 3 additions & 1 deletion butterfree/clients/cassandra_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ def get_schema(self, table: str, database: str = None) -> List[Dict[str, str]]:
return response

def _get_create_table_query(
self, columns: List[CassandraColumn], table: str,
self,
columns: List[CassandraColumn],
table: str,
) -> str:
"""Creates CQL statement to create a table."""
parsed_columns = []
Expand Down
6 changes: 3 additions & 3 deletions butterfree/clients/spark_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def read(
if path and not isinstance(path, (str, list)):
raise ValueError("path needs to be a string or a list of string")

df_reader: Union[
DataStreamReader, DataFrameReader
] = self.conn.readStream if stream else self.conn.read
df_reader: Union[DataStreamReader, DataFrameReader] = (
self.conn.readStream if stream else self.conn.read
)

df_reader = df_reader.schema(schema) if schema else df_reader

Expand Down
5 changes: 4 additions & 1 deletion butterfree/extract/source.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ class Source(HookableComponent):
"""

def __init__(
self, readers: List[Reader], query: str, eager_evaluation: bool = True,
self,
readers: List[Reader],
query: str,
eager_evaluation: bool = True,
) -> None:
super().__init__()
self.enable_pre_hooks = False
Expand Down
5 changes: 4 additions & 1 deletion butterfree/load/writers/historical_feature_store_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,10 @@ def __init__(
self.check_schema_hook = check_schema_hook

def write(
self, feature_set: FeatureSet, dataframe: DataFrame, spark_client: SparkClient,
self,
feature_set: FeatureSet,
dataframe: DataFrame,
spark_client: SparkClient,
) -> None:
"""Loads the data from a feature set into the Historical Feature Store.
Expand Down
10 changes: 8 additions & 2 deletions butterfree/load/writers/online_feature_store_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,10 @@ def filter_latest(dataframe: DataFrame, id_columns: List[Any]) -> DataFrame:

window = Window.partitionBy(*id_columns).orderBy(col(TIMESTAMP_COLUMN).desc())
return (
dataframe.select(col("*"), row_number().over(window).alias("rn"),)
dataframe.select(
col("*"),
row_number().over(window).alias("rn"),
)
.filter(col("rn") == 1)
.drop("rn")
)
Expand Down Expand Up @@ -162,7 +165,10 @@ def _write_in_debug_mode(
)

def write(
self, feature_set: FeatureSet, dataframe: DataFrame, spark_client: SparkClient,
self,
feature_set: FeatureSet,
dataframe: DataFrame,
spark_client: SparkClient,
) -> Union[StreamingQuery, None]:
"""Loads the latest data from a feature set into the Feature Store.
Expand Down
5 changes: 4 additions & 1 deletion butterfree/load/writers/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,10 @@ def _apply_transformations(self, df: DataFrame) -> DataFrame:

@abstractmethod
def write(
self, feature_set: FeatureSet, dataframe: DataFrame, spark_client: SparkClient,
self,
feature_set: FeatureSet,
dataframe: DataFrame,
spark_client: SparkClient,
) -> Any:
"""Loads the data from a feature set into the Feature Store.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ def create_query(

@staticmethod
def _get_diff(
fs_schema: List[Dict[str, Any]], db_schema: List[Dict[str, Any]],
fs_schema: List[Dict[str, Any]],
db_schema: List[Dict[str, Any]],
) -> Set[Diff]:
"""Gets schema difference between feature set and the table of a given db.
Expand Down Expand Up @@ -296,7 +297,7 @@ def apply_migration(
logger.info(f"Applying this query: {q} ...")
self._client.sql(q)

logger.info(f"Feature Set migration finished successfully.")
logger.info("Feature Set migration finished successfully.")

# inform in drone console which feature set was migrated
print(f"The {feature_set.name} feature set was migrated.")
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ class MetastoreMigration(DatabaseMigration):
data is being loaded into an entity table, then users can drop columns manually.
"""

def __init__(self, database: str = None,) -> None:
def __init__(
self,
database: str = None,
) -> None:
self._db_config = MetastoreConfig()
self.database = database or environment.get_variable(
"FEATURE_STORE_HISTORICAL_DATABASE"
Expand Down
4 changes: 3 additions & 1 deletion butterfree/transform/aggregated_feature_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,9 @@ def _aggregate(
# repartition to have all rows for each group at the same partition
# by doing that, we won't have to shuffle data on grouping by id
dataframe = repartition_df(
dataframe, partition_by=groupby, num_processors=num_processors,
dataframe,
partition_by=groupby,
num_processors=num_processors,
)
grouped_data = dataframe.groupby(*groupby)

Expand Down
6 changes: 5 additions & 1 deletion butterfree/transform/transformations/aggregated_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ def aggregations(self) -> List[Tuple]:
Function = namedtuple("Function", ["function", "data_type"])

return [
Function(f.func(expression), f.data_type.spark,) for f in self.functions
Function(
f.func(expression),
f.data_type.spark,
)
for f in self.functions
]

def _get_output_name(self, function: object) -> str:
Expand Down
4 changes: 3 additions & 1 deletion butterfree/transform/transformations/custom_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def transform(self, dataframe: DataFrame) -> DataFrame:
"""
dataframe = self.transformer(
dataframe, self.parent, **self.transformer__kwargs,
dataframe,
self.parent,
**self.transformer__kwargs,
)
return dataframe
5 changes: 4 additions & 1 deletion butterfree/transform/transformations/h3_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,10 @@ class H3HashTransform(TransformComponent):
"""

def __init__(
self, h3_resolutions: List[int], lat_column: str, lng_column: str,
self,
h3_resolutions: List[int],
lat_column: str,
lng_column: str,
):
super().__init__()
self.h3_resolutions = h3_resolutions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ class SQLExpressionTransform(TransformComponent):
"""

def __init__(
self, expression: str,
self,
expression: str,
):
super().__init__()
self.expression = expression
Expand Down
3 changes: 1 addition & 2 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ sphinxemoji==0.1.6
typing-extensions==3.7.4.2
cmake==3.18.4
h3==3.7.0
pyarrow==0.15.1

pyarrow==16.1.0
4 changes: 2 additions & 2 deletions examples/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@
_, error = p.communicate()
if p.returncode != 0:
errors.append({"notebook": path, "error": error})
print(f" >>> Error in execution!\n")
print(" >>> Error in execution!\n")
else:
print(f" >>> Successful execution\n")
print(" >>> Successful execution\n")

if errors:
print(">>> Errors in the following notebooks:")
Expand Down
Loading

0 comments on commit 99662f6

Please sign in to comment.