Skip to content

Commit

Permalink
Less strict requirements (#333)
Browse files Browse the repository at this point in the history
* bump a few requirements; increase lower bound for h3 version range; adding pyarrow dev dependency

* fix type repr for spark types; fix: broken tests (pyspark 3.4)

---------

Co-authored-by: Ralph Rassweiler <[email protected]>
  • Loading branch information
lecardozo and ralphrass authored Aug 16, 2023
1 parent 3a73ed8 commit 6b78a50
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 26 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ VERSION := $(shell grep __version__ setup.py | head -1 | cut -d \" -f2 | cut -d
.PHONY: environment
## create virtual environment for butterfree
environment:
@pyenv install -s 3.7.6
@pyenv virtualenv 3.7.6 butterfree
@pyenv install -s 3.7.13
@pyenv virtualenv 3.7.13 butterfree
@pyenv local butterfree
@PYTHONPATH=. python -m pip install --upgrade pip

Expand Down Expand Up @@ -221,4 +221,4 @@ help:
} \
printf "\n"; \
}' \
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
2 changes: 1 addition & 1 deletion butterfree/configs/db/cassandra_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def translate(self, schema: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
cassandra_schema.append(
{
"column_name": features["column_name"],
"type": cassandra_mapping[str(features["type"])],
"type": cassandra_mapping[str(features["type"]).replace("()", "")],
"primary_key": features["primary_key"],
}
)
Expand Down
4 changes: 2 additions & 2 deletions butterfree/reports/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def to_json(self) -> Any:
"features": [
{
"column_name": c["column_name"],
"data_type": str(c["type"]),
"data_type": str(c["type"]).replace("()", ""),
"description": desc,
}
for c, desc in params._features
Expand Down Expand Up @@ -208,7 +208,7 @@ def to_markdown(self) -> Any:

features = ["Column name", "Data type", "Description"]
for c, desc in params._features:
features.extend([c["column_name"], str(c["type"]), desc])
features.extend([c["column_name"], str(c["type"]).replace("()", ""), desc])

count_rows = len(features) // 3

Expand Down
8 changes: 3 additions & 5 deletions requirements.dev.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
cmake==3.18.4
h3==3.7.0
pyarrow==0.15.1
h3==3.7.4
jupyter==1.0.0
twine==3.1.1
mypy==0.790
pyspark-stubs==3.0.0
sphinx==3.5.4
sphinxemoji==0.1.8
sphinx-rtd-theme==0.5.2
recommonmark==0.7.1
recommonmark==0.7.1
pyarrow>=1.0.0
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
cassandra-driver>=3.22.0,<4.0
mdutils>=1.2.2,<2.0
pandas>=0.24,<1.1
pandas>=0.24,<2.0
parameters-validation>=1.1.5,<2.0
pyspark==3.*
typer>=0.3,<0.4
setuptools>=41,<42
typing-extensions==3.7.4.3
boto3==1.17.*
typing-extensions>3.7.4,<5
boto3==1.17.*
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ spark_options =
spark.sql.session.timeZone: UTC
spark.driver.bindAddress: 127.0.0.1
spark.sql.legacy.timeParserPolicy: LEGACY
spark.sql.legacy.createHiveTableByDefault: false

[mypy]
# suppress errors about unsatisfied imports
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
license="Copyright",
author="QuintoAndar",
install_requires=requirements,
extras_require={"h3": ["cmake==3.16.3", "h3==3.4.2"]},
extras_require={"h3": ["h3>=3.7.4,<4"]},
python_requires=">=3.7, <4",
entry_points={"console_scripts": ["butterfree=butterfree._cli.main:app"]},
include_package_data=True,
Expand Down
25 changes: 15 additions & 10 deletions tests/integration/butterfree/pipelines/test_feature_set_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,11 @@ def test_feature_set_pipeline(
self, mocked_df, spark_session, fixed_windows_output_feature_set_dataframe,
):
# arrange

table_reader_id = "a_source"
table_reader_table = "table"
table_reader_db = environment.get_variable("FEATURE_STORE_HISTORICAL_DATABASE")

create_temp_view(dataframe=mocked_df, name=table_reader_id)
create_db_and_table(
spark=spark_session,
Expand All @@ -88,14 +90,16 @@ def test_feature_set_pipeline(
table_reader_table=table_reader_table,
)

dbconfig = Mock()
dbconfig.mode = "overwrite"
dbconfig.format_ = "parquet"
path = "test_folder/historical/entity/feature_set"

dbconfig = MetastoreConfig()
dbconfig.get_options = Mock(
return_value={"path": "test_folder/historical/entity/feature_set"}
return_value={"mode": "overwrite", "format_": "parquet", "path": path}
)

historical_writer = HistoricalFeatureStoreWriter(db_config=dbconfig)
historical_writer = HistoricalFeatureStoreWriter(
db_config=dbconfig, debug_mode=True
)

# act
test_pipeline = FeatureSetPipeline(
Expand Down Expand Up @@ -151,9 +155,13 @@ def test_feature_set_pipeline(
)
test_pipeline.run()

# act and assert
dbconfig.get_path_with_partitions = Mock(
return_value=["historical/entity/feature_set"]
)

# assert
path = dbconfig.get_options("historical/entity/feature_set").get("path")
df = spark_session.read.parquet(path).orderBy(TIMESTAMP_COLUMN)
df = spark_session.sql("select * from historical_feature_store__feature_set")

target_df = fixed_windows_output_feature_set_dataframe.orderBy(
test_pipeline.feature_set.timestamp_column
Expand All @@ -162,9 +170,6 @@ def test_feature_set_pipeline(
# assert
assert_dataframe_equality(df, target_df)

# tear down
shutil.rmtree("test_folder")

def test_feature_set_pipeline_with_dates(
self,
mocked_date_df,
Expand Down

1 comment on commit 6b78a50

@chip-n-dale
Copy link

@chip-n-dale chip-n-dale bot commented on 6b78a50 Aug 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @ralphrass!

The GitLeaks SecTool reported some possibly exposed credentials/secrets, how about giving them a look?

GitLeaks Alert Sync
[
  {
    "line": "    webhook: REDACTED",
    "lineNumber": 141,
    "offender": "REDACTED",
    "offenderEntropy": -1,
    "commit": "b6a5daf28abc035f74b9685aab573d384680b9d1",
    "repo": "butterfree",
    "repoURL": "",
    "leakURL": "",
    "rule": "Slack Webhook",
    "commitMessage": "initial commit\n",
    "author": "Alvaro",
    "email": "[email protected]",
    "file": ".drone.yml",
    "date": "2020-01-03T14:21:51-03:00",
    "tags": "key, slack"
  },
  {
    "line": "    webhook: REDACTED",
    "lineNumber": 159,
    "offender": "REDACTED",
    "offenderEntropy": -1,
    "commit": "b6697aa708fec0c5a9e3af0b2713cee6f45ff675",
    "repo": "butterfree",
    "repoURL": "",
    "leakURL": "",
    "rule": "Slack Webhook",
    "commitMessage": "hail to the butterfree\n",
    "author": "Alvaro",
    "email": "[email protected]",
    "file": ".drone.yml",
    "date": "2020-01-03T11:07:44-03:00",
    "tags": "key, slack"
  }
]

In case of false-positives, more information is available on GitLeaks FAQ
If you had any other problem or question during this process, be sure to contact us on the Security space on GChat!

Please sign in to comment.