Skip to content

Commit

Permalink
Optimize DateToString converter (#40)
Browse files Browse the repository at this point in the history
* Optimize DateToString converter

* Configurable converters (special flag for unsafe converters)

* More comprehensive testing for unsafe DateToString converter
  • Loading branch information
vitalyisaev2 authored Feb 1, 2024
1 parent 42f8c26 commit c7a847b
Show file tree
Hide file tree
Showing 37 changed files with 942 additions and 402 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ fq-connector-go-tests
coverage*
scripts/bench/postgresql
__pycache__
*.png
*.test
4 changes: 2 additions & 2 deletions api/common/data_source.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions api/common/endpoint.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions api/service/connector.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 7 additions & 14 deletions api/service/connector_grpc.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions api/service/protos/connector.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

68 changes: 48 additions & 20 deletions app/bench/analyze.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
#!/usr/bin/env python3

from typing import Dict
from pathlib import Path
import json

import matplotlib
import matplotlib.pyplot as plt
import pandas as pd

Expand All @@ -15,39 +16,66 @@ def read_report(path: Path) -> pd.Series:
"bytes_internal_rate": data["bytes_internal_rate"],
"bytes_arrow_rate": data["bytes_arrow_rate"],
"rows_rate": data["rows_rate"],
"cpu_utilization": data["cpu_utilization"],
}
)


def make_dataframe(result_dir: Path) -> pd.DataFrame:
series = [read_report(path) for path in result_dir.glob("*.json")]
return pd.DataFrame(series)
def make_dataframe(result_dirs: Dict[str, Path]) -> pd.DataFrame:
dfs = []
for key, result_dir in result_dirs.items():
series = [read_report(path) for path in result_dir.glob("*.json")]
df = pd.DataFrame(series).sort_values("columns")
df["key"] = key
dfs.append(df)

return pd.concat(dfs)


def draw_subplot(
df_: pd.DataFrame, label: str, y_column: str, ax: matplotlib.figure.Figure
) -> matplotlib.figure.Figure:
ax.set_ylabel(label)
ax.set_xlabel("Number of columns to SELECT")

def draw_plot(df: pd.DataFrame, result_dir: Path) -> pd.Series:
fig, ax1 = plt.subplots()
keys = {
"baseline": "red",
"optimized": "blue",
}

ax1.set_xlabel("Number of columns in SELECT")
ax1.set_ylabel("Throughput, MB/sec", color="red")
ax1.scatter(df["columns"], df["bytes_internal_rate"], color="red")
ax1.tick_params(axis="y", labelcolor="red")
for key, color in keys.items():
df = df_.loc[df_["key"] == key]
ax.plot(df["columns"], df[y_column], color=color, label=key)

ax2 = ax1.twinx()
return ax

ax2.set_ylabel("Throughput, rows/sec", color="blue")
ax2.scatter(df["columns"], df["rows_rate"], color="blue")
ax2.tick_params(axis="y", labelcolor="blue")

fig.savefig(result_dir.joinpath("report.png"))
def draw_plot(df: pd.DataFrame) -> pd.Series:
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(12, 4))
fig.subplots_adjust(bottom=0.25, wspace=0.5)
draw_subplot(df, "Throughput, MB/sec", "bytes_internal_rate", axes[0])
draw_subplot(df, "Throughput, rows/sec", "rows_rate", axes[1])
ax = draw_subplot(df, "CPU Utilization, %", "cpu_utilization", axes[2])

handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, loc="lower right")
fig.suptitle("Reading TPC-H S-10 Lineitem from PostgreSQL", fontsize=14)

fig.savefig("report.png")


def main():
result_dir = Path(
"/home/vitalyisaev/projects/fq-connector-go/scripts/bench/postgresql/results/columns/"
)
df = make_dataframe(result_dir)
result_dirs = {
"baseline": Path(
"/home/vitalyisaev/projects/fq-connector-go/scripts/bench/postgresql/results/columns_baseline/"
),
"optimized": Path(
"/home/vitalyisaev/projects/fq-connector-go/scripts/bench/postgresql/results/columns/"
),
}
df = make_dataframe(result_dirs)
print(df)
draw_plot(df, result_dir)
draw_plot(df)


if __name__ == "__main__":
Expand Down
5 changes: 5 additions & 0 deletions app/bench/test_case_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ func newTestCaseRunner(
server.WithPprofServerConfig(&config.TPprofServerConfig{
Endpoint: &api_common.TEndpoint{Host: "localhost", Port: 50052},
}),
server.WithConversionConfig(
&config.TConversionConfig{
UseUnsafeConverters: true,
},
),
)

if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions app/config/client.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit c7a847b

Please sign in to comment.