Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: md format fix for columns and sql engine #3664

Merged
merged 4 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions marimo/_cli/convert/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,18 +142,32 @@ def get_source_from_tag(tag: Element) -> str:
return ""
source = markdown_to_marimo(source)
elif tag.attrib.get("language") == "sql":
source = sql_to_marimo(source, tag.attrib.get("query", "_df"))
source = sql_to_marimo(
source,
tag.attrib.get("query", "_df"),
str(tag.attrib.get("hide_output", "false")).lower() == "true",
tag.attrib.get("engine", None),
)
else:
assert tag.tag == MARIMO_CODE, f"Unknown tag: {tag.tag}"
return source


def get_cell_config_from_tag(tag: Element, **defaults: bool) -> CellConfig:
boolean_attrs = {
# Known boolean attributes.
extracted_attrs: dict[str, bool | int] = {
**defaults,
**{k: v == "true" for k, v in tag.attrib.items()},
**{
k: v == "true"
for k, v in tag.attrib.items()
if k in ["hide_code", "disabled"]
},
}
return CellConfig.from_dict(boolean_attrs)
# "Column" is not a boolean attribute.
for int_attr in ["column"]:
if int_attr in tag.attrib:
extracted_attrs[int_attr] = int(tag.attrib[int_attr])
return CellConfig.from_dict(extracted_attrs)


# TODO: Consider upstreaming some logic such that this isn't such a terrible
Expand Down
15 changes: 13 additions & 2 deletions marimo/_convert/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,25 @@ def markdown_to_marimo(source: str) -> str:
)


def sql_to_marimo(source: str, table: str) -> str:
def sql_to_marimo(
source: str,
table: str,
hide_output: bool = False,
engine: str | None = None,
) -> str:
terminal_options = [codegen.indent_text('"""')]
if hide_output:
terminal_options.append(codegen.indent_text("output=False"))
if engine:
terminal_options.append(codegen.indent_text(f"engine={engine}"))

return "\n".join(
[
f"{table} = mo.sql(",
# f-string: expected for sql
codegen.indent_text('f"""'),
codegen.indent_text(source),
codegen.indent_text('"""'),
",\n".join(terminal_options),
")",
]
)
Expand Down
66 changes: 29 additions & 37 deletions marimo/_server/export/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
get_download_filename,
get_filename,
get_markdown_from_cell,
get_sql_options_from_cell,
)
from marimo._server.file_manager import AppFileManager
from marimo._server.models.export import ExportAsHTMLRequest
Expand Down Expand Up @@ -276,14 +277,19 @@ def _format_value(v: Optional[str | list[str]]) -> str | list[str]:
code = cell_data.code
# Config values are opt in, so only include if they are set.
attributes = cell_data.config.asdict()
attributes = {k: "true" for k, v in attributes.items() if v}
# Allow for attributes like column index.
attributes = {
k: repr(v).lower() for k, v in attributes.items() if v
}
if not is_internal_cell_name(cell_data.name):
attributes["name"] = cell_data.name
# No "cell" typically means not parseable. However newly added
# cells require compilation before cell is set.
# TODO: Refactor so it doesn't occur in export (codegen
# does this too)
if not cell:
# NB. Also need to recompile in the sql case since sql parsing is
# cached.
if not cell or cell._cell.language == "sql":
try:
cell_impl = compile_cell(
code, cell_id=str(cell_data.cell_id)
Expand All @@ -298,46 +304,32 @@ def _format_value(v: Optional[str | list[str]]) -> str | list[str]:
pass

if cell:
markdown = get_markdown_from_cell(cell, code)
# Unsanitized markdown is forced to code.
if markdown and is_sanitized_markdown(markdown):
# Use blank HTML comment to separate markdown codeblocks
if previous_was_markdown:
document.append("<!---->")
previous_was_markdown = True
document.append(markdown)
continue
# Markdown that starts a column is forced to code.
column = attributes.get("column", None)
if not column or column == "0":
markdown = get_markdown_from_cell(cell, code)
# Unsanitized markdown is forced to code.
if markdown and is_sanitized_markdown(markdown):
# Use blank HTML comment to separate markdown codeblocks
if previous_was_markdown:
document.append("<!---->")
previous_was_markdown = True
document.append(markdown)
continue
attributes["language"] = cell._cell.language
# Definitely a code cell, but need to determine if it can be
# formatted as non-python.
if attributes["language"] == "sql":
# Note frontend/src/core/codemirror/language/sql.ts
# Determines sql structure by regex, but having access to
# the AST gives us more flexibility.
query = None
valid_sql = True
for (
maybe_query,
def_vars,
) in cell._cell.variable_data.items():
if query:
# query has already been set, hence this breaks
# the expected format.
query = None
attributes.pop("language")
valid_sql = False
break
for var in def_vars:
# We are looking for the case where we assign a
# query output to python.
if var.language == "python":
query = maybe_query
break

if valid_sql:
sql_options = get_sql_options_from_cell(code)
if not sql_options:
# means not sql.
attributes.pop("language")
else:
# Ignore default query value.
if sql_options.get("query") == "_df":
sql_options.pop("query")
attributes.update(sql_options)
code = "\n".join(cell._cell.raw_sqls).strip()
if query:
attributes["query"] = query

# Definitely no "cell"; as such, treat as code, as everything in
# marimo is code.
Expand Down
31 changes: 31 additions & 0 deletions marimo/_server/export/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def _const_string(args: list[ast.stmt]) -> str:
return f"{inner.value}" # type: ignore[attr-defined]


def _const_or_id(args: ast.stmt) -> str:
if hasattr(args, "value"):
return f"{args.value}" # type: ignore[attr-defined]
return f"{args.id}" # type: ignore[attr-defined]


def get_markdown_from_cell(
cell: Cell, code: str, native_callout: bool = False
) -> Optional[str]:
Expand Down Expand Up @@ -103,3 +109,28 @@ def get_markdown_from_cell(
:::"""
)
return md


def get_sql_options_from_cell(code: str) -> Optional[dict[str, str]]:
# Note frontend/src/core/codemirror/language/sql.ts
# also extracts options via ast. Ideally, these should be synced.
options = {}
code = code.strip()
try:
(body,) = ast.parse(code).body
(target,) = body.targets # type: ignore[attr-defined]
options["query"] = target.id
if body.value.func.attr == "sql": # type: ignore[attr-defined]
value = body.value # type: ignore[attr-defined]
else:
return None
if value.keywords:
for keyword in value.keywords: # type: ignore[attr-defined]
options[keyword.arg] = _const_or_id(keyword.value) # type: ignore[attr-defined]
output = options.pop("output", "True").lower()
if output == "false":
options["hide_output"] = "True"

return options
except (AssertionError, AttributeError, ValueError):
return None
124 changes: 84 additions & 40 deletions marimo/_tutorials/markdown_format.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
title: Markdown
marimo-version: 0.10.9
marimo-version: 0.10.19
---

# Markdown file format
Expand Down Expand Up @@ -66,21 +66,40 @@ You can break up markdown into multiple cells by using an empty html tag `<!----
<!---->
View the source of this notebook to see how this cell was created.
<!---->
You can still hide and disable code cells in markdown notebooks:
You can still hide cell code in markdown notebooks:

````md
```python {.marimo hide_code="true"}
import pandas as pd
pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
form = (
# ...
# Just something a bit more complicated
# you might not want to see in the editor.
# ...
)
form
```
````

```python {.marimo hide_code="true"}
import pandas as pd
form = (
mo.md('''
**Just how great is markdown?.**

{markdown_is_awesome}

pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
{marimo_is_amazing}
''')
.batch(
markdown_is_awesome=mo.ui.text(label="How much do you like markdown?", placeholder="It is pretty swell 🌊"),
marimo_is_amazing=mo.ui.slider(label="How much do you like marimo?", start=0, stop=11, value=11),
)
.form(show_clear_button=True, bordered=False)
)
form
```

and disable cells too:

````md
```python {.marimo disabled="true"}
print("This code cell is disabled, there should be no output!")
Expand Down Expand Up @@ -120,7 +139,7 @@ supposed to be computed, and what values are static. To interpolate Python
values, just use a Python cell:

```python {.marimo}
"🍃" * 7
mo.md(f"""Like so: {"🍃" * 7}""")
```

### Limitations on conversion
Expand Down Expand Up @@ -151,6 +170,17 @@ It's not likely that you'll run into this issue, but rest assured that marimo
is working behind the scenes to keep your notebooks unambiguous and clean as
possible.
<!---->
### Saving multicolumn mode

Multicolumn mode works, but the first cell in a column must be a python cell in
order to specify column start and to save correctly:

````md
```python {.marimo column="1"}
print("First cell in column 1")
```
````
<!---->
### Naming cells

Since the markdown notebook really is just markdown, you can't import from a
Expand All @@ -167,55 +197,69 @@ give your cells a name:
# But here's my `cell_id`, so call me, `maybe` 🎶
```

## Converting back to the Python file format
### SQL in markdown

The markdown format is supposed to lower the barrier for writing text heavy
documents, it's not meant to be a full replacement for the Python notebook
format. You can always convert back to a Python notebook if you need to:
You can also run SQL queries in markdown cells through marimo, using a `sql` code block. For instance:

```bash
$ marimo convert my_marimo.md > my_marimo.py
````md
```sql {.marimo}
SELECT GREATEST(x, y), SQRT(z) from uniformly_random_numbers
```
````

<!---->
The resultant distribution may be surprising! 🎲[^surprise]

[^surprise]: The general distributions should be the same

## SQL in markdown
```sql {.marimo}
SELECT GREATEST(a, b), SQRT(c) from uniformly_random_numbers
```

In this SQL format, Python variable interpolation in SQL queries occurs automatically. Additionally, query results can be assigned to a dataframe with the `query` attribute.
For instance, here's how to create a random uniform distribution and assign it to the dataframe `uniformly_random_numbers` used above:

````md
```sql {.marimo query="uniformly_random_numbers" hide_output="true"}
SELECT i.range::text AS id,
random() AS x,
random() AS y,
random() AS z
FROM
-- Note sample_count comes from the slider below!
range(1, {sample_count.value + 1}) i;
```
````

You can also run parameterized SQL queries in markdown cells through marimo.
You can learn more about other SQL use in the SQL tutorial (`marimo tutorial sql`)

```python {.marimo hide_code="true"}
num = mo.ui.slider(1, 15, label="Fibonacci numbers")
num
sample_count = mo.ui.slider(1, 1000, value=1000, label="Sample Count")
sample_count
```

```python {.marimo}
_df = mo.sql(
f"""
WITH RECURSIVE fibonacci AS (
SELECT
1 as n,
1 as fib,
1 as prev
UNION ALL
SELECT
n + 1,
fib + prev,
fib
FROM fibonacci
WHERE n < {num.value}
)
SELECT n, fib
FROM fibonacci
ORDER BY n;
"""
)
```sql {.marimo query="uniformly_random_numbers" hide_output="True"}
SELECT i.range::text AS id,
random() AS a,
random() AS b,
random() AS c
FROM range(1, {sample_count.value + 1}) i;
```

## Converting back to the Python file format

The markdown format is supposed to lower the barrier for writing text heavy
documents, it's not meant to be a full replacement for the Python notebook
format. You can always convert back to a Python notebook if you need to:

```bash
$ marimo convert my_marimo.md > my_marimo.py
```
<!---->
## More on markdown

Be sure to checkout the markdown.py tutorial (`marimo tutorial markdown`) for
more information on to type-set and render markdown in marimo.

```python {.marimo hide_code="true"}
import marimo as mo
```
```
Loading
Loading