From 58721b86660a29d52c0e8cf0dd87366ba69bb7e5 Mon Sep 17 00:00:00 2001 From: dylan madisetti Date: Sun, 2 Feb 2025 19:26:28 -0500 Subject: [PATCH 1/3] fix: fix up markdown format for full sql egniens and columns compatibility --- marimo/_cli/convert/markdown.py | 22 +++- marimo/_convert/utils.py | 15 ++- marimo/_server/export/exporter.py | 66 ++++++------ marimo/_server/export/utils.py | 31 ++++++ marimo/_tutorials/markdown_format.md | 123 ++++++++++++++++------- tests/_cli/snapshots/sql-notebook.py.txt | 12 ++- tests/_cli/test_markdown_conversion.py | 19 +++- 7 files changed, 200 insertions(+), 88 deletions(-) diff --git a/marimo/_cli/convert/markdown.py b/marimo/_cli/convert/markdown.py index 0e2c97f9db9..d01b3d4dbdc 100644 --- a/marimo/_cli/convert/markdown.py +++ b/marimo/_cli/convert/markdown.py @@ -142,18 +142,32 @@ def get_source_from_tag(tag: Element) -> str: return "" source = markdown_to_marimo(source) elif tag.attrib.get("language") == "sql": - source = sql_to_marimo(source, tag.attrib.get("query", "_df")) + source = sql_to_marimo( + source, + tag.attrib.get("query", "_df"), + str(tag.attrib.get("hide_output", "false")).lower() == "true", + tag.attrib.get("engine", None), + ) else: assert tag.tag == MARIMO_CODE, f"Unknown tag: {tag.tag}" return source def get_cell_config_from_tag(tag: Element, **defaults: bool) -> CellConfig: - boolean_attrs = { + # Known boolean attributes. + extracted_attrs: dict[str, bool | int] = { **defaults, - **{k: v == "true" for k, v in tag.attrib.items()}, + **{ + k: v == "true" + for k, v in tag.attrib.items() + if k in ["hide_code", "disabled"] + }, } - return CellConfig.from_dict(boolean_attrs) + # "Column" is not a boolean attribute. + for int_attr in ["column"]: + if int_attr in tag.attrib: + extracted_attrs[int_attr] = int(tag.attrib[int_attr]) + return CellConfig.from_dict(extracted_attrs) # TODO: Consider upstreaming some logic such that this isn't such a terrible diff --git a/marimo/_convert/utils.py b/marimo/_convert/utils.py index 006ee51ccf6..57da16a0aed 100644 --- a/marimo/_convert/utils.py +++ b/marimo/_convert/utils.py @@ -23,14 +23,25 @@ def markdown_to_marimo(source: str) -> str: ) -def sql_to_marimo(source: str, table: str) -> str: +def sql_to_marimo( + source: str, + table: str, + hide_output: bool = False, + engine: str | None = None, +) -> str: + terminal_options = [codegen.indent_text('"""')] + if hide_output: + terminal_options.append(codegen.indent_text("output=False")) + if engine: + terminal_options.append(codegen.indent_text(f"engine={engine}")) + return "\n".join( [ f"{table} = mo.sql(", # f-string: expected for sql codegen.indent_text('f"""'), codegen.indent_text(source), - codegen.indent_text('"""'), + ",\n".join(terminal_options), ")", ] ) diff --git a/marimo/_server/export/exporter.py b/marimo/_server/export/exporter.py index 66bcd5be28c..b9ff4309994 100644 --- a/marimo/_server/export/exporter.py +++ b/marimo/_server/export/exporter.py @@ -35,6 +35,7 @@ get_download_filename, get_filename, get_markdown_from_cell, + get_sql_options_from_cell, ) from marimo._server.file_manager import AppFileManager from marimo._server.models.export import ExportAsHTMLRequest @@ -276,14 +277,19 @@ def _format_value(v: Optional[str | list[str]]) -> str | list[str]: code = cell_data.code # Config values are opt in, so only include if they are set. attributes = cell_data.config.asdict() - attributes = {k: "true" for k, v in attributes.items() if v} + # Allow for attributes like column index. + attributes = { + k: repr(v).lower() for k, v in attributes.items() if v + } if not is_internal_cell_name(cell_data.name): attributes["name"] = cell_data.name # No "cell" typically means not parseable. However newly added # cells require compilation before cell is set. # TODO: Refactor so it doesn't occur in export (codegen # does this too) - if not cell: + # NB. Also need to recompile in the sql case since sql parsing is + # cached. + if not cell or cell._cell.language == "sql": try: cell_impl = compile_cell( code, cell_id=str(cell_data.cell_id) @@ -298,46 +304,32 @@ def _format_value(v: Optional[str | list[str]]) -> str | list[str]: pass if cell: - markdown = get_markdown_from_cell(cell, code) - # Unsanitized markdown is forced to code. - if markdown and is_sanitized_markdown(markdown): - # Use blank HTML comment to separate markdown codeblocks - if previous_was_markdown: - document.append("") - previous_was_markdown = True - document.append(markdown) - continue + # Markdown that starts a column is forced to code. + column = attributes.get("column", None) + if not column or column == "0": + markdown = get_markdown_from_cell(cell, code) + # Unsanitized markdown is forced to code. + if markdown and is_sanitized_markdown(markdown): + # Use blank HTML comment to separate markdown codeblocks + if previous_was_markdown: + document.append("") + previous_was_markdown = True + document.append(markdown) + continue attributes["language"] = cell._cell.language # Definitely a code cell, but need to determine if it can be # formatted as non-python. if attributes["language"] == "sql": - # Note frontend/src/core/codemirror/language/sql.ts - # Determines sql structure by regex, but having access to - # the AST gives us more flexibility. - query = None - valid_sql = True - for ( - maybe_query, - def_vars, - ) in cell._cell.variable_data.items(): - if query: - # query has already been set, hence this breaks - # the expected format. - query = None - attributes.pop("language") - valid_sql = False - break - for var in def_vars: - # We are looking for the case where we assign a - # query output to python. - if var.language == "python": - query = maybe_query - break - - if valid_sql: + sql_options = get_sql_options_from_cell(code) + if not sql_options: + # means not sql. + attributes.pop("language") + else: + # Ignore default query value. + if sql_options.get("query") == "_df": + sql_options.pop("query") + attributes.update(sql_options) code = "\n".join(cell._cell.raw_sqls).strip() - if query: - attributes["query"] = query # Definitely no "cell"; as such, treat as code, as everything in # marimo is code. diff --git a/marimo/_server/export/utils.py b/marimo/_server/export/utils.py index 7c54e534589..74f657ea4fe 100644 --- a/marimo/_server/export/utils.py +++ b/marimo/_server/export/utils.py @@ -49,6 +49,12 @@ def _const_string(args: list[ast.stmt]) -> str: return f"{inner.value}" # type: ignore[attr-defined] +def _const_or_id(args: ast.stmt) -> str: + if hasattr(args, "value"): + return f"{args.value}" # type: ignore[attr-defined] + return f"{args.id}" # type: ignore[attr-defined] + + def get_markdown_from_cell( cell: Cell, code: str, native_callout: bool = False ) -> Optional[str]: @@ -103,3 +109,28 @@ def get_markdown_from_cell( :::""" ) return md + + +def get_sql_options_from_cell(code: str) -> Optional[dict[str, str]]: + # Note frontend/src/core/codemirror/language/sql.ts + # also extracts options via ast. Ideally, these should be synced. + options = {} + code = code.strip() + try: + (body,) = ast.parse(code).body + (target,) = body.targets # type: ignore[attr-defined] + options["query"] = target.id + if body.value.func.attr == "sql": # type: ignore[attr-defined] + value = body.value # type: ignore[attr-defined] + else: + return None + if value.keywords: + for keyword in value.keywords: # type: ignore[attr-defined] + options[keyword.arg] = _const_or_id(keyword.value) # type: ignore[attr-defined] + output = options.pop("output", "True").lower() + if output == "false": + options["hide_output"] = "True" + + return options + except (AssertionError, AttributeError, ValueError): + return None diff --git a/marimo/_tutorials/markdown_format.md b/marimo/_tutorials/markdown_format.md index 318221140f4..e975c29da50 100644 --- a/marimo/_tutorials/markdown_format.md +++ b/marimo/_tutorials/markdown_format.md @@ -1,6 +1,7 @@ --- title: Markdown -marimo-version: 0.10.9 +marimo-version: 0.10.19 +width: columns --- # Markdown file format @@ -66,21 +67,40 @@ You can break up markdown into multiple cells by using an empty html tag ` View the source of this notebook to see how this cell was created. -You can still hide and disable code cells in markdown notebooks: +You can still hide cell code in markdown notebooks: ````md ```python {.marimo hide_code="true"} -import pandas as pd -pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) +form = ( + # ... + # Just something a bit more complicated + # you might not want to see in the editor. + # ... +) +form ``` ```` ```python {.marimo hide_code="true"} -import pandas as pd +form = ( + mo.md(''' + **Just how great is markdown?.** + + {markdown_is_awesome} -pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + {marimo_is_amazing} +''') + .batch( + markdown_is_awesome=mo.ui.text(label="How much do you like markdown?", placeholder="It is pretty swell 🌊"), + marimo_is_amazing=mo.ui.slider(label="How much do you like marimo?", start=0, stop=11, value=11), + ) + .form(show_clear_button=True, bordered=False) +) +form ``` +and disable cells too: + ````md ```python {.marimo disabled="true"} print("This code cell is disabled, there should be no output!") @@ -120,7 +140,7 @@ supposed to be computed, and what values are static. To interpolate Python values, just use a Python cell: ```python {.marimo} -"🍃" * 7 +mo.md(f"""Like so: {"🍃" * 7}""") ``` ### Limitations on conversion @@ -151,6 +171,17 @@ It's not likely that you'll run into this issue, but rest assured that marimo is working behind the scenes to keep your notebooks unambiguous and clean as possible. +### Saving multicolumn mode + +Multicolumn mode works, but the first cell in a column must be a python cell in +order to specify column start and to save correctly: + +````md +```python {.marimo column="1"} +print("First cell in column 1") +``` +```` + ### Naming cells Since the markdown notebook really is just markdown, you can't import from a @@ -167,50 +198,64 @@ give your cells a name: # But here's my `cell_id`, so call me, `maybe` 🎶 ``` -## Converting back to the Python file format +### SQL in markdown -The markdown format is supposed to lower the barrier for writing text heavy -documents, it's not meant to be a full replacement for the Python notebook -format. You can always convert back to a Python notebook if you need to: +You can also run SQL queries in markdown cells through marimo, using a `sql` code block. For instance: -```bash -$ marimo convert my_marimo.md > my_marimo.py +````md +```sql {.marimo} +SELECT GREATEST(x, y), SQRT(z) from uniformly_random_numbers ``` +```` - +The resultant distribution may be surprising! 🎲[^surprise] -## SQL in markdown +[^surprise]: The general distributions should be the same -You can also run parameterized SQL queries in markdown cells through marimo. +```sql {.marimo} +SELECT GREATEST(a, b), SQRT(c) from uniformly_random_numbers +``` + +In this SQL format, Python variable interpolation in SQL queries occurs automatically. Additionally, query results can be assigned to a dataframe with the `query` attribute. +For instance, here's how to create a random uniform distribution and assign it to the dataframe `uniformly_random_numbers` used above: + +````md +```sql {.marimo query="uniformly_random_numbers" hide_output="true"} +SELECT i.range::text AS id, + random() AS x, + random() AS y, + random() AS z +FROM + -- Note sample_count comes from the slider below! + range(1, {sample_count.value + 1}) i; +``` +```` + +You can learn more about other SQL use in the SQL tutorial (`marimo tutorial sql`) ```python {.marimo hide_code="true"} -num = mo.ui.slider(1, 15, label="Fibonacci numbers") -num +sample_count = mo.ui.slider(1, 1000, value=1000, label="Sample Count") +sample_count ``` -```python {.marimo} -_df = mo.sql( - f""" - WITH RECURSIVE fibonacci AS ( - SELECT - 1 as n, - 1 as fib, - 1 as prev - UNION ALL - SELECT - n + 1, - fib + prev, - fib - FROM fibonacci - WHERE n < {num.value} - ) - SELECT n, fib - FROM fibonacci - ORDER BY n; - """ -) +```sql {.marimo query="uniformly_random_numbers" hide_output="True"} +SELECT i.range::text AS id, + random() AS a, + random() AS b, + random() AS c +FROM range(1, {sample_count.value + 1}) i; ``` +## Converting back to the Python file format + +The markdown format is supposed to lower the barrier for writing text heavy +documents, it's not meant to be a full replacement for the Python notebook +format. You can always convert back to a Python notebook if you need to: + +```bash +$ marimo convert my_marimo.md > my_marimo.py +``` + ## More on markdown Be sure to checkout the markdown.py tutorial (`marimo tutorial markdown`) for diff --git a/tests/_cli/snapshots/sql-notebook.py.txt b/tests/_cli/snapshots/sql-notebook.py.txt index 36460f2943e..91ff3c57caa 100644 --- a/tests/_cli/snapshots/sql-notebook.py.txt +++ b/tests/_cli/snapshots/sql-notebook.py.txt @@ -15,11 +15,19 @@ def _(mo): @app.cell -def _(mo, my_table): +def _(mo): + mem_engine = mo.create_engine("sqlite:///:memory:") + return (mem_engine,) + + +@app.cell +def _(mem_engine, mo, my_table): export = mo.sql( f""" SELECT * FROM my_table; - """ + """, + output=False, + engine=mem_engine ) return (export,) diff --git a/tests/_cli/test_markdown_conversion.py b/tests/_cli/test_markdown_conversion.py index 03ca9bfd14d..ef5f4da9304 100644 --- a/tests/_cli/test_markdown_conversion.py +++ b/tests/_cli/test_markdown_conversion.py @@ -193,7 +193,11 @@ def test_markdown_with_sql() -> None: # SQL notebook - ```sql {.marimo query="export"} + ```python {.marimo} + mem_engine = mo.create_engine("sqlite:///:memory:") + ``` + + ```sql {.marimo query="export" engine="mem_engine" hide_output="true"} SELECT * FROM my_table; ``` @@ -210,11 +214,18 @@ def test_markdown_with_sql() -> None: app = InternalApp(convert_from_md_to_app(script)) assert app.config.app_title == "My Title" ids = list(app.cell_manager.cell_ids()) - assert len(ids) == 3 - for i in range(3): + assert len(ids) == 4 + for i in range(4): # Unparsables are None assert app.cell_manager.cell_data_at(ids[i]).cell is not None - assert app.cell_manager.cell_data_at(ids[1]).cell.defs == {"export"} + assert app.cell_manager.cell_data_at(ids[2]).cell.defs == {"export"} + assert app.cell_manager.cell_data_at(ids[2]).cell.refs == { + "mem_engine", + "my_table", + "mo", + } + # hide_output=True => output=False + assert "False" in app.cell_manager.cell_data_at(ids[2]).cell._cell.code def test_markdown_empty() -> None: From 63136e91515cfca4281fdee521857010d317c614 Mon Sep 17 00:00:00 2001 From: dylan madisetti Date: Mon, 3 Feb 2025 10:17:37 -0500 Subject: [PATCH 2/3] tidy --- marimo/_tutorials/markdown_format.md | 3 +-- tests/_cli/test_markdown_conversion.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/marimo/_tutorials/markdown_format.md b/marimo/_tutorials/markdown_format.md index e975c29da50..48491606356 100644 --- a/marimo/_tutorials/markdown_format.md +++ b/marimo/_tutorials/markdown_format.md @@ -1,7 +1,6 @@ --- title: Markdown marimo-version: 0.10.19 -width: columns --- # Markdown file format @@ -263,4 +262,4 @@ more information on to type-set and render markdown in marimo. ```python {.marimo hide_code="true"} import marimo as mo -``` \ No newline at end of file +``` diff --git a/tests/_cli/test_markdown_conversion.py b/tests/_cli/test_markdown_conversion.py index ef5f4da9304..8ccb31757b4 100644 --- a/tests/_cli/test_markdown_conversion.py +++ b/tests/_cli/test_markdown_conversion.py @@ -194,7 +194,7 @@ def test_markdown_with_sql() -> None: # SQL notebook ```python {.marimo} - mem_engine = mo.create_engine("sqlite:///:memory:") + mem_engine = fn_that_creates_engine("sqlite:///:memory:") ``` ```sql {.marimo query="export" engine="mem_engine" hide_output="true"} From 529bbd858ac5900b821db758e89908210660fa49 Mon Sep 17 00:00:00 2001 From: dylan madisetti Date: Mon, 3 Feb 2025 11:32:29 -0500 Subject: [PATCH 3/3] git add snapshot --- tests/_cli/snapshots/sql-notebook.py.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/_cli/snapshots/sql-notebook.py.txt b/tests/_cli/snapshots/sql-notebook.py.txt index 91ff3c57caa..db02b584ed0 100644 --- a/tests/_cli/snapshots/sql-notebook.py.txt +++ b/tests/_cli/snapshots/sql-notebook.py.txt @@ -15,8 +15,8 @@ def _(mo): @app.cell -def _(mo): - mem_engine = mo.create_engine("sqlite:///:memory:") +def _(fn_that_creates_engine): + mem_engine = fn_that_creates_engine("sqlite:///:memory:") return (mem_engine,)