diff --git a/notebooks/examples/python/Quickstart/QuickstartNotebook.ipynb b/notebooks/examples/python/Quickstart/QuickstartNotebook.ipynb index 6ca738f02..9fab5af77 100644 --- a/notebooks/examples/python/Quickstart/QuickstartNotebook.ipynb +++ b/notebooks/examples/python/Quickstart/QuickstartNotebook.ipynb @@ -12,6 +12,7 @@ } }, "source": [ + "%md\n", "# Mosaic Quickstart\n", "\n", "> Perform a point-in-polygon spatial join between NYC Taxi trips and zones. __Note: this does not get into performance tweaks that are available for scaled joins.__\n", @@ -29,13 +30,10 @@ " * Already installed with Mosaic, use `%%mosaic_kepler` magic [[Mosaic Docs](https://databrickslabs.github.io/mosaic/usage/kepler.html)]\n", " * Import with `from keplergl import KeplerGl` to use directly\n", "\n", - "If you have trouble with Volume access:\n", - "\n", - "* For Mosaic 0.3 series (< DBR 13) - you can copy resources to DBFS as a workaround\n", - "* For Mosaic 0.4 series (DBR 13.3 LTS) - you will need to either copy resources to DBFS or setup for Unity Catalog + Shared Access which will involve your workspace admin. Instructions, as updated, will be [here](https://databrickslabs.github.io/mosaic/usage/install-gdal.html).\n", + "If you have trouble access source datasets from a Unity Catalog Volume, the easiest workaround is to copy resources to an accessible location DBFS. Note: 'Shared' access mode clusters are not supported at all.\n", "\n", "--- \n", - " __Last Update__ 28 NOV 2023 [Mosaic 0.3.12]" + " __Last Update__ 10 JUN 2024 [Mosaic 0.4.2]" ] }, { @@ -81,8 +79,7 @@ } ], "source": [ - "%pip install \"databricks-mosaic<0.4,>=0.3\" --quiet # <- Mosaic 0.3 series\n", - "# %pip install \"databricks-mosaic<0.5,>=0.4\" --quiet # <- Mosaic 0.4 series (as available)" + "%pip install \"databricks-mosaic<0.5,>=0.4\" --quiet # <- Mosaic 0.4 series" ] }, { @@ -109,8 +106,7 @@ "spark.conf.set(\"spark.sql.shuffle.partitions\", 1_024) # <-- default is 200\n", "\n", "# -- import databricks + spark functions\n", - "from pyspark.sql import functions as F\n", - "from pyspark.sql.functions import col, udf\n", + "from pyspark.sql.functions import col, udf, lit, to_json, explode, array\n", "from pyspark.sql.types import *\n", "\n", "# -- setup mosaic\n", @@ -398,8 +394,8 @@ " .option(\"multiline\", \"true\")\n", " .format(\"json\")\n", " .load(zone_dir)\n", - " .select(\"type\", explode(col(\"features\")).alias(\"feature\"))\n", - " .select(\"type\", col(\"feature.properties\").alias(\"properties\"), to_json(col(\"feature.geometry\")).alias(\"json_geometry\"))\n", + " .select(\"type\", F.explode(col(\"features\")).alias(\"feature\"))\n", + " .select(\"type\", col(\"feature.properties\").alias(\"properties\"), F.to_json(col(\"feature.geometry\")).alias(\"json_geometry\"))\n", " .withColumn(\"geometry\", mos.st_aswkt(mos.st_geomfromgeojson(\"json_geometry\")))\n", ")\n", "\n", @@ -1857,7 +1853,7 @@ } }, "source": [ - "> We can easily perform a similar join for the drop off location. __Note: in this case using `withPickupZone` from above as the left sid of the join.__" + "> We can easily perform a similar join for the drop off location. __Note: in this case using `withPickupZone` from above as the left side of the join.__" ] }, {