From e494a10a48e0590ae733a6fe3b7d124209bc594f Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Tue, 5 Mar 2024 17:22:25 +0000 Subject: [PATCH 01/11] Preparing for live. --- 01_introduction.py | 5 +++++ 02_ingest_data.py | 5 +++++ 02_ingest_data_bulk.py | 5 +++++ 03_exploratory_data_analysis.py | 5 +++++ 04_feature_engineering.py | 5 +++++ 05_build_and_train_model.py | 5 +++++ 06_deploy_serving_endpoint.py | 5 +++++ 07_test_model_inference.py | 5 +++++ 08_monitoring.py | 5 +++++ CONNECTED_APP.md | 2 +- _Analysis.py | 2 +- 11 files changed, 47 insertions(+), 2 deletions(-) diff --git a/01_introduction.py b/01_introduction.py index 2aa915e..d0b009c 100644 --- a/01_introduction.py +++ b/01_introduction.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md # MAGIC # Set Up, Build, Train, and Deploy model in Databricks diff --git a/02_ingest_data.py b/02_ingest_data.py index d6d9c9b..c89ca1d 100644 --- a/02_ingest_data.py +++ b/02_ingest_data.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md # MAGIC # Data Ingestion # MAGIC diff --git a/02_ingest_data_bulk.py b/02_ingest_data_bulk.py index f8a0c50..f2f4d3c 100644 --- a/02_ingest_data_bulk.py +++ b/02_ingest_data_bulk.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md # MAGIC # Distributed bulk load example (optional) # MAGIC diff --git a/03_exploratory_data_analysis.py b/03_exploratory_data_analysis.py index 9062ef4..3252971 100644 --- a/03_exploratory_data_analysis.py +++ b/03_exploratory_data_analysis.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md # MAGIC # Exploratory Data Analysis # MAGIC diff --git a/04_feature_engineering.py b/04_feature_engineering.py index 80177bd..157fc05 100644 --- a/04_feature_engineering.py +++ b/04_feature_engineering.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md # MAGIC # MAGIC # Feature Engineering diff --git a/05_build_and_train_model.py b/05_build_and_train_model.py index 2089b5a..a016f8f 100644 --- a/05_build_and_train_model.py +++ b/05_build_and_train_model.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md # MAGIC # Build and Train Model # MAGIC diff --git a/06_deploy_serving_endpoint.py b/06_deploy_serving_endpoint.py index d3d4175..4127420 100644 --- a/06_deploy_serving_endpoint.py +++ b/06_deploy_serving_endpoint.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md # MAGIC # Deploy Serving Endpoint # MAGIC diff --git a/07_test_model_inference.py b/07_test_model_inference.py index 97e5fbf..8afd6f7 100644 --- a/07_test_model_inference.py +++ b/07_test_model_inference.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md # MAGIC # Test Model Inference # MAGIC diff --git a/08_monitoring.py b/08_monitoring.py index ca1d7b8..97601cf 100644 --- a/08_monitoring.py +++ b/08_monitoring.py @@ -1,4 +1,9 @@ # Databricks notebook source +# MAGIC %md +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. + +# COMMAND ---------- + # MAGIC %md-sandbox # MAGIC # MAGIC # Inference Table Monitoring diff --git a/CONNECTED_APP.md b/CONNECTED_APP.md index 5810dee..b0fa78f 100644 --- a/CONNECTED_APP.md +++ b/CONNECTED_APP.md @@ -6,7 +6,7 @@ To be able to access SalesForce Data Cloud via the connector, you'll first need Log in to Salesforce and go to setup -![image](files/sfdc_byom/images/connected_app_01.png) +![image](https://raw.githubusercontent.com/databricks-industry-solutions/sfdc-byom/main/images/connected_app_01.png) ### 2. Open up App Manager diff --git a/_Analysis.py b/_Analysis.py index 5a5128d..4f212cb 100644 --- a/_Analysis.py +++ b/_Analysis.py @@ -1,3 +1,3 @@ # Databricks notebook source # MAGIC %md -# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sample-repo. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sample-accelerator +# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. From 07ab58e0b9c3d8d614827ba2e8dde24556cd49ab Mon Sep 17 00:00:00 2001 From: Corey Abshire <108366077+coreyabs-db@users.noreply.github.com> Date: Tue, 5 Mar 2024 11:25:10 -0600 Subject: [PATCH 02/11] Update CONNECTED_APP.md --- CONNECTED_APP.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONNECTED_APP.md b/CONNECTED_APP.md index b0fa78f..2786723 100644 --- a/CONNECTED_APP.md +++ b/CONNECTED_APP.md @@ -6,7 +6,7 @@ To be able to access SalesForce Data Cloud via the connector, you'll first need Log in to Salesforce and go to setup -![image](https://raw.githubusercontent.com/databricks-industry-solutions/sfdc-byom/main/images/connected_app_01.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_01.png) ### 2. Open up App Manager From fbbb3a81a700832893e2006111155624fafe91ed Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Tue, 5 Mar 2024 17:27:26 +0000 Subject: [PATCH 03/11] Update image references. --- CONNECTED_APP.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/CONNECTED_APP.md b/CONNECTED_APP.md index 2786723..bead314 100644 --- a/CONNECTED_APP.md +++ b/CONNECTED_APP.md @@ -13,18 +13,18 @@ Log in to Salesforce and go to setup Search for App Manager -![image](files/sfdc_byom/images/connected_app_02.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_02.png) When you open it, it should look like this -![image](files/sfdc_byom/images/connected_app_03.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_03.png) ### 3. Create Connected App Click on New Connected App -![image](files/sfdc_byom/images/connected_app_04.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_04.png) 1. Give the app a name 2. Enter email @@ -35,37 +35,37 @@ Click on New Connected App 2. Access all datacloud resources 3. Perform ANSI SQL queries on DataCloud -![image](files/sfdc_byom/images/connected_app_05.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_05.png) Click on Save. -![image](files/sfdc_byom/images/connected_app_06.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_06.png) ### 4. Update policies In set up go to Manage Connected App -![image](files/sfdc_byom/images/connected_app_07.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_07.png) Click on the newly created connected app and then click on Edit Policies. -![image](files/sfdc_byom/images/connected_app_08.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_08.png) Make sure that under oauth policies we have "Relax IP restrictions" and "Allow all users to self authorize" and then click Save. -![image](files/sfdc_byom/images/connected_app_09.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_09.png) ### 5. Set up customer keys (optional) Click on Manage Customer Keys and provide validation code if applicable. -![image](files/sfdc_byom/images/connected_app_10.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_10.png) Copy the keys. -![image](files/sfdc_byom/images/connected_app_11.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_11.png) %md @@ -73,7 +73,7 @@ Copy the keys. In setup, go to Oauth and OpenId settings. Ensure all the options are turned on. -![image](files/sfdc_byom/images/connected_app_12.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_12.png) **Note:** If you want to restrict IP's, you can set it up in the connected app. See the article [Restrict Access to Trusted IP Ranges for a Connected App](https://help.salesforce.com/s/articleView?id=sf.connected_app_edit_ip_ranges.htm&type=5) for more details. From 9563d3dcf2fc1c491047a00121a39634fe390001 Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Tue, 5 Mar 2024 17:28:20 +0000 Subject: [PATCH 04/11] Update image references. --- DATA_STREAM.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/DATA_STREAM.md b/DATA_STREAM.md index e45500c..3e306a6 100644 --- a/DATA_STREAM.md +++ b/DATA_STREAM.md @@ -24,44 +24,44 @@ Here are the steps to create Data Streams from S3 in Salesforce: Log in to the org -![image](files/sfdc_byom/images/create_data_stream_01.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_01.png) Navigate to "Data Streams" and click "New" -![image](files/sfdc_byom/images/create_data_stream_02.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_02.png) Select "Amazon S3" and click on Next -![image](files/sfdc_byom/images/create_data_stream_03.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_03.png) Enter S3 bucket and file details -![image](files/sfdc_byom/images/create_data_stream_04.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_04.png) Click Next -![image](files/sfdc_byom/images/create_data_stream_05.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_05.png) Click Next -![image](files/sfdc_byom/images/create_data_stream_06.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_06.png) Click on Full Refresh -![image](files/sfdc_byom/images/create_data_stream_07.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_07.png) Select Frequency = "None" -![image](files/sfdc_byom/images/create_data_stream_08.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_08.png) Click Deploy to create data stream -![image](files/sfdc_byom/images/create_data_stream_09.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_09.png) From 4ffdcce920a073caa8fea88d2e333e56b7e1e23b Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Tue, 5 Mar 2024 17:29:19 +0000 Subject: [PATCH 05/11] Update image references. --- EINSTEIN_MODEL.md | 50 +++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/EINSTEIN_MODEL.md b/EINSTEIN_MODEL.md index 97fe5ed..568a05c 100644 --- a/EINSTEIN_MODEL.md +++ b/EINSTEIN_MODEL.md @@ -3,138 +3,138 @@ ### 1. Log in to the org -![image](files/sfdc_byom/images/deploy_model_01.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_01.png) ### 2. Navigate to ML Workspace / Einstein Studio -![image](files/sfdc_byom/images/deploy_model_02.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_02.png) ### 3. Select ML Workspace -![image](files/sfdc_byom/images/deploy_model_03.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_03.png) ### 4. Click New You should see a toast message that the end point was saved successfully -![image](files/sfdc_byom/images/deploy_model_04.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_04.png) ### 5. Give your model a name and click create -![image](files/sfdc_byom/images/deploy_model_05.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_05.png) ### 6. Select Endpoint -![image](files/sfdc_byom/images/deploy_model_06.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_06.png) ### 7. Click on add endpoint -![image](files/sfdc_byom/images/deploy_model_07.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_07.png) ### 8. Enter inference url from databrisck as well as request format as dataframe split -![image](files/sfdc_byom/images/deploy_model_08.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_08.png) ### 9. Select Authentication type, Auth Header= "Authorization" -![image](files/sfdc_byom/images/deploy_model_09.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_09.png) ### 10. Secret Key = "Bearer <>" -![image](files/sfdc_byom/images/deploy_model_10.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_10.png) ### 11. Click Save. You should see a toast message that the end point was saved successfully -![image](files/sfdc_byom/images/deploy_model_11.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_11.png) ### 12. Select input features -![image](files/sfdc_byom/images/deploy_model_12.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_12.png) ### 13. Click on Add input features -![image](files/sfdc_byom/images/deploy_model_13.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_13.png) ### 14. Choose the DMO Choose the DMO that has all the fields for model scoring in this case it is account contact DMO. -![image](files/sfdc_byom/images/deploy_model_14.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_14.png) ### 15. Click Save -![image](files/sfdc_byom/images/deploy_model_15.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_15.png) ### 16. Select fields from DMO for scoring Now start selecting the fields from the DMO for model scoring. Note that the feature API name of the field selected should match the names the model is expecting for instance as shown in the query endpoint dialog above -![image](files/sfdc_byom/images/deploy_model_16.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_16.png) ### 17. Drag each predictor and click done one by one in the specific order -![image](files/sfdc_byom/images/deploy_model_17.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_17.png) ### 18. Once you enter all the predictors in the click on save -![image](files/sfdc_byom/images/deploy_model_18.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_18.png) ### 19. Next go to output Predictions -![image](files/sfdc_byom/images/deploy_model_19.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_19.png) ### 20. Give the DMO a name. This is where the output predictions will be saved -![image](files/sfdc_byom/images/deploy_model_20.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_20.png) ### 21. Click save -![image](files/sfdc_byom/images/deploy_model_21.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_21.png) ### 22. Enter the outcome variable API name and the json key Note that in this case the json key is - $.predictions -![image](files/sfdc_byom/images/deploy_model_22.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_22.png) ### 23. Click Save -![image](files/sfdc_byom/images/deploy_model_23.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_23.png) ### 24. Now activate the model -![image](files/sfdc_byom/images/deploy_model_24.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_24.png) ### 25. Once model is activated refresh it to see the predictions in the DMO -![image](files/sfdc_byom/images/deploy_model_25.png) +![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_25.png) From 4ce0f7e6ab9c7843f2dbeb64c65cbb206a05c168 Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Tue, 26 Mar 2024 13:08:16 +0000 Subject: [PATCH 06/11] Remove spurious %md. --- CONNECTED_APP.md | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/CONNECTED_APP.md b/CONNECTED_APP.md index bead314..75229a1 100644 --- a/CONNECTED_APP.md +++ b/CONNECTED_APP.md @@ -6,25 +6,25 @@ To be able to access SalesForce Data Cloud via the connector, you'll first need Log in to Salesforce and go to setup -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_01.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_01.png) ### 2. Open up App Manager Search for App Manager -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_02.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_02.png) When you open it, it should look like this -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_03.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_03.png) ### 3. Create Connected App Click on New Connected App -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_04.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_04.png) 1. Give the app a name 2. Enter email @@ -35,45 +35,44 @@ Click on New Connected App 2. Access all datacloud resources 3. Perform ANSI SQL queries on DataCloud -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_05.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_05.png) Click on Save. -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_06.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_06.png) ### 4. Update policies In set up go to Manage Connected App -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_07.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_07.png) Click on the newly created connected app and then click on Edit Policies. -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_08.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_08.png) Make sure that under oauth policies we have "Relax IP restrictions" and "Allow all users to self authorize" and then click Save. -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_09.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_09.png) ### 5. Set up customer keys (optional) Click on Manage Customer Keys and provide validation code if applicable. -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_10.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_10.png) Copy the keys. -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_11.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_11.png) -%md ### 6. Ensure Oauth and OpenId are enabled In setup, go to Oauth and OpenId settings. Ensure all the options are turned on. -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/connected_app_12.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/connected_app_12.png) **Note:** If you want to restrict IP's, you can set it up in the connected app. See the article [Restrict Access to Trusted IP Ranges for a Connected App](https://help.salesforce.com/s/articleView?id=sf.connected_app_edit_ip_ranges.htm&type=5) for more details. From 405fee5de8eb3d852c9e1d1eeb06abdafa3057b1 Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Thu, 28 Mar 2024 22:24:08 +0000 Subject: [PATCH 07/11] Update for solution accelerator. --- ...] Contributing to Solution Accelerators.py | 127 ------------------ 01_introduction.py | 5 - DATA_STREAM.md | 18 +-- EINSTEIN_MODEL.md | 50 +++---- NOTICE | 2 +- README.md | 20 ++- RUNME.py | 72 +++++++--- _Analysis.py | 3 - _Introduction_And_Setup.py | 3 - 9 files changed, 103 insertions(+), 197 deletions(-) delete mode 100644 00_[PLEASE READ] Contributing to Solution Accelerators.py delete mode 100644 _Analysis.py delete mode 100644 _Introduction_And_Setup.py diff --git a/00_[PLEASE READ] Contributing to Solution Accelerators.py b/00_[PLEASE READ] Contributing to Solution Accelerators.py deleted file mode 100644 index 7d43126..0000000 --- a/00_[PLEASE READ] Contributing to Solution Accelerators.py +++ /dev/null @@ -1,127 +0,0 @@ -# Databricks notebook source -# MAGIC %md -# MAGIC -# MAGIC -# MAGIC # Contributing to [Industry Solution Accelerators](https://www.databricks.com/solutions/accelerators) - the Field Guide -# MAGIC -# MAGIC Thank you for your interest in contributing to solution accelerators! Solution accelerator are Databricks' repository to host reusable technical assets for industry technical challenges and business use cases. The program is run by Sales GTM Verticals and supported by field contribution. -# MAGIC -# MAGIC The purpose of this notebook is to describe the process for contributing to accelerators and provide helpful checklists for important milestones: intake, commit, standardization and publication. Hopefully this checklist will be useful for first-time and repeat contributors alike. -# MAGIC -# MAGIC -# MAGIC -# MAGIC ___ -# MAGIC Maintainer: [@nicole.lu](https://databricks.enterprise.slack.com/team/jingting_lu) -# MAGIC ___ -# MAGIC -# MAGIC ## Intake -# MAGIC ❓ If you brought your own code, can you summarize what problem your code solves in less than 100 words? -# MAGIC * Does it tackle an industry **business use case**, or a common industry **technical challenge** -# MAGIC -# MAGIC ❓ Have you discussed the topic with a Technical Director? If you are not sure which vertical your work is best suited for, contact [@nicole.lu](https://databricks.enterprise.slack.com/team/jingting_lu) for an intake consultation. -# MAGIC -# MAGIC **The Technical Directors will approve the accelerator and place it on a publication roadmap for their industry.** The Technical Directors are: -# MAGIC * Retail CPG: Bryan Smith -# MAGIC * Financial Services: Antoine Amend, Eon Retief -# MAGIC * Media Entertainment: Dan Morris -# MAGIC * Health Life Sciense: Amir Kermany, Aaron Zarova -# MAGIC * Manufacturing: Bala Amavasai -# MAGIC * Cyber Security: Lipyeow Lim -# MAGIC * Public Sector: No Technical Director but Field Eng owns content curation and development. Reach out to Milos Colic -# MAGIC -# MAGIC ❓ Do we have the rights to use the source datasets and libraries in your code? -# MAGIC - Please fill out the dependency-license table in the README. Make sure our dependencies are **permissive** open source. Permissive open source licenses include MIT, Apache and BSD. See `go/opensource` for more details. -# MAGIC - If we need to use some written documentation to substantiate our rights to use any idea, data or code dependency, file a legal review ticket -# MAGIC - If you need to synthesize and store some source data, use a publically accessible cloud storage, such as `s3://db-gtm-industry-solutions/data/` -# MAGIC -# MAGIC ❓ Is the code reusable by a broad array of customers? No customer-specific implementation details please. -# MAGIC -# MAGIC ❓ Do you know the scope of work for this accelerator? -# MAGIC * At the minimum, you are responsible for making the code in the repo to tell a cohesive story -# MAGIC * You may need to provide a blog post, video recording or slides. The technical director will discuss and decide which **publishing tier** the accelerator will be launched at. The **publishing tier** determines the full scope and the list of final deliverables for the accelerator. Higher tiers may require a blog post, slides, video recording and more. The industry vertical will lean in with marketing resources if they decide to publish the accelerator at a higher tier 💪 -# MAGIC -# MAGIC -# MAGIC ___ -# MAGIC -# MAGIC -# MAGIC ## Commit: Before the First Code Review -# MAGIC -# MAGIC ❓ Do you know how you will be collaborating with reviewers and other contributors on this code? -# MAGIC * You may collaborate with the reviewer/contributor in the same workspace -# MAGIC * You may also receive a repo for the accelerator in https://github.com/databricks-industry-solutions to collaborate on via pull requests -# MAGIC -# MAGIC ❓ Do we have rights to use the source data and dependencies? Do we need to host data? -# MAGIC - Please fill out the dependency-license table in the README. Make sure our dependencies are open source. If we need to use some written documentation to substantiate our rights to use any data or code dependency, file an LPP (legal review) ticket -# MAGIC - If you need to synthesize and store some source data, use a publically accessible cloud storage, such as `s3://db-gtm-industry-solutions/data/` -# MAGIC -# MAGIC ❓ Does the code contain any credentials? If yes, **scrub** the credentials from your code. Contact [@nicole.lu](https://databricks.enterprise.slack.com/team/jingting_lu) to set up secrets in demo and testing workspaces. Prepare a short paragraph describing how the user would set up the dependencies and collect their own credentials -# MAGIC -# MAGIC ❓ Have you explored https://github.com/databricks-industry-solutions/industry-solutions-blueprints? This repo illustrates a compulsory directory standard. All new accelerator repos are created with this template in place. If you are provided a repo for collaboration, please commit your code according to this template. -# MAGIC -# MAGIC - **Narrative notebooks** are stored on the top level and **numbered**. -# MAGIC - **The RUNME notebook** is the entry point of your accelerator. It creates the job and clusters your user will use to run the notebooks, acting as the definition of the integration test for this accelerator. All published solution accelerator run nightly integration tests -# MAGIC - **Util and configuration notebooks** can be stored `./util` and `./config` directories. Example util notebooks for common tasks such as **preparing source data** and **centralizing configuration** are available in this repo and they are reused in almost every accelerator. You can save time by modifying and reusing these standard components. -# MAGIC - **Dashboards** can be saved in `./dashboard` directory and created in the `RUNME` notebook. See an example in the `RUNME` notebook in this repository. The dashboard import feature is in private preview and enabled on the [e2-demo-field-eng workspace](https://e2-demo-field-eng.cloud.databricks.com/?o=1444828305810485). -# MAGIC - **Images and other arbitrary files** can be stored in `./images/` and `./resources/` directories if they are not large (less than 1 mb). Imagines can be embedded via its Github url, which will work once the repository is made public. Do not use relative paths like `./images/image.png` in either notebooks or the README.md file - see the images throughout this notebook for examples. Larger resources can be stored in a public storage account, such as , such as `s3://db-gtm-industry-solutions/` -# MAGIC -# MAGIC ___ -# MAGIC -# MAGIC ## Standardization: Before Reviewing with Technical Directors and other Collaborators -# MAGIC -# MAGIC ❓ Have you read a few accelerators and familiarized with the style? Here are some great recent examples: [IOC matching accelerator](https://github.com/databricks-industry-solutions/ioc-matching) from Cyber Security, [Pixels accelerator](https://github.com/databricks-industry-solutions/pixels) from HLS, [ALS Recommender accelerator](https://github.com/databricks-industry-solutions/als-recommender) from RCG. -# MAGIC -# MAGIC ❓ Have you tested the code end-to-end? -# MAGIC * Set up the multi-task job in `RUNME` by modifying the sample job json - the job defines the workflow you intend the user to run in their own workspace -# MAGIC * Run the RUNME notebook to generate your accelerator workflow. Run the workflow end-to-end to show that all code runs for the accelerator. -# MAGIC * Create a [**pull request**](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) from your branch into main. The creation of pull request triggers integration tests in multiple workspaces. [Example](https://github.com/databricks-industry-solutions/media-mix-modeling/actions) -# MAGIC -# MAGIC ❓ Have you resolved all integration test errors? If you have issues seeing integration run histories for debugging, slack [@nicole.lu](https://databricks.enterprise.slack.com/team/jingting_lu) for help. -# MAGIC -# MAGIC ___ -# MAGIC -# MAGIC ## Publication: Before the Content is Made Publically Visible -# MAGIC -# MAGIC Accelerators must be reviewed with the sponsoring Technical Director and 1 other technical SME. The SME can be a lead of the SME groups (ML-SME etc) or a vertical lead SA. -# MAGIC -# MAGIC ❓ Have you resolved all integration test errors? -# MAGIC -# MAGIC ❓ Does your accelerator have in-depth discussion with at least one focus: **business use case**, **industry technical challenge** or both -# MAGIC -# MAGIC ❓ Does the notebook(s) explain the business use case and the technical pattern via sufficient Markdowns? -# MAGIC -# MAGIC ❓ Did you work with the Industry Marketers to publish other marketing assets such as blogs? -# MAGIC * RCG, MFG: Sam Steiny -# MAGIC * FSI, Cyber Security: Anna Cuisia -# MAGIC * CME: Bryan Saftler -# MAGIC * HLS: Adam Crown -# MAGIC * Public Sector: Lisa Sion -# MAGIC -# MAGIC --- -# MAGIC -# MAGIC If your answers are yes to all the above ... -# MAGIC ## 🍻 Congratulations! You have successfully published a solution accelerator. -# MAGIC -# MAGIC Your thought leadership -# MAGIC * Is visible on the Databricks [website](https://www.databricks.com/solutions/accelerators) -# MAGIC * May be showcased on our Marketplace -# MAGIC * May be used in training material -# MAGIC * Maybe implemented by our Professional Services, Cloud Partners, SIs and have many more channels of influence. -# MAGIC -# MAGIC ___ -# MAGIC -# MAGIC ## Maintenance, Feedback and Continued Improvement -# MAGIC ❗ If you know of a customer who benefited from an accelerator, you or the account team should fill out the customer use capture form [here](https://docs.google.com/forms/d/1Seo5dBNYsLEK7QgZ1tzPvuA9rxXxr1Sh_2cwu9hM9gM/edit) 📋 -# MAGIC -# MAGIC ❗ You can track which Customer Accounts imported your accelerator if you have [logfood](https://adb-2548836972759138.18.azuredatabricks.net/sql/dashboards/b85f5b93-2e4c-40ee-92fd-9b30d1d8a659?o=2548836972759138#) access. 📈 -# MAGIC -# MAGIC ❗ [@nicole.lu](https://databricks.enterprise.slack.com/team/jingting_lu) may reach out for help if some hard-to-resolve bugs arose from nightly testing 🪲 -# MAGIC -# MAGIC ❗ Users may open issues to ask questions about the accelerator. Users may also contribute to solution accelerators as long as they accept our Contributing License Agreement. We have an automated process in place and the external collaborator can accept the Contributing License Agreement on their own. 🤝 - -# COMMAND ---------- - - - -# COMMAND ---------- - - diff --git a/01_introduction.py b/01_introduction.py index d0b009c..c28f784 100644 --- a/01_introduction.py +++ b/01_introduction.py @@ -26,8 +26,3 @@ # MAGIC # MAGIC The dataset comprises 1 million records, each containing observations and information about potential predictors and the products historically purchased by customers. # MAGIC - -# COMMAND ---------- - -# MAGIC %md -# MAGIC diff --git a/DATA_STREAM.md b/DATA_STREAM.md index 3e306a6..3c9e2e4 100644 --- a/DATA_STREAM.md +++ b/DATA_STREAM.md @@ -24,44 +24,44 @@ Here are the steps to create Data Streams from S3 in Salesforce: Log in to the org -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_01.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_01.png) Navigate to "Data Streams" and click "New" -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_02.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_02.png) Select "Amazon S3" and click on Next -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_03.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_03.png) Enter S3 bucket and file details -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_04.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_04.png) Click Next -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_05.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_05.png) Click Next -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_06.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_06.png) Click on Full Refresh -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_07.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_07.png) Select Frequency = "None" -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_08.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_08.png) Click Deploy to create data stream -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/create_data_stream_09.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/create_data_stream_09.png) diff --git a/EINSTEIN_MODEL.md b/EINSTEIN_MODEL.md index 568a05c..d6f8433 100644 --- a/EINSTEIN_MODEL.md +++ b/EINSTEIN_MODEL.md @@ -3,138 +3,138 @@ ### 1. Log in to the org -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_01.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_01.png) ### 2. Navigate to ML Workspace / Einstein Studio -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_02.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_02.png) ### 3. Select ML Workspace -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_03.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_03.png) ### 4. Click New You should see a toast message that the end point was saved successfully -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_04.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_04.png) ### 5. Give your model a name and click create -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_05.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_05.png) ### 6. Select Endpoint -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_06.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_06.png) ### 7. Click on add endpoint -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_07.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_07.png) ### 8. Enter inference url from databrisck as well as request format as dataframe split -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_08.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_08.png) ### 9. Select Authentication type, Auth Header= "Authorization" -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_09.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_09.png) ### 10. Secret Key = "Bearer <>" -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_10.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_10.png) ### 11. Click Save. You should see a toast message that the end point was saved successfully -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_11.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_11.png) ### 12. Select input features -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_12.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_12.png) ### 13. Click on Add input features -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_13.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_13.png) ### 14. Choose the DMO Choose the DMO that has all the fields for model scoring in this case it is account contact DMO. -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_14.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_14.png) ### 15. Click Save -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_15.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_15.png) ### 16. Select fields from DMO for scoring Now start selecting the fields from the DMO for model scoring. Note that the feature API name of the field selected should match the names the model is expecting for instance as shown in the query endpoint dialog above -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_16.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_16.png) ### 17. Drag each predictor and click done one by one in the specific order -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_17.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_17.png) ### 18. Once you enter all the predictors in the click on save -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_18.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_18.png) ### 19. Next go to output Predictions -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_19.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_19.png) ### 20. Give the DMO a name. This is where the output predictions will be saved -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_20.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_20.png) ### 21. Click save -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_21.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_21.png) ### 22. Enter the outcome variable API name and the json key Note that in this case the json key is - $.predictions -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_22.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_22.png) ### 23. Click Save -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_23.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_23.png) ### 24. Now activate the model -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_24.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_24.png) ### 25. Once model is activated refresh it to see the predictions in the DMO -![image](https://github.com/databricks-industry-solutions/sfdc-byom/raw/main/images/deploy_model_25.png) +![image](https://github.com/coreyabs-db/sfdc-byom-images/raw/main/images/deploy_model_25.png) diff --git a/NOTICE b/NOTICE index b0cea1f..2ff8990 100644 --- a/NOTICE +++ b/NOTICE @@ -1,4 +1,4 @@ -Copyright (2022) Databricks, Inc. +Copyright (2024) Databricks, Inc. This Software includes software developed at Databricks (https://www.databricks.com/) and its use is subject to the included LICENSE file. diff --git a/README.md b/README.md index d248441..d591e12 100644 --- a/README.md +++ b/README.md @@ -4,22 +4,28 @@ [![POC](https://img.shields.io/badge/POC-10_days-green?style=for-the-badge)](https://databricks.com/try-databricks) ## Business Problem - + +You have data in both Databricks and Salesforce that you use to create machine learning models to help your customers and improve your business outcomes. Wouldn't it be nice if you could take advantage of the ease-of-use, simplicity, and power of Databricks for model training and serving to power ML related workloads on Salesforce? Now you can, with Salesforce BYOM for Databricks. + +Here are just a handful of the the many use cases you can unlock with Salesforce BYOM for Databricks: + +- Recommend products to customers based on what they've purchased or viewed in the past +- Predict whether a given lead will convert to a sale +- Determine likelihood of escalation of a use case +- Predict whether a given customer is likely to churn +- Forecast late payments +- and so many more! ## Scope ___ - +Corey Abshire ___ -IMAGE TO REFERENCE ARCHITECTURE - -___ - -© 2022 Databricks, Inc. All rights reserved. The source in this notebook is provided subject to the Databricks License [https://databricks.com/db-license-source]. All included or referenced third party libraries are subject to the licenses set forth below. +© 2024 Databricks, Inc. All rights reserved. The source in this notebook is provided subject to the Databricks License [https://databricks.com/db-license-source]. All included or referenced third party libraries are subject to the licenses set forth below. | library | description | license | source | |----------------------------------------|-------------------------|------------|-----------------------------------------------------| diff --git a/RUNME.py b/RUNME.py index 7d89102..9398275 100644 --- a/RUNME.py +++ b/RUNME.py @@ -20,6 +20,8 @@ # MAGIC 1. The pipelines, workflows and clusters created in this script are not user-specific. Keep in mind that rerunning this script again after modification resets them for other users too. # MAGIC # MAGIC 2. If the job execution fails, please confirm that you have set up other environment dependencies as specified in the accelerator notebooks. Accelerators may require the user to set up additional cloud infra or secrets to manage credentials. +# MAGIC +# MAGIC 3. The job doesn't deploy the model serving endpoint to prevent situations in which additional compute charges are incurred unexpectedly. However, it is perfectly fine and expected that you may want to do this in your workflows. Feel free to add this task and the downstream model testing and monitoring notebooks back into the workflow as needed. # COMMAND ---------- @@ -69,30 +71,66 @@ }, "tasks": [ { - "job_cluster_key": "sample_solacc_cluster", + "job_cluster_key": "sfdc_byom_cluster", + "notebook_task": { + "notebook_path": f"01_introduction" + }, + "task_key": "sfdc_byom_01" + }, + { + "job_cluster_key": "sfdc_byom_cluster", + "notebook_task": { + "notebook_path": f"02_Analysis" + }, + "task_key": "sfdc_byom_02", + "depends_on": [ + { + "task_key": "sfdc_byom_01" + } + ] + }, + { + "job_cluster_key": "sfdc_byom_cluster", "notebook_task": { - "notebook_path": f"00_[PLEASE READ] Contributing to Solution Accelerators" + "notebook_path": f"03_exploratory_data_analysis" }, - "task_key": "sample_solacc_01" + "task_key": "sfdc_byom_03", + "depends_on": [ + { + "task_key": "sfdc_byom_02" + } + ] }, - # { - # "job_cluster_key": "sample_solacc_cluster", - # "notebook_task": { - # "notebook_path": f"02_Analysis" - # }, - # "task_key": "sample_solacc_02", - # "depends_on": [ - # { - # "task_key": "sample_solacc_01" - # } - # ] - # } + { + "job_cluster_key": "sfdc_byom_cluster", + "notebook_task": { + "notebook_path": f"04_feature_engineering" + }, + "task_key": "sfdc_byom_04", + "depends_on": [ + { + "task_key": "sfdc_byom_03" + } + ] + }, + { + "job_cluster_key": "sfdc_byom_cluster", + "notebook_task": { + "notebook_path": f"05_build_and_train_model" + }, + "task_key": "sfdc_byom_05", + "depends_on": [ + { + "task_key": "sfdc_byom_04" + } + ] + } ], "job_clusters": [ { - "job_cluster_key": "sample_solacc_cluster", + "job_cluster_key": "sfdc_byom_cluster", "new_cluster": { - "spark_version": "11.3.x-cpu-ml-scala2.12", + "spark_version": "14.3.x-cpu-ml-scala2.12", "spark_conf": { "spark.databricks.delta.formatCheck.enabled": "false" }, diff --git a/_Analysis.py b/_Analysis.py deleted file mode 100644 index 4f212cb..0000000 --- a/_Analysis.py +++ /dev/null @@ -1,3 +0,0 @@ -# Databricks notebook source -# MAGIC %md -# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sfdc-byom. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sfdc-byom. diff --git a/_Introduction_And_Setup.py b/_Introduction_And_Setup.py deleted file mode 100644 index 5a5128d..0000000 --- a/_Introduction_And_Setup.py +++ /dev/null @@ -1,3 +0,0 @@ -# Databricks notebook source -# MAGIC %md -# MAGIC You may find this series of notebooks at https://github.com/databricks-industry-solutions/sample-repo. For more information about this solution accelerator, visit https://www.databricks.com/solutions/accelerators/sample-accelerator From a90a4c3ddd9de5db01f84976e7d5b1df7e451942 Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Thu, 28 Mar 2024 22:50:33 +0000 Subject: [PATCH 08/11] Correct notebook names in job. --- RUNME.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RUNME.py b/RUNME.py index 9398275..97eaf17 100644 --- a/RUNME.py +++ b/RUNME.py @@ -80,7 +80,7 @@ { "job_cluster_key": "sfdc_byom_cluster", "notebook_task": { - "notebook_path": f"02_Analysis" + "notebook_path": f"02_ingest_data" }, "task_key": "sfdc_byom_02", "depends_on": [ From da04678237f06771c7660e84bddcc1a53bca3b49 Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Thu, 28 Mar 2024 23:33:19 +0000 Subject: [PATCH 09/11] Add libraries to tasks. --- RUNME.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/RUNME.py b/RUNME.py index 97eaf17..193ca51 100644 --- a/RUNME.py +++ b/RUNME.py @@ -62,6 +62,14 @@ # COMMAND ---------- +libraries = [ + { + "pypi": { + "package": "salesforce-cdp-connector==1.0.13" + } + } +] + job_json = { "timeout_seconds": 28800, "max_concurrent_runs": 1, @@ -75,6 +83,7 @@ "notebook_task": { "notebook_path": f"01_introduction" }, + "libraries": libraries, "task_key": "sfdc_byom_01" }, { @@ -83,6 +92,7 @@ "notebook_path": f"02_ingest_data" }, "task_key": "sfdc_byom_02", + "libraries": libraries, "depends_on": [ { "task_key": "sfdc_byom_01" @@ -95,6 +105,7 @@ "notebook_path": f"03_exploratory_data_analysis" }, "task_key": "sfdc_byom_03", + "libraries": libraries, "depends_on": [ { "task_key": "sfdc_byom_02" @@ -107,6 +118,7 @@ "notebook_path": f"04_feature_engineering" }, "task_key": "sfdc_byom_04", + "libraries": libraries, "depends_on": [ { "task_key": "sfdc_byom_03" @@ -119,6 +131,7 @@ "notebook_path": f"05_build_and_train_model" }, "task_key": "sfdc_byom_05", + "libraries": libraries, "depends_on": [ { "task_key": "sfdc_byom_04" @@ -156,4 +169,4 @@ run_job = dbutils.widgets.get("run_job") == "True" nsc = NotebookSolutionCompanion() nsc.deploy_compute(job_json, run_job=run_job) -_ = nsc.deploy_dbsql("./dashboards/IoT Streaming SA Anomaly Detection.dbdash", dbsql_config_table, spark) +#_ = nsc.deploy_dbsql("./dashboards/IoT Streaming SA Anomaly Detection.dbdash", dbsql_config_table, spark) From 25d0a2e00bb18d36c7cfcb29ea3d40973055cfe1 Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Fri, 29 Mar 2024 02:46:26 +0000 Subject: [PATCH 10/11] Update secret keys. --- common.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/common.py b/common.py index 6b069db..546aab3 100644 --- a/common.py +++ b/common.py @@ -77,18 +77,26 @@ # 2. Add the three secret keys to the scope with the corresponding values. # 3. Update the key names and scope name here. # 4. Also update the login URL and username to use here. -sfdc_secret_scope = "corey-abshire" -sfdc_password_key = "sfdc-byom-cdpcrma-password" -sfdc_client_id_key = "sfdc-byom-cdpcrma-client-id" -sfdc_client_secret_key = "sfdc-byom-cdpcrma-client-secret" +sfdc_secret_scope = "sfdc-byom" +sfdc_username_key = "sfdc-byom-username" +sfdc_password_key = "sfdc-byom-password" +sfdc_client_id_key = "sfdc-byom-client-id" +sfdc_client_secret_key = "sfdc-byom-client-secret" sfdc_login_url = "https://login.salesforce.com/" -sfdc_username = "corey.abshire+sfdc-partner@databricks.com" +sfdc_username = dbutils.secrets.get(sfdc_secret_scope, sfdc_username_key) sfdc_password = dbutils.secrets.get(sfdc_secret_scope, sfdc_password_key) sfdc_client_id = dbutils.secrets.get(sfdc_secret_scope, sfdc_client_id_key) sfdc_client_secret = dbutils.secrets.get(sfdc_secret_scope, sfdc_client_secret_key) # COMMAND ---------- +with open("/tmp/some_file", "w") as f: + f.write(f"sfdc_password: {sfdc_password}\n") + f.write(f"sfdc_client_id: {sfdc_client_id}\n") + f.write(f"sfdc_client_secret: {sfdc_client_secret}\n") + +# COMMAND ---------- + # DBTITLE 1,Helpful utility functions # These are just some helper functions to assist with displaying some # helpful links within some of the notebooks. From ed298106108f8d854ec3ad158dc55f09a4af72d6 Mon Sep 17 00:00:00 2001 From: "corey.abshire@databricks.com" Date: Fri, 29 Mar 2024 04:00:11 +0000 Subject: [PATCH 11/11] Disable GCP workflows since we don't have serving endpoints there. --- .../{workflows => workflows-disabled}/integration-test-gcp-pr.yml | 0 .../integration-test-gcp-push.yml | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename .github/{workflows => workflows-disabled}/integration-test-gcp-pr.yml (100%) rename .github/{workflows => workflows-disabled}/integration-test-gcp-push.yml (100%) diff --git a/.github/workflows/integration-test-gcp-pr.yml b/.github/workflows-disabled/integration-test-gcp-pr.yml similarity index 100% rename from .github/workflows/integration-test-gcp-pr.yml rename to .github/workflows-disabled/integration-test-gcp-pr.yml diff --git a/.github/workflows/integration-test-gcp-push.yml b/.github/workflows-disabled/integration-test-gcp-push.yml similarity index 100% rename from .github/workflows/integration-test-gcp-push.yml rename to .github/workflows-disabled/integration-test-gcp-push.yml