From c2352076d0a5ec18ad3e533730e1caa6f86eff8c Mon Sep 17 00:00:00 2001 From: Hassan Date: Wed, 4 Dec 2024 02:08:50 -0600 Subject: [PATCH] chore: Removed unused jobs and remove analytics-secure references. --- .../AmplitudeUserPropertiesBackfill.groovy | 58 ---------------- .../jobs/analytics/AnalyticsEmailOptin.groovy | 15 +++- dataeng/jobs/analytics/DBTDocs.groovy | 5 +- dataeng/jobs/analytics/DBTRun.groovy | 8 +-- .../jobs/analytics/DBTSourceFreshness.groovy | 53 -------------- dataeng/jobs/analytics/Enrollment.groovy | 48 ------------- .../EnrollmentValidationEvents.groovy | 43 ------------ dataeng/jobs/analytics/Enterprise.groovy | 31 --------- dataeng/jobs/analytics/ModelTransfers.groovy | 6 +- .../analytics/SnowflakeExpirePasswords.groovy | 5 +- .../analytics/SnowflakeRefreshSnowpipe.groovy | 5 +- .../analytics/SnowflakeSchemaBuilder.groovy | 5 +- .../analytics/SnowflakeValidateStitch.groovy | 69 ------------------- .../StitchSnowflakeLagMonitor.groovy | 53 -------------- dataeng/jobs/analytics/TableauRestore.groovy | 33 --------- .../analytics/UserLocationByCourse.groovy | 43 ------------ dataeng/jobs/analytics/VideoTimeline.groovy | 45 ------------ .../jobs/analytics/WarehouseTransforms.groovy | 6 +- .../analytics/WarehouseTransformsCI.groovy | 18 ++--- .../WarehouseTransformsCIManual.groovy | 15 ++-- .../WarehouseTransformsCIMasterMerges.groovy | 6 +- dataeng/jobs/createJobs.groovy | 59 ---------------- dataeng/jobs/createJobsNew.groovy | 6 -- .../amplitude-properties-backfill.sh | 47 ------------- dataeng/resources/dbtsource-freshness.sh | 30 -------- .../resources/enrollment-validation-events.sh | 24 ------- dataeng/resources/enrollment.sh | 24 ------- dataeng/resources/enterprise-enrollment.sh | 10 --- dataeng/resources/enterprise-user.sh | 10 --- .../resources/setup-exporter-email-optin.sh | 8 ++- dataeng/resources/setup-exporter.sh | 8 ++- .../resources/snowflake-validate-stitch.sh | 24 ------- .../resources/stitch-snowflake-lag-monitor.sh | 25 ------- dataeng/resources/tableau-restore.sh | 30 -------- dataeng/resources/user-location-by-course.sh | 16 ----- dataeng/resources/video-timeline.sh | 12 ---- 36 files changed, 50 insertions(+), 853 deletions(-) delete mode 100644 dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy delete mode 100644 dataeng/jobs/analytics/DBTSourceFreshness.groovy delete mode 100644 dataeng/jobs/analytics/Enrollment.groovy delete mode 100644 dataeng/jobs/analytics/EnrollmentValidationEvents.groovy delete mode 100644 dataeng/jobs/analytics/Enterprise.groovy delete mode 100644 dataeng/jobs/analytics/SnowflakeValidateStitch.groovy delete mode 100644 dataeng/jobs/analytics/StitchSnowflakeLagMonitor.groovy delete mode 100644 dataeng/jobs/analytics/TableauRestore.groovy delete mode 100644 dataeng/jobs/analytics/UserLocationByCourse.groovy delete mode 100644 dataeng/jobs/analytics/VideoTimeline.groovy delete mode 100644 dataeng/resources/amplitude-properties-backfill.sh delete mode 100644 dataeng/resources/dbtsource-freshness.sh delete mode 100755 dataeng/resources/enrollment-validation-events.sh delete mode 100755 dataeng/resources/enrollment.sh delete mode 100755 dataeng/resources/enterprise-enrollment.sh delete mode 100644 dataeng/resources/enterprise-user.sh delete mode 100644 dataeng/resources/snowflake-validate-stitch.sh delete mode 100644 dataeng/resources/stitch-snowflake-lag-monitor.sh delete mode 100644 dataeng/resources/tableau-restore.sh delete mode 100644 dataeng/resources/user-location-by-course.sh delete mode 100755 dataeng/resources/video-timeline.sh diff --git a/dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy b/dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy deleted file mode 100644 index 63315b35d..000000000 --- a/dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy +++ /dev/null @@ -1,58 +0,0 @@ -package analytics - -import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm - - -class AmplitudeUserPropertiesBackfill { - public static def job = { dslFactory, allVars -> - dslFactory.job("amplitude-user-properties-backfill") { - logRotator common_log_rotator(allVars) - authorization common_authorization(allVars) - parameters secure_scm_parameters(allVars) - parameters { - stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') - stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.') - stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') - stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.') - stringParam('AMPLITUDE_DATA_SOURCE_TABLE', '', 'Table name that has data which needs to be updated on Amplitude. It should have format like database.schema.table.') - stringParam('COLUMNS_TO_UPDATE', '', 'Columns that you want to update. Separate multiple columns with commas.') - stringParam('RESPONSE_TABLE', '', 'Output table which will store the updated data along with response from API endpoint.') - stringParam('AMPLITUDE_OPERATION_NAME', '', 'Amplitude user property operation name. e.g: set or setOnce.') - } - environmentVariables { - env('USER', allVars.get('USER')) - env('ACCOUNT', allVars.get('ACCOUNT')) - env('AMPLITUDE_VAULT_KV_PATH', allVars.get('AMPLITUDE_VAULT_KV_PATH')) - env('AMPLITUDE_VAULT_KV_VERSION', allVars.get('AMPLITUDE_VAULT_KV_VERSION')) - } - multiscm secure_scm(allVars) << { - git { - remote { - url('$ANALYTICS_TOOLS_URL') - branch('$ANALYTICS_TOOLS_BRANCH') - credentials('1') - } - extensions { - relativeTargetDirectory('analytics-tools') - pruneBranches() - cleanAfterCheckout() - } - } - } - wrappers { - timestamps() - credentialsBinding { - usernamePassword('ANALYTICS_VAULT_ROLE_ID', 'ANALYTICS_VAULT_SECRET_ID', 'analytics-vault'); - } - } - publishers common_publishers(allVars) - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/amplitude-properties-backfill.sh')) - } - } - } -} diff --git a/dataeng/jobs/analytics/AnalyticsEmailOptin.groovy b/dataeng/jobs/analytics/AnalyticsEmailOptin.groovy index e207cb971..2653bc477 100644 --- a/dataeng/jobs/analytics/AnalyticsEmailOptin.groovy +++ b/dataeng/jobs/analytics/AnalyticsEmailOptin.groovy @@ -80,6 +80,8 @@ class AnalyticsEmailOptin { 'Used to set the date of the CWSM dump. Leave blank to use today\'s date. Set to "-d 202x-0x-0x" if that is when the CWSM dump took place, typically the preceding Sunday. (Leave off quotes.)') stringParam('ORG_CONFIG','data-czar-keys/config.yaml', 'Path to the data-czar organization config file') stringParam('DATA_CZAR_KEYS_BRANCH','master', 'Branch of the Data-czar-keys repository to use') + stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') + stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), , 'Branch of analytics tools repo to use.') } parameters secure_scm_parameters(allVars) @@ -116,6 +118,18 @@ class AnalyticsEmailOptin { relativeTargetDirectory('data-czar-keys') } } + git { + remote { + url('$ANALYTICS_TOOLS_URL') + branch('$ANALYTICS_TOOLS_BRANCH') + credentials('1') + } + extensions { + relativeTargetDirectory('analytics-tools') + pruneBranches() + cleanAfterCheckout() + } + } } triggers{ @@ -157,4 +171,3 @@ class AnalyticsEmailOptin { } } } - diff --git a/dataeng/jobs/analytics/DBTDocs.groovy b/dataeng/jobs/analytics/DBTDocs.groovy index d41872813..a0fcae79d 100644 --- a/dataeng/jobs/analytics/DBTDocs.groovy +++ b/dataeng/jobs/analytics/DBTDocs.groovy @@ -1,16 +1,13 @@ package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters class DBTDocs{ public static def job = { dslFactory, allVars -> dslFactory.job("dbt-docs"){ logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of warehouse-transforms repository to use.') @@ -18,7 +15,7 @@ class DBTDocs{ stringParam('DBT_PROFILE', allVars.get('DBT_PROFILE'), 'DBT profile from profiles.yml in analytics-secure.') stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') } - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') diff --git a/dataeng/jobs/analytics/DBTRun.groovy b/dataeng/jobs/analytics/DBTRun.groovy index 791f332a0..f023efc03 100644 --- a/dataeng/jobs/analytics/DBTRun.groovy +++ b/dataeng/jobs/analytics/DBTRun.groovy @@ -1,11 +1,9 @@ package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters class DBTRun{ public static def job = { dslFactory, allVars -> @@ -18,7 +16,6 @@ class DBTRun{ ) authorization common_authorization(allVars) logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of warehouse-transforms repository to use.') @@ -36,7 +33,7 @@ class DBTRun{ environmentVariables { env('JOB_TYPE', 'manual') } - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') @@ -68,7 +65,6 @@ class DBTRun{ "Automatically run dbt in production, overwriting data in the PROD database when Schema Builder generated PR are merged" ) logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) environmentVariables { env('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL')) env('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH')) @@ -84,7 +80,7 @@ class DBTRun{ env('JOB_TYPE', 'automated') env('NOTIFY', allVars.get('$PAGER_NOTIFY')) } - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') diff --git a/dataeng/jobs/analytics/DBTSourceFreshness.groovy b/dataeng/jobs/analytics/DBTSourceFreshness.groovy deleted file mode 100644 index fd379e59c..000000000 --- a/dataeng/jobs/analytics/DBTSourceFreshness.groovy +++ /dev/null @@ -1,53 +0,0 @@ -package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters - -class DBTSourceFreshness{ - public static def job = { dslFactory, allVars -> - dslFactory.job("dbt-source-freshness"){ - logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) - parameters { - stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.') - stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of warehouse-transforms repository to use.') - stringParam('DBT_TARGET', allVars.get('DBT_TARGET'), 'DBT target from profiles.yml in analytics-secure.') - stringParam('DBT_PROFILE', allVars.get('DBT_PROFILE'), 'DBT profile from profiles.yml in analytics-secure.') - stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') - } - multiscm secure_scm(allVars) << { - git { - remote { - url('$WAREHOUSE_TRANSFORMS_URL') - branch('$WAREHOUSE_TRANSFORMS_BRANCH') - credentials('1') - } - extensions { - relativeTargetDirectory('warehouse-transforms') - pruneBranches() - cleanAfterCheckout() - } - } - } - triggers common_triggers(allVars) - wrappers { - colorizeOutput('xterm') - } - wrappers common_wrappers(allVars) - publishers { - postBuildTask { - task('WARN freshness', 'exit 1', true) - } - } - publishers common_publishers(allVars) - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/secrets-manager-setup.sh')) - shell(dslFactory.readFileFromWorkspace('dataeng/resources/dbtsource-freshness.sh')) - } - } - } -} - diff --git a/dataeng/jobs/analytics/Enrollment.groovy b/dataeng/jobs/analytics/Enrollment.groovy deleted file mode 100644 index 0335366c3..000000000 --- a/dataeng/jobs/analytics/Enrollment.groovy +++ /dev/null @@ -1,48 +0,0 @@ -package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization -import static org.edx.jenkins.dsl.AnalyticsConstants.common_multiscm -import static org.edx.jenkins.dsl.AnalyticsConstants.common_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.from_date_interval_parameter -import static org.edx.jenkins.dsl.AnalyticsConstants.to_date_interval_parameter -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.opsgenie_heartbeat_publisher - -class Enrollment { - public static def job = { dslFactory, allVars -> - allVars.get('ENVIRONMENTS').each { environment, env_config -> - dslFactory.job("enrollment-$environment") { - // desupport-1271: temporarily disabling now the the dbt version is live. - // leave this for 2 weeks and delete the job when we are confident that - // this is no longer needed. - disabled(true) - authorization common_authorization(env_config) - logRotator common_log_rotator(allVars) - multiscm common_multiscm(allVars) - triggers common_triggers(allVars, env_config) - publishers common_publishers(allVars) - publishers opsgenie_heartbeat_publisher(allVars) - parameters common_parameters(allVars, env_config) - parameters from_date_interval_parameter(allVars) - parameters to_date_interval_parameter(allVars) - environmentVariables { - env('OPSGENIE_HEARTBEAT_NAME', env_config.get('OPSGENIE_HEARTBEAT_NAME')) - env('OPSGENIE_HEARTBEAT_DURATION_NUM', env_config.get('OPSGENIE_HEARTBEAT_DURATION_NUM')) - env('OPSGENIE_HEARTBEAT_DURATION_UNIT', env_config.get('OPSGENIE_HEARTBEAT_DURATION_UNIT')) - } - wrappers common_wrappers(allVars) - wrappers { - credentialsBinding { - string('OPSGENIE_HEARTBEAT_CONFIG_KEY', 'opsgenie_heartbeat_config_key') - } - } - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/opsgenie-enable-heartbeat.sh')) - shell(dslFactory.readFileFromWorkspace('dataeng/resources/enrollment.sh')) - } - } - } - } -} diff --git a/dataeng/jobs/analytics/EnrollmentValidationEvents.groovy b/dataeng/jobs/analytics/EnrollmentValidationEvents.groovy deleted file mode 100644 index 4a1dfddd2..000000000 --- a/dataeng/jobs/analytics/EnrollmentValidationEvents.groovy +++ /dev/null @@ -1,43 +0,0 @@ -package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.common_multiscm -import static org.edx.jenkins.dsl.AnalyticsConstants.common_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.from_date_interval_parameter -import static org.edx.jenkins.dsl.AnalyticsConstants.to_date_interval_parameter -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers - -class EnrollmentValidationEvents { - public static def job = { dslFactory, allVars -> - allVars.get('ENVIRONMENTS').each { environment, env_config -> - dslFactory.job("enrollment-validation-events-$environment") { - disabled(env_config.get('DISABLED', false)) - logRotator common_log_rotator(allVars) - parameters common_parameters(allVars, env_config) - parameters from_date_interval_parameter(allVars) - parameters to_date_interval_parameter(allVars) - parameters { - stringParam('OUTPUT_ROOT', env_config.get('OUTPUT_ROOT'), '') - stringParam('CREDENTIALS', env_config.get('CREDENTIALS'), '') - stringParam('FILE_THRESHOLD', env_config.get('FILE_THRESHOLD'), - 'Threshold to apply to synthetic event files per day. Gzipped files that are bigger' + - ' than this threshold trigger a job failure and an alert. (Sadly, we ignore the smaller' + - ' files as being "normal".)') - } - multiscm common_multiscm(allVars) - triggers common_triggers(allVars, env_config) - wrappers common_wrappers(allVars) - publishers { - postBuildTask { - task('OVER THRESHOLD', 'exit 1', true) - } - } - publishers common_publishers(allVars) - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/enrollment-validation-events.sh')) - } - } - } - } -} diff --git a/dataeng/jobs/analytics/Enterprise.groovy b/dataeng/jobs/analytics/Enterprise.groovy deleted file mode 100644 index 2292d2023..000000000 --- a/dataeng/jobs/analytics/Enterprise.groovy +++ /dev/null @@ -1,31 +0,0 @@ -package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.common_multiscm -import static org.edx.jenkins.dsl.AnalyticsConstants.common_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization - -class Enterprise { - public static def job = { dslFactory, allVars -> - allVars.get('JOBS').each { job, job_config -> - dslFactory.job("enterprise-$job") { - disabled(job_config.get('DISABLED', false)) - authorization common_authorization(allVars) - logRotator common_log_rotator(allVars) - parameters common_parameters(allVars, job_config) - parameters { - stringParam('REPORT_DATE', allVars.get('REPORT_DATE'), '') - } - multiscm common_multiscm(allVars) - triggers common_triggers(allVars) - wrappers common_wrappers(allVars) - publishers common_publishers(allVars) - steps { - shell(dslFactory.readFileFromWorkspace("dataeng/resources/enterprise-${job}.sh")) - } - } - } - } -} diff --git a/dataeng/jobs/analytics/ModelTransfers.groovy b/dataeng/jobs/analytics/ModelTransfers.groovy index eae76c967..d298291ea 100644 --- a/dataeng/jobs/analytics/ModelTransfers.groovy +++ b/dataeng/jobs/analytics/ModelTransfers.groovy @@ -1,10 +1,8 @@ package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization class ModelTransfers{ @@ -13,7 +11,6 @@ class ModelTransfers{ dslFactory.job("transfer-dbt-models-$environment"){ authorization common_authorization(env_config) logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the Warehouse Transforms Repo.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of Warehouse Transforms to use.') @@ -23,7 +20,7 @@ class ModelTransfers{ stringParam('MODELS_TO_TRANSFER', env_config.get('MODELS_TO_TRANSFER'), 'Name of DBT models which should be transferred to S3 via a Snowflake stage.') stringParam('NOTIFY', env_config.get('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY')), 'Space separated list of emails to send notifications to.') } - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') @@ -51,4 +48,3 @@ class ModelTransfers{ } } } - diff --git a/dataeng/jobs/analytics/SnowflakeExpirePasswords.groovy b/dataeng/jobs/analytics/SnowflakeExpirePasswords.groovy index 4abccb39d..46585d984 100644 --- a/dataeng/jobs/analytics/SnowflakeExpirePasswords.groovy +++ b/dataeng/jobs/analytics/SnowflakeExpirePasswords.groovy @@ -3,8 +3,6 @@ package analytics import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters class SnowflakeExpirePasswords { @@ -31,7 +29,6 @@ class SnowflakeExpirePasswords { dslFactory.job(jobConfig['NAME']) { logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.') @@ -45,7 +42,7 @@ class SnowflakeExpirePasswords { env('ACCOUNT', allVars.get('ACCOUNT')) } logRotator common_log_rotator(allVars) - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$ANALYTICS_TOOLS_URL') diff --git a/dataeng/jobs/analytics/SnowflakeRefreshSnowpipe.groovy b/dataeng/jobs/analytics/SnowflakeRefreshSnowpipe.groovy index 6c136fe31..32a681435 100644 --- a/dataeng/jobs/analytics/SnowflakeRefreshSnowpipe.groovy +++ b/dataeng/jobs/analytics/SnowflakeRefreshSnowpipe.groovy @@ -6,8 +6,6 @@ import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm @@ -43,7 +41,6 @@ class SnowflakeRefreshSnowpipe { dslFactory.job(jobConfig['NAME']) { logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analtyics tools repo to use.') @@ -62,7 +59,7 @@ class SnowflakeRefreshSnowpipe { env('ACCOUNT', allVars.get('ACCOUNT')) } logRotator common_log_rotator(allVars) - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$ANALYTICS_TOOLS_URL') diff --git a/dataeng/jobs/analytics/SnowflakeSchemaBuilder.groovy b/dataeng/jobs/analytics/SnowflakeSchemaBuilder.groovy index 8c49e40b4..b76d2fd8a 100644 --- a/dataeng/jobs/analytics/SnowflakeSchemaBuilder.groovy +++ b/dataeng/jobs/analytics/SnowflakeSchemaBuilder.groovy @@ -2,14 +2,11 @@ package analytics import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters class SnowflakeSchemaBuilder { public static def job = { dslFactory, allVars -> dslFactory.job('snowflake-schema-builder') { logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the Warehouse Transforms Repo.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of Warehouse Transforms to use.') @@ -20,7 +17,7 @@ class SnowflakeSchemaBuilder { stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') } logRotator common_log_rotator(allVars) - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') diff --git a/dataeng/jobs/analytics/SnowflakeValidateStitch.groovy b/dataeng/jobs/analytics/SnowflakeValidateStitch.groovy deleted file mode 100644 index dd9be10e2..000000000 --- a/dataeng/jobs/analytics/SnowflakeValidateStitch.groovy +++ /dev/null @@ -1,69 +0,0 @@ -package analytics - -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization - - -class SnowflakeValidateStitch { - public static def job = { dslFactory, allVars -> - List apps = [ - 'CREDENTIALS', - 'DISCOVERY', - 'ECOMMERCE', - 'LMS' - ] - apps.each { app -> - - dslFactory.job("snowflake-validate-stitch-$app") { - - description( - "Validate $app database tables loaded by Stitch by comparing them against the same " + - "tables loaded by Sqoop. This compares only tables that exist in both sets, and only the last " + - "10 days of changed rows." - ) - authorization common_authorization(allVars) - parameters secure_scm_parameters(allVars) - parameters { - stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') - stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.') - stringParam('SQOOP_START_TIME', '', 'Application name of tables to validate.') - stringParam('SNOWFLAKE_USER', 'SNOWFLAKE_TASK_AUTOMATION_USER') - stringParam('SNOWFLAKE_ACCOUNT', 'edx.us-east-1') - stringParam('SNOWFLAKE_KEY_PATH', 'snowflake/rsa_key_snowflake_task_automation_user.p8', 'Path to the encrypted private key file that corresponds to the SNOWFLAKE_USER, relative to the root of analytics-secure.') - stringParam('SNOWFLAKE_PASSPHRASE_PATH', 'snowflake/rsa_key_passphrase_snowflake_task_automation_user', 'Path to the private key decryption passphrase file that corresponds to the SNOWFLAKE_USER, relative to the root of analytics-secure.') - stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') - stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.') - } - environmentVariables { - env('APP_NAME', app) - } - logRotator common_log_rotator(allVars) - multiscm secure_scm(allVars) << { - git { - remote { - url('$ANALYTICS_TOOLS_URL') - branch('$ANALYTICS_TOOLS_BRANCH') - credentials('1') - } - extensions { - relativeTargetDirectory('analytics-tools') - pruneBranches() - cleanAfterCheckout() - } - } - } - wrappers { - timestamps() - } - publishers common_publishers(allVars) - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/snowflake-validate-stitch.sh')) - } - } - } - } -} diff --git a/dataeng/jobs/analytics/StitchSnowflakeLagMonitor.groovy b/dataeng/jobs/analytics/StitchSnowflakeLagMonitor.groovy deleted file mode 100644 index 894e853b8..000000000 --- a/dataeng/jobs/analytics/StitchSnowflakeLagMonitor.groovy +++ /dev/null @@ -1,53 +0,0 @@ -package analytics - -import static org.edx.jenkins.dsl.AnalyticsConstants.common_multiscm -import static org.edx.jenkins.dsl.AnalyticsConstants.common_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers - - - -class StitchSnowflakeLagMonitor { - public static def job = { dslFactory, allVars -> - dslFactory.job("stitch-snowflake-lag-monitor") { - logRotator common_log_rotator(allVars) - parameters { - stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.') - stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analtyics tools repo to use.') - stringParam('NOTIFY', '$PAGER_NOTIFY', 'Space separated list of emails to send notifications to.') - stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.') - } - environmentVariables { - env('KEY_PATH', allVars.get('KEY_PATH')) - env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH')) - env('USER', allVars.get('USER')) - env('ACCOUNT', allVars.get('ACCOUNT')) - } - logRotator common_log_rotator(allVars) - multiscm { - git { - remote { - url('$ANALYTICS_TOOLS_URL') - branch('$ANALYTICS_TOOLS_BRANCH') - credentials('1') - } - extensions { - relativeTargetDirectory('analytics-tools') - pruneBranches() - cleanAfterCheckout() - } - } - } - triggers common_triggers(allVars) - wrappers { - timestamps() - } - publishers common_publishers(allVars) - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/stitch-snowflake-lag-monitor.sh')) - } - } - } -} diff --git a/dataeng/jobs/analytics/TableauRestore.groovy b/dataeng/jobs/analytics/TableauRestore.groovy deleted file mode 100644 index 9e80b22a3..000000000 --- a/dataeng/jobs/analytics/TableauRestore.groovy +++ /dev/null @@ -1,33 +0,0 @@ -package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters - -class TableauRestore{ - public static def job = { dslFactory, allVars -> - dslFactory.job("tableau-restore"){ - description('This job restores tableau data and config backup to a Tableau Server.') - logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) - parameters { - stringParam('TABLEAU_SERVER_HOST', '', 'Address of Tableau Server.') - stringParam('USER_NAME', '', 'SSH User name.') - stringParam('TABLEAU_ADMIN_USER', '', 'User which can invoke tsm commands.') - stringParam('S3_PATH', allVars.get('S3_PATH'), 'S3 path containing the backup files.') - stringParam('BACKUP_TIMESTAMP', '', 'Timestamp of the backup to restore.') - stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.') - } - wrappers { - colorizeOutput('xterm') - } - wrappers common_wrappers(allVars) - publishers common_publishers(allVars) - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/tableau-restore.sh')) - } - } - } -} diff --git a/dataeng/jobs/analytics/UserLocationByCourse.groovy b/dataeng/jobs/analytics/UserLocationByCourse.groovy deleted file mode 100644 index 98bbdc074..000000000 --- a/dataeng/jobs/analytics/UserLocationByCourse.groovy +++ /dev/null @@ -1,43 +0,0 @@ -package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization -import static org.edx.jenkins.dsl.AnalyticsConstants.common_multiscm -import static org.edx.jenkins.dsl.AnalyticsConstants.common_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.to_date_interval_parameter -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.opsgenie_heartbeat_publisher - -class UserLocationByCourse { - public static def job = { dslFactory, allVars -> - allVars.get('ENVIRONMENTS').each { environment, env_config -> - dslFactory.job("user-location-by-course-$environment") { - disabled(env_config.get('DISABLED', false)) - authorization common_authorization(env_config) - logRotator common_log_rotator(allVars, env_config) - multiscm common_multiscm(allVars) - triggers common_triggers(allVars, env_config) - parameters common_parameters(allVars, env_config) - parameters to_date_interval_parameter(allVars) - environmentVariables { - env('OPSGENIE_HEARTBEAT_NAME', env_config.get('OPSGENIE_HEARTBEAT_NAME')) - env('OPSGENIE_HEARTBEAT_DURATION_NUM', env_config.get('OPSGENIE_HEARTBEAT_DURATION_NUM')) - env('OPSGENIE_HEARTBEAT_DURATION_UNIT', env_config.get('OPSGENIE_HEARTBEAT_DURATION_UNIT')) - } - wrappers common_wrappers(allVars) - wrappers { - credentialsBinding { - string('OPSGENIE_HEARTBEAT_CONFIG_KEY', 'opsgenie_heartbeat_config_key') - } - } - publishers common_publishers(allVars) - publishers opsgenie_heartbeat_publisher(allVars) - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/opsgenie-enable-heartbeat.sh')) - shell(dslFactory.readFileFromWorkspace('dataeng/resources/user-location-by-course.sh')) - } - } - } - } -} diff --git a/dataeng/jobs/analytics/VideoTimeline.groovy b/dataeng/jobs/analytics/VideoTimeline.groovy deleted file mode 100644 index d975b0946..000000000 --- a/dataeng/jobs/analytics/VideoTimeline.groovy +++ /dev/null @@ -1,45 +0,0 @@ -package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization -import static org.edx.jenkins.dsl.AnalyticsConstants.common_multiscm -import static org.edx.jenkins.dsl.AnalyticsConstants.common_parameters -import static org.edx.jenkins.dsl.AnalyticsConstants.from_date_interval_parameter -import static org.edx.jenkins.dsl.AnalyticsConstants.to_date_interval_parameter -import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator -import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers -import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.opsgenie_heartbeat_publisher - -class VideoTimeline { - public static def job = { dslFactory, allVars -> - allVars.get('ENVIRONMENTS').each { environment, env_config -> - dslFactory.job("video-timeline-$environment") { - disabled(env_config.get('DISABLED', false)) - authorization common_authorization(env_config) - logRotator common_log_rotator(allVars, env_config) - multiscm common_multiscm(allVars) - triggers common_triggers(allVars, env_config) - parameters common_parameters(allVars, env_config) - parameters from_date_interval_parameter(allVars) - parameters to_date_interval_parameter(allVars) - environmentVariables { - env('OPSGENIE_HEARTBEAT_NAME', env_config.get('OPSGENIE_HEARTBEAT_NAME')) - env('OPSGENIE_HEARTBEAT_DURATION_NUM', env_config.get('OPSGENIE_HEARTBEAT_DURATION_NUM')) - env('OPSGENIE_HEARTBEAT_DURATION_UNIT', env_config.get('OPSGENIE_HEARTBEAT_DURATION_UNIT')) - } - wrappers common_wrappers(allVars) - wrappers { - credentialsBinding { - string('OPSGENIE_HEARTBEAT_CONFIG_KEY', 'opsgenie_heartbeat_config_key') - } - } - publishers common_publishers(allVars) - publishers opsgenie_heartbeat_publisher(allVars) - steps { - shell(dslFactory.readFileFromWorkspace('dataeng/resources/opsgenie-enable-heartbeat.sh')) - shell(dslFactory.readFileFromWorkspace('dataeng/resources/video-timeline.sh')) - } - } - } - } -} diff --git a/dataeng/jobs/analytics/WarehouseTransforms.groovy b/dataeng/jobs/analytics/WarehouseTransforms.groovy index 47c4c497d..2e2deae34 100644 --- a/dataeng/jobs/analytics/WarehouseTransforms.groovy +++ b/dataeng/jobs/analytics/WarehouseTransforms.groovy @@ -1,10 +1,8 @@ package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization import static org.edx.jenkins.dsl.AnalyticsConstants.opsgenie_heartbeat_publisher @@ -16,7 +14,6 @@ class WarehouseTransforms{ disabled(env_config.get('DISABLED', false)) authorization common_authorization(env_config) logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the Warehouse Transforms Repo.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of Warehouse Transforms to use.') @@ -35,7 +32,7 @@ class WarehouseTransforms{ stringParam('NOTIFY', env_config.get('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY')), 'Space separated list of emails to send notifications to.') booleanParam('FULL_REFRESH_INCREMENTALS', false, '[DANGEROUS] Supply the --full-refresh flag to the `dbt run` command, and use a larger warehouse. Use when you need to re-compute an incremental table from scratch. Applies to ALL incrementals in this run.') } - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') @@ -80,4 +77,3 @@ class WarehouseTransforms{ } } } - diff --git a/dataeng/jobs/analytics/WarehouseTransformsCI.groovy b/dataeng/jobs/analytics/WarehouseTransformsCI.groovy index abd8552b6..73492c2a2 100644 --- a/dataeng/jobs/analytics/WarehouseTransformsCI.groovy +++ b/dataeng/jobs/analytics/WarehouseTransformsCI.groovy @@ -1,10 +1,8 @@ package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization import static org.edx.jenkins.dsl.JenkinsPublicConstants.GHPRB_CANCEL_BUILDS_ON_UPDATE @@ -13,7 +11,6 @@ class WarehouseTransformsCI{ dslFactory.job("warehouse-transforms-ci"){ authorization common_authorization(allVars) logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of warehouse-transforms repository to use.') @@ -43,13 +40,13 @@ class WarehouseTransformsCI{ } scm { github('edx/warehouse-transforms') - } - multiscm secure_scm(allVars) << { + } + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') refspec('+refs/heads/master:refs/remotes/origin/master +refs/pull/*:refs/remotes/origin/pr/*') - credentials('1') + credentials('1') } branches('\${ghprbActualCommit}') extensions { @@ -69,7 +66,7 @@ class WarehouseTransformsCI{ pruneBranches() cleanAfterCheckout() } - } + } git { remote { url('$JENKINS_JOB_DSL_URL') @@ -81,7 +78,7 @@ class WarehouseTransformsCI{ pruneBranches() cleanAfterCheckout() } - } + } } triggers { githubPullRequest { @@ -91,7 +88,7 @@ class WarehouseTransformsCI{ cron('H/3 * * * *') triggerPhrase('jenkins run dbt') // You this trigger phrase to on Pull Rquest comment to trigger this job onlyTriggerPhrase(false) // true if you want the job to only fire when commented on (not on commits) - orgWhitelist(['edx-ops', 'edX']) // All the Github users under these orgs will be able to trigger this job via PR. As this job will be used by many edXers so giving the trigger access to all under edX. + orgWhitelist(['edx-ops', 'edX']) // All the Github users under these orgs will be able to trigger this job via PR. As this job will be used by many edXers so giving the trigger access to all under edX. extensions { commitStatus { context('jenkins/ci-tests') @@ -103,7 +100,7 @@ class WarehouseTransformsCI{ concurrentBuild(true) throttleConcurrentBuilds { maxTotal(5) - } + } wrappers { colorizeOutput('xterm') } @@ -115,4 +112,3 @@ class WarehouseTransformsCI{ } } } - diff --git a/dataeng/jobs/analytics/WarehouseTransformsCIManual.groovy b/dataeng/jobs/analytics/WarehouseTransformsCIManual.groovy index 0a80f3e9c..40621f8fc 100644 --- a/dataeng/jobs/analytics/WarehouseTransformsCIManual.groovy +++ b/dataeng/jobs/analytics/WarehouseTransformsCIManual.groovy @@ -1,10 +1,8 @@ package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization import static org.edx.jenkins.dsl.JenkinsPublicConstants.GHPRB_CANCEL_BUILDS_ON_UPDATE @@ -13,7 +11,6 @@ class WarehouseTransformsCIManual{ dslFactory.job("warehouse-transforms-ci-manual"){ authorization common_authorization(allVars) logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', '', 'Must specify branch of warehouse-transforms repository to use.') @@ -39,13 +36,13 @@ class WarehouseTransformsCIManual{ env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH')) env('USER', allVars.get('USER')) env('ACCOUNT', allVars.get('ACCOUNT')) - } - multiscm secure_scm(allVars) << { + } + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') branch('$WAREHOUSE_TRANSFORMS_BRANCH') - credentials('1') + credentials('1') } extensions { relativeTargetDirectory('warehouse-transforms') @@ -64,7 +61,7 @@ class WarehouseTransformsCIManual{ pruneBranches() cleanAfterCheckout() } - } + } git { remote { url('$JENKINS_JOB_DSL_URL') @@ -76,14 +73,14 @@ class WarehouseTransformsCIManual{ pruneBranches() cleanAfterCheckout() } - } + } } triggers common_triggers(allVars) publishers common_publishers(allVars) concurrentBuild(true) throttleConcurrentBuilds { maxTotal(5) - } + } wrappers { colorizeOutput('xterm') } diff --git a/dataeng/jobs/analytics/WarehouseTransformsCIMasterMerges.groovy b/dataeng/jobs/analytics/WarehouseTransformsCIMasterMerges.groovy index 6efa6d4b8..d00fab0ad 100644 --- a/dataeng/jobs/analytics/WarehouseTransformsCIMasterMerges.groovy +++ b/dataeng/jobs/analytics/WarehouseTransformsCIMasterMerges.groovy @@ -1,10 +1,8 @@ package analytics -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator import static org.edx.jenkins.dsl.AnalyticsConstants.common_wrappers import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers import static org.edx.jenkins.dsl.AnalyticsConstants.common_triggers -import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization import static org.edx.jenkins.dsl.AnalyticsConstants.slack_publisher @@ -14,7 +12,6 @@ class WarehouseTransformsCIMasterMerges{ dslFactory.job("warehouse-transforms-ci-poll-master"){ authorization common_authorization(allVars) logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of warehouse-transforms repository to use.') @@ -60,7 +57,6 @@ class WarehouseTransformsCIMasterMerges{ dslFactory.job("warehouse-transforms-ci-master-merges"){ authorization common_authorization(allVars) logRotator common_log_rotator(allVars) - parameters secure_scm_parameters(allVars) parameters { stringParam('WAREHOUSE_TRANSFORMS_URL', allVars.get('WAREHOUSE_TRANSFORMS_URL'), 'URL for the warehouse-transforms repository.') stringParam('WAREHOUSE_TRANSFORMS_BRANCH', allVars.get('WAREHOUSE_TRANSFORMS_BRANCH'), 'Branch of warehouse-transforms repository to use.') @@ -88,7 +84,7 @@ class WarehouseTransformsCIMasterMerges{ env('WITH_RETRY', allVars.get('WITH_RETRY')) env('NO_OF_TRIES', allVars.get('NO_OF_TRIES')) } - multiscm secure_scm(allVars) << { + multiscm { git { remote { url('$WAREHOUSE_TRANSFORMS_URL') diff --git a/dataeng/jobs/createJobs.groovy b/dataeng/jobs/createJobs.groovy index bbb8801a9..479887772 100644 --- a/dataeng/jobs/createJobs.groovy +++ b/dataeng/jobs/createJobs.groovy @@ -1,8 +1,5 @@ import static analytics.AggregateDailyTrackingLogs.job as AggregateDailyTrackingLogsJob import static analytics.DatabaseExportCoursewareStudentmodule.job as DatabaseExportCoursewareStudentmoduleJob -import static analytics.Enrollment.job as EnrollmentJob -import static analytics.EnrollmentValidationEvents.job as EnrollmentValidationEventsJob -import static analytics.Enterprise.job as EnterpriseJob import static analytics.EventExportIncremental.job as EventExportIncrementalJob import static analytics.EventExportIncrementalLarge.job as EventExportIncrementalLargeJob import static analytics.JenkinsBackup.job as JenkinsBackupJob @@ -12,10 +9,6 @@ import static analytics.ReadReplicaExportToS3.job as ReadReplicaExportToS3Job import static analytics.SnowflakePublicGrantsCleaner.job as SnowflakePublicGrantsCleanerJob import static analytics.SnowflakeRefreshSnowpipe.job as SnowflakeRefreshSnowpipeJob import static analytics.SnowflakeReplicaImportFromS3.job as SnowflakeReplicaImportFromS3Job -import static analytics.SnowflakeValidateStitch.job as SnowflakeValidateStitchJob -import static analytics.StitchSnowflakeLagMonitor.job as StitchSnowflakeLagMonitorJob -import static analytics.UserLocationByCourse.job as UserLocationByCourseJob -import static analytics.VideoTimeline.job as VideoTimelineJob import static org.edx.jenkins.dsl.JenkinsPublicConstants.DEFAULT_VIEW import org.yaml.snakeyaml.Yaml import org.yaml.snakeyaml.error.YAMLException @@ -39,9 +32,6 @@ try { def taskMap = [ AGGREGATE_DAILY_TRACKING_LOGS_JOB: AggregateDailyTrackingLogsJob, DATABASE_EXPORT_COURSEWARE_STUDENTMODULE_JOB: DatabaseExportCoursewareStudentmoduleJob, - ENROLLMENT_JOB: EnrollmentJob, - ENROLLMENT_VALIDATION_EVENTS_JOB: EnrollmentValidationEventsJob, - ENTERPRISE_JOB: EnterpriseJob, EVENT_EXPORT_INCREMENTAL_JOB: EventExportIncrementalJob, EVENT_EXPORT_INCREMENTAL_LARGE_JOB: EventExportIncrementalLargeJob, JENKINS_BACKUP_JOB: JenkinsBackupJob, @@ -51,10 +41,6 @@ def taskMap = [ SNOWFLAKE_PUBLIC_GRANTS_CLEANER_JOB: SnowflakePublicGrantsCleanerJob, SNOWFLAKE_REFRESH_SNOWPIPE_JOB: SnowflakeRefreshSnowpipeJob, SNOWFLAKE_REPLICA_IMPORT_FROM_S3_JOB: SnowflakeReplicaImportFromS3Job, - SNOWFLAKE_VALIDATE_STITCH_JOB: SnowflakeValidateStitchJob, - STITCH_SNOWFLAKE_LAG_MONITOR_JOB: StitchSnowflakeLagMonitorJob, - USER_LOCATION_BY_COURSE_JOB: UserLocationByCourseJob, - VIDEO_TIMELINE_JOB: VideoTimelineJob, ] for (task in taskMap) { @@ -89,22 +75,6 @@ listView('Edge') { columns DEFAULT_VIEW.call() } -listView('Release') { - description('Jobs that are used for testing release candidates.') - jobs { - regex('.+release') - } - columns DEFAULT_VIEW.call() -} - -listView('Exporter') { - description('Jobs that are used for exporting course data.') - jobs { - regex('analytics-.+') - } - columns DEFAULT_VIEW.call() -} - listView('Warehouse') { jobs { name('snowflake-schema-builder') @@ -123,20 +93,6 @@ listView('Tools') { columns DEFAULT_VIEW.call() } -listView('Stage') { - jobs { - regex('.+stage') - } - columns DEFAULT_VIEW.call() -} - -listView('Enterprise') { - jobs { - regex('enterprise.+') - } - columns DEFAULT_VIEW.call() -} - listView('Backups') { jobs { regex('.*backup.*') @@ -144,21 +100,6 @@ listView('Backups') { columns DEFAULT_VIEW.call() } -listView('dbt') { - jobs { - name('snowflake-schema-builder') - regex('dbt-.*|warehouse-transforms-.*') - } - columns DEFAULT_VIEW.call() -} - -listView('Deprecated') { - jobs { - regex('DEPRECATED-.*') - } - columns DEFAULT_VIEW.call() -} - listView('Current') { jobs { regex('(?!DEPRECATED-).*') diff --git a/dataeng/jobs/createJobsNew.groovy b/dataeng/jobs/createJobsNew.groovy index 5a825df08..24f60fb5b 100644 --- a/dataeng/jobs/createJobsNew.groovy +++ b/dataeng/jobs/createJobsNew.groovy @@ -2,18 +2,15 @@ import static analytics.AnalyticsEmailOptin.job as AnalyticsEmailOptinJob import static analytics.AnalyticsExporter.job as AnalyticsExporterJob import static analytics.DBTDocs.job as DBTDocsJob import static analytics.DBTRun.job as DBTRunJob -import static analytics.DBTSourceFreshness.job as DBTSourceFreshnessJob import static analytics.DeployCluster.job as DeployClusterJob import static analytics.EmrCostReporter.job as EmrCostReporterJob import static analytics.ModelTransfers.job as ModelTransfersJob import static analytics.RetirementJobEdxTriggers.job as RetirementJobEdxTriggersJob import static analytics.RetirementJobs.job as RetirementJobsJob import static analytics.SnowflakeCollectMetrics.job as SnowflakeCollectMetricsJob -import static analytics.AmplitudeUserPropertiesBackfill.job as AmplitudeUserPropertiesBackfillJob import static analytics.SnowflakeSchemaBuilder.job as SnowflakeSchemaBuilderJob import static analytics.SnowflakeUserRetirementStatusCleanup.job as SnowflakeUserRetirementStatusCleanupJob import static analytics.PrefectFlowsDeployment.job as PrefectFlowsDeploymentJob -import static analytics.TableauRestore.job as TableauRestoreJob import static analytics.TerminateCluster.job as TerminateClusterJob import static analytics.UpdateUsers.job as UpdateUsersJob import static analytics.WarehouseTransforms.job as WarehouseTransformsJob @@ -46,18 +43,15 @@ def taskMap = [ ANALYTICS_EXPORTER_JOB: AnalyticsExporterJob, DBT_DOCS_JOB: DBTDocsJob, DBT_RUN_JOB: DBTRunJob, - DBT_SOURCE_FRESHNESS_JOB: DBTSourceFreshnessJob, DEPLOY_CLUSTER_JOB: DeployClusterJob, EMR_COST_REPORTER_JOB: EmrCostReporterJob, MODEL_TRANSFERS_JOB: ModelTransfersJob, RETIREMENT_JOB_EDX_TRIGGERS_JOB: RetirementJobEdxTriggersJob, RETIREMENT_JOBS_JOB: RetirementJobsJob, SNOWFLAKE_COLLECT_METRICS_JOB: SnowflakeCollectMetricsJob, - AMPLITUDE_USER_PROPERTIES_BACKFILL_JOB: AmplitudeUserPropertiesBackfillJob, SNOWFLAKE_SCHEMA_BUILDER_JOB: SnowflakeSchemaBuilderJob, SNOWFLAKE_USER_RETIREMENT_STATUS_CLEANUP_JOB: SnowflakeUserRetirementStatusCleanupJob, PREFECT_FLOWS_DEPLOYMENT_JOB: PrefectFlowsDeploymentJob, - TABLEAU_RESTORE_JOB: TableauRestoreJob, TERMINATE_CLUSTER_JOB: TerminateClusterJob, UPDATE_USERS_JOB: UpdateUsersJob, WAREHOUSE_TRANSFORMS_CI_JOB: WarehouseTransformsCIJob, diff --git a/dataeng/resources/amplitude-properties-backfill.sh b/dataeng/resources/amplitude-properties-backfill.sh deleted file mode 100644 index 1d7a38891..000000000 --- a/dataeng/resources/amplitude-properties-backfill.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env bash -set -ex - -# Creating Python virtual env -PYTHON_VENV="python_venv" -virtualenv --python=$PYTHON_VENV_VERSION --clear "${PYTHON_VENV}" -source "${PYTHON_VENV}/bin/activate" - -# Setup -cd $WORKSPACE/analytics-tools/snowflake -make requirements - -# Do not print commands in this function since they may contain secrets. -set +x - -# Retrieve a vault token corresponding to the jenkins AppRole. The token is then stored in the VAULT_TOKEN variable -# which is implicitly used by subsequent vault commands within this script. -# Instructions followed: https://learn.hashicorp.com/tutorials/vault/approle#step-4-login-with-roleid-secretid -export VAULT_TOKEN=$(vault write -field=token auth/approle/login \ - role_id=${ANALYTICS_VAULT_ROLE_ID} \ - secret_id=${ANALYTICS_VAULT_SECRET_ID} - ) - -API_KEY=$( - vault kv get \ - -version=${AMPLITUDE_VAULT_KV_VERSION} \ - -field=API_KEY \ - ${AMPLITUDE_VAULT_KV_PATH} \ - - -python3 secrets-manager.py -w -n analytics-secure/snowflake/rsa_key_snowpipe_user.p8 -v rsa_key_snowflake_task_automation_user -python3 secrets-manager.py -w -n analytics-secure/snowflake/rsa_key_passphrase_snowpipe_user -v rsa_key_passphrase_snowflake_task_automation_user - - -python amplitude_user_properties_update.py \ - --automation_user 'SNOWFLAKE_TASK_AUTOMATION_USER' \ - --account 'edx.us-east-1' \ - --amplitude_data_source_table $AMPLITUDE_DATA_SOURCE_TABLE \ - --columns_to_update $COLUMNS_TO_UPDATE \ - --response_table $RESPONSE_TABLE \ - --amplitude_operation_name $AMPLITUDE_OPERATION_NAME \ - --amplitude_api_key $API_KEY \ - --key_file rsa_key_snowflake_task_automation_user \ - --passphrase_file rsa_key_passphrase_snowflake_task_automation_user - -rm rsa_key_snowflake_task_automation_user -rm rsa_key_passphrase_snowflake_task_automation_user diff --git a/dataeng/resources/dbtsource-freshness.sh b/dataeng/resources/dbtsource-freshness.sh deleted file mode 100644 index 0a0dd4369..000000000 --- a/dataeng/resources/dbtsource-freshness.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash -set -ex - -# Creating python 3.11 virtual environment to run dbt warehouse-transform job -PYTHON311_VENV="py311_venv" -virtualenv --python=python3.11 --clear "${PYTHON311_VENV}" -source "${PYTHON311_VENV}/bin/activate" - -# Setup -cd $WORKSPACE/warehouse-transforms - -# To install right version of dbt -pip install -r requirements.txt - -cd $WORKSPACE/warehouse-transforms/projects/reporting - -source $WORKSPACE/secrets-manager.sh -# Fetch the secrets from AWS -set +x -get_secret_value analytics-secure/warehouse-transforms/profiles DBT_PASSWORD -set -x -export DBT_PASSWORD - -dbt clean --profiles-dir $WORKSPACE/warehouse-transforms/profiles/ --profile $DBT_PROFILE --target $DBT_TARGET -dbt deps --profiles-dir $WORKSPACE/warehouse-transforms/profiles/ --profile $DBT_PROFILE --target $DBT_TARGET - -# For dbt v0.21.0 or above, dbt source snapshot-freshness has been renamed to dbt source freshness. -# Its node selection logic is now consistent with other tasks. In order to check freshness for a specific source, -# use --select flag and you must prefix it with source: e.g. dbt source freshness --select source:snowplow -dbt source freshness --profiles-dir $WORKSPACE/warehouse-transforms/profiles/ --profile $DBT_PROFILE --target $DBT_TARGET diff --git a/dataeng/resources/enrollment-validation-events.sh b/dataeng/resources/enrollment-validation-events.sh deleted file mode 100755 index f30ed4781..000000000 --- a/dataeng/resources/enrollment-validation-events.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -END_DATE=$(date +%Y-%m-%d -d "$TO_DATE") -START_DATE=$(date +%Y-%m-%d -d "$FROM_DATE") -INTERVAL=$START_DATE-$END_DATE - -if [ -z "$NUM_REDUCE_TASKS" ]; then - NUM_REDUCE_TASKS=$(( $NUM_TASK_CAPACITY + 1 )) -fi -env|sort - -${WORKSPACE}/analytics-configuration/automation/run-automated-task.sh \ - EnrollmentValidationWorkflow --local-scheduler \ - --output-root $OUTPUT_ROOT/$INTERVAL \ - --interval $INTERVAL \ - --credentials $CREDENTIALS \ - --n-reduce-tasks $NUM_REDUCE_TASKS \ - $EXTRA_ARGS - -pip install awscli - -MAX_FILE_SIZE=`aws s3 ls $OUTPUT_ROOT/$INTERVAL/ | cut -b20-30 | grep -v PRE | sort -nr | head -1` -aws s3 ls $OUTPUT_ROOT/$INTERVAL/ -if [ -n "$MAX_FILE_SIZE" ] && [ "$MAX_FILE_SIZE" -gt "$FILE_THRESHOLD" ]; then echo "OVER THRESHOLD $MAX_FILE_SIZE > $FILE_THRESHOLD"; fi diff --git a/dataeng/resources/enrollment.sh b/dataeng/resources/enrollment.sh deleted file mode 100755 index ba9b577f0..000000000 --- a/dataeng/resources/enrollment.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -if [ -z "$NUM_REDUCE_TASKS" ]; then - NUM_CAPACITY=$(( $NUM_TASK_CAPACITY + $ON_DEMAND_CAPACITY )) - NUM_REDUCE_TASKS=$(( $NUM_CAPACITY*2 )) -fi - -env - -# Interpolate the TO_DATE now so that the downstream job is guaranteed to use -# the same exact date as this job. Otherwise, if this job runs over a date -# boundary, the downstream job would re-interpolate the value of 'yesterday' on -# a different date. -INTERPOLATED_TO_DATE="$(date +%Y-%m-%d -d "$TO_DATE")" -echo "TO_DATE=${INTERPOLATED_TO_DATE}" > "${WORKSPACE}/downstream.properties" - -${WORKSPACE}/analytics-configuration/automation/run-automated-task.sh \ - ImportEnrollmentsIntoMysql --local-scheduler \ - --interval $(date +%Y-%m-%d -d "$FROM_DATE")-$(date +%Y-%m-%d -d "$TO_DATE") \ - --n-reduce-tasks $NUM_REDUCE_TASKS \ - --overwrite-mysql \ - --EnrollmentByGenderMysqlTask-use-temp-table-for-overwrite \ - $EXTRA_ARGS - diff --git a/dataeng/resources/enterprise-enrollment.sh b/dataeng/resources/enterprise-enrollment.sh deleted file mode 100755 index 1c7705ae4..000000000 --- a/dataeng/resources/enterprise-enrollment.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -env - -${WORKSPACE}/analytics-configuration/automation/run-automated-task.sh \ - ImportEnterpriseEnrollmentsIntoMysql --local-scheduler \ - --date $(date +%Y-%m-%d -d "$REPORT_DATE") \ - --overwrite-mysql \ - --overwrite-hive \ - $EXTRA_ARGS diff --git a/dataeng/resources/enterprise-user.sh b/dataeng/resources/enterprise-user.sh deleted file mode 100644 index 86d4c52c8..000000000 --- a/dataeng/resources/enterprise-user.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env bash - -env - -${WORKSPACE}/analytics-configuration/automation/run-automated-task.sh \ - ImportEnterpriseUsersIntoMysql --local-scheduler \ - --date $(date +%Y-%m-%d -d "$REPORT_DATE") \ - --overwrite-mysql \ - --overwrite-hive \ - $EXTRA_ARGS \ No newline at end of file diff --git a/dataeng/resources/setup-exporter-email-optin.sh b/dataeng/resources/setup-exporter-email-optin.sh index af769373c..33294f588 100644 --- a/dataeng/resources/setup-exporter-email-optin.sh +++ b/dataeng/resources/setup-exporter-email-optin.sh @@ -6,9 +6,15 @@ mkdir -p /var/lib/jenkins/tmp/analytics-exporter/course-data # Create and activate a virtualenv in shell script EXPORTER_VENV="exporter_venv" -virtualenv --python=python3.9 --clear "${EXPORTER_VENV}" +virtualenv --python=python3.11 --clear "${EXPORTER_VENV}" source "${EXPORTER_VENV}/bin/activate" +cd $WORKSPACE/analytics-tools/snowflake +pip install boto3 + +python3 secrets-manager.py -w -n analytics-secure/analytics-exporter/task-auth.json -v ${WORKSPACE}/analytics-secure/analytics-exporter/task-auth.json +cd $WORKSPACE + # Install requirements into this (exporter) virtual environment pushd analytics-exporter/ pip install 'setuptools<65' diff --git a/dataeng/resources/setup-exporter.sh b/dataeng/resources/setup-exporter.sh index 2cddb89ee..f2d02822f 100644 --- a/dataeng/resources/setup-exporter.sh +++ b/dataeng/resources/setup-exporter.sh @@ -6,9 +6,15 @@ mkdir -p /var/lib/jenkins/tmp/analytics-exporter/course-data # Create and activate a virtualenv in shell script EXPORTER_VENV="exporter_venv" -virtualenv --python=python3.9 --clear "${EXPORTER_VENV}" +virtualenv --python=python3.11 --clear "${EXPORTER_VENV}" source "${EXPORTER_VENV}/bin/activate" +cd $WORKSPACE/analytics-tools/snowflake +pip install boto3 + +python3 secrets-manager.py -w -n analytics-secure/analytics-exporter/task-auth.json -v ${WORKSPACE}/analytics-secure/analytics-exporter/task-auth.json +cd $WORKSPACE + # Install requirements into this (exporter) virtual environment pushd analytics-exporter/ pip install 'setuptools<65' diff --git a/dataeng/resources/snowflake-validate-stitch.sh b/dataeng/resources/snowflake-validate-stitch.sh deleted file mode 100644 index 2017e1d94..000000000 --- a/dataeng/resources/snowflake-validate-stitch.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -set -ex - -# Creating Python virtual env -PYTHON_VENV="python_venv" -virtualenv --python=$PYTHON_VENV_VERSION --clear "${PYTHON_VENV}" -source "${PYTHON_VENV}/bin/activate" - -# Calculate the start of the validation window as 15 days prior to the end of the window. -COMPARISON_END_TIME="${SQOOP_START_TIME}" -COMPARISON_START_TIME=$(date --utc --iso=minutes -d "${COMPARISON_END_TIME} - 15 days") - -# Tooling setup -cd $WORKSPACE/analytics-tools/snowflake -make requirements - -python stitch_vs_sqoop_validation.py \ - --key_path $WORKSPACE/analytics-secure/${SNOWFLAKE_KEY_PATH} \ - --passphrase_path $WORKSPACE/analytics-secure/${SNOWFLAKE_PASSPHRASE_PATH} \ - --user ${SNOWFLAKE_USER} \ - --account ${SNOWFLAKE_ACCOUNT} \ - --schema ${APP_NAME} \ - --begin_datetime ${COMPARISON_START_TIME} \ - --end_datetime ${COMPARISON_END_TIME} diff --git a/dataeng/resources/stitch-snowflake-lag-monitor.sh b/dataeng/resources/stitch-snowflake-lag-monitor.sh deleted file mode 100644 index f1d56b5e1..000000000 --- a/dataeng/resources/stitch-snowflake-lag-monitor.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -set -ex - -# Creating Python virtual env -PYTHON_VENV="python_venv" -virtualenv --python=$PYTHON_VENV_VERSION --clear "${PYTHON_VENV}" -source "${PYTHON_VENV}/bin/activate" - -# Setup -cd $WORKSPACE/analytics-tools/snowflake -make requirements - - -python3 secrets-manager.py -w -n analytics-secure/snowflake/rsa_key_stitch_loader.p8 -v rsa_key_stitch_loader -python3 secrets-manager.py -w -n analytics-secure/snowflake/rsa_key_passphrase_stitch_loader -v rsa_key_passphrase_stitch_loader - - -python stitch-snowflake-monitoring.py \ - --user "STITCH_LOADER" \ - --account "edx.us-east-1" \ - --key_file rsa_key_stitch_loader \ - --passphrase_file rsa_key_passphrase_stitch_loader - -rm rsa_key_stitch_loader -rm rsa_key_passphrase_stitch_loader diff --git a/dataeng/resources/tableau-restore.sh b/dataeng/resources/tableau-restore.sh deleted file mode 100644 index 84e1c577f..000000000 --- a/dataeng/resources/tableau-restore.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -if [ -z $BACKUP_TIMESTAMP ]; then - aws s3 ls $S3_PATH | sort -r -else - ssh -o StrictHostKeyChecking=accept-new $USER_NAME@$TABLEAU_SERVER_HOST /bin/bash << EOF - set -ex - sudo su - -s /bin/bash $TABLEAU_ADMIN_USER - set -ex - config_backup_file=tableau_config_backup_$BACKUP_TIMESTAMP.json - data_backup_file=tableau_data_backup_$BACKUP_TIMESTAMP.tsbak - - # Restore config backup - aws s3 cp $S3_PATH\$config_backup_file . - tsm settings import -f \$config_backup_file - tsm pending-changes apply - tsm restart - rm \$config_backup_file - - # Restore data backup - # tsm maintainence restore command expects a backup file in the directory - # defined in the TSM basefilepath.backuprestore variable - tablea_backup_dir=$(tsm configuration get -k basefilepath.backuprestore) - aws s3 cp $S3_PATH\$data_backup_file \$tablea_backup_dir - tsm stop - tsm maintenance restore --file \$data_backup_file - tsm start - rm \$tablea_backup_dir/\$data_backup_file -EOF -fi diff --git a/dataeng/resources/user-location-by-course.sh b/dataeng/resources/user-location-by-course.sh deleted file mode 100644 index 7d77421eb..000000000 --- a/dataeng/resources/user-location-by-course.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env bash - -END_DATE=$(date +%Y-%m-%d -d "$TO_DATE") - -if [ -z "$NUM_REDUCE_TASKS" ]; then - NUM_REDUCE_TASKS=$(( $NUM_TASK_CAPACITY * 2 )) -fi - -env - -${WORKSPACE}/analytics-configuration/automation/run-automated-task.sh \ - InsertToMysqlLastCountryPerCourseTask --local-scheduler \ - --interval-end $END_DATE \ - --n-reduce-tasks $NUM_REDUCE_TASKS \ - --overwrite \ - $EXTRA_ARGS diff --git a/dataeng/resources/video-timeline.sh b/dataeng/resources/video-timeline.sh deleted file mode 100755 index a3a40ecdb..000000000 --- a/dataeng/resources/video-timeline.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash - -if [ -z "$NUM_REDUCE_TASKS" ]; then - NUM_REDUCE_TASKS=$(( $NUM_TASK_CAPACITY * 2 )) -fi - -${WORKSPACE}/analytics-configuration/automation/run-automated-task.sh \ - InsertToMysqlAllVideoTask --local-scheduler \ - --interval $(date +%Y-%m-%d -d "$FROM_DATE")-$(date +%Y-%m-%d -d "$TO_DATE") \ - --n-reduce-tasks $NUM_REDUCE_TASKS \ - --InsertToMysqlVideoTimelineTask-use-temp-table-for-overwrite \ - $EXTRA_ARGS