From b3834eb873c810ef9a269b1c0a75a4b211871c5e Mon Sep 17 00:00:00 2001 From: James Date: Thu, 21 Nov 2024 10:51:05 +0800 Subject: [PATCH] [improvement](statistics)Change auto analyze max width to 300 and health threshold to 90. (#42104) ### What problem does this PR solve? Change auto analyze max width to 300 and health threshold to 90. This could auto analyze wider tables by default and collect table with data changes more frequently. Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None --- .../org/apache/doris/qe/GlobalVariable.java | 1 + .../org/apache/doris/qe/SessionVariable.java | 4 +-- .../java/org/apache/doris/qe/VariableMgr.java | 16 +++++++++++ .../doris/statistics/StatisticConstants.java | 4 +-- .../statistics/util/StatisticsUtilTest.java | 27 +++++++++++++++---- 5 files changed, 43 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/GlobalVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/GlobalVariable.java index c7e226438f8541..6a08c97cd4858f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/GlobalVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/GlobalVariable.java @@ -35,6 +35,7 @@ public final class GlobalVariable { public static final int VARIABLE_VERSION_0 = 0; public static final int VARIABLE_VERSION_100 = 100; + public static final int VARIABLE_VERSION_101 = 101; public static final int VARIABLE_VERSION_200 = 200; public static final int CURRENT_VARIABLE_VERSION = VARIABLE_VERSION_200; public static final String VARIABLE_VERSION = "variable_version"; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index f7ff9baf1660c9..c6b7757e2c8966 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -1912,7 +1912,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { "Maximum table width to enable auto analyze, " + "table with more columns than this value will not be auto analyzed."}, flag = VariableMgr.GLOBAL) - public int autoAnalyzeTableWidthThreshold = 100; + public int autoAnalyzeTableWidthThreshold = 300; @VariableMgr.VarAttr(name = AUTO_ANALYZE_START_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", description = {"该参数定义自动ANALYZE例程的开始时间", @@ -1977,7 +1977,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) { + "exceeds (100 - table_stats_health_threshold)% since the last " + "statistics collection operation, the statistics for this table are" + "considered outdated."}) - public int tableStatsHealthThreshold = 60; + public int tableStatsHealthThreshold = 90; @VariableMgr.VarAttr(name = ENABLE_MATERIALIZED_VIEW_REWRITE, needForward = true, description = {"是否开启基于结构信息的物化视图透明改写", diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/VariableMgr.java b/fe/fe-core/src/main/java/org/apache/doris/qe/VariableMgr.java index 34cbe29c3fdfe0..93f931fff124aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/VariableMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/VariableMgr.java @@ -34,6 +34,8 @@ import org.apache.doris.common.util.SerializationUtils; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.persist.GlobalVarPersistInfo; +import org.apache.doris.statistics.StatisticConstants; +import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; import com.google.common.base.Strings; @@ -984,6 +986,20 @@ public static void forceUpdateVariables() { SessionVariable.ENABLE_PIPELINE_X_ENGINE, String.valueOf(true)); } + if (currentVariableVersion < GlobalVariable.VARIABLE_VERSION_101) { + if (StatisticsUtil.getAutoAnalyzeTableWidthThreshold() + < StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD) { + VariableMgr.refreshDefaultSessionVariables("update variable version", + SessionVariable.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD, + String.valueOf(StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD)); + } + if (StatisticsUtil.getTableStatsHealthThreshold() + < StatisticConstants.TABLE_STATS_HEALTH_THRESHOLD) { + VariableMgr.refreshDefaultSessionVariables("update variable version", + SessionVariable.TABLE_STATS_HEALTH_THRESHOLD, + String.valueOf(StatisticConstants.TABLE_STATS_HEALTH_THRESHOLD)); + } + } if (currentVariableVersion < GlobalVariable.VARIABLE_VERSION_200) { // update from 3.0.2 or below to 3.0.3 or higher VariableMgr.refreshDefaultSessionVariables("update variable version", diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java index c2479147ec7b20..f6d49ea079bf18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticConstants.java @@ -93,13 +93,13 @@ public class StatisticConstants { public static final long EXTERNAL_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(24); - public static final int TABLE_STATS_HEALTH_THRESHOLD = 60; + public static final int TABLE_STATS_HEALTH_THRESHOLD = 90; public static final int ANALYZE_TIMEOUT_IN_SEC = 43200; public static final int TASK_QUEUE_CAP = 1; - public static final int AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = 100; + public static final int AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD = 300; public static final int MSG_LEN_UPPER_BOUND = 1024; diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 32521882939d34..fbac718e421c29 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -344,11 +344,11 @@ public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { tableMeta.partitionChanged.set(false); Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); - // Test update rows changed more than threshold. + // Test row count changed more than threshold. new MockUp() { @Mock public long getRowCount() { - return 120; + return 111; } }; new MockUp() { @@ -358,12 +358,29 @@ public ColStatsMeta findColumnStatsMeta(String indexName, String colName) { } }; tableMeta.partitionChanged.set(false); - tableMeta.updatedRows.set(200); + tableMeta.updatedRows.set(80); Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); - // Test update rows changed less than threshold + // Test update rows changed more than threshold + new MockUp() { + @Mock + public long getRowCount() { + return 101; + } + }; + tableMeta.partitionChanged.set(false); + tableMeta.updatedRows.set(91); + Assertions.assertTrue(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); + + // Test row count and update rows changed less than threshold + new MockUp() { + @Mock + public long getRowCount() { + return 100; + } + }; tableMeta.partitionChanged.set(false); - tableMeta.updatedRows.set(100); + tableMeta.updatedRows.set(85); Assertions.assertFalse(StatisticsUtil.needAnalyzeColumn(table, Pair.of("index", column.getName()))); }