From f441f08fe7c4c7f10ce2c04ff14714a474db78d4 Mon Sep 17 00:00:00 2001 From: Alexander Bruy Date: Mon, 1 Jul 2024 17:10:23 +0100 Subject: [PATCH] port Basic statistics for fields algorithm to C++ Also add new optional output a table containing calculated statistics similar to the Statistics by categories algorithm (fix #46241) --- python/plugins/processing/algs/help/qgis.yaml | 7 - .../processing/algs/qgis/BasicStatistics.py | 284 ------------ .../algs/qgis/QgisAlgorithmProvider.py | 2 - python/plugins/processing/gui/menus.py | 2 +- .../testdata/expected/statistics_date.gml | 21 + .../testdata/expected/statistics_date.xsd | 84 ++++ .../testdata/expected/statistics_datetime.gml | 21 + .../testdata/expected/statistics_datetime.xsd | 84 ++++ .../testdata/expected/statistics_float.gml | 31 ++ .../testdata/expected/statistics_float.xsd | 152 +++++++ .../testdata/expected/statistics_text.gml | 25 ++ .../testdata/expected/statistics_text.xsd | 116 +++++ .../testdata/expected/statistics_time.gml | 21 + .../testdata/expected/statistics_time.xsd | 84 ++++ .../tests/testdata/qgis_algorithm_tests1.yaml | 10 +- .../tests/testdata/qgis_algorithm_tests2.yaml | 15 +- src/analysis/CMakeLists.txt | 1 + .../qgsalgorithmbasicstatistics.cpp | 421 ++++++++++++++++++ .../processing/qgsalgorithmbasicstatistics.h | 63 +++ .../processing/qgsnativealgorithms.cpp | 2 + 20 files changed, 1147 insertions(+), 299 deletions(-) delete mode 100644 python/plugins/processing/algs/qgis/BasicStatistics.py create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_date.gml create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_date.xsd create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_datetime.gml create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_datetime.xsd create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_float.gml create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_float.xsd create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_text.gml create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_text.xsd create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_time.gml create mode 100644 python/plugins/processing/tests/testdata/expected/statistics_time.xsd create mode 100644 src/analysis/processing/qgsalgorithmbasicstatistics.cpp create mode 100644 src/analysis/processing/qgsalgorithmbasicstatistics.h diff --git a/python/plugins/processing/algs/help/qgis.yaml b/python/plugins/processing/algs/help/qgis.yaml index 10f3257b3229..a61d56461030 100644 --- a/python/plugins/processing/algs/help/qgis.yaml +++ b/python/plugins/processing/algs/help/qgis.yaml @@ -4,13 +4,6 @@ qgis:advancedpythonfieldcalculator: > qgis:barplot: > This algorithm creates a bar plot from a category and a layer field. -qgis:basicstatisticsforfields: > - This algorithm generates basic statistics from the analysis of a values in a field in the attribute table of a vector layer. Numeric, date, time and string fields are supported. - - The statistics returned will depend on the field type. - - Statistics are generated as an HTML file. - qgis:boxplot: > This algorithm creates a box plot from a category and a layer field. diff --git a/python/plugins/processing/algs/qgis/BasicStatistics.py b/python/plugins/processing/algs/qgis/BasicStatistics.py deleted file mode 100644 index 630d767d575f..000000000000 --- a/python/plugins/processing/algs/qgis/BasicStatistics.py +++ /dev/null @@ -1,284 +0,0 @@ -""" -*************************************************************************** - BasicStatistics.py - --------------------- - Date : November 2016 - Copyright : (C) 2016 by Nyall Dawson - Email : nyall dot dawson at gmail dot com -*************************************************************************** -* * -* This program is free software; you can redistribute it and/or modify * -* it under the terms of the GNU General Public License as published by * -* the Free Software Foundation; either version 2 of the License, or * -* (at your option) any later version. * -* * -*************************************************************************** -""" - -__author__ = 'Nyall Dawson' -__date__ = 'November 2016' -__copyright__ = '(C) 2016, Nyall Dawson' - -import os -import codecs - -from qgis.PyQt.QtCore import QVariant -from qgis.PyQt.QtGui import QIcon - -from qgis.core import (QgsApplication, - QgsStatisticalSummary, - QgsStringStatisticalSummary, - QgsDateTimeStatisticalSummary, - QgsFeatureRequest, - QgsProcessing, - QgsProcessingException, - QgsProcessingParameterFeatureSource, - QgsProcessingParameterField, - QgsProcessingParameterFileDestination, - QgsProcessingOutputNumber, - QgsProcessingFeatureSource) - -from processing.algs.qgis.QgisAlgorithm import QgisAlgorithm - -pluginPath = os.path.split(os.path.split(os.path.dirname(__file__))[0])[0] - - -class BasicStatisticsForField(QgisAlgorithm): - INPUT_LAYER = 'INPUT_LAYER' - FIELD_NAME = 'FIELD_NAME' - OUTPUT_HTML_FILE = 'OUTPUT_HTML_FILE' - - MIN = 'MIN' - MAX = 'MAX' - COUNT = 'COUNT' - UNIQUE = 'UNIQUE' - EMPTY = 'EMPTY' - FILLED = 'FILLED' - MIN_LENGTH = 'MIN_LENGTH' - MAX_LENGTH = 'MAX_LENGTH' - MEAN_LENGTH = 'MEAN_LENGTH' - CV = 'CV' - SUM = 'SUM' - MEAN = 'MEAN' - STD_DEV = 'STD_DEV' - RANGE = 'RANGE' - MEDIAN = 'MEDIAN' - MINORITY = 'MINORITY' - MAJORITY = 'MAJORITY' - FIRSTQUARTILE = 'FIRSTQUARTILE' - THIRDQUARTILE = 'THIRDQUARTILE' - IQR = 'IQR' - - def icon(self): - return QgsApplication.getThemeIcon("/algorithms/mAlgorithmBasicStatistics.svg") - - def svgIconPath(self): - return QgsApplication.iconPath("/algorithms/mAlgorithmBasicStatistics.svg") - - def tags(self): - return self.tr( - 'stats,statistics,date,time,datetime,string,number,text,table,layer,sum,maximum,minimum,mean,average,standard,deviation,' - 'count,distinct,unique,variance,median,quartile,range,majority,minority,summary').split(',') - - def group(self): - return self.tr('Vector analysis') - - def groupId(self): - return 'vectoranalysis' - - def __init__(self): - super().__init__() - - def initAlgorithm(self, config=None): - self.addParameter(QgsProcessingParameterFeatureSource(self.INPUT_LAYER, - self.tr('Input layer'), - types=[QgsProcessing.SourceType.TypeVector])) - - self.addParameter(QgsProcessingParameterField(self.FIELD_NAME, - self.tr('Field to calculate statistics on'), - None, self.INPUT_LAYER, QgsProcessingParameterField.DataType.Any)) - - self.addParameter(QgsProcessingParameterFileDestination(self.OUTPUT_HTML_FILE, self.tr('Statistics'), - self.tr('HTML files (*.html)'), None, True)) - - self.addOutput(QgsProcessingOutputNumber(self.COUNT, self.tr('Count'))) - self.addOutput(QgsProcessingOutputNumber(self.UNIQUE, self.tr('Number of unique values'))) - self.addOutput(QgsProcessingOutputNumber(self.EMPTY, self.tr('Number of empty (null) values'))) - self.addOutput(QgsProcessingOutputNumber(self.FILLED, self.tr('Number of non-empty values'))) - self.addOutput(QgsProcessingOutputNumber(self.MIN, self.tr('Minimum value'))) - self.addOutput(QgsProcessingOutputNumber(self.MAX, self.tr('Maximum value'))) - self.addOutput(QgsProcessingOutputNumber(self.MIN_LENGTH, self.tr('Minimum length'))) - self.addOutput(QgsProcessingOutputNumber(self.MAX_LENGTH, self.tr('Maximum length'))) - self.addOutput(QgsProcessingOutputNumber(self.MEAN_LENGTH, self.tr('Mean length'))) - self.addOutput(QgsProcessingOutputNumber(self.CV, self.tr('Coefficient of Variation'))) - self.addOutput(QgsProcessingOutputNumber(self.SUM, self.tr('Sum'))) - self.addOutput(QgsProcessingOutputNumber(self.MEAN, self.tr('Mean value'))) - self.addOutput(QgsProcessingOutputNumber(self.STD_DEV, self.tr('Standard deviation'))) - self.addOutput(QgsProcessingOutputNumber(self.RANGE, self.tr('Range'))) - self.addOutput(QgsProcessingOutputNumber(self.MEDIAN, self.tr('Median'))) - self.addOutput(QgsProcessingOutputNumber(self.MINORITY, self.tr('Minority (rarest occurring value)'))) - self.addOutput(QgsProcessingOutputNumber(self.MAJORITY, self.tr('Majority (most frequently occurring value)'))) - self.addOutput(QgsProcessingOutputNumber(self.FIRSTQUARTILE, self.tr('First quartile'))) - self.addOutput(QgsProcessingOutputNumber(self.THIRDQUARTILE, self.tr('Third quartile'))) - self.addOutput(QgsProcessingOutputNumber(self.IQR, self.tr('Interquartile Range (IQR)'))) - - def name(self): - return 'basicstatisticsforfields' - - def displayName(self): - return self.tr('Basic statistics for fields') - - def processAlgorithm(self, parameters, context, feedback): - source = self.parameterAsSource(parameters, self.INPUT_LAYER, context) - if source is None: - raise QgsProcessingException(self.invalidSourceError(parameters, self.INPUT_LAYER)) - - field_name = self.parameterAsString(parameters, self.FIELD_NAME, context) - field_idx = source.fields().lookupField(field_name) - if field_idx < 0: - raise QgsProcessingException(self.tr("Invalid field for statistics: “{}” does not exist").format(field_name)) - - field = source.fields().at(field_idx) - - output_file = self.parameterAsFileOutput(parameters, self.OUTPUT_HTML_FILE, context) - - request = QgsFeatureRequest().setFlags(QgsFeatureRequest.Flag.NoGeometry).setSubsetOfAttributes([field_name], - source.fields()) - features = source.getFeatures(request, QgsProcessingFeatureSource.Flag.FlagSkipGeometryValidityChecks) - count = source.featureCount() - - data = [self.tr('Analyzed field: {}').format(field_name)] - results = {} - - if field.isNumeric(): - d, results = self.calcNumericStats(features, feedback, field, count) - elif field.type() in (QVariant.Date, QVariant.Time, QVariant.DateTime): - d, results = self.calcDateTimeStats(features, feedback, field, count) - else: - d, results = self.calcStringStats(features, feedback, field, count) - data.extend(d) - - if output_file: - self.createHTML(output_file, data) - results[self.OUTPUT_HTML_FILE] = output_file - - return results - - def calcNumericStats(self, features, feedback, field, count): - total = 100.0 / count if count else 0 - stat = QgsStatisticalSummary() - for current, ft in enumerate(features): - if feedback.isCanceled(): - break - stat.addVariant(ft[field.name()]) - feedback.setProgress(int(current * total)) - stat.finalize() - - cv = stat.stDev() / stat.mean() if stat.mean() != 0 else 0 - - results = {self.COUNT: stat.count(), - self.UNIQUE: stat.variety(), - self.EMPTY: stat.countMissing(), - self.FILLED: count - stat.countMissing(), - self.MIN: stat.min(), - self.MAX: stat.max(), - self.RANGE: stat.range(), - self.SUM: stat.sum(), - self.MEAN: stat.mean(), - self.MEDIAN: stat.median(), - self.STD_DEV: stat.stDev(), - self.CV: cv, - self.MINORITY: stat.minority(), - self.MAJORITY: stat.majority(), - self.FIRSTQUARTILE: stat.firstQuartile(), - self.THIRDQUARTILE: stat.thirdQuartile(), - self.IQR: stat.interQuartileRange()} - - data = [ - self.tr('Count: {}').format(stat.count()), - self.tr('Unique values: {}').format(stat.variety()), - self.tr('NULL (missing) values: {}').format(stat.countMissing()), - self.tr('Minimum value: {}').format(stat.min()), - self.tr('Maximum value: {}').format(stat.max()), - self.tr('Range: {}').format(stat.range()), - self.tr('Sum: {}').format(stat.sum()), - self.tr('Mean value: {}').format(stat.mean()), - self.tr('Median value: {}').format(stat.median()), - self.tr('Standard deviation: {}').format(stat.stDev()), - self.tr('Coefficient of Variation: {}').format(cv), - self.tr('Minority (rarest occurring value): {}').format(stat.minority()), - self.tr('Majority (most frequently occurring value): {}').format(stat.majority()), - self.tr('First quartile: {}').format(stat.firstQuartile()), - self.tr('Third quartile: {}').format(stat.thirdQuartile()), - self.tr('Interquartile Range (IQR): {}').format(stat.interQuartileRange()) - ] - return data, results - - def calcStringStats(self, features, feedback, field, count): - total = 100.0 / count if count else 1 - stat = QgsStringStatisticalSummary() - for current, ft in enumerate(features): - if feedback.isCanceled(): - break - stat.addValue(ft[field.name()]) - feedback.setProgress(int(current * total)) - stat.finalize() - - results = {self.COUNT: stat.count(), - self.UNIQUE: stat.countDistinct(), - self.EMPTY: stat.countMissing(), - self.FILLED: stat.count() - stat.countMissing(), - self.MIN: stat.min(), - self.MAX: stat.max(), - self.MIN_LENGTH: stat.minLength(), - self.MAX_LENGTH: stat.maxLength(), - self.MEAN_LENGTH: stat.meanLength()} - - data = [ - self.tr('Count: {}').format(count), - self.tr('Unique values: {}').format(stat.countDistinct()), - self.tr('NULL (missing) values: {}').format(stat.countMissing()), - self.tr('Minimum value: {}').format(stat.min()), - self.tr('Maximum value: {}').format(stat.max()), - self.tr('Minimum length: {}').format(stat.minLength()), - self.tr('Maximum length: {}').format(stat.maxLength()), - self.tr('Mean length: {}').format(stat.meanLength()) - ] - - return data, results - - def calcDateTimeStats(self, features, feedback, field, count): - total = 100.0 / count if count else 1 - stat = QgsDateTimeStatisticalSummary() - for current, ft in enumerate(features): - if feedback.isCanceled(): - break - stat.addValue(ft[field.name()]) - feedback.setProgress(int(current * total)) - stat.finalize() - - results = {self.COUNT: stat.count(), - self.UNIQUE: stat.countDistinct(), - self.EMPTY: stat.countMissing(), - self.FILLED: stat.count() - stat.countMissing(), - self.MIN: stat.statistic(QgsDateTimeStatisticalSummary.Statistic.Min), - self.MAX: stat.statistic(QgsDateTimeStatisticalSummary.Statistic.Max)} - - data = [ - self.tr('Count: {}').format(count), - self.tr('Unique values: {}').format(stat.countDistinct()), - self.tr('NULL (missing) values: {}').format(stat.countMissing()), - self.tr('Minimum value: {}').format(field.displayString(stat.statistic(QgsDateTimeStatisticalSummary.Statistic.Min))), - self.tr('Maximum value: {}').format(field.displayString(stat.statistic(QgsDateTimeStatisticalSummary.Statistic.Max))) - ] - - return data, results - - def createHTML(self, outputFile, algData): - with codecs.open(outputFile, 'w', encoding='utf-8') as f: - f.write('\n') - f.write('\n') - for s in algData: - f.write('

' + str(s) + '

\n') - f.write('\n') diff --git a/python/plugins/processing/algs/qgis/QgisAlgorithmProvider.py b/python/plugins/processing/algs/qgis/QgisAlgorithmProvider.py index c6a42b6f9aa9..4705e863842d 100644 --- a/python/plugins/processing/algs/qgis/QgisAlgorithmProvider.py +++ b/python/plugins/processing/algs/qgis/QgisAlgorithmProvider.py @@ -28,7 +28,6 @@ from qgis.PyQt.QtCore import QCoreApplication from .BarPlot import BarPlot -from .BasicStatistics import BasicStatisticsForField from .BoxPlot import BoxPlot from .CheckValidity import CheckValidity from .Climb import Climb @@ -89,7 +88,6 @@ def __init__(self): def getAlgs(self): algs = [BarPlot(), - BasicStatisticsForField(), BoxPlot(), CheckValidity(), Climb(), diff --git a/python/plugins/processing/gui/menus.py b/python/plugins/processing/gui/menus.py index 4f905387594e..422fae1cbc6b 100644 --- a/python/plugins/processing/gui/menus.py +++ b/python/plugins/processing/gui/menus.py @@ -52,7 +52,7 @@ def initMenusAndToolbars(): 'native:sumlinelengths': analysisToolsMenu, 'native:countpointsinpolygon': analysisToolsMenu, 'qgis:listuniquevalues': analysisToolsMenu, - 'qgis:basicstatisticsforfields': analysisToolsMenu, + 'native:basicstatisticsforfields': analysisToolsMenu, 'native:nearestneighbouranalysis': analysisToolsMenu, 'native:meancoordinates': analysisToolsMenu, 'native:lineintersections': analysisToolsMenu}) diff --git a/python/plugins/processing/tests/testdata/expected/statistics_date.gml b/python/plugins/processing/tests/testdata/expected/statistics_date.gml new file mode 100644 index 000000000000..ab14428c1701 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_date.gml @@ -0,0 +1,21 @@ + + + + + + + 4 + 4 + 1 + 3 + 2014-11-30 + 2016-11-30 + 63158400 + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_date.xsd b/python/plugins/processing/tests/testdata/expected/statistics_date.xsd new file mode 100644 index 000000000000..6886b8a5e792 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_date.xsd @@ -0,0 +1,84 @@ + + + + + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_datetime.gml b/python/plugins/processing/tests/testdata/expected/statistics_datetime.gml new file mode 100644 index 000000000000..2f33cfe336c1 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_datetime.gml @@ -0,0 +1,21 @@ + + + + + + + 4 + 4 + 1 + 3 + 2014-11-30T14:30:02 + 2016-11-30T14:29:22 + 63158360 + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_datetime.xsd b/python/plugins/processing/tests/testdata/expected/statistics_datetime.xsd new file mode 100644 index 000000000000..50739a2f5391 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_datetime.xsd @@ -0,0 +1,84 @@ + + + + + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_float.gml b/python/plugins/processing/tests/testdata/expected/statistics_float.gml new file mode 100644 index 000000000000..584150523a2d --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_float.gml @@ -0,0 +1,31 @@ + + + + + + + 3 + 3 + 1 + 3 + -0.123 + 0.123 + 0.246 + 0 + 0 + 0 + 0.10042907945411 + 0 + -0.123 + -0.123 + -0.0615 + 0.0615 + 0.123 + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_float.xsd b/python/plugins/processing/tests/testdata/expected/statistics_float.xsd new file mode 100644 index 000000000000..75d192cc1f2a --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_float.xsd @@ -0,0 +1,152 @@ + + + + + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_text.gml b/python/plugins/processing/tests/testdata/expected/statistics_text.gml new file mode 100644 index 000000000000..21b6dc6ecf58 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_text.gml @@ -0,0 +1,25 @@ + + + + + + + 4 + 2 + 1 + 3 + Test + Test + 0 + 4 + + Test + + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_text.xsd b/python/plugins/processing/tests/testdata/expected/statistics_text.xsd new file mode 100644 index 000000000000..2495a6d3ace4 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_text.xsd @@ -0,0 +1,116 @@ + + + + + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_time.gml b/python/plugins/processing/tests/testdata/expected/statistics_time.gml new file mode 100644 index 000000000000..0d9609256eb6 --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_time.gml @@ -0,0 +1,21 @@ + + + + + + + 4 + 4 + 1 + 3 + 03:29:40 + 15:29:22 + 43182 + + + diff --git a/python/plugins/processing/tests/testdata/expected/statistics_time.xsd b/python/plugins/processing/tests/testdata/expected/statistics_time.xsd new file mode 100644 index 000000000000..58a13717cb0f --- /dev/null +++ b/python/plugins/processing/tests/testdata/expected/statistics_time.xsd @@ -0,0 +1,84 @@ + + + + + 0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/python/plugins/processing/tests/testdata/qgis_algorithm_tests1.yaml b/python/plugins/processing/tests/testdata/qgis_algorithm_tests1.yaml index d3c0fc4636df..019a4d9c85d9 100644 --- a/python/plugins/processing/tests/testdata/qgis_algorithm_tests1.yaml +++ b/python/plugins/processing/tests/testdata/qgis_algorithm_tests1.yaml @@ -500,13 +500,16 @@ tests: fields: fid: skip - - algorithm: qgis:basicstatisticsforfields + - algorithm: native:basicstatisticsforfields name: Basic statistics for numeric fields params: - name: multipolys.gml type: vector - 'Bfloatval' results: + OUTPUT: + name: expected/statistics_float.gml + type: vector OUTPUT_HTML_FILE: name: basic_statistics_numeric_float.html type: regex @@ -529,13 +532,16 @@ tests: - 'NULL \(missing\) values: 1' - 'Interquartile Range \(IQR\): 0.123' - - algorithm: qgis:basicstatisticsforfields + - algorithm: native:basicstatisticsforfields name: Basic statistics for text fields params: - name: multipolys.gml type: vector - 'Bname' results: + OUTPUT: + name: expected/statistics_text.gml + type: vector OUTPUT_HTML_FILE: name: expected/basic_statistics_string.html type: regex diff --git a/python/plugins/processing/tests/testdata/qgis_algorithm_tests2.yaml b/python/plugins/processing/tests/testdata/qgis_algorithm_tests2.yaml index 61616aea72e0..7a4d24eed45b 100644 --- a/python/plugins/processing/tests/testdata/qgis_algorithm_tests2.yaml +++ b/python/plugins/processing/tests/testdata/qgis_algorithm_tests2.yaml @@ -1148,7 +1148,7 @@ tests: type: vector pk: intval - - algorithm: qgis:basicstatisticsforfields + - algorithm: native:basicstatisticsforfields name: Basic stats datetime params: FIELD_NAME: date_time @@ -1156,6 +1156,9 @@ tests: name: custom/datetimes.tab type: table results: + OUTPUT: + name: expected/statistics_datetime.gml + type: vector OUTPUT_HTML_FILE: name: expected/basic_statistics_datetime.html type: regex @@ -1167,7 +1170,7 @@ tests: - 'Maximum value: 2016-11-30T14:29:22' - 'NULL \(missing\) values: 1' - - algorithm: qgis:basicstatisticsforfields + - algorithm: native:basicstatisticsforfields name: Basic stats date params: FIELD_NAME: date @@ -1175,6 +1178,9 @@ tests: name: custom/datetimes.tab type: table results: + OUTPUT: + name: expected/statistics_date.gml + type: vector OUTPUT_HTML_FILE: name: expected/basic_statistics_date.html type: regex @@ -1186,7 +1192,7 @@ tests: - 'Maximum value: 2016-11-30T00:00:00' - 'NULL \(missing\) values: 1' - - algorithm: qgis:basicstatisticsforfields + - algorithm: native:basicstatisticsforfields name: Basic stats time params: FIELD_NAME: time @@ -1194,6 +1200,9 @@ tests: name: custom/datetimes.tab type: table results: + OUTPUT: + name: expected/statistics_time.gml + type: vector OUTPUT_HTML_FILE: name: expected/basic_statistics_time.html type: regex diff --git a/src/analysis/CMakeLists.txt b/src/analysis/CMakeLists.txt index 5c4bb11a4a3a..ea5f045fe724 100644 --- a/src/analysis/CMakeLists.txt +++ b/src/analysis/CMakeLists.txt @@ -50,6 +50,7 @@ set(QGIS_ANALYSIS_SRCS processing/qgsalgorithmassignprojection.cpp processing/qgsalgorithmattributeindex.cpp processing/qgsalgorithmb3dmtogltf.cpp + processing/qgsalgorithmbasicstatistics.cpp processing/qgsalgorithmbatchgeocode.cpp processing/qgsalgorithmbatchnominatimgeocode.cpp processing/qgsalgorithmboundary.cpp diff --git a/src/analysis/processing/qgsalgorithmbasicstatistics.cpp b/src/analysis/processing/qgsalgorithmbasicstatistics.cpp new file mode 100644 index 000000000000..5819389165f2 --- /dev/null +++ b/src/analysis/processing/qgsalgorithmbasicstatistics.cpp @@ -0,0 +1,421 @@ +/*************************************************************************** + qgsalgorithmbasicstatistics.cpp + ------------------------------ + begin : June 2024 + copyright : (C) 2024 by Alexander Bruy + email : alexander dot bruy at gmail dot com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#include "qgsalgorithmbasicstatistics.h" +#include "qgsstatisticalsummary.h" +#include "qgsdatetimestatisticalsummary.h" +#include "qgsstringstatisticalsummary.h" + + + +///@cond PRIVATE + +QString QgsBasicStatisticsAlgorithm::name() const +{ + return QStringLiteral( "basicstatisticsforfields" ); +} + +QString QgsBasicStatisticsAlgorithm::displayName() const +{ + return QObject::tr( "Basic statistics for fields" ); +} + +QStringList QgsBasicStatisticsAlgorithm::tags() const +{ + return QObject::tr( "stats,statistics,date,time,datetime,string,number,text,table,layer,sum,maximum,minimum,mean,average,standard,deviation,count,distinct,unique,variance,median,quartile,range,majority,minority,summary" ).split( ',' ); +} + +QString QgsBasicStatisticsAlgorithm::group() const +{ + return QObject::tr( "Vector analysis" ); +} + +QString QgsBasicStatisticsAlgorithm::groupId() const +{ + return QStringLiteral( "vectoranalysis" ); +} + +QString QgsBasicStatisticsAlgorithm::shortHelpString() const +{ + return QObject::tr( "Generates basic statistics from the analysis of a values in a field in the attribute table of a vector layer. Numeric, date, time and string fields are supported. The statistics returned will depend on the field type." ); +} + +QgsBasicStatisticsAlgorithm *QgsBasicStatisticsAlgorithm::createInstance() const +{ + return new QgsBasicStatisticsAlgorithm(); +} + +void QgsBasicStatisticsAlgorithm::initAlgorithm( const QVariantMap & ) +{ + addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT_LAYER" ), QObject::tr( "Input layer" ), QList() << static_cast< int >( Qgis::ProcessingSourceType::Vector ) ) ); + addParameter( new QgsProcessingParameterField( QStringLiteral( "FIELD_NAME" ), QObject::tr( "Field to calculate statistics on" ), QVariant(), QStringLiteral( "INPUT_LAYER" ) ) ); + addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Statistics" ), Qgis::ProcessingSourceType::Vector, QVariant(), true ) ); + addParameter( new QgsProcessingParameterFileDestination( QStringLiteral( "OUTPUT_HTML_FILE" ), QObject::tr( "Statistics report" ), QObject::tr( "'HTML files (*.html)" ), QVariant(), true ) ); + + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "COUNT" ), QObject::tr( "Count" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "UNIQUE" ), QObject::tr( "Number of unique values" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "EMPTY" ), QObject::tr( "Number of empty (null) values" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "FILLED" ), QObject::tr( "Number of non-empty values" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MIN" ), QObject::tr( "Minimum value" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MAX" ), QObject::tr( "Maximum value" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MIN_LENGTH" ), QObject::tr( "Minimum length" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MAX_LENGTH" ), QObject::tr( "Maximum length" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MEAN_LENGTH" ), QObject::tr( "Mean length" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "CV" ), QObject::tr( "Coefficient of Variation" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "SUM" ), QObject::tr( "Sum" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MEAN" ), QObject::tr( "Mean value" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "STD_DEV" ), QObject::tr( "Standard deviation" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RANGE" ), QObject::tr( "Range" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MEDIAN" ), QObject::tr( "Median" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MINORITY" ), QObject::tr( "Minority (rarest occurring value)" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "MAJORITY" ), QObject::tr( "Majority (most frequently occurring value)" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "FIRSTQUARTILE" ), QObject::tr( "First quartile" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "THIRDQUARTILE" ), QObject::tr( "Third quartile" ) ) ); + addOutput( new QgsProcessingOutputNumber( QStringLiteral( "IQR" ), QObject::tr( "Interquartile Range (IQR)" ) ) ); +} + +QVariantMap QgsBasicStatisticsAlgorithm::processAlgorithm( const QVariantMap ¶meters, QgsProcessingContext &context, QgsProcessingFeedback *feedback ) +{ + std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT_LAYER" ), context ) ); + if ( !source ) + throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT_LAYER" ) ) ); + + const QString fieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context ); + const int fieldIndex = source->fields().lookupField( fieldName ); + if ( fieldIndex < 0 ) + { + throw QgsProcessingException( QObject::tr( "Invalid field for statistics: “%1” does not exist" ).arg( fieldName ) ); + } + + QgsField field = source->fields().at( fieldIndex ); + + QString outputHtml = parameterAsFileOutput( parameters, QStringLiteral( "OUTPUT_HTML_FILE" ), context ); + + QgsFeatureRequest request; + request.setFlags( Qgis::FeatureRequestFlag::NoGeometry ).setSubsetOfAttributes( QStringList() << fieldName, source->fields() ); + QgsFeatureIterator features = source->getFeatures( request, Qgis::ProcessingFeatureSourceFlag::SkipGeometryValidityChecks ); + const long long count = source->featureCount(); + + QgsFields fields; + fields.append( QgsField( QStringLiteral( "count" ), QMetaType::Int ) ); + fields.append( QgsField( QStringLiteral( "unique" ), QMetaType::Int ) ); + fields.append( QgsField( QStringLiteral( "empty" ), QMetaType::Int ) ); + fields.append( QgsField( QStringLiteral( "filled" ), QMetaType::Int ) ); + + if ( field.isNumeric() ) + { + fields.append( QgsField( QStringLiteral( "min" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "max" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "range" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "sum" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "mean" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "median" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "stddev" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "cv" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "minority" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "majority" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "q1" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "q3" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "iqr" ), QMetaType::Double ) ); + } + else if ( field.isDateOrTime() ) + { + if ( field.type() == QMetaType::Type::QDate ) + { + fields.append( QgsField( QStringLiteral( "min" ), QMetaType::QDate ) ); + fields.append( QgsField( QStringLiteral( "max" ), QMetaType::QDate ) ); + } + else if ( field.type() == QMetaType::Type::QTime ) + { + fields.append( QgsField( QStringLiteral( "min" ), QMetaType::QTime ) ); + fields.append( QgsField( QStringLiteral( "max" ), QMetaType::QTime ) ); + } + else + { + fields.append( QgsField( QStringLiteral( "min" ), QMetaType::QDateTime ) ); + fields.append( QgsField( QStringLiteral( "max" ), QMetaType::QDateTime ) ); + } + fields.append( QgsField( QStringLiteral( "range" ), QMetaType::Double ) ); + } + else + { + fields.append( QgsField( QStringLiteral( "min" ), QMetaType::QString ) ); + fields.append( QgsField( QStringLiteral( "max" ), QMetaType::QString ) ); + fields.append( QgsField( QStringLiteral( "min_length" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "max_length" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "mean_length" ), QMetaType::Double ) ); + fields.append( QgsField( QStringLiteral( "minority" ), QMetaType::QString ) ); + fields.append( QgsField( QStringLiteral( "majority" ), QMetaType::QString ) ); + } + + QString destId; + std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, fields, Qgis::WkbType::NoGeometry, QgsCoordinateReferenceSystem() ) ); + if ( parameters.value( QStringLiteral( "OUTPUT" ) ).isValid() && !sink ) + throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) ); + + QStringList data; + data << QObject::tr( "Analyzed field: %1" ).arg( fieldName ); + + QVariantMap outputs; + + if ( field.isNumeric() ) + { + outputs = calculateNumericStatistics( field, features, count, sink.get(), data, feedback ); + } + else if ( field.isDateOrTime() ) + { + outputs = calculateDateTimeStatistics( field, features, count, sink.get(), data, feedback ); + } + else + { + outputs = calculateStringStatistics( field, features, count, sink.get(), data, feedback ); + } + + if ( !outputHtml.isEmpty() ) + { + QFile file( outputHtml ); + if ( file.open( QIODevice::WriteOnly | QIODevice::Truncate ) ) + { + QTextStream out( &file ); +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) + out.setCodec( "UTF-8" ); +#endif + out << QStringLiteral( "\n" ); + for ( const QString &s : data ) + { + out << QStringLiteral( "

%1

" ).arg( s ); + } + out << QStringLiteral( "" ); + + outputs.insert( QStringLiteral( "OUTPUT_HTML_FILE" ), outputHtml ); + } + } + + if ( sink ) + { + outputs.insert( QStringLiteral( "OUTPUT" ), destId ); + } + + return outputs; +} + +QVariantMap QgsBasicStatisticsAlgorithm::calculateNumericStatistics( QgsField field, QgsFeatureIterator features, const long long count, QgsFeatureSink *sink, QStringList &data, QgsProcessingFeedback *feedback ) +{ + const double step = count > 0 ? 100.0 / count : 1; + long long current = 0; + + QgsFeature f; + QgsStatisticalSummary stat; + + while ( features.nextFeature( f ) ) + { + if ( feedback->isCanceled() ) + { + break; + } + + stat.addVariant( f.attribute( field.name() ) ); + feedback->setProgress( current * step ); + current++; + } + stat.finalize(); + + const double cv = stat.mean() != 0 ? stat.stDev() / stat.mean() : 0; + + QVariantMap outputs; + outputs.insert( QStringLiteral( "COUNT" ), stat.count() ); + outputs.insert( QStringLiteral( "UNIQUE" ), stat.variety() ); + outputs.insert( QStringLiteral( "EMPTY" ), stat.countMissing() ); + outputs.insert( QStringLiteral( "FILLED" ), count - stat.countMissing() ); + outputs.insert( QStringLiteral( "MIN" ), stat.min() ); + outputs.insert( QStringLiteral( "MAX" ), stat.max() ); + outputs.insert( QStringLiteral( "RANGE" ), stat.range() ); + outputs.insert( QStringLiteral( "SUM" ), stat.sum() ); + outputs.insert( QStringLiteral( "MEAN" ), stat.mean() ); + outputs.insert( QStringLiteral( "MEDIAN" ), stat.median() ); + outputs.insert( QStringLiteral( "STD_DEV" ), stat.stDev() ); + outputs.insert( QStringLiteral( "CV" ), cv ); + outputs.insert( QStringLiteral( "MINORITY" ), stat.minority() ); + outputs.insert( QStringLiteral( "MAJORITY" ), stat.majority() ); + outputs.insert( QStringLiteral( "FIRSTQUARTILE" ), stat.firstQuartile() ); + outputs.insert( QStringLiteral( "THIRDQUARTILE" ), stat.thirdQuartile() ); + outputs.insert( QStringLiteral( "IQR" ), stat.interQuartileRange() ); + + data << QObject::tr( "Count: %1" ).arg( stat.count() ) + << QObject::tr( "Unique values: %1" ).arg( stat.variety() ) + << QObject::tr( "NULL (missing) values: %1" ).arg( stat.countMissing() ) + << QObject::tr( "NOT NULL (filled) values: %1" ).arg( count - stat.countMissing() ) + << QObject::tr( "Minimum value: %1" ).arg( stat.min() ) + << QObject::tr( "Maximum value: %1" ).arg( stat.max() ) + << QObject::tr( "Range: %1" ).arg( stat.range() ) + << QObject::tr( "Sum: %1" ).arg( stat.sum(), 0, 'f' ) + << QObject::tr( "Mean value: %1" ).arg( stat.mean(), 0, 'f' ) + << QObject::tr( "Median value: %1" ).arg( stat.median(), 0, 'f' ) + << QObject::tr( "Standard deviation: %1" ).arg( stat.stDev(), 0, 'f', 12 ) + << QObject::tr( "Coefficient of Variation: %1" ).arg( cv, 0, 'f' ) + << QObject::tr( "Minority (rarest occurring value): %1" ).arg( stat.minority() ) + << QObject::tr( "Majority (most frequently occurring value): %1" ).arg( stat.majority() ) + << QObject::tr( "First quartile: %1" ).arg( stat.firstQuartile(), 0, 'f' ) + << QObject::tr( "Third quartile: %1" ).arg( stat.thirdQuartile(), 0, 'f' ) + << QObject::tr( "Interquartile Range (IQR): %1" ).arg( stat.interQuartileRange() ); + + if ( sink ) + { + QgsFeature f; + f.setAttributes( QgsAttributes() << outputs.value( QStringLiteral( "COUNT" ) ) + << outputs.value( QStringLiteral( "UNIQUE" ) ) + << outputs.value( QStringLiteral( "EMPTY" ) ) + << outputs.value( QStringLiteral( "FILLED" ) ) + << outputs.value( QStringLiteral( "MIN" ) ) + << outputs.value( QStringLiteral( "MAX" ) ) + << outputs.value( QStringLiteral( "RANGE" ) ) + << outputs.value( QStringLiteral( "SUM" ) ) + << outputs.value( QStringLiteral( "MEAN" ) ) + << outputs.value( QStringLiteral( "MEDIAN" ) ) + << outputs.value( QStringLiteral( "STD_DEV" ) ) + << outputs.value( QStringLiteral( "CV" ) ) + << outputs.value( QStringLiteral( "MINORITY" ) ) + << outputs.value( QStringLiteral( "MAJORITY" ) ) + << outputs.value( QStringLiteral( "FIRSTQUARTILE" ) ) + << outputs.value( QStringLiteral( "THIRDQUARTILE" ) ) + << outputs.value( QStringLiteral( "IQR" ) ) ); + sink->addFeature( f, QgsFeatureSink::FastInsert ); + } + + return outputs; +} + +QVariantMap QgsBasicStatisticsAlgorithm::calculateDateTimeStatistics( QgsField field, QgsFeatureIterator features, const long long count, QgsFeatureSink *sink, QStringList &data, QgsProcessingFeedback *feedback ) +{ + const double step = count > 0 ? 100.0 / count : 1; + long long current = 0; + + QgsFeature f; + QgsDateTimeStatisticalSummary stat; + + while ( features.nextFeature( f ) ) + { + if ( feedback->isCanceled() ) + { + break; + } + + stat.addValue( f.attribute( field.name() ) ); + feedback->setProgress( current * step ); + current++; + } + stat.finalize(); + + QVariantMap outputs; + outputs.insert( QStringLiteral( "COUNT" ), stat.count() ); + outputs.insert( QStringLiteral( "UNIQUE" ), stat.countDistinct() ); + outputs.insert( QStringLiteral( "EMPTY" ), stat.countMissing() ); + outputs.insert( QStringLiteral( "FILLED" ), stat.count() - stat.countMissing() ); + outputs.insert( QStringLiteral( "MIN" ), stat.statistic( Qgis::DateTimeStatistic::Min ) ); + outputs.insert( QStringLiteral( "MAX" ), stat.statistic( Qgis::DateTimeStatistic::Max ) ); + outputs.insert( QStringLiteral( "RANGE" ), stat.range().seconds() ); + + data << QObject::tr( "Count: %1" ).arg( stat.count() ) + << QObject::tr( "Unique values: %1" ).arg( stat.countDistinct() ) + << QObject::tr( "NULL (missing) values: %1" ).arg( stat.countMissing() ) + << QObject::tr( "NOT NULL (filled) values: %1" ).arg( stat.count() - stat.countMissing() ) + << QObject::tr( "Minimum value: %1" ).arg( field.displayString( stat.statistic( Qgis::DateTimeStatistic::Min ) ) ) + << QObject::tr( "Maximum value: %1" ).arg( field.displayString( stat.statistic( Qgis::DateTimeStatistic::Max ) ) ) + << QObject::tr( "Range (seconds): %1" ).arg( stat.range().seconds() ); + + if ( sink ) + { + QgsFeature f; + f.setAttributes( QgsAttributes() << outputs.value( QStringLiteral( "COUNT" ) ) + << outputs.value( QStringLiteral( "UNIQUE" ) ) + << outputs.value( QStringLiteral( "EMPTY" ) ) + << outputs.value( QStringLiteral( "FILLED" ) ) + << outputs.value( QStringLiteral( "MIN" ) ) + << outputs.value( QStringLiteral( "MAX" ) ) + << outputs.value( QStringLiteral( "RANGE" ) ) ); + sink->addFeature( f, QgsFeatureSink::FastInsert ); + } + + return outputs; +} + +QVariantMap QgsBasicStatisticsAlgorithm::calculateStringStatistics( QgsField field, QgsFeatureIterator features, const long long count, QgsFeatureSink *sink, QStringList &data, QgsProcessingFeedback *feedback ) +{ + const double step = count > 0 ? 100.0 / count : 1; + long long current = 0; + + QgsFeature f; + QgsStringStatisticalSummary stat; + + while ( features.nextFeature( f ) ) + { + if ( feedback->isCanceled() ) + { + break; + } + + stat.addValue( f.attribute( field.name() ) ); + feedback->setProgress( current * step ); + current++; + } + stat.finalize(); + + QVariantMap outputs; + outputs.insert( QStringLiteral( "COUNT" ), stat.count() ); + outputs.insert( QStringLiteral( "UNIQUE" ), stat.countDistinct() ); + outputs.insert( QStringLiteral( "EMPTY" ), stat.countMissing() ); + outputs.insert( QStringLiteral( "FILLED" ), stat.count() - stat.countMissing() ); + outputs.insert( QStringLiteral( "MIN" ), stat.min() ); + outputs.insert( QStringLiteral( "MAX" ), stat.max() ); + outputs.insert( QStringLiteral( "MIN_LENGTH" ), stat.minLength() ); + outputs.insert( QStringLiteral( "MAX_LENGTH" ), stat.maxLength() ); + outputs.insert( QStringLiteral( "MEAN_LENGTH" ), stat.meanLength() ); + outputs.insert( QStringLiteral( "MINORITY" ), stat.minority() ); + outputs.insert( QStringLiteral( "MAJORITY" ), stat.majority() ); + + data << QObject::tr( "Count: %1" ).arg( stat.count() ) + << QObject::tr( "Unique values: %1" ).arg( stat.countDistinct() ) + << QObject::tr( "NULL (missing) values: %1" ).arg( stat.countMissing() ) + << QObject::tr( "NOT NULL (filled) values: %1" ).arg( count - stat.countMissing() ) + << QObject::tr( "Minimum value: %1" ).arg( stat.min() ) + << QObject::tr( "Maximum value: %1" ).arg( stat.max() ) + << QObject::tr( "Minimum length: %1" ).arg( stat.minLength() ) + << QObject::tr( "Maximum length: %1" ).arg( stat.maxLength() ) + << QObject::tr( "Mean length: %1" ).arg( stat.meanLength(), 0, 'f' ) + << QObject::tr( "Minority: %1" ).arg( stat.minority() ) + << QObject::tr( "Majority: %1" ).arg( stat.majority() ); + + if ( sink ) + { + QgsFeature f; + f.setAttributes( QgsAttributes() << outputs.value( QStringLiteral( "COUNT" ) ) + << outputs.value( QStringLiteral( "UNIQUE" ) ) + << outputs.value( QStringLiteral( "EMPTY" ) ) + << outputs.value( QStringLiteral( "FILLED" ) ) + << outputs.value( QStringLiteral( "MIN" ) ) + << outputs.value( QStringLiteral( "MAX" ) ) + << outputs.value( QStringLiteral( "MIN_LENGTH" ) ) + << outputs.value( QStringLiteral( "MAX_LENGTH" ) ) + << outputs.value( QStringLiteral( "MINORITY" ) ) + << outputs.value( QStringLiteral( "MAJORITY" ) ) ); + sink->addFeature( f, QgsFeatureSink::FastInsert ); + } + + return outputs; +} + +///@endcond diff --git a/src/analysis/processing/qgsalgorithmbasicstatistics.h b/src/analysis/processing/qgsalgorithmbasicstatistics.h new file mode 100644 index 000000000000..a4688bcf110a --- /dev/null +++ b/src/analysis/processing/qgsalgorithmbasicstatistics.h @@ -0,0 +1,63 @@ +/*************************************************************************** + qgsalgorithmbasicstatistics.h + ------------------------------ + begin : June 2024 + copyright : (C) 2024 by Alexander Bruy + email : alexander dot bruy at gmail dot com + ***************************************************************************/ + +/*************************************************************************** + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + ***************************************************************************/ + +#ifndef QGSALGORITHMBASICSTATISTICS_H +#define QGSALGORITHMBASICSTATISTICS_H + +#define SIP_NO_FILE + +#include "qgis_sip.h" +#include "qgsprocessingalgorithm.h" +#include "qgsapplication.h" + +///@cond PRIVATE + +/** + * Native basic statistics algorithm. + */ +class QgsBasicStatisticsAlgorithm : public QgsProcessingAlgorithm +{ + + public: + + QgsBasicStatisticsAlgorithm() = default; + QIcon icon() const override { return QgsApplication::getThemeIcon( QStringLiteral( "/algorithms/mAlgorithmBasicStatistics.svg" ) ); } + QString svgIconPath() const override { return QgsApplication::iconPath( QStringLiteral( "/algorithms/mAlgorithmBasicStatistics.svg" ) ); } + void initAlgorithm( const QVariantMap &configuration = QVariantMap() ) override; + QString name() const override; + QString displayName() const override; + QStringList tags() const override; + QString group() const override; + QString groupId() const override; + QString shortHelpString() const override; + QgsBasicStatisticsAlgorithm *createInstance() const override SIP_FACTORY; + + protected: + + QVariantMap processAlgorithm( const QVariantMap ¶meters, + QgsProcessingContext &context, QgsProcessingFeedback *feedback ) override; + + private: + + QVariantMap calculateNumericStatistics( QgsField field, QgsFeatureIterator features, const long long count, QgsFeatureSink *sink, QStringList &data, QgsProcessingFeedback *feedback ); + QVariantMap calculateDateTimeStatistics( QgsField field, QgsFeatureIterator features, const long long count, QgsFeatureSink *sink, QStringList &data, QgsProcessingFeedback *feedback ); + QVariantMap calculateStringStatistics( QgsField field, QgsFeatureIterator features, const long long count, QgsFeatureSink *sink, QStringList &data, QgsProcessingFeedback *feedback ); +}; + +///@endcond PRIVATE + +#endif // QGSALGORITHMBASICSTATISTICS_H diff --git a/src/analysis/processing/qgsnativealgorithms.cpp b/src/analysis/processing/qgsnativealgorithms.cpp index 06a26910024d..022b045fd017 100644 --- a/src/analysis/processing/qgsnativealgorithms.cpp +++ b/src/analysis/processing/qgsnativealgorithms.cpp @@ -32,6 +32,7 @@ #include "qgsalgorithmassignprojection.h" #include "qgsalgorithmattributeindex.h" #include "qgsalgorithmb3dmtogltf.h" +#include "qgsalgorithmbasicstatistics.h" #include "qgsalgorithmbatchnominatimgeocode.h" #include "qgsalgorithmboundary.h" #include "qgsalgorithmboundingbox.h" @@ -298,6 +299,7 @@ void QgsNativeAlgorithms::loadAlgorithms() addAlgorithm( new QgsAssignProjectionAlgorithm() ); addAlgorithm( new QgsAttributeIndexAlgorithm() ); addAlgorithm( new QgsB3DMToGltfAlgorithm() ); + addAlgorithm( new QgsBasicStatisticsAlgorithm() ); addAlgorithm( new QgsBatchNominatimGeocodeAlgorithm() ); addAlgorithm( new QgsBookmarksToLayerAlgorithm() ); addAlgorithm( new QgsBoundaryAlgorithm() );