From a43aa1832efd4dc0518c26c6b6337afa3a10117c Mon Sep 17 00:00:00 2001 From: CyrilFerlicot Date: Tue, 21 Mar 2023 16:26:12 +0100 Subject: [PATCH] Add a way to speedup dataframe data imputing --- src/AI-DataImputers/AISimpleImputer.class.st | 6 +----- .../SequenceableCollection.extension.st | 12 ++++++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) create mode 100644 src/AI-DataImputers/SequenceableCollection.extension.st diff --git a/src/AI-DataImputers/AISimpleImputer.class.st b/src/AI-DataImputers/AISimpleImputer.class.st index aba1b39..a057895 100644 --- a/src/AI-DataImputers/AISimpleImputer.class.st +++ b/src/AI-DataImputers/AISimpleImputer.class.st @@ -176,11 +176,7 @@ AISimpleImputer >> transform: aCollection [ self ensureIs2D: aCollection. self statistics ifNil: [ self error: '#fit: needs to be called before transforming.' ]. - ^ aCollection collect: [ :subcoll | - subcoll withIndexCollect: [ :elem :index | - elem = self missingValue - ifTrue: [ statistics at: index ] - ifFalse: [ elem ] ] ] + ^ aCollection copyReplace: missingValue in2DCollectionBy: statistics ] { #category : #options } diff --git a/src/AI-DataImputers/SequenceableCollection.extension.st b/src/AI-DataImputers/SequenceableCollection.extension.st new file mode 100644 index 0000000..5e3b5e2 --- /dev/null +++ b/src/AI-DataImputers/SequenceableCollection.extension.st @@ -0,0 +1,12 @@ +Extension { #name : #SequenceableCollection } + +{ #category : #'*AI-DataImputers' } +SequenceableCollection >> copyReplace: missingValue in2DCollectionBy: arrayOfReplacementValues [ + "I am a 2D collection and the goal is to return a copy replace the missing values by the values of my second parameter. The good value is the index of the missing value in the sub collection." + + ^ self collect: [ :subColl | + subColl withIndexCollect: [ :element :index | + element = missingValue + ifTrue: [ arrayOfReplacementValues at: index ] + ifFalse: [ element ] ] ] +]