From 94c551e0c0dd4b170bd531ea6abe7f8579ffb3c3 Mon Sep 17 00:00:00 2001 From: Tarek Ayed Date: Wed, 27 Nov 2024 16:08:36 +0100 Subject: [PATCH 1/2] fix returnDuplicateState option and add duplicate_state filtering --- pkg/generator_db.go | 15 ++++++++++++++- pkg/serdes.go | 1 + tests/client_db_test.go | 19 +++++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pkg/generator_db.go b/pkg/generator_db.go index 5f7907b..10bff92 100644 --- a/pkg/generator_db.go +++ b/pkg/generator_db.go @@ -70,6 +70,8 @@ type dbRequest struct { minPixelCount string maxPixelCount string + duplicateState string + randomSampling bool partitionsCount string @@ -99,6 +101,9 @@ type SourceDBConfig struct { MaxShortEdge int `json:"max_short_edge"` MinPixelCount int `json:"min_pixel_count"` MaxPixelCount int `json:"max_pixel_count"` + + DuplicateState int `json:"duplicate_state"` + RandomSampling bool `json:"random_sampling"` } @@ -127,7 +132,7 @@ func (c *SourceDBConfig) setDefaults() { c.MinPixelCount = -1 c.MaxPixelCount = -1 c.RandomSampling = false - + c.DuplicateState = -1 } func (c *SourceDBConfig) getDbRequest() dbRequest { @@ -175,6 +180,11 @@ func (c *SourceDBConfig) getDbRequest() dbRequest { c.Rank = -1 } + duplicateState := sanitizeInt(c.DuplicateState) + if duplicateState == "0" { + duplicateState = "None" + } + return dbRequest{ fields: fields, sources: c.Sources, @@ -194,6 +204,7 @@ func (c *SourceDBConfig) getDbRequest() dbRequest { minPixelCount: sanitizeInt(c.MinPixelCount), maxPixelCount: sanitizeInt(c.MaxPixelCount), randomSampling: c.RandomSampling, + duplicateState: duplicateState, partitionsCount: sanitizeInt(c.WorldSize), partition: sanitizeInt(c.Rank), } @@ -367,6 +378,8 @@ func getHTTPRequest(api_url string, api_key string, request dbRequest) *http.Req maybeAddField(&req, "pixel_count__gte", request.minPixelCount) maybeAddField(&req, "pixel_count__lte", request.maxPixelCount) + maybeAddField(&req, "duplicate_state", request.duplicateState) + maybeAddField(&req, "partitions_count", request.partitionsCount) maybeAddField(&req, "partition", request.partition) diff --git a/pkg/serdes.go b/pkg/serdes.go index 745837a..d7e0f60 100644 --- a/pkg/serdes.go +++ b/pkg/serdes.go @@ -254,6 +254,7 @@ func fetchSample(config *SourceDBConfig, http_client *http.Client, sample_result return &Sample{ID: sample_result.Id, Source: sample_result.Source, Attributes: sample_result.Attributes, + DuplicateState: sample_result.DuplicateState, Image: *img_payload, Latents: latents, Masks: masks, diff --git a/tests/client_db_test.go b/tests/client_db_test.go index d15b831..8dd5379 100644 --- a/tests/client_db_test.go +++ b/tests/client_db_test.go @@ -454,3 +454,22 @@ func TestRandomSampling(t *testing.T) { t.Error("Random sampling is not working") } } + +func TestDuplicateStateFiltering(t *testing.T) { + clientConfig := get_default_test_config() + clientConfig.SamplesBufferSize = 1 + dbConfig := clientConfig.SourceConfig.(datago.SourceDBConfig) + dbConfig.DuplicateState = 1 + dbConfig.ReturnDuplicateState = true + clientConfig.SourceConfig = dbConfig + + client := datago.GetClient(clientConfig) + + for i := 0; i < 10; i++ { + sample := client.GetSample() + if sample.DuplicateState != 1 { + t.Errorf("Expected duplicate state to be 1") + } + } + client.Stop() +} From e0deb90d6b92c1f7cf2168ade6c5fa046f5c5a2a Mon Sep 17 00:00:00 2001 From: Tarek Ayed Date: Wed, 27 Nov 2024 16:15:40 +0100 Subject: [PATCH 2/2] formatting --- pkg/generator_db.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/generator_db.go b/pkg/generator_db.go index 10bff92..ec64046 100644 --- a/pkg/generator_db.go +++ b/pkg/generator_db.go @@ -97,12 +97,12 @@ type SourceDBConfig struct { ReturnLatents string `json:"return_latents"` ReturnDuplicateState bool `json:"return_duplicate_state"` - MinShortEdge int `json:"min_short_edge"` - MaxShortEdge int `json:"max_short_edge"` - MinPixelCount int `json:"min_pixel_count"` - MaxPixelCount int `json:"max_pixel_count"` + MinShortEdge int `json:"min_short_edge"` + MaxShortEdge int `json:"max_short_edge"` + MinPixelCount int `json:"min_pixel_count"` + MaxPixelCount int `json:"max_pixel_count"` - DuplicateState int `json:"duplicate_state"` + DuplicateState int `json:"duplicate_state"` RandomSampling bool `json:"random_sampling"` }