-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP, would need more work but short on time
[x] initial refactor [x] adding a barebones filesystem dataloader [x] barebones unit test -> broken [ ] benchmark on IN1k
- Loading branch information
1 parent
dd80544
commit 538aba8
Showing
11 changed files
with
316 additions
and
109 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
pytest | ||
pytest | ||
pillow |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package datago | ||
|
||
import "context" | ||
|
||
// --- Sample data structures - these will be exposed to the Python world --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ||
type LatentPayload struct { | ||
Data []byte | ||
Len int | ||
DataPtr uintptr | ||
} | ||
|
||
type ImagePayload struct { | ||
Data []byte | ||
OriginalHeight int // Good indicator of the image frequency dbResponse at the current resolution | ||
OriginalWidth int | ||
Height int // Useful to decode the current payload | ||
Width int | ||
Channels int | ||
DataPtr uintptr | ||
} | ||
|
||
type Sample struct { | ||
ID string | ||
Source string | ||
Attributes map[string]interface{} | ||
Image ImagePayload | ||
Masks map[string]ImagePayload | ||
AdditionalImages map[string]ImagePayload | ||
Latents map[string]LatentPayload | ||
CocaEmbedding []float32 | ||
Tags []string | ||
} | ||
|
||
// --- Generator and Backend interfaces --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ||
|
||
// The generator will be responsible for producing pages of metadata which can be dispatched | ||
// to the dispatch goroutine. The metadata will be used to fetch the actual payloads | ||
|
||
type SampleDataPointers interface{} | ||
|
||
type Pages struct { | ||
samplesDataPointers []SampleDataPointers | ||
} | ||
|
||
type Generator interface { | ||
generatePages(ctx context.Context, chanPages chan Pages) | ||
} | ||
|
||
// The backend will be responsible for fetching the payloads and deserializing them | ||
type Backend interface { | ||
collectSamples(chanSampleMetadata chan SampleDataPointers, chanSamples chan Sample, transform *ARAwareTransform) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package datago | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
) | ||
|
||
type BackendFileSystem struct { | ||
config *DatagoConfig | ||
} | ||
|
||
func loadSample(config *DatagoConfig, filesystem_sample fsSampleMetadata, transform *ARAwareTransform) *Sample { | ||
// Load the file into []bytes | ||
bytes_buffer, err := os.ReadFile(filesystem_sample.filePath) | ||
if err != nil { | ||
fmt.Println("Error reading file:", filesystem_sample.filePath) | ||
return nil | ||
} | ||
|
||
img_payload, _, err := imageFromBuffer(bytes_buffer, transform, -1., config.PreEncodeImages, false) | ||
if err != nil { | ||
fmt.Println("Error loading image:", filesystem_sample.fileName) | ||
return nil | ||
} | ||
|
||
return &Sample{ID: filesystem_sample.fileName, | ||
Image: *img_payload, | ||
} | ||
} | ||
|
||
func (b BackendFileSystem) collectSamples(chanSampleMetadata chan SampleDataPointers, chanSamples chan Sample, transform *ARAwareTransform) { | ||
|
||
ack_channel := make(chan bool) | ||
|
||
sampleWorker := func() { | ||
for { | ||
item_to_fetch, open := <-chanSampleMetadata | ||
if !open { | ||
ack_channel <- true | ||
return | ||
} | ||
|
||
// Cast the item to fetch to the correct type | ||
filesystem_sample, ok := item_to_fetch.(fsSampleMetadata) | ||
if !ok { | ||
panic("Failed to cast the item to fetch to dbSampleMetadata. This worker is probably misconfigured") | ||
} | ||
|
||
sample := loadSample(b.config, filesystem_sample, transform) | ||
if sample != nil { | ||
chanSamples <- *sample | ||
} | ||
} | ||
} | ||
|
||
// Start the workers and work on the metadata channel | ||
for i := 0; i < b.config.ConcurrentDownloads; i++ { | ||
go sampleWorker() | ||
} | ||
|
||
// Wait for all the workers to be done or overall context to be cancelled | ||
for i := 0; i < b.config.ConcurrentDownloads; i++ { | ||
<-ack_channel | ||
} | ||
close(chanSamples) | ||
fmt.Println("No more items to serve, wrapping up") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.