Skip to content

Commit

Permalink
Merge branch 'main' into fix-optimum-quanto-version
Browse files Browse the repository at this point in the history
  • Loading branch information
noskill authored Jan 6, 2025
2 parents d163a00 + 05f2c07 commit e31ca25
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
12 changes: 9 additions & 3 deletions multigen/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,15 @@ def _get_pipeline(self, pipe_class, model_id, model_type, cnet=None, quantize_dt
# use quantisation by default for now
cls = pipe_class._classflux
if device.type == 'cuda':
quantize_dtype = qfloat8
# offload_device = device.index
# device = torch.device('cpu')
mb = torch.cuda.get_device_properties(device.index).total_memory / 1024 / 1024
# quantize if there is more than 23 GB of memory
# if less use cpu offload
if 23000 < mb:
self.logger.debug(f"set quantisation for the pipe on cuda:{device.index} has {mb}Mb")
quantize_dtype = qfloat8
else:
offload_device = device.index
device = torch.device('cpu')
else:
cls = pipe_class._class
pipeline = self._loader.load_pipeline(cls, model_id, torch_dtype=torch.bfloat16,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_worker_flux.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def on_new_image(*args, **kwargs):
nonlocal c
c += 1

num_runs = 25
num_runs = 15
for i in range(num_runs):
if len(sessions) - 1 < i:
i %= len(sessions)
Expand Down

0 comments on commit e31ca25

Please sign in to comment.