Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model File Manager #789

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LLama.Unittest/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ namespace LLama.Unittest
{
internal static class Constants
{
public static readonly string ModelDirectory = "Models";
public static readonly string GenerativeModelPath = "Models/llama-2-7b-chat.Q3_K_S.gguf";
public static readonly string EmbeddingModelPath = "Models/all-MiniLM-L12-v2.Q8_0.gguf";

Expand Down
2 changes: 0 additions & 2 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf" DestinationFolder="Models" DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf" SkipUnchangedFiles="true"></DownloadFile>


</Target>

<ItemGroup>
Expand Down
104 changes: 104 additions & 0 deletions LLama.Unittest/Model/FileSystemModelRepoTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
using LLama.Model;

namespace LLama.Unittest.Model;

public class FileSystemModelRepoTests
{
private readonly FileSystemModelRepo TestableRepo;

public FileSystemModelRepoTests()
{
TestableRepo = new([Constants.ModelDirectory]);
}

[Fact]
public void ModelDirectories_IsCorrect()
{
var dirs = TestableRepo.ListSources();
Assert.Single(dirs);

var expected = dirs.First()!.Contains(Constants.ModelDirectory);
Assert.True(expected);
}

[Fact]
public void AddDirectory_DoesntDuplicate()
{
for (var i = 0; i < 10; i++)
{
TestableRepo.AddSource(Constants.ModelDirectory);
TestableRepo.AddSource(Path.GetFullPath(Constants.ModelDirectory));

var dirs = TestableRepo.ListSources();
Assert.Single(dirs);
var expected = dirs.First()!.Contains(Constants.ModelDirectory);
Assert.True(expected);
}
}

[Fact]
public void RemoveDirectory()
{
var dirs = TestableRepo.ListSources();
Assert.Single(dirs);
var expected = dirs.First()!.Contains(Constants.ModelDirectory);
Assert.True(expected);

Assert.True(TestableRepo.RemoveSource(Constants.ModelDirectory));
Assert.Empty(TestableRepo.ListSources());
Assert.Empty(TestableRepo.GetAvailableModels());
}

[Fact]
public void RemoveDirectory_DoesNotExist()
{
var dirs = TestableRepo.ListSources();
Assert.Single(dirs);
var expected = dirs.First()!.Contains(Constants.ModelDirectory);
Assert.True(expected);

Assert.False(TestableRepo.RemoveSource("foo/boo/bar"));
Assert.Single(dirs);
}

[Fact]
public void RemoveAllDirectories()
{
var dirs = TestableRepo.ListSources();
Assert.Single(dirs);
var expected = dirs.First()!.Contains(Constants.ModelDirectory);
Assert.True(expected);

TestableRepo.RemoveAllSources();
Assert.Empty(TestableRepo.ListSources());
Assert.Empty(TestableRepo.GetAvailableModels());
}

[Fact]
public void ModelFiles_IsCorrect()
{
var files = TestableRepo.GetAvailableModels();
Assert.Equal(4, files.Count());
}

[Fact]
public void GetAvailableModelsFromDirectory()
{
var files = TestableRepo.GetAvailableModelsFromSource(Constants.ModelDirectory);
Assert.Equal(4, files.Count());

files = TestableRepo.GetAvailableModels();
Assert.Equal(4, files.Count());
}

[Fact]
public void TryGetModelFileMetadata_WhenExists()
{
var expectedFile = TestableRepo.GetAvailableModels().First();
var found = TestableRepo.TryGetModelFileMetadata(expectedFile.ModelFileUri, out var foundData);

Assert.True(found);
Assert.Equal(expectedFile.ModelFileUri, foundData.ModelFileUri);
}

}
87 changes: 87 additions & 0 deletions LLama.Unittest/Model/ModelCacheTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
using LLama.Common;
using LLama.Model;

namespace LLama.Unittest.Model;

public class ModelManagerTests
{
private readonly IModelSourceRepo _testRepo = new FileSystemModelRepo([Constants.ModelDirectory]);

private readonly ModelCache TestableModelManager;

public ModelManagerTests()
{
TestableModelManager = new();
}

[Fact]
public async void LoadModel_LoadsAndCaches()
{
var modelToLoad = _testRepo.GetAvailableModels()
.First(f => f.ModelFileName.Contains("llama-2-7b"));

var model = await TestableModelManager.LoadModelAsync(modelToLoad);
var isLoaded = TestableModelManager.TryGetLoadedModel(model.ModelName, out var cachedModel);
Assert.True(isLoaded);

// unload the newly acquired model even though it was cached
Assert.True(TestableModelManager.UnloadModel(model.ModelName));
//cachedModel.Dispose(); // this does effectively nothing

// unload "original"
model.Dispose(); // need to explicitly dispose the model that the caller (us) owns
Assert.True(TestableModelManager.UnloadModel(model.ModelName));

Assert.False(TestableModelManager.UnloadModel(model.ModelName));

Assert.Throws<ObjectDisposedException>(() =>
{
_ = model.CreateContext(new ModelParams(modelToLoad.ModelFileUri));
});
}

[Fact]
public async void LoadModel_AlreadyLoaded_ReturnsFromCache()
{
var modelToLoad = _testRepo.GetAvailableModels()
.First(f => f.ModelFileName.Contains("llama-2-7b"));

for (var i = 0; i < 5; i++)
{
var model = await TestableModelManager.LoadModelAsync(modelToLoad);
Assert.NotNull(model);
Assert.Equal("LLaMA v2", model.ModelName);
var isLoaded = TestableModelManager.TryGetLoadedModel(model.ModelName, out var cachedModel);
Assert.True(isLoaded);
Assert.NotNull(cachedModel);
Assert.Equal("LLaMA v2", cachedModel.ModelName);
}
}

[Fact]
public async void TryGetLoadedModel_AlreadyDisposed_ReturnsFalse()
{
var modelToLoad = _testRepo.GetAvailableModels()
.First(f => f.ModelFileName.Contains("llama-2-7b"));

using (var model = await TestableModelManager.LoadModelAsync(modelToLoad))
{
Assert.NotNull(model);
Assert.Equal("LLaMA v2", model.ModelName);
var isLoaded = TestableModelManager.TryGetLoadedModel(model.ModelName, out var cachedModel);
Assert.True(isLoaded);
Assert.NotNull(cachedModel);
Assert.Equal("LLaMA v2", cachedModel.ModelName);

// unload from the last check
Assert.True(TestableModelManager.UnloadModel("LLaMA v2"));

} // end scope, dispose is called on the model but since we have the model cache it should stick around until unloaded
Assert.True(TestableModelManager.UnloadModel("LLaMA v2"));

// Model is still loaded due to cache
var isDisposedLoaded = TestableModelManager.TryGetLoadedModel("LLaMA v2", out var disposedModel);
Assert.False(isDisposedLoaded);
Assert.Null(disposedModel);
}
}
94 changes: 73 additions & 21 deletions LLama/LLamaWeights.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,29 @@ namespace LLama
public sealed class LLamaWeights
: IDisposable
{
private bool _disposed = false;

///
patrick-hovsepian marked this conversation as resolved.
Show resolved Hide resolved
~LLamaWeights()
{
Dispose(false);
}

/// <summary>
/// The native handle, which is used in the native APIs
/// </summary>
/// <remarks>Be careful how you use this!</remarks>
public SafeLlamaModelHandle NativeHandle { get; }

#region Properties
/// <summary>
/// The models name as specified in it's metadata
/// </summary>
/// <returns></returns>
public string ModelName => Metadata.TryGetValue("general.name", out var name)
? name
: string.Empty;

/// <summary>
/// Total number of tokens in vocabulary of this model
/// </summary>
Expand Down Expand Up @@ -56,11 +73,28 @@ public sealed class LLamaWeights
/// All metadata keys in this model
/// </summary>
public IReadOnlyDictionary<string, string> Metadata { get; set; }
#endregion

private LLamaWeights(SafeLlamaModelHandle weights)
private LLamaWeights(SafeLlamaModelHandle handle)
{
NativeHandle = weights;
Metadata = weights.ReadMetadata();
NativeHandle = handle;
Metadata = handle.ReadMetadata();

// Increment the model reference count while this weight exists.
// DangerousAddRef throws if it fails, so there is no need to check "success"
var success = false;
NativeHandle.DangerousAddRef(ref success);
}

#region Load
/// <summary>
/// Create from a "shared" handle. The `SafeLlamaModelHandle` will not be disposed and the model will not be unloaded until <b>all</b> such handles have been disposed.
/// </summary>
/// <param name="handle"></param>
/// <returns></returns>
martindevans marked this conversation as resolved.
Show resolved Hide resolved
public static LLamaWeights FromSafeModelHandle(SafeLlamaModelHandle handle)
patrick-hovsepian marked this conversation as resolved.
Show resolved Hide resolved
{
return new LLamaWeights(handle);
}

/// <summary>
Expand All @@ -71,19 +105,19 @@ private LLamaWeights(SafeLlamaModelHandle weights)
public static LLamaWeights LoadFromFile(IModelParams @params)
{
using var pin = @params.ToLlamaModelParams(out var lparams);
var weights = SafeLlamaModelHandle.LoadFromFile(@params.ModelPath, lparams);
var model = SafeLlamaModelHandle.LoadFromFile(@params.ModelPath, lparams);

foreach (var adapter in @params.LoraAdapters)
{
if (string.IsNullOrEmpty(adapter.Path))
continue;
if (adapter.Scale <= 0)
if (string.IsNullOrEmpty(adapter.Path) || adapter.Scale <= 0)
{
continue;
}

weights.ApplyLoraFromFile(adapter.Path, adapter.Scale, @params.LoraBase);
model.ApplyLoraFromFile(adapter.Path, adapter.Scale, @params.LoraBase);
}

return new LLamaWeights(weights);
return new LLamaWeights(model);
}

/// <summary>
Expand All @@ -103,15 +137,15 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
var loraBase = @params.LoraBase;
var loraAdapters = @params.LoraAdapters.ToArray();

// Determine the range to report for model loading. llama.cpp reports 0-1, but we'll remap that into a
// slightly smaller range to allow some space for reporting LoRA loading too.
var modelLoadProgressRange = 1f;
if (loraAdapters.Length > 0)
modelLoadProgressRange = 0.9f;

using (@params.ToLlamaModelParams(out var lparams))
{
#if !NETSTANDARD2_0
// Determine the range to report for model loading. llama.cpp reports 0-1, but we'll remap that into a
// slightly smaller range to allow some space for reporting LoRA loading too.
var modelLoadProgressRange = 1f;
if (loraAdapters.Length > 0)
modelLoadProgressRange = 0.9f;

// Overwrite the progress callback with one which polls the cancellation token and updates the progress object
if (token.CanBeCanceled || progressReporter != null)
{
Expand All @@ -125,11 +159,7 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
if (internalCallback != null && !internalCallback(progress, ctx))
return false;

// Check the cancellation token
if (token.IsCancellationRequested)
return false;

return true;
return token.IsCancellationRequested;
};
}
#endif
Expand Down Expand Up @@ -183,11 +213,33 @@ public static async Task<LLamaWeights> LoadFromFileAsync(IModelParams @params, C
return model;
}
}
#endregion

/// <inheritdoc />
public void Dispose()
{
NativeHandle.Dispose();
Dispose(true);
GC.SuppressFinalize(this);
}

/// <summary>
/// Unload all models when called explicitly via dispose
/// </summary>
/// <param name="disposing">Whether or not this call is made explicitly(true) or via GC</param>
internal void Dispose(bool disposing)
{
if (_disposed)
{
return;
}

if (disposing)
{
NativeHandle.DangerousRelease();
martindevans marked this conversation as resolved.
Show resolved Hide resolved
NativeHandle.Dispose();
}

_disposed = true;
}

/// <summary>
Expand Down
Loading
Loading