Skip to content

Commit

Permalink
Merge pull request #495 from martindevans/quantise_new_formats
Browse files Browse the repository at this point in the history
Added new file types to quantisation
  • Loading branch information
martindevans authored Feb 7, 2024
2 parents 17385e1 + c7103e8 commit ac7faa0
Showing 1 changed file with 13 additions and 1 deletion.
14 changes: 13 additions & 1 deletion LLama/LLamaQuantizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public static bool Quantize(string srcFileName, string dstFilename, string ftype
private static bool ValidateFtype(LLamaFtype ftype)
{
// Validation copies from here:
// https://github.com/ggerganov/llama.cpp/blob/e59fcb2bc129881f4a269fee748fb38bce0a64de/llama.cpp#L2960
// https://github.com/ggerganov/llama.cpp/blob/d71ac90985854b0905e1abba778e407e17f9f887/llama.cpp#L9613

switch (ftype)
{
Expand All @@ -70,15 +70,27 @@ private static bool ValidateFtype(LLamaFtype ftype)
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q8_0:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_F16:
case LLamaFtype.LLAMA_FTYPE_ALL_F32:

case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K_S:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q2_K:

case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_XS:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_S:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_M:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q3_K_L:

case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_S:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_K_M:

case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_S:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q5_K_M:

case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q6_K:

case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XXS:
case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ2_XS:

case LLamaFtype.LLAMA_FTYPE_MOSTLY_IQ3_XXS:
return true;

case LLamaFtype.LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16:
Expand Down

0 comments on commit ac7faa0

Please sign in to comment.