Skip to content

Commit 98a1e1c

Browse files
committed
Use existing importance matrix files for all quant formats
1 parent fd1785e commit 98a1e1c

File tree

3 files changed

+13
-5
lines changed

3 files changed

+13
-5
lines changed

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ TARGET_DIRECTORY=.\gguf
5353
# physical drive to improve the quantization speed.
5454
CACHE_DIRECTORY=.\cache
5555
56+
# Path to the directory for importance matrix files.
57+
IMPORTANCE_MATRIX_DIRECTORY=.\imatrix
58+
5659
#
5760
# Comma separated list of quantization types.
5861
#
@@ -106,7 +109,7 @@ QUANTIZATION_TYPES=Q5_K_M,Q3_K_S
106109
Clone a Git repository containing an LLM into the `SOURCE_DIRECTORY` without checking out any files and downloading any large files (lfs).
107110

108111
```PowerShell
109-
git -C "./source" clone --no-checkout https://huggingface.co/openchat/openchat-3.5-0106
112+
git -C "./source" clone --no-checkout https://huggingface.co/openchat/openchat-3.6-8b-20240522
110113
```
111114

112115
### 2. Download model sources

imatrix/.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Ignore everything in this directory except this file.
2+
*
3+
!.gitignore

quantize_weights_for_llama.cpp.ps1

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Get-Content "./.env" | ForEach {
1414
$llamaCppDirectory = Resolve-Path -Path $env:LLAMA_CPP_DIRECTORY
1515
$sourceDirectory = Resolve-Path -Path $env:SOURCE_DIRECTORY
1616
$targetDirectory = Resolve-Path -Path $env:TARGET_DIRECTORY
17+
$importanceMatrixDirectory = Resolve-Path -Path $env:IMPORTANCE_MATRIX_DIRECTORY
1718
$cacheDirectory = Resolve-Path -Path $env:CACHE_DIRECTORY
1819
$trainingDataPath = Resolve-Path -Path $env:TRAINING_DATA
1920
$cleanCache = [System.Convert]::ToBoolean($env:CLEAN_CACHE)
@@ -42,7 +43,7 @@ ForEach ($repositoryName in $repositoryDirectories) {
4243

4344
# Note that we are not removing *.importance-matrix.dat files because
4445
# they are relatively small but take a _very_ long time to compute.
45-
$importanceMatrixPath = Join-Path -Path $targetDirectoryPath -ChildPath "${repositoryName}.importance-matrix.dat"
46+
$importanceMatrixPath = Join-Path -Path $importanceMatrixDirectory -ChildPath "${repositoryName}.importance-matrix.dat"
4647

4748
# If a repository already contains an unquantized GGUF file we are using it directly.
4849
$unquantizedModelPathFromSource = Join-Path -Path $sourceDirectory -ChildPath $repositoryName | Join-Path -ChildPath "${repositoryName}.gguf"
@@ -64,8 +65,8 @@ ForEach ($repositoryName in $repositoryDirectories) {
6465
Invoke-Expression "$convertCommand --outfile `"${unquantizedModelPath}`" `"${sourceDirectoryPath}`""
6566
}
6667

67-
# We need to compute an importance matrix for all i-quants and
68-
# small k-quants to enhance the quality of the quantum models.
68+
# We need to compute an importance matrix for all i-quants
69+
# and small k-quants to enhance the quality of the models.
6970
# https://github.com/ggerganov/llama.cpp/tree/master/examples/imatrix
7071
$requiresImportanceMatrix = $type.Contains("IQ") -or "Q2_K Q2_K_S".Contains($type)
7172

@@ -84,7 +85,8 @@ ForEach ($repositoryName in $repositoryDirectories) {
8485

8586
$quantizeCommand = "${llamaCppDirectory}\build\bin\Release\llama-quantize.exe"
8687

87-
if ($requiresImportanceMatrix) {
88+
# If an importance matrix file is available we are using it.
89+
if (Test-Path -Path $importanceMatrixPath) {
8890
$quantizeCommand = "${quantizeCommand} --imatrix `"${importanceMatrixPath}`""
8991
}
9092

0 commit comments

Comments
 (0)