16
16
# cmake -B build
17
17
# cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
18
18
19
+ # Run 'ollama serve' in a separate terminal
20
+
19
21
export TOKENIZERS_PARALLELISM=false
20
22
LLAMA_CPP_PATH=/Users/appthreat/work/llama.cpp
21
23
cd $LLAMA_CPP_PATH
@@ -52,12 +54,14 @@ GGUF_MODEL_Q8_0_NAME=${HF_ORG}/${TOOL_BASE_MODEL}-${PARAM_SIZE}-Q8_0-${FORMAT}
52
54
GGUF_MODEL_Q8_0_PATH=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -Q8_0-${FORMAT}
53
55
FUSED_MODEL=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${TUNING_TOOL}
54
56
57
+ # Direct conversion to 8-bit from the fused BF16 version
55
58
rm -rf ${GGUF_MODEL_Q8_0_PATH}
56
59
mkdir -p ${GGUF_MODEL_Q8_0_PATH}
57
60
python convert_hf_to_gguf.py --outtype q8_0 --outfile ${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -Q8_0-${FORMAT} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -q8_0.gguf --model-name ${GGUF_MODEL_Q8_0_NAME} ${FUSED_MODEL}
58
61
cp ${MODEL_FILE_PATH} ${GGUF_MODEL_Q8_0_PATH} /Modelfile
59
62
cp ${FUSED_MODEL} /* .json ${FUSED_MODEL} /merges.txt ${GGUF_MODEL_Q8_0_PATH} /
60
63
64
+ # BF16
61
65
GGUF_MODEL_BF16_NAME=${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -BF16-${FORMAT}
62
66
GGUF_MODEL_BF16_PATH=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -BF16-${FORMAT}
63
67
rm -rf ${GGUF_MODEL_BF16_PATH}
@@ -67,6 +71,16 @@ cp ${MODEL_FILE_PATH} ${GGUF_MODEL_BF16_PATH}/Modelfile
67
71
sed -i ' ' ' s|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-q8_0.gguf|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-bf16.gguf|g' ${GGUF_MODEL_BF16_PATH} /Modelfile
68
72
cp ${FUSED_MODEL} /* .json ${FUSED_MODEL} /merges.txt ${GGUF_MODEL_BF16_PATH} /
69
73
74
+ # MXFP4 - MOE only
75
+ GGUF_MODEL_MXFP4_NAME=${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -MXFP4-${FORMAT}
76
+ GGUF_MODEL_MXFP4_PATH=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -MXFP4-${FORMAT}
77
+ rm -rf ${GGUF_MODEL_MXFP4_PATH}
78
+ mkdir -p ${GGUF_MODEL_MXFP4_PATH}
79
+ llama-quantize ${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -BF16-${FORMAT} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -bf16.gguf ${GGUF_MODEL_MXFP4_PATH} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -MXFP4.gguf MXFP4_MOE
80
+ cp ${MODEL_FILE_PATH} ${GGUF_MODEL_MXFP4_PATH} /Modelfile
81
+ sed -i ' ' ' s|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-q8_0.gguf|./${TOOL_BASE_MODEL}-${PARAM_SIZE}-MXFP4.gguf|g' ${GGUF_MODEL_MXFP4_PATH} /Modelfile
82
+ cp ${FUSED_MODEL} /* .json ${FUSED_MODEL} /merges.txt ${GGUF_MODEL_MXFP4_PATH} /
83
+
70
84
if [ " $TOOL_BASE_MODEL " == " cdx1-mini" ] || [ " $TOOL_BASE_MODEL " == " cdx1-nano" ]; then
71
85
GGUF_MODEL_Q6_K_NAME=${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -Q6_K-${FORMAT}
72
86
GGUF_MODEL_Q6_K_PATH=${CDXGEN_FT_PATH} /${HF_ORG} /${TOOL_BASE_MODEL} -${PARAM_SIZE} -Q6_K-${FORMAT}
114
128
export HF_HUB_ENABLE_HF_TRANSFER=0
115
129
hf auth whoami
116
130
hf upload --quiet --exclude " **/README.md" --repo-type model ${GGUF_MODEL_Q8_0_NAME} ${GGUF_MODEL_Q8_0_PATH} .
131
+ hf upload --quiet --exclude " **/README.md" --repo-type model ${GGUF_MODEL_MXFP4_NAME} ${GGUF_MODEL_MXFP4_PATH} .
117
132
if [ " $TOOL_BASE_MODEL " == " cdx1-mini" ] || [ " $TOOL_BASE_MODEL " == " cdx1-nano" ]; then
118
133
hf upload --quiet --exclude " **/README.md" --repo-type model ${GGUF_MODEL_Q6_K_NAME} ${GGUF_MODEL_Q6_K_PATH} .
119
134
else
@@ -123,11 +138,18 @@ else
123
138
fi
124
139
hf upload --quiet --exclude " **/README.md" --repo-type model ${GGUF_MODEL_BF16_NAME} ${GGUF_MODEL_BF16_PATH} .
125
140
141
+ # ## upload to ollama registry. Move this to a separate script in the future.
142
+
126
143
ollama pull hf.co/${GGUF_MODEL_Q8_0_NAME}
127
144
ollama cp hf.co/${GGUF_MODEL_Q8_0_NAME} ${GGUF_MODEL_Q8_0_NAME}
128
145
ollama push ${GGUF_MODEL_Q8_0_NAME}
129
146
ollama rm hf.co/${GGUF_MODEL_Q8_0_NAME}
130
147
148
+ ollama pull hf.co/${GGUF_MODEL_MXFP4_NAME}
149
+ ollama cp hf.co/${GGUF_MODEL_MXFP4_NAME} ${GGUF_MODEL_MXFP4_NAME}
150
+ ollama push ${GGUF_MODEL_MXFP4_NAME}
151
+ ollama rm hf.co/${GGUF_MODEL_MXFP4_NAME}
152
+
131
153
if [ " $TOOL_BASE_MODEL " == " cdx1-mini" ] || [ " $TOOL_BASE_MODEL " == " cdx1-nano" ]; then
132
154
ollama pull hf.co/${GGUF_MODEL_Q6_K_NAME}
133
155
ollama cp hf.co/${GGUF_MODEL_Q6_K_NAME} ${GGUF_MODEL_Q6_K_NAME}
0 commit comments