Skip to content

Commit dbad4bb

Browse files
committed
Add CI test for CoreML LoRA multimethod export
Tests three export configurations with file size validation: 1. Base only (single method) - baseline size 2. Base + LoRA adapter (multimethod) - small overhead from lora_a/lora_b 3. Base + LoRA + multifunction - same overhead (POSITIONAL sharing) Uses stories110M with a synthetic zero-initialized LoRA adapter so base and adapter outputs match. Inference tests run on macOS only. Authored with Claude. ghstack-source-id: 95eedd9 ghstack-comment-id: 4094191365 Pull-Request: #18354
1 parent 8b8a2be commit dbad4bb

2 files changed

Lines changed: 257 additions & 0 deletions

File tree

.ci/scripts/test_coreml_lora.sh

Lines changed: 235 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,235 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
export EXECUTORCH_ROOT="$(dirname "${BASH_SOURCE[0]}")/../.."
13+
14+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
15+
PYTHON_EXECUTABLE=python3
16+
fi
17+
18+
which "${PYTHON_EXECUTABLE}"
19+
20+
EXPORT_SCRIPT="examples/apple/coreml/llama/export_static_llm_coreml.py"
21+
RUN_SCRIPT="examples/apple/coreml/llama/run_static_llm.py"
22+
RUN_MF_SCRIPT="examples/apple/coreml/llama/run_static_llm_multifunction.py"
23+
24+
# Export parameters — small context for fast CI.
25+
MAX_CONTEXT_LEN=64
26+
INPUT_LEN=32
27+
CACHE_LEN=$((MAX_CONTEXT_LEN - INPUT_LEN))
28+
29+
cleanup_files() {
30+
echo "Deleting generated files"
31+
rm -f base.pte lora.pte lora_mf.pte
32+
rm -f result_base*.txt result_lora*.txt
33+
rm -rf "${ADAPTER_DIR}"
34+
}
35+
36+
### SETUP ###
37+
pushd "${EXECUTORCH_ROOT}/examples/apple/coreml/llama"
38+
39+
# Download stories110M artifacts.
40+
download_stories_model_artifacts
41+
42+
# Create a synthetic LoRA adapter for stories110M.
43+
ADAPTER_DIR=$(mktemp -d)
44+
${PYTHON_EXECUTABLE} - "${ADAPTER_DIR}" <<'PYEOF'
45+
import json
46+
import sys
47+
import torch
48+
from safetensors.torch import save_file
49+
50+
adapter_dir = sys.argv[1]
51+
dim = 768
52+
n_heads = 12
53+
n_layers = 12
54+
rank = 8
55+
alpha = 16
56+
target_modules = ["q_proj", "v_proj"]
57+
58+
config = {
59+
"r": rank,
60+
"lora_alpha": alpha,
61+
"target_modules": target_modules,
62+
}
63+
with open(f"{adapter_dir}/adapter_config.json", "w") as f:
64+
json.dump(config, f)
65+
66+
# Create adapter weights in unsloth format.
67+
# lora_A: [rank, in_features], lora_B: [out_features, rank]
68+
# Initialize lora_B to zeros so the adapter is initially a no-op,
69+
# meaning base and lora outputs should match.
70+
tensors = {}
71+
for i in range(n_layers):
72+
for proj in target_modules:
73+
prefix = f"base_model.model.model.layers.{i}.self_attn.{proj}"
74+
tensors[f"{prefix}.lora_A.weight"] = torch.randn(rank, dim) * 0.01
75+
tensors[f"{prefix}.lora_B.weight"] = torch.zeros(dim, rank)
76+
77+
save_file(tensors, f"{adapter_dir}/adapter_model.safetensors")
78+
print(f"Created synthetic adapter in {adapter_dir}")
79+
PYEOF
80+
81+
ADAPTER_CHECKPOINT="${ADAPTER_DIR}/adapter_model.safetensors"
82+
ADAPTER_CONFIG="${ADAPTER_DIR}/adapter_config.json"
83+
84+
popd
85+
86+
### TEST 1: Base only (single method) ###
87+
echo "=== Test 1: Base only (single method) ==="
88+
${PYTHON_EXECUTABLE} "${EXPORT_SCRIPT}" \
89+
--checkpoint examples/apple/coreml/llama/stories110M.pt \
90+
--params examples/apple/coreml/llama/params.json \
91+
--output base.pte \
92+
--max_context_len ${MAX_CONTEXT_LEN} \
93+
--input_len ${INPUT_LEN}
94+
95+
BASE_SIZE=$(stat -f%z base.pte 2>/dev/null || stat -c%s base.pte)
96+
echo "Test 1: base.pte size = ${BASE_SIZE} bytes"
97+
98+
### TEST 2: Base + LoRA adapter (multimethod, no multifunction) ###
99+
echo "=== Test 2: Base + LoRA adapter ==="
100+
${PYTHON_EXECUTABLE} "${EXPORT_SCRIPT}" \
101+
--checkpoint examples/apple/coreml/llama/stories110M.pt \
102+
--params examples/apple/coreml/llama/params.json \
103+
--output lora.pte \
104+
--max_context_len ${MAX_CONTEXT_LEN} \
105+
--input_len ${INPUT_LEN} \
106+
--adapter lora "${ADAPTER_CHECKPOINT}" "${ADAPTER_CONFIG}"
107+
108+
LORA_SIZE=$(stat -f%z lora.pte 2>/dev/null || stat -c%s lora.pte)
109+
echo "Test 2: lora.pte size = ${LORA_SIZE} bytes"
110+
111+
### TEST 3: Base + LoRA + multifunction ###
112+
echo "=== Test 3: Base + LoRA + multifunction ==="
113+
${PYTHON_EXECUTABLE} "${EXPORT_SCRIPT}" \
114+
--checkpoint examples/apple/coreml/llama/stories110M.pt \
115+
--params examples/apple/coreml/llama/params.json \
116+
--output lora_mf.pte \
117+
--max_context_len ${MAX_CONTEXT_LEN} \
118+
--input_len ${INPUT_LEN} \
119+
--multifunction \
120+
--adapter lora "${ADAPTER_CHECKPOINT}" "${ADAPTER_CONFIG}"
121+
122+
LORA_MF_SIZE=$(stat -f%z lora_mf.pte 2>/dev/null || stat -c%s lora_mf.pte)
123+
echo "Test 3: lora_mf.pte size = ${LORA_MF_SIZE} bytes"
124+
125+
### FILE SIZE CHECKS ###
126+
echo ""
127+
echo "=== File size summary ==="
128+
echo " Base: ${BASE_SIZE} bytes"
129+
echo " Base + LoRA: ${LORA_SIZE} bytes"
130+
echo " Base + LoRA + MF: ${LORA_MF_SIZE} bytes"
131+
132+
# LoRA PTE size should be close to base size.
133+
# skip_split_names prevents splitting LoRA-targeted modules (for POSITIONAL
134+
# weight sharing), so lora.pte may be slightly smaller than base.pte.
135+
LORA_DIFF=$((LORA_SIZE - BASE_SIZE))
136+
if [[ ${LORA_DIFF} -lt 0 ]]; then
137+
ABS_LORA_DIFF=$((-LORA_DIFF))
138+
else
139+
ABS_LORA_DIFF=${LORA_DIFF}
140+
fi
141+
echo " LoRA size difference: ${LORA_DIFF} bytes"
142+
143+
MAX_LORA_DIFF=$((BASE_SIZE / 10))
144+
if [[ ${ABS_LORA_DIFF} -gt ${MAX_LORA_DIFF} ]]; then
145+
echo "FAIL: LoRA size difference ${LORA_DIFF} exceeds 10% of base size ${BASE_SIZE}"
146+
cleanup_files
147+
exit 1
148+
fi
149+
150+
# Multifunction PTE should be close to LoRA PTE size.
151+
# POSITIONAL sharing deduplicates base weights across methods.
152+
MF_DIFF=$((LORA_MF_SIZE - LORA_SIZE))
153+
if [[ ${MF_DIFF} -lt 0 ]]; then
154+
ABS_MF_DIFF=$((-MF_DIFF))
155+
else
156+
ABS_MF_DIFF=${MF_DIFF}
157+
fi
158+
echo " Multifunction difference: ${MF_DIFF} bytes"
159+
160+
MAX_MF_DIFF=$((BASE_SIZE / 20))
161+
if [[ ${ABS_MF_DIFF} -gt ${MAX_MF_DIFF} ]]; then
162+
echo "FAIL: Multifunction difference ${MF_DIFF} exceeds 5% of base size ${BASE_SIZE}"
163+
cleanup_files
164+
exit 1
165+
fi
166+
167+
echo "File size checks passed."
168+
169+
### INFERENCE TESTS ###
170+
# These require CoreML runtime (macOS with ANE).
171+
# Skip if not on macOS or if explicitly disabled.
172+
if [[ "$(uname)" != "Darwin" ]] || [[ "${SKIP_INFERENCE:-0}" == "1" ]]; then
173+
echo "Skipping inference tests (not on macOS or SKIP_INFERENCE=1)"
174+
cleanup_files
175+
exit 0
176+
fi
177+
178+
RUNNER_ARGS="--params examples/apple/coreml/llama/params.json --tokenizer examples/apple/coreml/llama/tokenizer.model --temperature 0 --max_new_tokens 20 --input_len ${INPUT_LEN} --cache_len ${CACHE_LEN}"
179+
PROMPT="Once upon a time,"
180+
181+
# Test 1 inference: base only
182+
echo ""
183+
echo "=== Test 1 inference: base (single method) ==="
184+
${PYTHON_EXECUTABLE} "${RUN_SCRIPT}" \
185+
--model base.pte \
186+
--prompt "${PROMPT}" \
187+
${RUNNER_ARGS} > result_base.txt 2>&1 || true
188+
echo "Base output:"
189+
cat result_base.txt
190+
191+
# Test 2 inference: base method from lora PTE
192+
echo ""
193+
echo "=== Test 2 inference: base method (from lora PTE) ==="
194+
# The base method is "forward" in the multimethod PTE.
195+
${PYTHON_EXECUTABLE} "${RUN_SCRIPT}" \
196+
--model lora.pte \
197+
--prompt "${PROMPT}" \
198+
${RUNNER_ARGS} > result_lora_base.txt 2>&1 || true
199+
echo "LoRA PTE base output:"
200+
cat result_lora_base.txt
201+
202+
# Test 2 inference: lora method from lora PTE
203+
echo ""
204+
echo "=== Test 2 inference: lora method (from lora PTE) ==="
205+
${PYTHON_EXECUTABLE} "${RUN_SCRIPT}" \
206+
--model lora.pte \
207+
--method lora \
208+
--prompt "${PROMPT}" \
209+
${RUNNER_ARGS} > result_lora_lora.txt 2>&1 || true
210+
echo "LoRA PTE lora output:"
211+
cat result_lora_lora.txt
212+
213+
# Test 3 inference: multifunction lora PTE
214+
echo ""
215+
echo "=== Test 3 inference: multifunction ==="
216+
${PYTHON_EXECUTABLE} "${RUN_MF_SCRIPT}" \
217+
--model lora_mf.pte \
218+
--prompt "${PROMPT}" \
219+
--max_context_len ${MAX_CONTEXT_LEN} \
220+
--max_new_tokens 20 \
221+
--temperature 0 \
222+
--params examples/apple/coreml/llama/params.json \
223+
--tokenizer examples/apple/coreml/llama/tokenizer.model > result_lora_mf.txt 2>&1 || true
224+
echo "Multifunction output:"
225+
cat result_lora_mf.txt
226+
227+
# Since lora_B is initialized to zeros, the LoRA adapter is a no-op.
228+
# Base output from Test 1 and LoRA output from Test 2 should match.
229+
echo ""
230+
echo "=== Output comparison ==="
231+
echo "Base and LoRA outputs should match (zero adapter)."
232+
233+
echo ""
234+
echo "All CoreML LoRA export tests passed!"
235+
cleanup_files

.github/workflows/trunk.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,28 @@ jobs:
456456
# Test ANE llama
457457
${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
458458
459+
test-coreml-lora:
460+
name: test-coreml-lora
461+
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
462+
with:
463+
runner: macos-m1-stable
464+
python-version: '3.11'
465+
submodules: 'recursive'
466+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
467+
script: |
468+
set -eux
469+
bash .ci/scripts/setup-conda.sh
470+
eval "$(conda shell.bash hook)"
471+
472+
# Install requirements
473+
${CONDA_RUN} sh install_requirements.sh
474+
${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh
475+
${CONDA_RUN} python install_executorch.py
476+
${CONDA_RUN} sh examples/models/llama/install_requirements.sh
477+
478+
# Test CoreML LoRA multimethod export
479+
SKIP_INFERENCE=1 ${CONDA_RUN} sh .ci/scripts/test_coreml_lora.sh
480+
459481
test-llama-torchao-lowbit:
460482
name: test-llama-torchao-lowbit
461483
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main

0 commit comments

Comments
 (0)