-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtrain-sft.sh
More file actions
executable file
·42 lines (32 loc) · 1.11 KB
/
train-sft.sh
File metadata and controls
executable file
·42 lines (32 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/bash
set -e
REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LLAMA_FACTORY_DIR="${LLAMA_FACTORY_DIR:-$REPO_DIR/LLaMA-Factory}"
ENV_NAME="${ENV_NAME:-llamafactory}"
TRAIN_CONFIG="${TRAIN_CONFIG:-$REPO_DIR/train_configs/qwen3vl_2b_full_sft_all.yaml}"
# Initialize conda
eval "$(conda shell.bash hook)"
# Activate environment
echo "Activating conda environment: $ENV_NAME"
conda activate "$ENV_NAME"
# Verify environment
echo "Python: $(which python)"
echo "Python version: $(python --version)"
echo "CLI: $(which llamafactory-cli)"
if [ ! -d "$LLAMA_FACTORY_DIR" ]; then
echo "ERROR: LLaMA-Factory directory not found at $LLAMA_FACTORY_DIR"
exit 1
fi
if [ ! -f "$TRAIN_CONFIG" ]; then
echo "ERROR: Training config not found at $TRAIN_CONFIG"
exit 1
fi
# Navigate to LLaMA-Factory directory
cd "$LLAMA_FACTORY_DIR"
# Set GPUs unless already provided by the caller
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2,3}"
echo "Using GPUs: $CUDA_VISIBLE_DEVICES"
# Run training
echo "Starting SFT training..."
FORCE_TORCHRUN=1 llamafactory-cli train "$TRAIN_CONFIG"
echo "Training completed!"