-
Notifications
You must be signed in to change notification settings - Fork 58
Expand file tree
/
Copy pathltxv_13b_ic_lora.yaml
More file actions
96 lines (84 loc) · 3.1 KB
/
ltxv_13b_ic_lora.yaml
File metadata and controls
96 lines (84 loc) · 3.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# LTXV LoRA Training Configuration
# Model configuration
model:
model_source: "LTXV_13B_097_DEV" # Options: "LTXV_13B_097_DEV", "LTXV_2B_0.9.6_DEV", "LTXV_2B_0.9.5", "LTXV_2B_0.9.1", "LTXV_2B_0.9.0", or a HF repo/local path
training_mode: "lora" # Options: "lora" or "full"
load_checkpoint: null # Path to checkpoint file or directory to resume from. If directory, latest checkpoint will be used.
# LoRA configuration
lora:
rank: 128
alpha: 128
dropout: 0.0
target_modules:
- "to_k"
- "to_q"
- "to_v"
- "to_out.0"
- "ff.net.0.proj"
- "ff.net.2"
# Conditioning configuration
conditioning:
mode: "reference_video" # Options: "none", "reference_video"
first_frame_conditioning_p: 0.2
reference_latents_dir: "reference_latents" # Directory for reference latents when using reference_video mode
# Optimization configuration
optimization:
learning_rate: 2e-4
steps: 2000
batch_size: 1
gradient_accumulation_steps: 1
max_grad_norm: 1.0
optimizer_type: "adamw" # Options: "adamw" or "adamw8bit"
scheduler_type: "linear" # Options: "constant", "linear", "cosine", "cosine_with_restarts", "polynomial"
scheduler_params: {}
enable_gradient_checkpointing: true
# Acceleration optimization
acceleration:
mixed_precision_mode: "bf16" # Options: "no", "fp16", "bf16"
quantization: null # Options: null, "int8-quanto", "int4-quanto", "int2-quanto", "fp8-quanto", "fp8uz-quanto"
load_text_encoder_in_8bit: false # Load text encoder in 8-bit precision to save memory
compile_with_inductor: false
compilation_mode: "reduce-overhead" # Options: "default", "reduce-overhead", "max-autotune"
# Data configuration
data:
preprocessed_data_root: "/path/to/preprocessed/data"
num_dataloader_workers: 2
# Validation configuration
validation:
prompts:
- "First validation prompt"
- "Second validation prompt"
- "Third validation prompt"
reference_videos:
- "/path/to/reference_a.mp4"
- "/path/to/reference_b.mp4"
- "/path/to/reference_c.mp4"
negative_prompt: "worst quality, inconsistent motion, blurry, jittery, distorted"
video_dims: [512, 512, 81] # [width, height, frames]
seed: 42
inference_steps: 50
interval: 250 # Set to null to disable validation
videos_per_prompt: 1
guidance_scale: 3.5
# Checkpoint configuration
checkpoints:
interval: 250 # Save a checkpoint every N steps, set to null to disable
keep_last_n: 3 # Keep only the N most recent checkpoints, set to -1 to keep all
# Flow matching configuration
flow_matching:
timestep_sampling_mode: "shifted_logit_normal" # Options: "uniform", "shifted_logit_normal"
timestep_sampling_params: {}
# HuggingFace Hub configuration
hub:
push_to_hub: false # Whether to push the model weights to the Hugging Face Hub
hub_model_id: null # Hugging Face Hub repository ID (e.g., 'username/repo-name'). Must be provided if `push_to_hub` is set to True
# W&B configuration
wandb:
enabled: false # Set to true to enable W&B logging
project: "ltxv-trainer"
entity: null # Your W&B username or team
tags: []
log_validation_videos: true
# General configuration
seed: 42
output_dir: "outputs/ic_lora_13b"