Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ This repo contains PyTorch model definitions, pretrained weights and inference/s
</div>

## 🔥🔥🔥 Latest Updates
- October 31, 2025: 🚀 Optimized FP8 inference now available via [AngelSlim](https://github.com/Tencent/AngelSlim), delivering up to 40% faster generation!
- September 18, 2025: ✨ Try the [PromptEnhancer-32B model](https://huggingface.co/PromptEnhancer/PromptEnhancer-32B) for higher-quality prompt enhancement!​.
- September 18, 2025: ✨ [ComfyUI workflow of HunyuanImage-2.1](https://github.com/KimbingNg/ComfyUI-HunyuanImage2.1) is available now!
- September 16, 2025: 👑 We achieved the Top1 on Arena's leaderboard for text-to-image open-source models. [Leaderboard](https://artificialanalysis.ai/text-to-image/arena/leaderboard-text)
Expand Down Expand Up @@ -122,6 +123,14 @@ Additionally, we **highly recommend** using the full generation pipeline for bet
| Distilled text-to-image Model | hunyuanimage2.1-distilled | Distilled model for faster inference | 8 | 3.25 | 4 |
| Refiner | hunyuanimage-refiner | The refiner model | N/A | N/A | N/A |

### FP8 Inference

Enable FP8 quantized inference with [Angelslim](https://github.com/Tencent/AngelSlim) to significantly reduce GPU memory usage and speed up generation.
- **Recommended:** `fp8_mode="weight_only"` for optimal image quality and memory savings
- **Alternative:** `fp8_mode="w8a8"` for maximum speed and lower memory usage (may slightly lower quality)
- `use_fp8=True` will be deprecated and now maps to `"weight_only"`.

### Example:

```python
import os
Expand All @@ -131,7 +140,9 @@ from hyimage.diffusion.pipelines.hunyuanimage_pipeline import HunyuanImagePipeli

# Supported model_name: hunyuanimage-v2.1, hunyuanimage-v2.1-distilled
model_name = "hunyuanimage-v2.1"
pipe = HunyuanImagePipeline.from_pretrained(model_name=model_name, use_fp8=True)
# Supported fp8_mode: weight_only, w8a8
# pipe = HunyuanImagePipeline.from_pretrained(model_name=model_name, fp8_mode="weight_only")
pipe = HunyuanImagePipeline.from_pretrained(model_name=model_name, fp8_mode="w8a8")
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里增加一行weight_only的调用代码,可以注释掉

pipe = pipe.to("cuda")

# The input prompt
Expand Down
14 changes: 13 additions & 1 deletion README_CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@


## 🔥🔥🔥 最新动态
- 2025 年 10 月 31 日:🚀 通过集成 [AngelSlim](https://github.com/Tencent/AngelSlim) 优化 FP8 推理,最高可带来约 40% 的生成加速!
- 2025 年 9 月 18 日:✨ 欢迎体验 [PromptEnhancer-32B 模型](https://huggingface.co/PromptEnhancer/PromptEnhancer-32B) 以获得更高质量的提示词增强!
- 2025 年 9 月 18 日:✨ [HunyuanImage-2.1 的 ComfyUI 工作流](https://github.com/KimbingNg/ComfyUI-HunyuanImage2.1) 现已开放体验!
- 2025 年 9 月 16 日:👑 我们在 Arena 文生图开源模型排行榜上获得第一名![排行榜](https://artificialanalysis.ai/text-to-image/arena/leaderboard-text)
Expand Down Expand Up @@ -110,6 +111,15 @@ HunyuanImage-2.1 **仅支持 2K** 图像生成(如 1:1 时为 2048x2048,16:9
| 蒸馏文生图模型 | hunyuanimage2.1-distilled | 蒸馏模型,推理更快 | 8 | 3.25 | 4 |
| 精修模型 | hunyuanimage-refiner | 精修模型 | N/A | N/A | N/A |

### FP8 推理

借助 [AngelSlim](https://github.com/Tencent/AngelSlim) 可以显著降低显存占用并提升生成速度:
- **推荐:** 将 `fp8_mode` 设为 `"weight_only"`,在画质与显存之间取得最佳平衡;
- **可选:** `"w8a8"` 模式提供更高速度与更低显存占用,但画质可能略有下降;
- `use_fp8=True` 将被弃用,现已等价映射为 `"weight_only"`。


### 示例

```python
import os
Expand All @@ -119,7 +129,9 @@ from hyimage.diffusion.pipelines.hunyuanimage_pipeline import HunyuanImagePipeli

# 支持的 model_name:hunyuanimage-v2.1, hunyuanimage-v2.1-distilled
model_name = "hunyuanimage-v2.1"
pipe = HunyuanImagePipeline.from_pretrained(model_name=model_name, use_fp8=True)
# 支持的 fp8_mode:"weight_only"(推荐),"w8a8"(更快)
# pipe = HunyuanImagePipeline.from_pretrained(model_name=model_name, fp8_mode="weight_only")
pipe = HunyuanImagePipeline.from_pretrained(model_name=model_name, fp8_mode="w8a8")
pipe = pipe.to("cuda")

# 输入提示词
Expand Down
52 changes: 44 additions & 8 deletions hyimage/diffusion/pipelines/hunyuanimage_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,12 @@ class HunyuanImagePipelineConfig:
enable_vae_offloading: bool = True # offload vae after finishing
enable_byt5_offloading: bool = True # offload byt5 after finishing

use_fp8: bool = False
# FP8 settings
use_fp8: bool = False # Deprecated: legacy single FP8 switch. Kept for backward compatibility.
use_fp8_weight_only: bool = False # Use FP8 weight only mode
use_fp8_w8a8: bool = False # Use FP8 w8a8 mode
# Preferred unified FP8 mode: "none" | "weight_only" | "w8a8"
fp8_mode: str = "none"

cfg_mode: str = "MIX_mode_0"
guidance_rescale: float = 0.0
Expand Down Expand Up @@ -159,25 +164,28 @@ def __init__(

def _load_dit(self):
dit_device = None
if self.config.enable_full_dit_offloading or self.config.use_fp8:
fp8_mode = self._resolve_fp8_mode()
if self.config.enable_full_dit_offloading or fp8_mode != "none":
dit_device = 'cpu'
else:
dit_device = self.device

try:
dit_config = self.config.dit_config
self.dit = instantiate(dit_config.model, dtype=self.torch_dtype, device=dit_device)
if self.config.use_fp8:
from hyimage.models.utils.fp8_quantization import convert_fp8_linear
if fp8_mode != "none":
from angelslim.compressor.diffusion import DynamicDiTQuantizer
# Map unified mode to backend quant type
quant_type = "fp8-per-tensor-weight-only" if fp8_mode == "weight_only" else "fp8-per-tensor"
quantizer = DynamicDiTQuantizer(quant_type=quant_type)
if not Path(dit_config.fp8_scale).exists():
raise FileNotFoundError(f"FP8 scale file not found: {dit_config.fp8_scale}. Please download from https://huggingface.co/tencent/HunyuanImage-2.1/")
if dit_config.fp8_load_from is not None and Path(dit_config.fp8_load_from).exists():
convert_fp8_linear(self.dit, dit_config.fp8_scale)
quantizer.convert_linear(self.dit, scale=dit_config.fp8_scale)
load_hunyuan_dit_state_dict(self.dit, dit_config.fp8_load_from, strict=True)
else:
raise FileNotFoundError(f"FP8 ckpt not found: {dit_config.fp8_load_from}. Please download from https://huggingface.co/tencent/HunyuanImage-2.1/")
load_hunyuan_dit_state_dict(self.dit, dit_config.load_from, strict=True)
convert_fp8_linear(self.dit, dit_config.fp8_scale)

self.dit = self.dit.to(dit_device)
else:
load_hunyuan_dit_state_dict(self.dit, dit_config.load_from, strict=True)
Expand Down Expand Up @@ -256,9 +264,33 @@ def refiner_pipeline(self):
from hyimage.diffusion.pipelines.hunyuanimage_refiner_pipeline import HunYuanImageRefinerPipeline
if self.config.enable_stage1_offloading:
self.offload()
self._refiner_pipeline = HunYuanImageRefinerPipeline.from_pretrained(self.config.refiner_model_name, use_fp8=self.config.use_fp8)
self._refiner_pipeline = HunYuanImageRefinerPipeline.from_pretrained(
self.config.refiner_model_name,
fp8_mode=self._resolve_fp8_mode(),
)
return self._refiner_pipeline

def _resolve_fp8_mode(self) -> str:
"""Resolve FP8 mode with backward compatibility.

Preferred: config.fp8_mode in {"none", "weight_only", "w8a8"}.
Fallback to old boolean switches if fp8_mode == "none".
"""
mode = getattr(self.config, "fp8_mode", "none") or "none"
if mode not in {"none", "weight_only", "w8a8"}:
raise ValueError(f"Invalid fp8_mode: {mode}. Expected one of 'none', 'weight_only', 'w8a8'.")
if mode != "none":
return mode
use_fp8_weight_only = getattr(self.config, "use_fp8_weight_only", False)
use_fp8_w8a8 = getattr(self.config, "use_fp8_w8a8", False)
if use_fp8_weight_only and use_fp8_w8a8:
raise ValueError("Both use_fp8_weight_only and use_fp8_w8a8 are True. Please enable only one.")
if use_fp8_w8a8:
return "w8a8"
if self.config.use_fp8 or use_fp8_weight_only:
return "weight_only"
return "none"

@property
def reprompt_model(self):
"""
Expand Down Expand Up @@ -860,6 +892,8 @@ def to(self, device: str | torch.device):
self.text_encoder = self.text_encoder.to(device, non_blocking=True)
if self.vae is not None:
self.vae = self.vae.to(device, non_blocking=True)
if torch.cuda.is_available():
torch.cuda.empty_cache()
return self

def offload(self):
Expand All @@ -869,6 +903,8 @@ def offload(self):
self.text_encoder = self.text_encoder.to('cpu', non_blocking=True)
if self.vae is not None:
self.vae = self.vae.to('cpu', non_blocking=True)
if torch.cuda.is_available():
torch.cuda.empty_cache()
return self

def update_config(self, **kwargs):
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ transformers[accelerate,tiktoken]==4.56.0
wheel
setuptools
modelscope
huggingface_hub[cli]
huggingface_hub[cli]
angelslim[diffusion]==0.2.0