openvinotoolkit · l-bat · Apr 14, 2026 · Apr 14, 2026 · Apr 15, 2026
@@ -111,17 +111,17 @@ With adapter | Without adapter
 
 ## Run text to image with TaylorSeer caching optimization
 
-The `taylorseer_text2image` sample demonstrates how to use TaylorSeer Lite caching to accelerate text to image generation. TaylorSeer is a caching optimization technique that uses Taylor series approximation to predict intermediate outputs during diffusion inference, reducing the number of computationally expensive transformer forward passes.
+The `taylorseer_text2image` sample demonstrates how to use TaylorSeer Lite caching to accelerate text to image generation. TaylorSeer is a caching optimization technique that uses Taylor series approximation to predict intermediate outputs during diffusion inference, reducing the number of computationally expensive transformer forward passes. TaylorSeer caching is **enabled by default** for Flux and StableDiffusion3 Text2Image pipelines.
 
 Run the sample with custom parameters:
 
 ```bash
 ./taylorseer_text2image ./flux.1-dev/FP16 "a beautiful sunset over mountains"
 ```
 
-The sample generates two images with and without TaylorSeer config applied using the same prompt:
-   - `taylorseer.bmp` with TaylorSeer config applied
-   - `taylorseer_baseline.bmp` without TaylorSeer config applied
+The sample generates two images using the same prompt:
+   - `taylorseer_baseline.bmp` without caching
+   - `taylorseer.bmp` with TaylorSeer caching applied
 
 Check the difference:
 

@@ -9,6 +9,7 @@
 
 #include <chrono>
 #include <iostream>
+#include <optional>
 
 int32_t main(int32_t argc, char* argv[]) try {
     if (argc != 3) {
@@ -28,6 +29,10 @@ int32_t main(int32_t argc, char* argv[]) try {
 
     ov::genai::Text2ImagePipeline pipe(models_path, device);
     std::cout << "Generating baseline image without caching...\n";
+    auto generation_config = pipe.get_generation_config();
+    generation_config.taylorseer_config = std::nullopt;  // explicitly disable caching
+    pipe.set_generation_config(generation_config);
+
     auto start_time = std::chrono::high_resolution_clock::now();
 
     ov::Tensor baseline_image = pipe.generate(prompt,
@@ -51,7 +56,7 @@ int32_t main(int32_t argc, char* argv[]) try {
 
     ov::genai::TaylorSeerCacheConfig taylorseer_config{cache_interval, disable_before, disable_after};
     std::cout << taylorseer_config.to_string() << "\n";
-    auto generation_config = pipe.get_generation_config();
+    generation_config = pipe.get_generation_config();
     generation_config.taylorseer_config = taylorseer_config;
     pipe.set_generation_config(generation_config);
 

@@ -125,17 +125,17 @@ With adapter | Without adapter
 
 ## Run text to image with TaylorSeer caching optimization
 
-The `taylorseer_text2image.py` sample demonstrates how to use TaylorSeer Lite caching to accelerate text to image generation. TaylorSeer is a caching optimization technique that uses Taylor series approximation to predict intermediate outputs during diffusion inference, reducing the number of computationally expensive transformer forward passes.
+The `taylorseer_text2image.py` sample demonstrates how to use TaylorSeer Lite caching to accelerate text to image generation. TaylorSeer is a caching optimization technique that uses Taylor series approximation to predict intermediate outputs during diffusion inference, reducing the number of computationally expensive transformer forward passes. TaylorSeer caching is **enabled by default** for Flux and StableDiffusion3 Text2Image pipelines.
 
 Run the sample with custom parameters:
 
 ```bash
 python taylorseer_text2image.py ./flux.1-dev/FP16 "a beautiful sunset over mountains"
 ```
 
-The sample generates two images with and without TaylorSeer config applied using the same prompt:
-   - `taylorseer.bmp` with TaylorSeer config applied
-   - `taylorseer_baseline.bmp` without TaylorSeer config applied
+The sample generates two images using the same prompt:
+   - `taylorseer_baseline.bmp` without caching
+   - `taylorseer.bmp` with TaylorSeer caching applied
 
 Check the difference:
 

@@ -39,6 +39,10 @@ def callback(step, num_steps, latent):
 
     # Generate baseline for comparison
     print(f"\nGenerating baseline image without caching...")
+    baseline_config = pipe.get_generation_config()
+    baseline_config.taylorseer_config = None  # explicitly disable caching
+    pipe.set_generation_config(baseline_config)
+
     start_time = time.time()
     baseline_tensor = pipe.generate(args.prompt, **generate_kwargs)
     baseline_time = time.time() - start_time

@@ -40,14 +40,26 @@ taylorseer_config.disable_cache_after_step = -1
 ```
 
 ### Image Generation (Flux / StableDiffusion3)
+TaylorSeer caching is **enabled by default** for Flux and StableDiffusion3 Text2Image pipelines.
+
 ```python
 pipe = openvino_genai.Text2ImagePipeline(models_path, device)
-# Apply TaylorSeerCacheConfig to generation config
+# TaylorSeer is active out of the box
+res = pipe.generate(prompt, num_inference_steps=28)
+```
+
+To customize caching parameters, use `set_generation_config()`:
+```python
 generation_config = pipe.get_generation_config()
 generation_config.taylorseer_config = taylorseer_config
 pipe.set_generation_config(generation_config)
+```
 
-res = pipe.generate(prompt, num_inference_steps=28)
+To disable caching entirely:
+```python
+generation_config = pipe.get_generation_config()
+generation_config.taylorseer_config = None  # disable caching
+pipe.set_generation_config(generation_config)
 ```
 
 ### Video Generation (LTX-Video)

@@ -612,6 +612,7 @@ class FluxPipeline : public DiffusionPipeline {
                 m_generation_config.guidance_scale = 3.5f;
                 m_generation_config.num_inference_steps = 28;
                 m_generation_config.strength = 1.0f;
+                m_generation_config.taylorseer_config = TaylorSeerCacheConfig{};
             } else if (m_pipeline_type == PipelineType::IMAGE_2_IMAGE || m_pipeline_type == PipelineType::INPAINTING) {
                 m_generation_config.guidance_scale = 7.0f;
                 m_generation_config.num_inference_steps = 28;

@@ -740,6 +740,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
             m_generation_config.num_inference_steps = 28;
             m_generation_config.max_sequence_length = 256;
             m_generation_config.strength = m_pipeline_type == PipelineType::TEXT_2_IMAGE ? 1.0f : 0.6f;
+            if (m_pipeline_type == PipelineType::TEXT_2_IMAGE) {
+                m_generation_config.taylorseer_config = TaylorSeerCacheConfig{};
+            }
         } else {
             OPENVINO_THROW("Unsupported class_name '", class_name, "'. Please, contact OpenVINO GenAI developers");
         }

@@ -76,6 +76,7 @@ def pytest_configure(config: pytest.Config):
     "tiny-random-latent-consistency": "echarlaix/tiny-random-latent-consistency",
     "tiny-random-flux": "optimum-intel-internal-testing/tiny-random-flux",
     "tiny-random-sdxl": "echarlaix/tiny-random-stable-diffusion-xl",
+    "tiny-random-sd3": "optimum-intel-internal-testing/stable-diffusion-3-tiny-random",
 }
 
 DEFAULT_IMAGE_GEN_MODEL_ID = "tiny-random-latent-consistency"

@@ -10,6 +10,7 @@
 from utils.ov_genai_pipelines import should_skip_npuw_tests
 
 FLUX_MODEL_ID = "tiny-random-flux"
+SD3_MODEL_ID = "tiny-random-sd3"
 SDXL_MODEL_ID = "tiny-random-sdxl"
 
 
@@ -191,6 +192,12 @@ def callback(step, num_steps, latent):
         assert image is not None
         assert len(callback_calls) > 0
 
+    @pytest.mark.parametrize("image_generation_model", [FLUX_MODEL_ID, SD3_MODEL_ID], indirect=True)
+    def test_taylorseer_default_on(self, image_generation_model):
+        """Test that TaylorSeer is enabled by default for Flux and StableDiffusion3 Text2Image pipelines."""
+        pipe = ov_genai.Text2ImagePipeline(image_generation_model, "CPU")
+        assert pipe.get_generation_config().taylorseer_config is not None
+
 
 class TestImageGenerationOnNpuByNpuwCpu:
     def _construct_reshaped(self, model_dir):

@@ -226,7 +226,7 @@ python benchmark.py -m models/dreamlike_anime_1_0_ov/FP16 -p "cat wizard, gandal
 - `--static_reshape`: Reshape image generation pipeline to specific width & height at pipeline creation time.
 - `--guidance_scale`: guidance_scale parameter for pipeline, supported via json JSON input only.
 - `--images`: Like a `--media`, path to the directory or single image.
-- `--taylorseer_config`: TaylorSeer cache configuration, supported via JSON string or path to JSON file.
+- `--taylorseer_config`: TaylorSeer cache configuration, supported via JSON string or path to JSON file.  **Note:** TaylorSeer caching is enabled by default for Flux and StableDiffusion3 Text2Image pipelines. To disable it for a baseline benchmark, pass `--taylorseer_config '{"disable_cache_after_step": 0}'`.
 
 > **Supported Image Generation model types:** stable-diffusion, ssd, tiny-sd, small-sd, lcm, sdxl, dreamlike, flux
 

@@ -121,44 +121,52 @@ def test_image_model_genai(model_id, model_type, tmp_path):
     assert GT_FILE.exists()
     assert (tmp_path / "reference").exists()
 
-    output = run_wwb([
-        "--target-model",
-        MODEL_PATH,
-        "--num-samples",
-        "1",
-        "--gt-data",
-        GT_FILE,
-        "--device",
-        "CPU",
-        "--model-type",
-        model_type,
-        "--genai",
-        "--num-inference-steps",
-        "2",
-    ])
+    output = run_wwb(
+        [
+            "--target-model",
+            MODEL_PATH,
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+            "--genai",
+            "--num-inference-steps",
+            "2",
+            "--taylorseer-config",
+            '{"disable_cache_after_step": 0}',
+        ]
+    )
 
     assert "Metrics for model" in output
     similarity = get_similarity(output)
     assert similarity >= 0.97751  # Ticket 166496
     assert (tmp_path / "target").exists()
 
-    run_wwb([
-        "--target-model",
-        MODEL_PATH,
-        "--num-samples",
-        "1",
-        "--gt-data",
-        GT_FILE,
-        "--device",
-        "CPU",
-        "--model-type",
-        model_type,
-        "--output",
-        tmp_path,
-        "--genai",
-        "--num-inference-steps",
-        "2",
-    ])
+    run_wwb(
+        [
+            "--target-model",
+            MODEL_PATH,
+            "--num-samples",
+            "1",
+            "--gt-data",
+            GT_FILE,
+            "--device",
+            "CPU",
+            "--model-type",
+            model_type,
+            "--output",
+            tmp_path,
+            "--genai",
+            "--num-inference-steps",
+            "2",
+            "--taylorseer-config",
+            '{"disable_cache_after_step": 0}',
+        ]
+    )
     assert (tmp_path / "target").exists()
     assert (tmp_path / "target.csv").exists()