|
| 1 | +/* |
| 2 | + * SPDX-FileCopyrightText: 2024 Espressif Systems (Shanghai) CO LTD |
| 3 | + * |
| 4 | + * SPDX-License-Identifier: Apache-2.0 |
| 5 | + */ |
| 6 | + |
| 7 | +// This is LVGL RGB888 simple fill for ESP32 processor |
| 8 | + |
| 9 | + .section .text |
| 10 | + .align 4 |
| 11 | + .global lv_color_blend_to_rgb888_esp |
| 12 | + .type lv_color_blend_to_rgb888_esp,@function |
| 13 | +// The function implements the following C code: |
| 14 | +// void lv_color_blend_to_rgb888(_lv_draw_sw_blend_fill_dsc_t * dsc); |
| 15 | + |
| 16 | +// Input params |
| 17 | +// |
| 18 | +// dsc - a2 |
| 19 | + |
| 20 | +// typedef struct { |
| 21 | +// uint32_t opa; l32i 0 |
| 22 | +// void * dst_buf; l32i 4 |
| 23 | +// uint32_t dst_w; l32i 8 |
| 24 | +// uint32_t dst_h; l32i 12 |
| 25 | +// uint32_t dst_stride; l32i 16 |
| 26 | +// const void * src_buf; l32i 20 |
| 27 | +// uint32_t src_stride; l32i 24 |
| 28 | +// const lv_opa_t * mask_buf; l32i 28 |
| 29 | +// uint32_t mask_stride; l32i 32 |
| 30 | +// } asm_dsc_t; |
| 31 | + |
| 32 | +lv_color_blend_to_rgb888_esp: |
| 33 | + |
| 34 | + entry a1, 32 |
| 35 | + |
| 36 | + l32i.n a3, a2, 4 // a3 - dest_buff |
| 37 | + l32i.n a4, a2, 8 // a4 - dest_w in uint24_t |
| 38 | + l32i.n a5, a2, 12 // a5 - dest_h in uint16_t |
| 39 | + l32i.n a6, a2, 16 // a6 - dest_stride in bytes |
| 40 | + l32i.n a7, a2, 20 // a7 - src_buff (color) |
| 41 | + l32i.n a8, a7, 0 // a8 - color as value |
| 42 | + |
| 43 | + // a11 - dest_w_bytes = sizeof(uint24_t) * dest_w = 3 * a4 |
| 44 | + slli a11, a4, 1 // a11 - dest_w_bytes = sizeof(uint16_t) * dest_w |
| 45 | + add a11, a11, a4 // a11 - dest_w_bytes = a11 + a4 |
| 46 | + |
| 47 | + // Prepare register combinations |
| 48 | + // a13 - 0xBBRRGGBB a14 - 0xGGBBRRGG a15 - 0xRRGGBBRR |
| 49 | + l8ui a13, a7, 0 // blue 000B |
| 50 | + slli a13, a13, 24 // shift to B000 |
| 51 | + or a13, a13, a8 // a13 BRGB |
| 52 | + |
| 53 | + srli a14, a8, 8 // a14 00RG |
| 54 | + slli a10, a8, 16 // a10 GB00 |
| 55 | + or a14, a14, a10 // a14 GBRG |
| 56 | + |
| 57 | + slli a15, a8, 8 // a15 RGB0 |
| 58 | + l8ui a10, a7, 2 // a7 000R |
| 59 | + or a15, a15, a10 // a15 RGBR |
| 60 | + |
| 61 | + sub a6, a6, a11 // dest_stride = dest_stride - dest_w_bytes |
| 62 | + |
| 63 | + // Prepare main loop length and dest_w_bytes |
| 64 | + srli a9, a4, 2 // a9 = loop_len = dest_w / 4, calculate main loop_len for original dest_w |
| 65 | + movi.n a8, 0x3 // a8 = 0x3, remainder mask |
| 66 | + and a10, a4, a8 // a10 - remainder after division by 4 = a4 and 0x3 |
| 67 | + |
| 68 | + .outer_loop: |
| 69 | + |
| 70 | + // Run main loop which sets 12 bytes (4 rgb888) in one loop run |
| 71 | + loopnez a9, ._main_loop |
| 72 | + s32i.n a13, a3, 0 // save 32 bits from 32-bit color a13 to dest_buff a3, offset 0 |
| 73 | + s32i.n a14, a3, 4 // save 32 bits from 32-bit color a14 to dest_buff a3, offset 4 |
| 74 | + s32i.n a15, a3, 8 // save 32 bits from 32-bit color a15 to dest_buff a3, offset 8 |
| 75 | + addi.n a3, a3, 12 // increment dest_buff pointer by 12 |
| 76 | + ._main_loop: |
| 77 | + |
| 78 | + bnei a10, 0x3, _less_than_3 // branch if less than 3 values left |
| 79 | + s32i.n a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes |
| 80 | + s32i.n a14, a3, 4 // save 32 bits from a14 to dest_buff a3, offset 4 bytes |
| 81 | + s8i a15, a3, 8 // save 8 bits from a15 to dest_buff a3, offset 8 bytes |
| 82 | + addi.n a3, a3, 9 // increment dest_buff pointer by 9 bytes |
| 83 | + j _less_than_1 |
| 84 | + _less_than_3: |
| 85 | + |
| 86 | + bnei a10, 0x2, _less_than_2 // branch if less than 2 values left |
| 87 | + s32i.n a13, a3, 0 // save 32 bits from a13 to dest_buff a3, offset 0 bytes |
| 88 | + s16i a14, a3, 4 // save 16 bits from a14 to dest_buff a3, offset 4 bytes |
| 89 | + addi.n a3, a3, 6 // increment dest_buff pointer by 6 bytes |
| 90 | + j _less_than_1 |
| 91 | + _less_than_2: |
| 92 | + |
| 93 | + bnei a10, 0x1, _less_than_1 // branch if less than 1 value left |
| 94 | + s16i a13, a3, 0 // save 16 bits from a13 to dest_buff a3, offset 0 bytes |
| 95 | + s8i a15, a3, 2 // save 8 bits from a15 to dest_buff a3, offset 2 bytes |
| 96 | + addi.n a3, a3, 3 // increment dest_buff pointer by 3 bytes |
| 97 | + _less_than_1: |
| 98 | + |
| 99 | + add a3, a3, a6 // dest_buff + dest_stride |
| 100 | + addi.n a5, a5, -1 // decrease the outer loop |
| 101 | + and a7, a8, a3 // a7 = dest_buff AND 0x3 (check if the address is 4-byte aligned) |
| 102 | + bnez a5, .outer_loop |
| 103 | + |
| 104 | + movi.n a2, 1 // return LV_RESULT_OK = 1 |
| 105 | + retw.n // return |
0 commit comments