|
9 | 9 | import subprocess |
10 | 10 | import textwrap |
11 | 11 | import threading |
| 12 | +import unicodedata |
12 | 13 | from typing import ( |
13 | 14 | Callable, |
14 | 15 | Deque, |
@@ -600,10 +601,6 @@ def process_graph(lines): |
600 | 601 | if isinstance(line, str) or not line.decoration: |
601 | 602 | yield "...\n" |
602 | 603 |
|
603 | | - def trunc(text, width): |
604 | | - # type: (str, int) -> str |
605 | | - return f"{text[:width - 2]}.." if len(text) > width else f"{text:{width}}" |
606 | | - |
607 | 604 | def resolve_refs_from_the_logs(): |
608 | 605 | # type: () -> Dict[str, str] |
609 | 606 | # git does not decorate refs from the reflogs, e.g. "branch@{2}", so we resolve |
@@ -706,7 +703,8 @@ def format_line(line): |
706 | 703 | left = f"{hash} ({decoration})" |
707 | 704 | else: |
708 | 705 | left = f"{hash}" |
709 | | - return f"{left} {trunc(subject, max(2, LEFT_COLUMN_WIDTH - len(left)))} \u200b {info}\n" |
| 706 | + left_width = len(left) if left.isascii() else text_width(left) |
| 707 | + return f"{left} {trunc_and_pad(subject, max(2, LEFT_COLUMN_WIDTH - left_width))} \u200b {info}\n" |
710 | 708 |
|
711 | 709 | def filter_consecutive_continuation_lines(lines): |
712 | 710 | # type: (Iterator[str]) -> Iterator[str] |
@@ -1085,3 +1083,127 @@ def is_sel_in_viewport(view): |
1085 | 1083 | # type: (sublime.View) -> bool |
1086 | 1084 | viewport = view.visible_region() |
1087 | 1085 | return all(viewport.contains(s) or viewport.intersects(s) for s in view.sel()) |
| 1086 | + |
| 1087 | + |
| 1088 | +# The following are basically unicode aware len(text) variants. |
| 1089 | +# `text_width` and `trunc_and_pad` duplicate code. We found no |
| 1090 | +# zero-cost abstraction. In perf benchmarking I found an iterator |
| 1091 | +# variant to be 20% slower. Calling a `char_width` function in a |
| 1092 | +# loop was even worse. So we life with the following. |
| 1093 | + |
| 1094 | + |
| 1095 | +def text_width( |
| 1096 | + text: str, |
| 1097 | + _combining=unicodedata.combining, |
| 1098 | + _east_asian_width=unicodedata.east_asian_width, |
| 1099 | + _category=unicodedata.category |
| 1100 | +) -> int: |
| 1101 | + if text.isascii(): |
| 1102 | + return len(text) |
| 1103 | + |
| 1104 | + width = 0 |
| 1105 | + for char in text: |
| 1106 | + codepoint = ord(char) |
| 1107 | + if codepoint < 0x80: |
| 1108 | + width += 1 |
| 1109 | + elif _combining(char): |
| 1110 | + continue |
| 1111 | + elif 0x1F3FB <= codepoint <= 0x1F3FF: |
| 1112 | + continue |
| 1113 | + elif _east_asian_width(char) in ('W', 'F'): |
| 1114 | + width += 2 |
| 1115 | + elif ( |
| 1116 | + codepoint == 0x200D # ZWJ |
| 1117 | + or codepoint == 0x200C # ZWNJ |
| 1118 | + or 0xFE00 <= codepoint <= 0xFE0F # variation selectors (VS1..VS16) |
| 1119 | + or 0xE0100 <= codepoint <= 0xE01EF # variation selectors supplement |
| 1120 | + or ( |
| 1121 | + ( |
| 1122 | + 0x0600 <= codepoint <= 0x06FF |
| 1123 | + or codepoint == 0x180E |
| 1124 | + or 0x2000 <= codepoint <= 0x206F |
| 1125 | + or codepoint == 0xFEFF |
| 1126 | + or 0xFFF0 <= codepoint <= 0xFFFF |
| 1127 | + ) |
| 1128 | + and _category(char) == 'Cf' |
| 1129 | + ) |
| 1130 | + ): |
| 1131 | + continue |
| 1132 | + else: |
| 1133 | + width += 1 |
| 1134 | + return width |
| 1135 | + |
| 1136 | + |
| 1137 | +def trunc_and_pad( |
| 1138 | + text: str, |
| 1139 | + width: int, |
| 1140 | + _combining=unicodedata.combining, |
| 1141 | + _east_asian_width=unicodedata.east_asian_width, |
| 1142 | + _category=unicodedata.category |
| 1143 | +) -> str: |
| 1144 | + if width <= 0: |
| 1145 | + return '' |
| 1146 | + |
| 1147 | + if text.isascii(): |
| 1148 | + if len(text) > width: |
| 1149 | + if width <= 2: |
| 1150 | + return '.' * width |
| 1151 | + return f"{text[:width - 2]}.." |
| 1152 | + return f"{text:{width}}" |
| 1153 | + |
| 1154 | + target = max(0, width - 2) |
| 1155 | + text_width_ = 0 |
| 1156 | + trunc_idx = len(text) |
| 1157 | + target_idx = 0 |
| 1158 | + target_width = 0 |
| 1159 | + for idx, char in enumerate(text): |
| 1160 | + codepoint = ord(char) |
| 1161 | + |
| 1162 | + # Fast-path ASCII and then apply terminal-ish display width rules: |
| 1163 | + # - combining / format / modifiers / joiners / variation selectors: 0 |
| 1164 | + # - East Asian wide/full-width: 2 |
| 1165 | + # - everything else: 1 |
| 1166 | + if codepoint < 0x80: |
| 1167 | + char_width_ = 1 |
| 1168 | + elif _combining(char): |
| 1169 | + char_width_ = 0 |
| 1170 | + elif 0x1F3FB <= codepoint <= 0x1F3FF: |
| 1171 | + char_width_ = 0 |
| 1172 | + elif _east_asian_width(char) in ('W', 'F'): |
| 1173 | + char_width_ = 2 |
| 1174 | + elif ( |
| 1175 | + codepoint == 0x200D # ZWJ |
| 1176 | + or codepoint == 0x200C # ZWNJ |
| 1177 | + or 0xFE00 <= codepoint <= 0xFE0F # variation selectors (VS1..VS16) |
| 1178 | + or 0xE0100 <= codepoint <= 0xE01EF # variation selectors supplement |
| 1179 | + or ( |
| 1180 | + ( |
| 1181 | + 0x0600 <= codepoint <= 0x06FF |
| 1182 | + or codepoint == 0x180E |
| 1183 | + or 0x2000 <= codepoint <= 0x206F |
| 1184 | + or codepoint == 0xFEFF |
| 1185 | + or 0xFFF0 <= codepoint <= 0xFFFF |
| 1186 | + ) |
| 1187 | + and _category(char) == 'Cf' |
| 1188 | + ) |
| 1189 | + ): |
| 1190 | + char_width_ = 0 |
| 1191 | + else: |
| 1192 | + char_width_ = 1 |
| 1193 | + |
| 1194 | + # Remember the longest prefix that still leaves room for '..'. |
| 1195 | + if text_width_ + char_width_ <= target: |
| 1196 | + target_idx = idx + 1 |
| 1197 | + target_width = text_width_ + char_width_ |
| 1198 | + |
| 1199 | + text_width_ += char_width_ |
| 1200 | + # Once we overflow, we know the string must be truncated. |
| 1201 | + if text_width_ > width: |
| 1202 | + trunc_idx = idx |
| 1203 | + break |
| 1204 | + |
| 1205 | + if trunc_idx < len(text): |
| 1206 | + return text[:target_idx] + '..' + (' ' * max(0, width - (target_width + 2))) |
| 1207 | + |
| 1208 | + # Pad using computed display width, not Python string length. |
| 1209 | + return text + (' ' * max(0, width - text_width_)) |
0 commit comments