Skip to content

Commit 6b2382f

Browse files
authored
Merge pull request #2052 from old9/fix-wide-char-alignment
2 parents 083d9bc + 8d1a3ef commit 6b2382f

File tree

2 files changed

+173
-6
lines changed

2 files changed

+173
-6
lines changed

core/commands/log_graph_renderer.py

Lines changed: 127 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import subprocess
1010
import textwrap
1111
import threading
12+
import unicodedata
1213
from typing import (
1314
Callable,
1415
Deque,
@@ -600,10 +601,6 @@ def process_graph(lines):
600601
if isinstance(line, str) or not line.decoration:
601602
yield "...\n"
602603

603-
def trunc(text, width):
604-
# type: (str, int) -> str
605-
return f"{text[:width - 2]}.." if len(text) > width else f"{text:{width}}"
606-
607604
def resolve_refs_from_the_logs():
608605
# type: () -> Dict[str, str]
609606
# git does not decorate refs from the reflogs, e.g. "branch@{2}", so we resolve
@@ -706,7 +703,8 @@ def format_line(line):
706703
left = f"{hash} ({decoration})"
707704
else:
708705
left = f"{hash}"
709-
return f"{left} {trunc(subject, max(2, LEFT_COLUMN_WIDTH - len(left)))} \u200b {info}\n"
706+
left_width = len(left) if left.isascii() else text_width(left)
707+
return f"{left} {trunc_and_pad(subject, max(2, LEFT_COLUMN_WIDTH - left_width))} \u200b {info}\n"
710708

711709
def filter_consecutive_continuation_lines(lines):
712710
# type: (Iterator[str]) -> Iterator[str]
@@ -1085,3 +1083,127 @@ def is_sel_in_viewport(view):
10851083
# type: (sublime.View) -> bool
10861084
viewport = view.visible_region()
10871085
return all(viewport.contains(s) or viewport.intersects(s) for s in view.sel())
1086+
1087+
1088+
# The following are basically unicode aware len(text) variants.
1089+
# `text_width` and `trunc_and_pad` duplicate code. We found no
1090+
# zero-cost abstraction. In perf benchmarking I found an iterator
1091+
# variant to be 20% slower. Calling a `char_width` function in a
1092+
# loop was even worse. So we life with the following.
1093+
1094+
1095+
def text_width(
1096+
text: str,
1097+
_combining=unicodedata.combining,
1098+
_east_asian_width=unicodedata.east_asian_width,
1099+
_category=unicodedata.category
1100+
) -> int:
1101+
if text.isascii():
1102+
return len(text)
1103+
1104+
width = 0
1105+
for char in text:
1106+
codepoint = ord(char)
1107+
if codepoint < 0x80:
1108+
width += 1
1109+
elif _combining(char):
1110+
continue
1111+
elif 0x1F3FB <= codepoint <= 0x1F3FF:
1112+
continue
1113+
elif _east_asian_width(char) in ('W', 'F'):
1114+
width += 2
1115+
elif (
1116+
codepoint == 0x200D # ZWJ
1117+
or codepoint == 0x200C # ZWNJ
1118+
or 0xFE00 <= codepoint <= 0xFE0F # variation selectors (VS1..VS16)
1119+
or 0xE0100 <= codepoint <= 0xE01EF # variation selectors supplement
1120+
or (
1121+
(
1122+
0x0600 <= codepoint <= 0x06FF
1123+
or codepoint == 0x180E
1124+
or 0x2000 <= codepoint <= 0x206F
1125+
or codepoint == 0xFEFF
1126+
or 0xFFF0 <= codepoint <= 0xFFFF
1127+
)
1128+
and _category(char) == 'Cf'
1129+
)
1130+
):
1131+
continue
1132+
else:
1133+
width += 1
1134+
return width
1135+
1136+
1137+
def trunc_and_pad(
1138+
text: str,
1139+
width: int,
1140+
_combining=unicodedata.combining,
1141+
_east_asian_width=unicodedata.east_asian_width,
1142+
_category=unicodedata.category
1143+
) -> str:
1144+
if width <= 0:
1145+
return ''
1146+
1147+
if text.isascii():
1148+
if len(text) > width:
1149+
if width <= 2:
1150+
return '.' * width
1151+
return f"{text[:width - 2]}.."
1152+
return f"{text:{width}}"
1153+
1154+
target = max(0, width - 2)
1155+
text_width_ = 0
1156+
trunc_idx = len(text)
1157+
target_idx = 0
1158+
target_width = 0
1159+
for idx, char in enumerate(text):
1160+
codepoint = ord(char)
1161+
1162+
# Fast-path ASCII and then apply terminal-ish display width rules:
1163+
# - combining / format / modifiers / joiners / variation selectors: 0
1164+
# - East Asian wide/full-width: 2
1165+
# - everything else: 1
1166+
if codepoint < 0x80:
1167+
char_width_ = 1
1168+
elif _combining(char):
1169+
char_width_ = 0
1170+
elif 0x1F3FB <= codepoint <= 0x1F3FF:
1171+
char_width_ = 0
1172+
elif _east_asian_width(char) in ('W', 'F'):
1173+
char_width_ = 2
1174+
elif (
1175+
codepoint == 0x200D # ZWJ
1176+
or codepoint == 0x200C # ZWNJ
1177+
or 0xFE00 <= codepoint <= 0xFE0F # variation selectors (VS1..VS16)
1178+
or 0xE0100 <= codepoint <= 0xE01EF # variation selectors supplement
1179+
or (
1180+
(
1181+
0x0600 <= codepoint <= 0x06FF
1182+
or codepoint == 0x180E
1183+
or 0x2000 <= codepoint <= 0x206F
1184+
or codepoint == 0xFEFF
1185+
or 0xFFF0 <= codepoint <= 0xFFFF
1186+
)
1187+
and _category(char) == 'Cf'
1188+
)
1189+
):
1190+
char_width_ = 0
1191+
else:
1192+
char_width_ = 1
1193+
1194+
# Remember the longest prefix that still leaves room for '..'.
1195+
if text_width_ + char_width_ <= target:
1196+
target_idx = idx + 1
1197+
target_width = text_width_ + char_width_
1198+
1199+
text_width_ += char_width_
1200+
# Once we overflow, we know the string must be truncated.
1201+
if text_width_ > width:
1202+
trunc_idx = idx
1203+
break
1204+
1205+
if trunc_idx < len(text):
1206+
return text[:target_idx] + '..' + (' ' * max(0, width - (target_width + 2)))
1207+
1208+
# Pad using computed display width, not Python string length.
1209+
return text + (' ' * max(0, width - text_width_))

tests/test_graph_diff.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55

66
from GitSavvy.core.commands.log_graph_renderer import (
7-
diff, simplify, normalize_tokens, apply_diff, Ins, Del, Replace, Flush
7+
diff, simplify, normalize_tokens, apply_diff, Ins, Del, Replace, Flush,
8+
text_width, trunc_and_pad
89
)
910

1011

@@ -103,3 +104,47 @@ def test_d(self, A, B, ops):
103104
B = _(B)
104105
ops = tx(ops)
105106
self.assertEqual(apply_diff(A, normalize_tokens(simplify(diff(A, B), 100))), B)
107+
108+
@p.expand([
109+
('ASCII', 5),
110+
('中文', 4),
111+
('中a文', 5),
112+
('📦', 2),
113+
('a📦b', 4),
114+
('\u200d', 0),
115+
('a\u200db', 2),
116+
('a\u2060b', 2),
117+
('👍🏽', 2),
118+
('a\ufe0fb', 2),
119+
('👩\u200d💻', 4),
120+
('👩‍💻', 4), # sic!
121+
])
122+
def test_text_width(self, text, expected):
123+
self.assertEqual(text_width(text), expected)
124+
125+
@p.expand([
126+
('00', 'hello', 8, 'hello '),
127+
('01', '中文', 6, '中文 '),
128+
('02', '中文', 4, '中文'),
129+
('03', 'abcdef', 5, 'abc..'),
130+
('04', '中文hello', 6, '中文..'),
131+
('05', 'abc中文', 5, 'abc..'),
132+
('06', '中文ab', 5, '中.. '),
133+
('07', '中a文', 4, '中..'),
134+
('08', 'a中中', 4, 'a.. '),
135+
('09', '中中中', 5, '中.. '),
136+
('10', '📦', 4, '📦 '),
137+
('11', '📦abcd', 5, '📦a..'),
138+
('12', '中中', 3, '.. '),
139+
('13', '📦中', 3, '.. '),
140+
('14', 'e\u0301x', 4, 'e\u0301x '),
141+
('15', 'a\u200db', 2, 'a\u200db'),
142+
('16', '👩\u200d💻', 4, '👩\u200d💻'),
143+
('17', 'a\u2060b', 2, 'a\u2060b'),
144+
('18', '👍🏽', 2, '👍🏽'),
145+
('19', 'a\ufe0fb', 2, 'a\ufe0fb'),
146+
])
147+
def test_trunc_and_pad(self, _, text, width, expected):
148+
actual = trunc_and_pad(text, width)
149+
self.assertEqual(actual, expected)
150+
self.assertEqual(text_width(actual), width)

0 commit comments

Comments
 (0)