Skip to content

Commit 1398956

Browse files
authored
fix: add support for multibyte characters (#2)
* test(unicode): add tests for complicated characters if the length of a character is more than one (here tested with 2 and 7), it should be counted as that length, but definitely shouldn't cut in the middle of that character. This does not work as expected yet, so the test is skipped * use an array to have single characters * add another failing case * comp++ * fix tests by using runes
1 parent c88466b commit 1398956

4 files changed

Lines changed: 45 additions & 7 deletions

File tree

package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,8 @@
3030
"eslint-plugin-prettier": "^2.1.2",
3131
"jest": "^20.0.4",
3232
"prettier": "^1.4.4"
33+
},
34+
"dependencies": {
35+
"runes": "^0.4.1"
3336
}
3437
}

src/__tests__/index.js

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,26 @@ it('should truncate a word if longer than size', () => {
3939
const pieces = chunk('hello you', 4);
4040
expect(pieces).toEqual(['hell', 'o', 'you']);
4141
});
42+
43+
it('should count double width characters as single characters', () => {
44+
// each of these characters is two bytes
45+
const chineseText = '𤻪𬜬𬜯';
46+
const camembert = '🧀🧀🧀🧀 🧀🧀🧀🧀';
47+
48+
expect(chunk(chineseText, 2)).toEqual(['𤻪𬜬', '𬜯']);
49+
expect(chunk(chineseText, 1)).toEqual(['𤻪', '𬜬', '𬜯']);
50+
expect(chunk(camembert, 4)).toEqual(['🧀🧀🧀🧀', '🧀🧀🧀🧀']);
51+
});
52+
53+
// this test does not pass yet
54+
it('should not cut combined characters', () => {
55+
// one woman runner emoji with a colour is seven bytes, or five characters
56+
// RUNNER(2) + COLOUR(2) + ZJW + GENDER + VS15
57+
const runners = '🏃🏽‍♀️🏃🏽‍♀️🏃🏽‍♀️';
58+
// FLAG + RAINBOW
59+
const flags = '🏳️‍🌈🏳️‍🌈';
60+
61+
expect(chunk(runners, 3)).toEqual(['🏃🏽‍♀️🏃🏽‍♀️🏃🏽‍♀️']);
62+
expect(chunk(runners, 1)).toEqual(['🏃🏽‍♀️', '🏃🏽‍♀️', '🏃🏽‍♀️']);
63+
expect(chunk(flags, 1)).toEqual(['🏳️‍🌈', '🏳️‍🌈']);
64+
});

src/index.js

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
/* eslint-disable import/no-commonjs */
2+
const runes = require('runes');
3+
14
const assertIsValidText = function(text) {
25
if (typeof text !== 'string') {
36
throw new TypeError(
@@ -19,19 +22,24 @@ export default function(text, chunkSize) {
1922
assertIsValidChunkSize(chunkSize);
2023

2124
const chunks = [];
22-
while (text.length > chunkSize) {
23-
const splitAt = text.lastIndexOf(' ', chunkSize);
25+
let characters = runes(text);
26+
27+
while (characters.length > chunkSize) {
28+
const splitAt = characters.lastIndexOf(' ', chunkSize);
29+
2430
if (splitAt === -1) {
2531
// No whitespace found, we need to truncate the word in that case.
26-
chunks.push(text.substr(0, chunkSize));
27-
text = text.substr(chunkSize); // eslint-disable-line no-param-reassign
32+
const chunk = characters.slice(0, chunkSize).join('');
33+
chunks.push(chunk);
34+
characters = characters.slice(chunkSize); // eslint-disable-line no-param-reassign
2835
} else {
29-
chunks.push(text.substr(0, splitAt));
30-
text = text.substr(splitAt + 1); // eslint-disable-line no-param-reassign
36+
const chunk = characters.slice(0, splitAt).join('');
37+
chunks.push(chunk);
38+
characters = characters.slice(splitAt + 1); // eslint-disable-line no-param-reassign
3139
}
3240
}
3341

34-
chunks.push(text);
42+
chunks.push(characters.join(''));
3543

3644
return chunks;
3745
}

yarn.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2809,6 +2809,10 @@ run-async@^2.2.0:
28092809
dependencies:
28102810
is-promise "^2.1.0"
28112811

2812+
runes@^0.4.1:
2813+
version "0.4.1"
2814+
resolved "https://registry.yarnpkg.com/runes/-/runes-0.4.1.tgz#8fe5ca2cf7d7a384c6456beeeff6c4e9dc3e8502"
2815+
28122816
rx-lite-aggregates@^4.0.8:
28132817
version "4.0.8"
28142818
resolved "https://registry.yarnpkg.com/rx-lite-aggregates/-/rx-lite-aggregates-4.0.8.tgz#753b87a89a11c95467c4ac1626c4efc4e05c67be"

0 commit comments

Comments
 (0)