try to calculate character width

(cherry picked from commit 92e30955b9)
This commit is contained in:
Philipp Otterbein 2024-12-15 02:29:56 +01:00 committed by Mergify
parent 85a4f62400
commit b26da55735
7 changed files with 1636 additions and 29 deletions

View File

@ -354,6 +354,7 @@
''^src/libutil/util\.cc$''
''^src/libutil/util\.hh$''
''^src/libutil/variant-wrapper\.hh$''
''^src/libutil/widecharwidth/widechar_width\.h$'' # vendored source
''^src/libutil/windows/file-descriptor\.cc$''
''^src/libutil/windows/file-path\.cc$''
''^src/libutil/windows/processes\.cc$''

View File

@ -55,6 +55,10 @@ TEST(filterANSIEscapes, utf8)
ASSERT_EQ(filterANSIEscapes("fóóbär", true, 3), "fóó");
ASSERT_EQ(filterANSIEscapes("f€€bär", true, 4), "f€€b");
ASSERT_EQ(filterANSIEscapes("f𐍈𐍈bär", true, 4), "f𐍈𐍈b");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 6), "f🔍bar");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 3), "f🔍");
ASSERT_EQ(filterANSIEscapes("f🔍bar", true, 2), "f");
ASSERT_EQ(filterANSIEscapes("foo\u0301", true, 3), "foó");
}
TEST(filterANSIEscapes, osc8)

View File

@ -109,6 +109,8 @@ deps_private += cpuid
nlohmann_json = dependency('nlohmann_json', version : '>= 3.9')
deps_public += nlohmann_json
cxx = meson.get_compiler('cpp')
config_h = configure_file(
configuration : configdata,
output : 'config-util.hh',
@ -169,6 +171,10 @@ sources = files(
)
include_dirs = [include_directories('.')]
if not cxx.has_header('widechar_width.h', required : false)
# use vendored widechar_width.h
include_dirs += include_directories('./widecharwidth')
endif
headers = [config_h] + files(
'abstract-setting-to-json.hh',

View File

@ -29,6 +29,7 @@ mkMesonLibrary (finalAttrs: {
./build-utils-meson
../../.version
./.version
./widecharwidth
./meson.build
./meson.options
./linux/meson.build

View File

@ -11,6 +11,53 @@
# include <sys/ioctl.h>
#endif
#include <unistd.h>
#include <widechar_width.h>
namespace {
inline std::pair<int, size_t> charWidthUTF8Helper(std::string_view s)
{
size_t bytes = 1;
uint32_t ch = s[0];
uint32_t max = 1U << 7;
if ((ch & 0x80U) == 0U) {
} else if ((ch & 0xe0U) == 0xc0U) {
ch &= 0x1fU;
bytes = 2;
max = 1U << 11;
} else if ((ch & 0xf0U) == 0xe0U) {
ch &= 0x0fU;
bytes = 3;
max = 1U << 16;
} else if ((ch & 0xf8U) == 0xf0U) {
ch &= 0x07U;
bytes = 4;
max = 0x110000U;
} else {
return {bytes, bytes}; // invalid UTF-8 start byte
}
for (size_t i = 1; i < bytes; i++) {
if (i < s.size() && (s[i] & 0xc0) == 0x80) {
ch = (ch << 6) | (s[i] & 0x3f);
} else {
return {i, i}; // invalid UTF-8 encoding; assume one character per byte
}
}
int width = bytes; // in case of overlong encoding
if (ch < max) {
width = widechar_wcwidth(ch);
if (width == widechar_ambiguous) {
width = 1; // just a guess...
} else if (width == widechar_widened_in_9) {
width = 2;
} else if (width < 0) {
width = 0;
}
}
return {width, bytes};
}
}
namespace nix {
@ -30,7 +77,7 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
size_t w = 0;
auto i = s.begin();
while (w < (size_t) width && i != s.end()) {
while (i != s.end()) {
if (*i == '\e') {
std::string e;
@ -61,10 +108,12 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
}
else if (*i == '\t') {
i++; t += ' '; w++;
while (w < (size_t) width && w % 8) {
t += ' '; w++;
}
do {
if (++w > (size_t) width)
return t;
t += ' ';
} while (w % 8);
i++;
}
else if (*i == '\r' || *i == '\a')
@ -72,35 +121,18 @@ std::string filterANSIEscapes(std::string_view s, bool filterAll, unsigned int w
i++;
else {
w++;
// Copy one UTF-8 character.
if ((*i & 0xe0) == 0xc0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
} else if ((*i & 0xf0) == 0xe0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
} else if ((*i & 0xf8) == 0xf0) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) {
t += *i++;
if (i != s.end() && ((*i & 0xc0) == 0x80)) t += *i++;
}
}
} else
t += *i++;
auto [chWidth, bytes] = charWidthUTF8Helper({i, s.end()});
w += chWidth;
if (w > (size_t) width) {
break;
}
t += {i, i + bytes};
i += bytes;
}
}
return t;
}
//////////////////////////////////////////////////////////////////////
static Sync<std::pair<unsigned short, unsigned short>> windowSize{{0, 0}};

View File

@ -0,0 +1,4 @@
widecharwidth - wcwidth implementation
Written in 2018 by ridiculous_fish
To the extent possible under law, the author(s) have dedicated all copyright and related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty.
You should have received a copy of the CC0 Public Domain Dedication along with this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.

File diff suppressed because it is too large Load Diff