Refactor ::OnReadable() creating get_utf8_char_validated() (#136)
Extract call to GLib's g_utf8_get_char_validated() and the associated
workaround to also read NUL characters into a separate function to make
PipeCapture::OnReadable() a little smaller and simpler, so easier to
understand.
Add max_len > 0 clause into get_utf8_char_validated() like this:
if (uc == UTF8_PARTIAL && max_len > 0)
so that the NUL character reading workaround is only applied when
max_len specifies the maximum number of bytes to read, rather than
when -1 specifies reading a NUL termination string. This makes
get_utf8_char_validated() a complete wrapper of
g_utf8_get_char_validated() [1], even though GParted always specifies
the maximum number of bytes to read.
No longer describe the inability to read NUL characters as a bug [2]
since the GLib author's said it wasn't [3].
[1] GLib Reference Manual, Unicode Manipulation Functions,
g_utf8_get_char_validated ()
https://developer.gnome.org/glib/stable/glib-Unicode-Manipulation.html#g-utf8-get-char-validated
[2] 8dbbb47ce2
Workaround g_utf8_get_char_validate() bug with embedded NUL bytes
(#777973)
[3] Bug 780095 - g_utf8_get_char_validated() stopping at nul byte even
for length specified buffers
https://bugzilla.gnome.org/show_bug.cgi?id=780095#18
"If g_utf8_get_char_validated() encounters a nul byte in the
middle of a string of given longer length, it returns -2,
indicating a partial gunichar. That is not the obvious
behaviour, but since g_utf8_get_char_validated() has been API
for a long time, the behaviour cannot be changed.
"
Closes #136 - 1.2.0: test suite is failing in test_PipeCapture
This commit is contained in:
parent
0bcb224bdc
commit
b1cad17a14
|
@ -45,6 +45,7 @@ private:
|
||||||
gpointer data );
|
gpointer data );
|
||||||
static void append_unichar_vector_to_utf8( std::string & str,
|
static void append_unichar_vector_to_utf8( std::string & str,
|
||||||
const std::vector<gunichar> & ucvec );
|
const std::vector<gunichar> & ucvec );
|
||||||
|
static gunichar get_utf8_char_validated(const char *p, gssize max_len);
|
||||||
static int utf8_char_length( unsigned char firstbyte );
|
static int utf8_char_length( unsigned char firstbyte );
|
||||||
|
|
||||||
Glib::RefPtr<Glib::IOChannel> channel; // Wrapper around fd
|
Glib::RefPtr<Glib::IOChannel> channel; // Wrapper around fd
|
||||||
|
|
|
@ -30,6 +30,11 @@ namespace GParted {
|
||||||
|
|
||||||
const size_t READBUF_SIZE = 64*KIBIBYTE;
|
const size_t READBUF_SIZE = 64*KIBIBYTE;
|
||||||
|
|
||||||
|
|
||||||
|
const gunichar UTF8_PARTIAL = (gunichar)-2;
|
||||||
|
const gunichar UTF8_INVALID = (gunichar)-1;
|
||||||
|
|
||||||
|
|
||||||
PipeCapture::PipeCapture( int fd, Glib::ustring &buffer ) : fill_offset( 0 ),
|
PipeCapture::PipeCapture( int fd, Glib::ustring &buffer ) : fill_offset( 0 ),
|
||||||
cursor( 0 ),
|
cursor( 0 ),
|
||||||
line_start( 0 ),
|
line_start( 0 ),
|
||||||
|
@ -125,21 +130,7 @@ bool PipeCapture::OnReadable( Glib::IOCondition condition )
|
||||||
fill_offset = 0;
|
fill_offset = 0;
|
||||||
while ( read_ptr < end_ptr )
|
while ( read_ptr < end_ptr )
|
||||||
{
|
{
|
||||||
const gunichar UTF8_PARTIAL = (gunichar)-2;
|
gunichar uc = get_utf8_char_validated(read_ptr, end_ptr - read_ptr);
|
||||||
const gunichar UTF8_INVALID = (gunichar)-1;
|
|
||||||
gunichar uc = g_utf8_get_char_validated( read_ptr, end_ptr - read_ptr );
|
|
||||||
if ( uc == UTF8_PARTIAL )
|
|
||||||
{
|
|
||||||
// Workaround bug in g_utf8_get_char_validated() in which
|
|
||||||
// it reports an partial UTF-8 char when a NUL byte is
|
|
||||||
// encountered in the middle of a multi-byte character,
|
|
||||||
// yet there are more bytes available in the length
|
|
||||||
// specified buffer. Report as invalid character instead.
|
|
||||||
int len = utf8_char_length( *read_ptr );
|
|
||||||
if ( len == -1 || read_ptr + len <= end_ptr )
|
|
||||||
uc = UTF8_INVALID;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( uc == UTF8_PARTIAL )
|
if ( uc == UTF8_PARTIAL )
|
||||||
{
|
{
|
||||||
// Partial UTF-8 character at end of read buffer. Copy to
|
// Partial UTF-8 character at end of read buffer. Copy to
|
||||||
|
@ -258,6 +249,29 @@ void PipeCapture::append_unichar_vector_to_utf8( std::string & str, const std::v
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// GLib's g_utf8_get_char_validated() always considers strings as being NUL terminated,
|
||||||
|
// even when max_len is specified, hence can't read NUL characters. This wrapper can read
|
||||||
|
// NUL characters when max_len is specified.
|
||||||
|
// Reference:
|
||||||
|
// https://developer.gnome.org/glib/stable/glib-Unicode-Manipulation.html#g-utf8-get-char-validated
|
||||||
|
gunichar PipeCapture::get_utf8_char_validated(const char *p, gssize max_len)
|
||||||
|
{
|
||||||
|
gunichar uc = g_utf8_get_char_validated(p, max_len);
|
||||||
|
if (uc == UTF8_PARTIAL && max_len > 0)
|
||||||
|
{
|
||||||
|
// If g_utf8_get_char_validated() found a NUL byte in the middle of a
|
||||||
|
// multi-byte character, even when there are more bytes available as
|
||||||
|
// specified by max_len, it reports a partial UTF-8 character. Report
|
||||||
|
// this case as an invalid character instead.
|
||||||
|
int len = utf8_char_length(*p);
|
||||||
|
if (len == -1 || (gssize)len <= max_len)
|
||||||
|
uc = UTF8_INVALID;
|
||||||
|
}
|
||||||
|
return uc;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int PipeCapture::utf8_char_length( unsigned char firstbyte )
|
int PipeCapture::utf8_char_length( unsigned char firstbyte )
|
||||||
{
|
{
|
||||||
// Recognise the size of FSS-UTF (1992) / UTF-8 (1993) characters given the first
|
// Recognise the size of FSS-UTF (1992) / UTF-8 (1993) characters given the first
|
||||||
|
|
Loading…
Reference in New Issue