From: Pádraig Brady Date: Mon, 8 Mar 2010 18:29:01 +0000 (+0000) Subject: cal: factor out and update multibyte alignment code X-Git-Url: https://err.no/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=104b92f84802e4043e6a8db034985fa726b9a6ab;p=util-linux cal: factor out and update multibyte alignment code * include/mbsalign.h: New module interface * lib/mbsalign.c: Updated implementation synced from coreutils * include/Makefile.am: Add mbsalign.h * misc-utils/Makefile.am: Make cal dependent on mbsalign module * misc-utils/cal.c: Call mbsalign() [kzak@redhat.com: - use min() macro from c.h] Signed-off-by: Pádraig Brady Signed-off-by: Karel Zak --- diff --git a/include/Makefile.am b/include/Makefile.am index 1da31fe1..ccae85d7 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -12,6 +12,7 @@ dist_noinst_HEADERS = \ linux_reboot.h \ linux_version.h \ md5.h \ + mbsalign.h \ nls.h \ pathnames.h \ setproctitle.h \ diff --git a/include/mbsalign.h b/include/mbsalign.h new file mode 100644 index 00000000..b8a588c2 --- /dev/null +++ b/include/mbsalign.h @@ -0,0 +1,43 @@ +/* Align/Truncate a string in a given screen width + Copyright (C) 2009-2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +typedef enum { MBS_ALIGN_LEFT, MBS_ALIGN_RIGHT, MBS_ALIGN_CENTER } mbs_align_t; + +enum { + /* Use unibyte mode for invalid multibyte strings or + or when heap memory is exhausted. */ + MBA_UNIBYTE_FALLBACK = 0x0001, + +#if 0 /* Other possible options. */ + /* Skip invalid multibyte chars rather than failing */ + MBA_IGNORE_INVALID = 0x0002, + + /* Align multibyte strings using "figure space" (\u2007) */ + MBA_USE_FIGURE_SPACE = 0x0004, + + /* Don't add any padding */ + MBA_TRUNCATE_ONLY = 0x0008, + + /* Don't truncate */ + MBA_PAD_ONLY = 0x0010, +#endif +}; + +size_t +mbsalign (const char *src, char *dest, size_t dest_size, + size_t *width, mbs_align_t align, int flags); diff --git a/lib/mbsalign.c b/lib/mbsalign.c new file mode 100644 index 00000000..c1a5472e --- /dev/null +++ b/lib/mbsalign.c @@ -0,0 +1,254 @@ +/* Align/Truncate a string in a given screen width + Copyright (C) 2009-2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Pádraig Brady. */ + +#include + +#include +#include +#include +#include +#include + +#include "c.h" +#include "mbsalign.h" +#include "widechar.h" + + +#ifdef HAVE_WIDECHAR +/* Replace non printable chars. + Note \t and \n etc. are non printable. + Return 1 if replacement made, 0 otherwise. */ + +static bool +wc_ensure_printable (wchar_t *wchars) +{ + bool replaced = false; + wchar_t *wc = wchars; + while (*wc) + { + if (!iswprint ((wint_t) *wc)) + { + *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */ + replaced = true; + } + wc++; + } + return replaced; +} + +/* Truncate wchar string to width cells. + * Returns number of cells used. */ + +static size_t +wc_truncate (wchar_t *wc, size_t width) +{ + size_t cells = 0; + int next_cells = 0; + + while (*wc) + { + next_cells = wcwidth (*wc); + if (next_cells == -1) /* non printable */ + { + *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */ + next_cells = 1; + } + if (cells + next_cells > width) + break; + cells += next_cells; + wc++; + } + *wc = L'\0'; + return cells; +} + +/* FIXME: move this function to gnulib as it's missing on: + OpenBSD 3.8, IRIX 5.3, Solaris 2.5.1, mingw, BeOS */ + +static int +rpl_wcswidth (const wchar_t *s, size_t n) +{ + int ret = 0; + + while (n-- > 0 && *s != L'\0') + { + int nwidth = wcwidth (*s++); + if (nwidth == -1) /* non printable */ + return -1; + if (ret > (INT_MAX - nwidth)) /* overflow */ + return -1; + ret += nwidth; + } + + return ret; +} +#endif + +/* Write N_SPACES space characters to DEST while ensuring + nothing is written beyond DEST_END. A terminating NUL + is always added to DEST. + A pointer to the terminating NUL is returned. */ + +static char* +mbs_align_pad (char *dest, const char* dest_end, size_t n_spaces) +{ + /* FIXME: Should we pad with "figure space" (\u2007) + if non ascii data present? */ + while (n_spaces-- && (dest < dest_end)) + *dest++ = ' '; + *dest = '\0'; + return dest; +} + +/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte + characters; write the result into the DEST_SIZE-byte buffer, DEST. + ALIGNMENT specifies whether to left- or right-justify or to center. + If SRC requires more than *WIDTH columns, truncate it to fit. + When centering, the number of trailing spaces may be one less than the + number of leading spaces. The FLAGS parameter is unused at present. + Return the length in bytes required for the final result, not counting + the trailing NUL. A return value of DEST_SIZE or larger means there + wasn't enough space. DEST will be NUL terminated in any case. + Return (size_t) -1 upon error (invalid multi-byte sequence in SRC, + or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified. + Update *WIDTH to indicate how many columns were used before padding. */ + +size_t +mbsalign (const char *src, char *dest, size_t dest_size, + size_t *width, mbs_align_t align, int flags) +{ + size_t ret = -1; + size_t src_size = strlen (src) + 1; + char *newstr = NULL; + wchar_t *str_wc = NULL; + const char *str_to_print = src; + size_t n_cols = src_size - 1; + size_t n_used_bytes = n_cols; /* Not including NUL */ + size_t n_spaces = 0; + bool conversion = false; + bool wc_enabled = false; + +#ifdef HAVE_WIDECHAR + /* In multi-byte locales convert to wide characters + to allow easy truncation. Also determine number + of screen columns used. */ + if (MB_CUR_MAX > 1) + { + size_t src_chars = mbstowcs (NULL, src, 0); + if (src_chars == (size_t) -1) + { + if (flags & MBA_UNIBYTE_FALLBACK) + goto mbsalign_unibyte; + else + goto mbsalign_cleanup; + } + src_chars += 1; /* make space for NUL */ + str_wc = malloc (src_chars * sizeof (wchar_t)); + if (str_wc == NULL) + { + if (flags & MBA_UNIBYTE_FALLBACK) + goto mbsalign_unibyte; + else + goto mbsalign_cleanup; + } + if (mbstowcs (str_wc, src, src_chars) != 0) + { + str_wc[src_chars - 1] = L'\0'; + wc_enabled = true; + conversion = wc_ensure_printable (str_wc); + n_cols = rpl_wcswidth (str_wc, src_chars); + } + } + + /* If we transformed or need to truncate the source string + then create a modified copy of it. */ + if (wc_enabled && (conversion || (n_cols > *width))) + { + if (conversion) + { + /* May have increased the size by converting + \t to \uFFFD for example. */ + src_size = wcstombs(NULL, str_wc, 0) + 1; + } + newstr = malloc (src_size); + if (newstr == NULL) + { + if (flags & MBA_UNIBYTE_FALLBACK) + goto mbsalign_unibyte; + else + goto mbsalign_cleanup; + } + str_to_print = newstr; + n_cols = wc_truncate (str_wc, *width); + n_used_bytes = wcstombs (newstr, str_wc, src_size); + } +#endif + +mbsalign_unibyte: + + if (n_cols > *width) /* Unibyte truncation required. */ + { + n_cols = *width; + n_used_bytes = n_cols; + } + + if (*width > n_cols) /* Padding required. */ + n_spaces = *width - n_cols; + + /* indicate to caller how many cells needed (not including padding). */ + *width = n_cols; + + /* indicate to caller how many bytes needed (not including NUL). */ + ret = n_used_bytes + (n_spaces * 1); + + /* Write as much NUL terminated output to DEST as possible. */ + if (dest_size != 0) + { + char *dest_end = dest + dest_size - 1; + size_t start_spaces = n_spaces / 2 + n_spaces % 2; + size_t end_spaces = n_spaces / 2; + + switch (align) + { + case MBS_ALIGN_CENTER: + start_spaces = n_spaces / 2 + n_spaces % 2; + end_spaces = n_spaces / 2; + break; + case MBS_ALIGN_LEFT: + start_spaces = 0; + end_spaces = n_spaces; + break; + case MBS_ALIGN_RIGHT: + start_spaces = n_spaces; + end_spaces = 0; + break; + } + + dest = mbs_align_pad (dest, dest_end, start_spaces); + size_t space_left = dest_end - dest; + dest = mempcpy (dest, str_to_print, min (n_used_bytes, space_left)); + mbs_align_pad (dest, dest_end, end_spaces); + } + +mbsalign_cleanup: + + free (str_wc); + free (newstr); + + return ret; +} diff --git a/misc-utils/Makefile.am b/misc-utils/Makefile.am index 6176ab7d..8e1ea4fa 100644 --- a/misc-utils/Makefile.am +++ b/misc-utils/Makefile.am @@ -51,6 +51,7 @@ blkid_static_CFLAGS = $(AM_CFLAGS) -I$(ul_libblkid_incdir) endif endif +cal_SOURCES = cal.c $(top_srcdir)/lib/mbsalign.c if HAVE_TINFO cal_LDADD = -ltinfo @NCURSES_LIBS@ else diff --git a/misc-utils/cal.c b/misc-utils/cal.c index 5eb14b55..65d517a8 100644 --- a/misc-utils/cal.c +++ b/misc-utils/cal.c @@ -67,6 +67,7 @@ #include "c.h" #include "nls.h" +#include "mbsalign.h" #if defined(HAVE_LIBNCURSES) || defined(HAVE_LIBNCURSESW) @@ -243,8 +244,8 @@ struct fmt_st }; char * ascii_day(char *, int); -int center_str(const char* src, char* dest, size_t dest_size, int width); -void center(const char *, int, int); +int center_str(const char* src, char* dest, size_t dest_size, size_t width); +void center(const char *, size_t, int); void day_array(int, int, int, int *); int day_in_week(int, int, int); int day_in_year(int, int, int); @@ -753,95 +754,22 @@ trim_trailing_spaces(s) *p = '\0'; } -#ifdef HAVE_WIDECHAR -/* replace non printable chars. - * return 1 if replacement made, 0 otherwise */ -int wc_ensure_printable(wchar_t* wchars) -{ - int replaced=0; - wchar_t* wc = wchars; - while (*wc) { - if (!iswprint((wint_t) *wc)) { - *wc=L'\uFFFD'; - replaced=1; - } - wc++; - } - return replaced; -} - -/* truncate wchar string to width cells. - * returns number of cells used. */ -size_t wc_truncate(wchar_t* wchars, size_t width, size_t minchars) -{ - int wc=0; - int cells=0; - while (*(wchars+wc)) { - cells = wcswidth(wchars, wc+1); - if (cells > width) { - if (wc >= minchars) { - break; - } - } - wc++; - } - wchars[wc]=L'\0'; - return cells; -} -#endif - /* * Center string, handling multibyte characters appropriately. * In addition if the string is too large for the width it's truncated. * The number of trailing spaces may be 1 less than the number of leading spaces. */ int -center_str(const char* src, char* dest, size_t dest_size, int width) +center_str(const char* src, char* dest, size_t dest_size, size_t width) { -#ifdef HAVE_WIDECHAR - wchar_t str_wc[FMT_ST_CHARS]; -#endif - char str[FMT_ST_CHARS]; - const char* str_to_print=src; - int used, spaces, wc_conversion=0, wc_enabled=0; - -#ifdef HAVE_WIDECHAR - if (mbstowcs(str_wc, src, ARRAY_SIZE(str_wc)) > 0) { - str_wc[ARRAY_SIZE(str_wc)-1]=L'\0'; - wc_enabled=1; - wc_conversion = wc_ensure_printable(str_wc); - used = wcswidth(str_wc, ARRAY_SIZE(str_wc)); - } - else -#endif - used = strlen(src); - - if (wc_conversion || used > width) { - str_to_print=str; - if (wc_enabled) { -#ifdef HAVE_WIDECHAR - used = wc_truncate(str_wc, width, 1); - wcstombs(str, str_wc, ARRAY_SIZE(str)); -#endif - } else { - memcpy(str, src, width); - str[width]='\0'; - } - } - - spaces = width - used; - spaces = ( spaces < 0 ? 0 : spaces ); - - return snprintf(dest, dest_size, "%*s%s%*s", - spaces / 2 + spaces % 2, "", - str_to_print, - spaces / 2, "" ); + return mbsalign(src, dest, dest_size, &width, + MBS_ALIGN_CENTER, MBA_UNIBYTE_FALLBACK); } void center(str, len, separate) const char *str; - int len; + size_t len; int separate; { char lineout[FMT_ST_CHARS];