volume_id: add and export string encoding function

author Kay Sievers <kay.sievers@vrfy.org>

Thu, 17 May 2007 18:09:24 +0000 (20:09 +0200)

committer Kay Sievers <kay.sievers@vrfy.org>

Thu, 17 May 2007 18:09:24 +0000 (20:09 +0200)
author Kay Sievers <kay.sievers@vrfy.org>
Thu, 17 May 2007 18:09:24 +0000 (20:09 +0200)
committer Kay Sievers <kay.sievers@vrfy.org>
Thu, 17 May 2007 18:09:24 +0000 (20:09 +0200)
diff --git a/extras/volume_id/lib/Makefile b/extras/volume_id/lib/Makefile

index 17992ba02ece0c6473e034d719d4589f0b6a2058..328758d0393f59bbed182a5d70442879f414b799 100644 (file)
--- a/extras/volume_id/lib/Makefile
+++ b/extras/volume_id/lib/Makefile
@@ -13,7 +13,7 @@ INSTALL_DATA  = ${INSTALL} -m 644
  INSTALL_LIB = ${INSTALL} -m 755
  
  SHLIB_CUR = 0
-SHLIB_REV = 77
+SHLIB_REV = 78
  SHLIB_AGE = 0
  SHLIB = libvolume_id.so.$(SHLIB_CUR).$(SHLIB_REV).$(SHLIB_AGE)
  
@@ -81,7 +81,7 @@ libvolume_id.a: $(HEADERS) $(OBJS)
         $(E) "  RANLIB  " $@
         $(Q) $(RANLIB) $@
  
-$(SHLIB): $(HEADERS) $(addprefix .shlib/,$(OBJS))
+$(SHLIB): $(HEADERS) exported_symbols $(addprefix .shlib/,$(OBJS))
         $(E) "  CC      " $@
         $(Q) $(CC) -shared $(CFLAGS) -o $@ \
                 -Wl,-soname,libvolume_id.so.$(SHLIB_CUR),--version-script,exported_symbols \
diff --git a/extras/volume_id/lib/exported_symbols b/extras/volume_id/lib/exported_symbols

index 3422152edb4285944040c458bc9f153fb07478db..05b6e52bc4f00374ba454b7506ac30b3e893092a 100644 (file)
--- a/extras/volume_id/lib/exported_symbols
+++ b/extras/volume_id/lib/exported_symbols
@@ -16,6 +16,7 @@
         volume_id_probe_raid;
         volume_id_all_probers;
         volume_id_get_prober_by_type;
+       volume_id_encode_string;
  
         volume_id_open_node;
  
diff --git a/extras/volume_id/lib/libvolume_id.h b/extras/volume_id/lib/libvolume_id.h

index c0ab29e0f782daaab35ecd8e54506465e377fc74..19aee68d010cef74be2dec56022760ce054df3b5 100644 (file)
--- a/extras/volume_id/lib/libvolume_id.h
+++ b/extras/volume_id/lib/libvolume_id.h
@@ -40,6 +40,7 @@ extern int volume_id_get_uuid_raw(struct volume_id *id, const uint8_t **uuid, si
  extern int volume_id_get_usage(struct volume_id *id, const char **usage);
  extern int volume_id_get_type(struct volume_id *id, const char **type);
  extern int volume_id_get_type_version(struct volume_id *id, const char **type_version);
+extern int volume_id_encode_string(const char *str, char *str_enc, size_t len);
  
  /*
   * Note: everything below will be made private or removed from
diff --git a/extras/volume_id/lib/util.c b/extras/volume_id/lib/util.c

index 54d9fd0d80ce237fb4a4a3961a8c463823d50936..eaaececadbced2ac20a0bf36dfa4df178a60e7cb 100644 (file)
--- a/extras/volume_id/lib/util.c
+++ b/extras/volume_id/lib/util.c
@@ -28,6 +28,129 @@
  #include "libvolume_id.h"
  #include "util.h"
  
+/* count of characters used to encode one unicode char */
+static int utf8_encoded_expected_len(const char *str)
+{
+       unsigned char c = (unsigned char)str[0];
+
+       if (c < 0x80)
+               return 1;
+       if ((c & 0xe0) == 0xc0)
+               return 2;
+       if ((c & 0xf0) == 0xe0)
+               return 3;
+       if ((c & 0xf8) == 0xf0)
+               return 4;
+       if ((c & 0xfc) == 0xf8)
+               return 5;
+       if ((c & 0xfe) == 0xfc)
+               return 6;
+       return 0;
+}
+
+/* decode one unicode char */
+static int utf8_encoded_to_unichar(const char *str)
+{
+       int unichar;
+       int len;
+       int i;
+
+       len = utf8_encoded_expected_len(str);
+       switch (len) {
+       case 1:
+               return (int)str[0];
+       case 2:
+               unichar = str[0] & 0x1f;
+               break;
+       case 3:
+               unichar = (int)str[0] & 0x0f;
+               break;
+       case 4:
+               unichar = (int)str[0] & 0x07;
+               break;
+       case 5:
+               unichar = (int)str[0] & 0x03;
+               break;
+       case 6:
+               unichar = (int)str[0] & 0x01;
+               break;
+       default:
+               return -1;
+       }
+
+       for (i = 1; i < len; i++) {
+               if (((int)str[i] & 0xc0) != 0x80)
+                       return -1;
+               unichar <<= 6;
+               unichar |= (int)str[i] & 0x3f;
+       }
+
+       return unichar;
+}
+
+/* expected size used to encode one unicode char */
+static int utf8_unichar_to_encoded_len(int unichar)
+{
+       if (unichar < 0x80)
+               return 1;
+       if (unichar < 0x800)
+               return 2;
+       if (unichar < 0x10000)
+               return 3;
+       if (unichar < 0x200000)
+               return 4;
+       if (unichar < 0x4000000)
+               return 5;
+       return 6;
+}
+
+/* check if unicode char has a valid numeric range */
+static int utf8_unichar_valid_range(int unichar)
+{
+       if (unichar > 0x10ffff)
+               return 0;
+       if ((unichar & 0xfffff800) == 0xd800)
+               return 0;
+       if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
+               return 0;
+       if ((unichar & 0xffff) == 0xffff)
+               return 0;
+       return 1;
+}
+
+/* validate one encoded unicode char and return its length */
+int volume_id_utf8_encoded_valid_unichar(const char *str)
+{
+       int len;
+       int unichar;
+       int i;
+
+       len = utf8_encoded_expected_len(str);
+       if (len == 0)
+               return -1;
+
+       /* ascii is valid */
+       if (len == 1)
+               return 1;
+
+       /* check if expected encoded chars are available */
+       for (i = 0; i < len; i++)
+               if ((str[i] & 0x80) != 0x80)
+                       return -1;
+
+       unichar = utf8_encoded_to_unichar(str);
+
+       /* check if encoded length matches encoded value */
+       if (utf8_unichar_to_encoded_len(unichar) != len)
+               return -1;
+
+       /* check if value has valid range */
+       if (!utf8_unichar_valid_range(unichar))
+               return -1;
+
+       return len;
+}
+
  void volume_id_set_unicode16(char *str, size_t len, const uint8_t *buf, enum endian endianess, size_t count)
  {
         unsigned int i, j;
diff --git a/extras/volume_id/lib/util.h b/extras/volume_id/lib/util.h

index 1206116ce2e26e7c261401f1a3900f5f33b3dbde..6ed7b48b5061d543e9c01be4508ad9787ee6361b 100644 (file)
--- a/extras/volume_id/lib/util.h
+++ b/extras/volume_id/lib/util.h
@@ -23,6 +23,8 @@
  #include <byteswap.h>
  #include <syslog.h>
  
+#define ALLOWED_CHARS                          "#+-.:=@_%"
+
  #ifndef PACKED
  #define PACKED                         __attribute__((packed))
  #endif
@@ -77,6 +79,7 @@ enum endian {
         BE = 1
  };
  
+extern int volume_id_utf8_encoded_valid_unichar(const char *str);
  extern void volume_id_set_unicode16(char *str, size_t len, const uint8_t *buf, enum endian endianess, size_t count);
  extern void volume_id_set_usage(struct volume_id *id, enum volume_id_usage usage_id);
  extern void volume_id_set_label_raw(struct volume_id *id, const uint8_t *buf, size_t count);
diff --git a/extras/volume_id/lib/volume_id.c b/extras/volume_id/lib/volume_id.c

index 15e9cda4da57952f74af0bba6ab2ce36ee50221f..59303d5bffce2f2e8228f33980bf7a385151d3ee 100644 (file)
--- a/extras/volume_id/lib/volume_id.c
+++ b/extras/volume_id/lib/volume_id.c
@@ -276,6 +276,60 @@ int volume_id_get_type_version(struct volume_id *id, const char **type_version)
         return 1;
  }
  
+static int needs_encoding(const char c)
+{
+       if ((c >= '0' && c <= '9') ||
+           (c >= 'A' && c <= 'Z') ||
+           (c >= 'a' && c <= 'z') ||
+           strchr(ALLOWED_CHARS, c))
+               return 0;
+       return 1;
+}
+
+/**
+ * volume_id_encode_string:
+ * @str: Input string to be encoded.
+ * @str_enc: Target string to store the encoded input.
+ * @len: Location to store the encoded string. The target string,
+ * which may be four times as long as the input string.
+ *
+ * Encode all potentially unsafe characters of a string to the
+ * corresponding hex value prefixed by '\x'.
+ *
+ * Returns: 1 if the entire string was copied, 0 otherwise.
+ **/
+int volume_id_encode_string(const char *str, char *str_enc, size_t len)
+{
+       size_t i, j;
+
+       if (str == NULL || str_enc == NULL || len == 0)
+               return 0;
+
+       str_enc[0] = '\0';
+       for (i = 0, j = 0; str[i] != '\0'; i++) {
+               int seqlen;
+
+               seqlen = volume_id_utf8_encoded_valid_unichar(&str[i]);
+               if (seqlen > 1) {
+                       memcpy(&str_enc[j], &str[i], seqlen);
+                       j += seqlen;
+                       i += (seqlen-1);
+               } else if (str[i] == '\\' || needs_encoding(str[i])) {
+                       sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
+                       j += 4;
+               } else {
+                       str_enc[j] = str[i];
+                       j++;
+               }
+               if (j+3 >= len)
+                       goto err;
+       }
+       str_enc[j] = '\0';
+       return 1;
+err:
+       return 0;
+}
+
  /**
   * volume_id_probe_raid:
   * @id: Probing context.
author	Kay Sievers <kay.sievers@vrfy.org>
	Thu, 17 May 2007 18:09:24 +0000 (20:09 +0200)
committer	Kay Sievers <kay.sievers@vrfy.org>
	Thu, 17 May 2007 18:09:24 +0000 (20:09 +0200)
extras/volume_id/lib/Makefile		patch \| blob \| history
extras/volume_id/lib/exported_symbols		patch \| blob \| history
extras/volume_id/lib/libvolume_id.h		patch \| blob \| history
extras/volume_id/lib/util.c		patch \| blob \| history
extras/volume_id/lib/util.h		patch \| blob \| history
extras/volume_id/lib/volume_id.c		patch \| blob \| history