summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2024-02-09 15:59:37 +0100
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2024-02-09 15:59:37 +0100
commitae048f7ac4a9ab6576ca6842aa13e5c9c31e35a7 (patch)
tree79aa627a2689e76f23f856c85b9f86d9e651bfd2
parent48b3355c7a887530a9bd17a1ad571e402102dd95 (diff)
Add utility function to fixup Windows file paths
The idea is to iterate over a (canonicalized) path with forward slashes by components, i.e. file and directory names. Each name is then looked at by iterating over components, i.e. everything between dots. If a component is an illegal name, like COM1 or AUX, we add an underscore. If it contains illegal characters, like : or \, we re-map that character into unicode private use area. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
-rw-r--r--include/util/util.h2
-rw-r--r--lib/util/Makemodule.am1
-rw-r--r--lib/util/fix_win32_filename.c164
-rw-r--r--tests/libutil/Makemodule.am5
-rw-r--r--tests/libutil/fix_win32_filename.c55
5 files changed, 226 insertions, 1 deletions
diff --git a/include/util/util.h b/include/util/util.h
index db6a712..0c632de 100644
--- a/include/util/util.h
+++ b/include/util/util.h
@@ -79,4 +79,6 @@ SQFS_INTERNAL int hex_decode(const char *in, size_t in_sz,
SQFS_INTERNAL int base64_decode(const char *in, size_t in_len,
sqfs_u8 *out, size_t *out_len);
+SQFS_INTERNAL char *fix_win32_filename(const char *path);
+
#endif /* SQFS_UTIL_H */
diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am
index ec38b7a..c102b9b 100644
--- a/lib/util/Makemodule.am
+++ b/lib/util/Makemodule.am
@@ -16,6 +16,7 @@ libutil_a_SOURCES += lib/util/source_date_epoch.c
libutil_a_SOURCES += lib/util/file_cmp.c
libutil_a_SOURCES += lib/util/hex_decode.c
libutil_a_SOURCES += lib/util/base64_decode.c
+libutil_a_SOURCES += lib/util/fix_win32_filename.c
libutil_a_CFLAGS = $(AM_CFLAGS)
libutil_a_CPPFLAGS = $(AM_CPPFLAGS)
diff --git a/lib/util/fix_win32_filename.c b/lib/util/fix_win32_filename.c
new file mode 100644
index 0000000..948de66
--- /dev/null
+++ b/lib/util/fix_win32_filename.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later */
+/*
+ * fix_win32_filename.c
+ *
+ * Copyright (C) 2024 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "util/util.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef _MSC_VER
+#define strncasecmp _strnicmp
+#define strcasecmp _stricmp
+#endif
+
+typedef struct {
+ size_t used;
+ size_t available;
+ char buffer[];
+} buffer_t;
+
+static buffer_t *buffer_append(buffer_t *buf, const char *data, size_t count)
+{
+ size_t bufspace, needed;
+
+ if (buf == NULL) {
+ buf = calloc(1, sizeof(*buf) + 128);
+ if (buf == NULL)
+ return NULL;
+
+ buf->used = 1;
+ buf->available = 128;
+ buf->buffer[0] = '\0';
+ }
+
+ bufspace = buf->available;
+ needed = buf->used + count;
+
+ while (bufspace < needed)
+ bufspace += 128;
+
+ if (bufspace != buf->available) {
+ void *new_buf = realloc(buf, sizeof(*buf) + bufspace);
+ if (new_buf == NULL) {
+ free(buf);
+ return NULL;
+ }
+ buf = new_buf;
+ buf->available = bufspace;
+ }
+
+ buf->used -= 1;
+ memcpy(buf->buffer + buf->used, data, count);
+ buf->used += count;
+ buf->buffer[buf->used++] = '\0';
+ return buf;
+}
+
+static const char *bad_names[] = {
+ "CON", "PRN", "AUX", "NUL",
+ "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
+ "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
+};
+
+static buffer_t *handle_component(buffer_t *buf, const char *comp, size_t len)
+{
+ for (size_t i = 0; i < sizeof(bad_names) / sizeof(bad_names[0]); ++i) {
+ if (!strncasecmp(comp, bad_names[i], len)) {
+ buf = buffer_append(buf, comp, len);
+ if (buf != NULL)
+ buf = buffer_append(buf, "_", 1);
+ return buf;
+ }
+ }
+
+ while (len > 0) {
+ sqfs_u8 value, rep[3];
+ size_t i = 0;
+
+ for (i = 0; i < len; ++i) {
+ if (comp[i] < 0x20 || comp[i] == 0x7F)
+ break;
+ if (comp[i] == '<' || comp[i] == '>' || comp[i] == ':')
+ break;
+ if (comp[i] == '|' || comp[i] == '?' || comp[i] == '*')
+ break;
+ if (comp[i] == '\\' || comp[i] == '\"')
+ break;
+ }
+
+ if (i > 0) {
+ buf = buffer_append(buf, comp, i);
+ if (buf == NULL || i == len)
+ break;
+ }
+
+ value = comp[i++];
+ comp += i;
+ len -= i;
+
+ rep[0] = 0xEF;
+ rep[1] = 0x80 | ((value >> 6) & 0x3f);
+ rep[2] = 0x80 | ( value & 0x3f);
+
+ buf = buffer_append(buf, (const char *)rep, 3);
+ if (buf == NULL)
+ break;
+ }
+
+ return buf;
+}
+
+static buffer_t *handle_name(buffer_t *buf, const char *name, size_t len)
+{
+ char *sep;
+
+ while ((sep = memchr(name, '.', len)) != NULL) {
+ buf = handle_component(buf, name, sep - name);
+ if (buf == NULL)
+ return NULL;
+
+ buf = buffer_append(buf, ".", 1);
+ if (buf == NULL)
+ return NULL;
+
+ len -= sep - name + 1;
+ name = sep + 1;
+ }
+
+ return handle_component(buf, name, len);
+}
+
+char *fix_win32_filename(const char *path)
+{
+ buffer_t *buf = NULL;
+ char *sep, *out;
+ size_t len;
+
+ while ((sep = strchr(path, '/')) != NULL) {
+ buf = handle_name(buf, path, sep - path);
+ if (buf == NULL)
+ return NULL;
+
+ buf = buffer_append(buf, "/", 1);
+ if (buf == NULL)
+ return NULL;
+
+ path = sep + 1;
+ }
+
+ buf = handle_name(buf, path, strlen(path));
+ if (buf == NULL)
+ return NULL;
+
+ len = buf->used;
+ memmove(buf, buf->buffer, len);
+
+ out = realloc(buf, len);
+ if (out == NULL)
+ out = (char *)buf;
+
+ return out;
+}
diff --git a/tests/libutil/Makemodule.am b/tests/libutil/Makemodule.am
index e039282..c783853 100644
--- a/tests/libutil/Makemodule.am
+++ b/tests/libutil/Makemodule.am
@@ -36,9 +36,12 @@ test_hex_decode_LDADD = libutil.a libcompat.a
test_base64_decode_SOURCES = tests/libutil/base64_decode.c
test_base64_decode_LDADD = libutil.a libcompat.a
+test_fix_win32_filename_SOURCES = tests/libutil/fix_win32_filename.c
+test_fix_win32_filename_LDADD = libutil.a libcompat.a
+
LIBUTIL_TESTS = \
test_str_table test_rbtree test_xxhash test_threadpool test_ismemzero \
- test_canonicalize_name test_filename_sane \
+ test_canonicalize_name test_filename_sane test_fix_win32_filename \
test_sdate_epoch test_hex_decode test_base64_decode
check_PROGRAMS += $(LIBUTIL_TESTS)
diff --git a/tests/libutil/fix_win32_filename.c b/tests/libutil/fix_win32_filename.c
new file mode 100644
index 0000000..a4f71e8
--- /dev/null
+++ b/tests/libutil/fix_win32_filename.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+/*
+ * fix_win32_filename.c
+ *
+ * Copyright (C) 2024 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "config.h"
+
+#include "util/test.h"
+#include "util/util.h"
+
+static const struct {
+ const char *path;
+ const char *result;
+} test_data[] = {
+ { "foo", "foo" },
+ { "foo/bar", "foo/bar" },
+ { "foo/bar.txt", "foo/bar.txt" },
+ { "COM1", "COM1_" },
+ { "COM1.txt", "COM1_.txt" },
+ { "foo.aux", "foo.aux_" },
+ { "foo/bar/test.LPT1/bla", "foo/bar/test.LPT1_/bla" },
+ { "C:\\/foo/COM1.bla/bar",
+ "C\xEF\x80\xBA\xEF\x81\x9c/foo/COM1_.bla/bar" },
+};
+
+int main(int argc, char **argv)
+{
+ (void)argc; (void)argv;
+
+ for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) {
+ char *result = fix_win32_filename(test_data[i].path);
+ size_t out_len = strlen(test_data[i].result);
+
+ if (result == NULL) {
+ fprintf(stderr, "OOM for test case %u (%s)?\n",
+ (unsigned int)i, test_data[i].path);
+ return EXIT_FAILURE;
+ }
+
+ if (out_len != strlen(result) ||
+ memcmp(result, test_data[i].result, out_len) != 0) {
+ fprintf(stderr,
+ "Mismatch for %s -> %s, got %s instead!\n",
+ test_data[i].path, test_data[i].result,
+ result);
+ free(result);
+ return EXIT_FAILURE;
+ }
+
+ free(result);
+ }
+
+ return EXIT_SUCCESS;
+}