From ae048f7ac4a9ab6576ca6842aa13e5c9c31e35a7 Mon Sep 17 00:00:00 2001 From: David Oberhollenzer Date: Fri, 9 Feb 2024 15:59:37 +0100 Subject: Add utility function to fixup Windows file paths The idea is to iterate over a (canonicalized) path with forward slashes by components, i.e. file and directory names. Each name is then looked at by iterating over components, i.e. everything between dots. If a component is an illegal name, like COM1 or AUX, we add an underscore. If it contains illegal characters, like : or \, we re-map that character into unicode private use area. Signed-off-by: David Oberhollenzer --- include/util/util.h | 2 + lib/util/Makemodule.am | 1 + lib/util/fix_win32_filename.c | 164 +++++++++++++++++++++++++++++++++++++ tests/libutil/Makemodule.am | 5 +- tests/libutil/fix_win32_filename.c | 55 +++++++++++++ 5 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 lib/util/fix_win32_filename.c create mode 100644 tests/libutil/fix_win32_filename.c diff --git a/include/util/util.h b/include/util/util.h index db6a712..0c632de 100644 --- a/include/util/util.h +++ b/include/util/util.h @@ -79,4 +79,6 @@ SQFS_INTERNAL int hex_decode(const char *in, size_t in_sz, SQFS_INTERNAL int base64_decode(const char *in, size_t in_len, sqfs_u8 *out, size_t *out_len); +SQFS_INTERNAL char *fix_win32_filename(const char *path); + #endif /* SQFS_UTIL_H */ diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am index ec38b7a..c102b9b 100644 --- a/lib/util/Makemodule.am +++ b/lib/util/Makemodule.am @@ -16,6 +16,7 @@ libutil_a_SOURCES += lib/util/source_date_epoch.c libutil_a_SOURCES += lib/util/file_cmp.c libutil_a_SOURCES += lib/util/hex_decode.c libutil_a_SOURCES += lib/util/base64_decode.c +libutil_a_SOURCES += lib/util/fix_win32_filename.c libutil_a_CFLAGS = $(AM_CFLAGS) libutil_a_CPPFLAGS = $(AM_CPPFLAGS) diff --git a/lib/util/fix_win32_filename.c b/lib/util/fix_win32_filename.c new file mode 100644 index 0000000..948de66 --- /dev/null +++ b/lib/util/fix_win32_filename.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ +/* + * fix_win32_filename.c + * + * Copyright (C) 2024 David Oberhollenzer + */ +#include "util/util.h" + +#include +#include + +#ifdef _MSC_VER +#define strncasecmp _strnicmp +#define strcasecmp _stricmp +#endif + +typedef struct { + size_t used; + size_t available; + char buffer[]; +} buffer_t; + +static buffer_t *buffer_append(buffer_t *buf, const char *data, size_t count) +{ + size_t bufspace, needed; + + if (buf == NULL) { + buf = calloc(1, sizeof(*buf) + 128); + if (buf == NULL) + return NULL; + + buf->used = 1; + buf->available = 128; + buf->buffer[0] = '\0'; + } + + bufspace = buf->available; + needed = buf->used + count; + + while (bufspace < needed) + bufspace += 128; + + if (bufspace != buf->available) { + void *new_buf = realloc(buf, sizeof(*buf) + bufspace); + if (new_buf == NULL) { + free(buf); + return NULL; + } + buf = new_buf; + buf->available = bufspace; + } + + buf->used -= 1; + memcpy(buf->buffer + buf->used, data, count); + buf->used += count; + buf->buffer[buf->used++] = '\0'; + return buf; +} + +static const char *bad_names[] = { + "CON", "PRN", "AUX", "NUL", + "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", + "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", +}; + +static buffer_t *handle_component(buffer_t *buf, const char *comp, size_t len) +{ + for (size_t i = 0; i < sizeof(bad_names) / sizeof(bad_names[0]); ++i) { + if (!strncasecmp(comp, bad_names[i], len)) { + buf = buffer_append(buf, comp, len); + if (buf != NULL) + buf = buffer_append(buf, "_", 1); + return buf; + } + } + + while (len > 0) { + sqfs_u8 value, rep[3]; + size_t i = 0; + + for (i = 0; i < len; ++i) { + if (comp[i] < 0x20 || comp[i] == 0x7F) + break; + if (comp[i] == '<' || comp[i] == '>' || comp[i] == ':') + break; + if (comp[i] == '|' || comp[i] == '?' || comp[i] == '*') + break; + if (comp[i] == '\\' || comp[i] == '\"') + break; + } + + if (i > 0) { + buf = buffer_append(buf, comp, i); + if (buf == NULL || i == len) + break; + } + + value = comp[i++]; + comp += i; + len -= i; + + rep[0] = 0xEF; + rep[1] = 0x80 | ((value >> 6) & 0x3f); + rep[2] = 0x80 | ( value & 0x3f); + + buf = buffer_append(buf, (const char *)rep, 3); + if (buf == NULL) + break; + } + + return buf; +} + +static buffer_t *handle_name(buffer_t *buf, const char *name, size_t len) +{ + char *sep; + + while ((sep = memchr(name, '.', len)) != NULL) { + buf = handle_component(buf, name, sep - name); + if (buf == NULL) + return NULL; + + buf = buffer_append(buf, ".", 1); + if (buf == NULL) + return NULL; + + len -= sep - name + 1; + name = sep + 1; + } + + return handle_component(buf, name, len); +} + +char *fix_win32_filename(const char *path) +{ + buffer_t *buf = NULL; + char *sep, *out; + size_t len; + + while ((sep = strchr(path, '/')) != NULL) { + buf = handle_name(buf, path, sep - path); + if (buf == NULL) + return NULL; + + buf = buffer_append(buf, "/", 1); + if (buf == NULL) + return NULL; + + path = sep + 1; + } + + buf = handle_name(buf, path, strlen(path)); + if (buf == NULL) + return NULL; + + len = buf->used; + memmove(buf, buf->buffer, len); + + out = realloc(buf, len); + if (out == NULL) + out = (char *)buf; + + return out; +} diff --git a/tests/libutil/Makemodule.am b/tests/libutil/Makemodule.am index e039282..c783853 100644 --- a/tests/libutil/Makemodule.am +++ b/tests/libutil/Makemodule.am @@ -36,9 +36,12 @@ test_hex_decode_LDADD = libutil.a libcompat.a test_base64_decode_SOURCES = tests/libutil/base64_decode.c test_base64_decode_LDADD = libutil.a libcompat.a +test_fix_win32_filename_SOURCES = tests/libutil/fix_win32_filename.c +test_fix_win32_filename_LDADD = libutil.a libcompat.a + LIBUTIL_TESTS = \ test_str_table test_rbtree test_xxhash test_threadpool test_ismemzero \ - test_canonicalize_name test_filename_sane \ + test_canonicalize_name test_filename_sane test_fix_win32_filename \ test_sdate_epoch test_hex_decode test_base64_decode check_PROGRAMS += $(LIBUTIL_TESTS) diff --git a/tests/libutil/fix_win32_filename.c b/tests/libutil/fix_win32_filename.c new file mode 100644 index 0000000..a4f71e8 --- /dev/null +++ b/tests/libutil/fix_win32_filename.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +/* + * fix_win32_filename.c + * + * Copyright (C) 2024 David Oberhollenzer + */ +#include "config.h" + +#include "util/test.h" +#include "util/util.h" + +static const struct { + const char *path; + const char *result; +} test_data[] = { + { "foo", "foo" }, + { "foo/bar", "foo/bar" }, + { "foo/bar.txt", "foo/bar.txt" }, + { "COM1", "COM1_" }, + { "COM1.txt", "COM1_.txt" }, + { "foo.aux", "foo.aux_" }, + { "foo/bar/test.LPT1/bla", "foo/bar/test.LPT1_/bla" }, + { "C:\\/foo/COM1.bla/bar", + "C\xEF\x80\xBA\xEF\x81\x9c/foo/COM1_.bla/bar" }, +}; + +int main(int argc, char **argv) +{ + (void)argc; (void)argv; + + for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) { + char *result = fix_win32_filename(test_data[i].path); + size_t out_len = strlen(test_data[i].result); + + if (result == NULL) { + fprintf(stderr, "OOM for test case %u (%s)?\n", + (unsigned int)i, test_data[i].path); + return EXIT_FAILURE; + } + + if (out_len != strlen(result) || + memcmp(result, test_data[i].result, out_len) != 0) { + fprintf(stderr, + "Mismatch for %s -> %s, got %s instead!\n", + test_data[i].path, test_data[i].result, + result); + free(result); + return EXIT_FAILURE; + } + + free(result); + } + + return EXIT_SUCCESS; +} -- cgit v1.2.3