summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2024-02-09 15:59:37 +0100
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2024-02-09 15:59:37 +0100
commitae048f7ac4a9ab6576ca6842aa13e5c9c31e35a7 (patch)
tree79aa627a2689e76f23f856c85b9f86d9e651bfd2 /lib
parent48b3355c7a887530a9bd17a1ad571e402102dd95 (diff)
Add utility function to fixup Windows file paths
The idea is to iterate over a (canonicalized) path with forward slashes by components, i.e. file and directory names. Each name is then looked at by iterating over components, i.e. everything between dots. If a component is an illegal name, like COM1 or AUX, we add an underscore. If it contains illegal characters, like : or \, we re-map that character into unicode private use area. Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib')
-rw-r--r--lib/util/Makemodule.am1
-rw-r--r--lib/util/fix_win32_filename.c164
2 files changed, 165 insertions, 0 deletions
diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am
index ec38b7a..c102b9b 100644
--- a/lib/util/Makemodule.am
+++ b/lib/util/Makemodule.am
@@ -16,6 +16,7 @@ libutil_a_SOURCES += lib/util/source_date_epoch.c
libutil_a_SOURCES += lib/util/file_cmp.c
libutil_a_SOURCES += lib/util/hex_decode.c
libutil_a_SOURCES += lib/util/base64_decode.c
+libutil_a_SOURCES += lib/util/fix_win32_filename.c
libutil_a_CFLAGS = $(AM_CFLAGS)
libutil_a_CPPFLAGS = $(AM_CPPFLAGS)
diff --git a/lib/util/fix_win32_filename.c b/lib/util/fix_win32_filename.c
new file mode 100644
index 0000000..948de66
--- /dev/null
+++ b/lib/util/fix_win32_filename.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later */
+/*
+ * fix_win32_filename.c
+ *
+ * Copyright (C) 2024 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "util/util.h"
+
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef _MSC_VER
+#define strncasecmp _strnicmp
+#define strcasecmp _stricmp
+#endif
+
+typedef struct {
+ size_t used;
+ size_t available;
+ char buffer[];
+} buffer_t;
+
+static buffer_t *buffer_append(buffer_t *buf, const char *data, size_t count)
+{
+ size_t bufspace, needed;
+
+ if (buf == NULL) {
+ buf = calloc(1, sizeof(*buf) + 128);
+ if (buf == NULL)
+ return NULL;
+
+ buf->used = 1;
+ buf->available = 128;
+ buf->buffer[0] = '\0';
+ }
+
+ bufspace = buf->available;
+ needed = buf->used + count;
+
+ while (bufspace < needed)
+ bufspace += 128;
+
+ if (bufspace != buf->available) {
+ void *new_buf = realloc(buf, sizeof(*buf) + bufspace);
+ if (new_buf == NULL) {
+ free(buf);
+ return NULL;
+ }
+ buf = new_buf;
+ buf->available = bufspace;
+ }
+
+ buf->used -= 1;
+ memcpy(buf->buffer + buf->used, data, count);
+ buf->used += count;
+ buf->buffer[buf->used++] = '\0';
+ return buf;
+}
+
+static const char *bad_names[] = {
+ "CON", "PRN", "AUX", "NUL",
+ "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
+ "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
+};
+
+static buffer_t *handle_component(buffer_t *buf, const char *comp, size_t len)
+{
+ for (size_t i = 0; i < sizeof(bad_names) / sizeof(bad_names[0]); ++i) {
+ if (!strncasecmp(comp, bad_names[i], len)) {
+ buf = buffer_append(buf, comp, len);
+ if (buf != NULL)
+ buf = buffer_append(buf, "_", 1);
+ return buf;
+ }
+ }
+
+ while (len > 0) {
+ sqfs_u8 value, rep[3];
+ size_t i = 0;
+
+ for (i = 0; i < len; ++i) {
+ if (comp[i] < 0x20 || comp[i] == 0x7F)
+ break;
+ if (comp[i] == '<' || comp[i] == '>' || comp[i] == ':')
+ break;
+ if (comp[i] == '|' || comp[i] == '?' || comp[i] == '*')
+ break;
+ if (comp[i] == '\\' || comp[i] == '\"')
+ break;
+ }
+
+ if (i > 0) {
+ buf = buffer_append(buf, comp, i);
+ if (buf == NULL || i == len)
+ break;
+ }
+
+ value = comp[i++];
+ comp += i;
+ len -= i;
+
+ rep[0] = 0xEF;
+ rep[1] = 0x80 | ((value >> 6) & 0x3f);
+ rep[2] = 0x80 | ( value & 0x3f);
+
+ buf = buffer_append(buf, (const char *)rep, 3);
+ if (buf == NULL)
+ break;
+ }
+
+ return buf;
+}
+
+static buffer_t *handle_name(buffer_t *buf, const char *name, size_t len)
+{
+ char *sep;
+
+ while ((sep = memchr(name, '.', len)) != NULL) {
+ buf = handle_component(buf, name, sep - name);
+ if (buf == NULL)
+ return NULL;
+
+ buf = buffer_append(buf, ".", 1);
+ if (buf == NULL)
+ return NULL;
+
+ len -= sep - name + 1;
+ name = sep + 1;
+ }
+
+ return handle_component(buf, name, len);
+}
+
+char *fix_win32_filename(const char *path)
+{
+ buffer_t *buf = NULL;
+ char *sep, *out;
+ size_t len;
+
+ while ((sep = strchr(path, '/')) != NULL) {
+ buf = handle_name(buf, path, sep - path);
+ if (buf == NULL)
+ return NULL;
+
+ buf = buffer_append(buf, "/", 1);
+ if (buf == NULL)
+ return NULL;
+
+ path = sep + 1;
+ }
+
+ buf = handle_name(buf, path, strlen(path));
+ if (buf == NULL)
+ return NULL;
+
+ len = buf->used;
+ memmove(buf, buf->buffer, len);
+
+ out = realloc(buf, len);
+ if (out == NULL)
+ out = (char *)buf;
+
+ return out;
+}