diff options
author | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2024-02-09 15:59:37 +0100 |
---|---|---|
committer | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2024-02-09 15:59:37 +0100 |
commit | ae048f7ac4a9ab6576ca6842aa13e5c9c31e35a7 (patch) | |
tree | 79aa627a2689e76f23f856c85b9f86d9e651bfd2 /lib | |
parent | 48b3355c7a887530a9bd17a1ad571e402102dd95 (diff) |
Add utility function to fixup Windows file paths
The idea is to iterate over a (canonicalized) path with forward
slashes by components, i.e. file and directory names. Each name
is then looked at by iterating over components, i.e. everything
between dots.
If a component is an illegal name, like COM1 or AUX, we add an
underscore. If it contains illegal characters, like : or \, we
re-map that character into unicode private use area.
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/util/Makemodule.am | 1 | ||||
-rw-r--r-- | lib/util/fix_win32_filename.c | 164 |
2 files changed, 165 insertions, 0 deletions
diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am index ec38b7a..c102b9b 100644 --- a/lib/util/Makemodule.am +++ b/lib/util/Makemodule.am @@ -16,6 +16,7 @@ libutil_a_SOURCES += lib/util/source_date_epoch.c libutil_a_SOURCES += lib/util/file_cmp.c libutil_a_SOURCES += lib/util/hex_decode.c libutil_a_SOURCES += lib/util/base64_decode.c +libutil_a_SOURCES += lib/util/fix_win32_filename.c libutil_a_CFLAGS = $(AM_CFLAGS) libutil_a_CPPFLAGS = $(AM_CPPFLAGS) diff --git a/lib/util/fix_win32_filename.c b/lib/util/fix_win32_filename.c new file mode 100644 index 0000000..948de66 --- /dev/null +++ b/lib/util/fix_win32_filename.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ +/* + * fix_win32_filename.c + * + * Copyright (C) 2024 David Oberhollenzer <goliath@infraroot.at> + */ +#include "util/util.h" + +#include <string.h> +#include <stdlib.h> + +#ifdef _MSC_VER +#define strncasecmp _strnicmp +#define strcasecmp _stricmp +#endif + +typedef struct { + size_t used; + size_t available; + char buffer[]; +} buffer_t; + +static buffer_t *buffer_append(buffer_t *buf, const char *data, size_t count) +{ + size_t bufspace, needed; + + if (buf == NULL) { + buf = calloc(1, sizeof(*buf) + 128); + if (buf == NULL) + return NULL; + + buf->used = 1; + buf->available = 128; + buf->buffer[0] = '\0'; + } + + bufspace = buf->available; + needed = buf->used + count; + + while (bufspace < needed) + bufspace += 128; + + if (bufspace != buf->available) { + void *new_buf = realloc(buf, sizeof(*buf) + bufspace); + if (new_buf == NULL) { + free(buf); + return NULL; + } + buf = new_buf; + buf->available = bufspace; + } + + buf->used -= 1; + memcpy(buf->buffer + buf->used, data, count); + buf->used += count; + buf->buffer[buf->used++] = '\0'; + return buf; +} + +static const char *bad_names[] = { + "CON", "PRN", "AUX", "NUL", + "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", + "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", +}; + +static buffer_t *handle_component(buffer_t *buf, const char *comp, size_t len) +{ + for (size_t i = 0; i < sizeof(bad_names) / sizeof(bad_names[0]); ++i) { + if (!strncasecmp(comp, bad_names[i], len)) { + buf = buffer_append(buf, comp, len); + if (buf != NULL) + buf = buffer_append(buf, "_", 1); + return buf; + } + } + + while (len > 0) { + sqfs_u8 value, rep[3]; + size_t i = 0; + + for (i = 0; i < len; ++i) { + if (comp[i] < 0x20 || comp[i] == 0x7F) + break; + if (comp[i] == '<' || comp[i] == '>' || comp[i] == ':') + break; + if (comp[i] == '|' || comp[i] == '?' || comp[i] == '*') + break; + if (comp[i] == '\\' || comp[i] == '\"') + break; + } + + if (i > 0) { + buf = buffer_append(buf, comp, i); + if (buf == NULL || i == len) + break; + } + + value = comp[i++]; + comp += i; + len -= i; + + rep[0] = 0xEF; + rep[1] = 0x80 | ((value >> 6) & 0x3f); + rep[2] = 0x80 | ( value & 0x3f); + + buf = buffer_append(buf, (const char *)rep, 3); + if (buf == NULL) + break; + } + + return buf; +} + +static buffer_t *handle_name(buffer_t *buf, const char *name, size_t len) +{ + char *sep; + + while ((sep = memchr(name, '.', len)) != NULL) { + buf = handle_component(buf, name, sep - name); + if (buf == NULL) + return NULL; + + buf = buffer_append(buf, ".", 1); + if (buf == NULL) + return NULL; + + len -= sep - name + 1; + name = sep + 1; + } + + return handle_component(buf, name, len); +} + +char *fix_win32_filename(const char *path) +{ + buffer_t *buf = NULL; + char *sep, *out; + size_t len; + + while ((sep = strchr(path, '/')) != NULL) { + buf = handle_name(buf, path, sep - path); + if (buf == NULL) + return NULL; + + buf = buffer_append(buf, "/", 1); + if (buf == NULL) + return NULL; + + path = sep + 1; + } + + buf = handle_name(buf, path, strlen(path)); + if (buf == NULL) + return NULL; + + len = buf->used; + memmove(buf, buf->buffer, len); + + out = realloc(buf, len); + if (out == NULL) + out = (char *)buf; + + return out; +} |