diff options
| author | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2024-02-09 15:59:37 +0100 | 
|---|---|---|
| committer | David Oberhollenzer <david.oberhollenzer@sigma-star.at> | 2024-02-09 15:59:37 +0100 | 
| commit | ae048f7ac4a9ab6576ca6842aa13e5c9c31e35a7 (patch) | |
| tree | 79aa627a2689e76f23f856c85b9f86d9e651bfd2 | |
| parent | 48b3355c7a887530a9bd17a1ad571e402102dd95 (diff) | |
Add utility function to fixup Windows file paths
The idea is to iterate over a (canonicalized) path with forward
slashes by components, i.e. file and directory names. Each name
is then looked at by iterating over components, i.e. everything
between dots.
If a component is an illegal name, like COM1 or AUX, we add an
underscore. If it contains illegal characters, like : or \, we
re-map that character into unicode private use area.
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
| -rw-r--r-- | include/util/util.h | 2 | ||||
| -rw-r--r-- | lib/util/Makemodule.am | 1 | ||||
| -rw-r--r-- | lib/util/fix_win32_filename.c | 164 | ||||
| -rw-r--r-- | tests/libutil/Makemodule.am | 5 | ||||
| -rw-r--r-- | tests/libutil/fix_win32_filename.c | 55 | 
5 files changed, 226 insertions, 1 deletions
| diff --git a/include/util/util.h b/include/util/util.h index db6a712..0c632de 100644 --- a/include/util/util.h +++ b/include/util/util.h @@ -79,4 +79,6 @@ SQFS_INTERNAL int hex_decode(const char *in, size_t in_sz,  SQFS_INTERNAL int base64_decode(const char *in, size_t in_len,  				sqfs_u8 *out, size_t *out_len); +SQFS_INTERNAL char *fix_win32_filename(const char *path); +  #endif /* SQFS_UTIL_H */ diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am index ec38b7a..c102b9b 100644 --- a/lib/util/Makemodule.am +++ b/lib/util/Makemodule.am @@ -16,6 +16,7 @@ libutil_a_SOURCES += lib/util/source_date_epoch.c  libutil_a_SOURCES += lib/util/file_cmp.c  libutil_a_SOURCES += lib/util/hex_decode.c  libutil_a_SOURCES += lib/util/base64_decode.c +libutil_a_SOURCES += lib/util/fix_win32_filename.c  libutil_a_CFLAGS = $(AM_CFLAGS)  libutil_a_CPPFLAGS = $(AM_CPPFLAGS) diff --git a/lib/util/fix_win32_filename.c b/lib/util/fix_win32_filename.c new file mode 100644 index 0000000..948de66 --- /dev/null +++ b/lib/util/fix_win32_filename.c @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ +/* + * fix_win32_filename.c + * + * Copyright (C) 2024 David Oberhollenzer <goliath@infraroot.at> + */ +#include "util/util.h" + +#include <string.h> +#include <stdlib.h> + +#ifdef _MSC_VER +#define strncasecmp _strnicmp +#define strcasecmp _stricmp +#endif + +typedef struct { +	size_t used; +	size_t available; +	char buffer[]; +} buffer_t; + +static buffer_t *buffer_append(buffer_t *buf, const char *data, size_t count) +{ +	size_t bufspace, needed; + +	if (buf == NULL) { +		buf = calloc(1, sizeof(*buf) + 128); +		if (buf == NULL) +			return NULL; + +		buf->used = 1; +		buf->available = 128; +		buf->buffer[0] = '\0'; +	} + +	bufspace = buf->available; +	needed = buf->used + count; + +	while (bufspace < needed) +		bufspace += 128; + +	if (bufspace != buf->available) { +		void *new_buf = realloc(buf, sizeof(*buf) + bufspace); +		if (new_buf == NULL) { +			free(buf); +			return NULL; +		} +		buf = new_buf; +		buf->available = bufspace; +	} + +	buf->used -= 1; +	memcpy(buf->buffer + buf->used, data, count); +	buf->used += count; +	buf->buffer[buf->used++] = '\0'; +	return buf; +} + +static const char *bad_names[] = { +	"CON", "PRN", "AUX", "NUL", +	"COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9", +	"LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9", +}; + +static buffer_t *handle_component(buffer_t *buf, const char *comp, size_t len) +{ +	for (size_t i = 0; i < sizeof(bad_names) / sizeof(bad_names[0]); ++i) { +		if (!strncasecmp(comp, bad_names[i], len)) { +			buf = buffer_append(buf, comp, len); +			if (buf != NULL) +				buf = buffer_append(buf, "_", 1); +			return buf; +		} +	} + +	while (len > 0) { +		sqfs_u8 value, rep[3]; +		size_t i = 0; + +		for (i = 0; i < len; ++i) { +			if (comp[i] < 0x20 || comp[i] == 0x7F) +				break; +			if (comp[i] == '<' || comp[i] == '>' || comp[i] == ':') +				break; +			if (comp[i] == '|' || comp[i] == '?' || comp[i] == '*') +				break; +			if (comp[i] == '\\' || comp[i] == '\"') +				break; +		} + +		if (i > 0) { +			buf = buffer_append(buf, comp, i); +			if (buf == NULL || i == len) +				break; +		} + +		value = comp[i++]; +		comp += i; +		len -= i; + +		rep[0] = 0xEF; +		rep[1] = 0x80 | ((value >> 6) & 0x3f); +		rep[2] = 0x80 | ( value       & 0x3f); + +		buf = buffer_append(buf, (const char *)rep, 3); +		if (buf == NULL) +			break; +	} + +	return buf; +} + +static buffer_t *handle_name(buffer_t *buf, const char *name, size_t len) +{ +	char *sep; + +	while ((sep = memchr(name, '.', len)) != NULL) { +		buf = handle_component(buf, name, sep - name); +		if (buf == NULL) +			return NULL; + +		buf = buffer_append(buf, ".", 1); +		if (buf == NULL) +			return NULL; + +		len -= sep - name + 1; +		name = sep + 1; +	} + +	return handle_component(buf, name, len); +} + +char *fix_win32_filename(const char *path) +{ +	buffer_t *buf = NULL; +	char *sep, *out; +	size_t len; + +	while ((sep = strchr(path, '/')) != NULL) { +		buf = handle_name(buf, path, sep - path); +		if (buf == NULL) +			return NULL; + +		buf = buffer_append(buf, "/", 1); +		if (buf == NULL) +			return NULL; + +		path = sep + 1; +	} + +	buf = handle_name(buf, path, strlen(path)); +	if (buf == NULL) +		return NULL; + +	len = buf->used; +	memmove(buf, buf->buffer, len); + +	out = realloc(buf, len); +	if (out == NULL) +		out = (char *)buf; + +	return out; +} diff --git a/tests/libutil/Makemodule.am b/tests/libutil/Makemodule.am index e039282..c783853 100644 --- a/tests/libutil/Makemodule.am +++ b/tests/libutil/Makemodule.am @@ -36,9 +36,12 @@ test_hex_decode_LDADD = libutil.a libcompat.a  test_base64_decode_SOURCES = tests/libutil/base64_decode.c  test_base64_decode_LDADD = libutil.a libcompat.a +test_fix_win32_filename_SOURCES = tests/libutil/fix_win32_filename.c +test_fix_win32_filename_LDADD = libutil.a libcompat.a +  LIBUTIL_TESTS = \  	test_str_table test_rbtree test_xxhash test_threadpool test_ismemzero \ -	test_canonicalize_name test_filename_sane \ +	test_canonicalize_name test_filename_sane test_fix_win32_filename \  	test_sdate_epoch test_hex_decode test_base64_decode  check_PROGRAMS += $(LIBUTIL_TESTS) diff --git a/tests/libutil/fix_win32_filename.c b/tests/libutil/fix_win32_filename.c new file mode 100644 index 0000000..a4f71e8 --- /dev/null +++ b/tests/libutil/fix_win32_filename.c @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ +/* + * fix_win32_filename.c + * + * Copyright (C) 2024 David Oberhollenzer <goliath@infraroot.at> + */ +#include "config.h" + +#include "util/test.h" +#include "util/util.h" + +static const struct { +	const char *path; +	const char *result; +} test_data[] = { +	{ "foo", "foo" }, +	{ "foo/bar", "foo/bar" }, +	{ "foo/bar.txt", "foo/bar.txt" }, +	{ "COM1", "COM1_" }, +	{ "COM1.txt", "COM1_.txt" }, +	{ "foo.aux", "foo.aux_" }, +	{ "foo/bar/test.LPT1/bla", "foo/bar/test.LPT1_/bla" }, +	{ "C:\\/foo/COM1.bla/bar", +	  "C\xEF\x80\xBA\xEF\x81\x9c/foo/COM1_.bla/bar" }, +}; + +int main(int argc, char **argv) +{ +	(void)argc; (void)argv; + +	for (size_t i = 0; i < sizeof(test_data) / sizeof(test_data[0]); ++i) { +		char *result = fix_win32_filename(test_data[i].path); +		size_t out_len = strlen(test_data[i].result); + +		if (result == NULL) { +			fprintf(stderr, "OOM for test case %u (%s)?\n", +				(unsigned int)i, test_data[i].path); +			return EXIT_FAILURE; +		} + +		if (out_len != strlen(result) || +		    memcmp(result, test_data[i].result, out_len) != 0) { +			fprintf(stderr, +				"Mismatch for %s -> %s, got %s instead!\n", +				test_data[i].path, test_data[i].result, +				result); +			free(result); +			return EXIT_FAILURE; +		} + +		free(result); +	} + +	return EXIT_SUCCESS; +} | 
