aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/util/parse.h37
-rw-r--r--lib/util/Makemodule.am8
-rw-r--r--lib/util/src/split_line.c109
-rw-r--r--lib/util/test/split_line.c120
4 files changed, 272 insertions, 2 deletions
diff --git a/include/util/parse.h b/include/util/parse.h
index bc635ad..8da07b1 100644
--- a/include/util/parse.h
+++ b/include/util/parse.h
@@ -15,6 +15,18 @@ enum {
ISTREAM_LINE_SKIP_EMPTY = 0x04,
};
+enum {
+ SPLIT_LINE_OK = 0,
+ SPLIT_LINE_ALLOC = -1,
+ SPLIT_LINE_UNMATCHED_QUOTE = -2,
+ SPLIT_LINE_ESCAPE = -3,
+};
+
+typedef struct {
+ size_t count;
+ char *args[];
+} split_line_t;
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -48,6 +60,31 @@ extern "C" {
SQFS_INTERNAL int istream_get_line(sqfs_istream_t *strm, char **out,
size_t *line_num, int flags);
+/**
+ * @brief Split a line of special character separated tokens
+ *
+ * The underlying string is modified, replacing sequences of separator
+ * characters with a single null byte and compacting the string. Every
+ * occourance of a termianted string is recorded in the returned structure.
+ *
+ * @param line A modifyable buffer holding a line
+ * @param len The maximum length of the string in the buffer to process
+ * @param sep A string of valid separator caracaters
+ * @param out Returns the token list, free this with free()
+ *
+ * @return Zero on success, a negative SPLIT_LINE_* error code on failure
+ */
+SQFS_INTERNAL int split_line(char *line, size_t len,
+ const char *sep, split_line_t **out);
+
+/**
+ * @brief Remove the first N components of a tokenized line
+ *
+ * @param sep A successfully split up line
+ * @param count Number of components to remove from the front
+ */
+SQFS_INTERNAL void split_line_remove_front(split_line_t *sep, size_t count);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am
index 1ca4802..6386066 100644
--- a/lib/util/Makemodule.am
+++ b/lib/util/Makemodule.am
@@ -9,7 +9,7 @@ libutil_a_SOURCES = include/util/util.h include/util/str_table.h \
lib/util/src/canonicalize_name.c lib/util/src/filename_sane.c \
lib/util/src/source_date_epoch.c lib/util/src/file_cmp.c \
lib/util/src/hex_decode.c lib/util/src/base64_decode.c \
- lib/util/src/get_line.c
+ lib/util/src/get_line.c lib/util/src/split_line.c
libutil_a_CFLAGS = $(AM_CFLAGS)
libutil_a_CPPFLAGS = $(AM_CPPFLAGS)
@@ -80,10 +80,14 @@ test_base64_decode_LDADD = libutil.a libcompat.a
test_get_line_SOURCES = lib/util/test/get_line.c
test_get_line_LDADD = libutil.a libio.a libcompat.a
+test_split_line_SOURCES = lib/util/test/split_line.c
+test_split_line_LDADD = libutil.a libcompat.a
+
LIBUTIL_TESTS = \
test_str_table test_rbtree test_xxhash test_threadpool test_ismemzero \
test_canonicalize_name test_filename_sane test_filename_sane_w32 \
- test_sdate_epoch test_hex_decode test_base64_decode test_get_line
+ test_sdate_epoch test_hex_decode test_base64_decode test_get_line \
+ test_split_line
check_PROGRAMS += $(LIBUTIL_TESTS)
TESTS += $(LIBUTIL_TESTS)
diff --git a/lib/util/src/split_line.c b/lib/util/src/split_line.c
new file mode 100644
index 0000000..ede9964
--- /dev/null
+++ b/lib/util/src/split_line.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later */
+/*
+ * split_line.c
+ *
+ * Copyright (C) 2023 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "config.h"
+#include "util/parse.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+static split_line_t *append_arg(split_line_t *in, char *arg)
+{
+ split_line_t *out = realloc(in, sizeof(*in) +
+ (in->count + 1) * sizeof(char *));
+
+ if (out == NULL) {
+ free(in);
+ return NULL;
+ }
+
+ out->args[out->count++] = arg;
+ return out;
+}
+
+static int is_sep(const char *sep, int c)
+{
+ return strchr(sep, c) != NULL && c != '\0';
+}
+
+int split_line(char *line, size_t len, const char *sep, split_line_t **out)
+{
+ split_line_t *split = calloc(1, sizeof(*split));
+ char *src = line, *dst = line;
+
+ if (split == NULL)
+ return SPLIT_LINE_ALLOC;
+
+ while (len > 0 && is_sep(sep, *src)) {
+ ++src;
+ --len;
+ }
+
+ while (len > 0 && *src != '\0') {
+ split = append_arg(split, dst);
+ if (split == NULL)
+ return SPLIT_LINE_ALLOC;
+
+ if (*src == '"') {
+ ++src;
+ --len;
+
+ while (len > 0 && *src != '\0' && *src != '"') {
+ if (src[0] == '\\') {
+ if (len < 2)
+ goto fail_esc;
+ if (src[1] != '"' && src[1] != '\\')
+ goto fail_esc;
+
+ *(dst++) = src[1];
+ src += 2;
+ len -= 2;
+ } else {
+ *(dst++) = *(src++);
+ --len;
+ }
+ }
+
+ if (len == 0 || *src != '"')
+ goto fail_quote;
+ ++src;
+ --len;
+ } else {
+ while (len > 0 && !is_sep(sep, *src) && *src != '\0') {
+ *(dst++) = *(src++);
+ --len;
+ }
+ }
+
+ while (len > 0 && is_sep(sep, *src)) {
+ ++src;
+ --len;
+ }
+
+ *(dst++) = '\0';
+ }
+
+ *out = split;
+ return SPLIT_LINE_OK;
+fail_esc:
+ free(split);
+ return SPLIT_LINE_ESCAPE;
+fail_quote:
+ free(split);
+ return SPLIT_LINE_UNMATCHED_QUOTE;
+}
+
+void split_line_remove_front(split_line_t *split, size_t count)
+{
+ if (count < split->count) {
+ for (size_t i = count, j = 0; i < split->count; ++i, ++j)
+ split->args[j] = split->args[i];
+ split->count -= count;
+ } else {
+ split->count = 0;
+ }
+}
diff --git a/lib/util/test/split_line.c b/lib/util/test/split_line.c
new file mode 100644
index 0000000..a0c32b8
--- /dev/null
+++ b/lib/util/test/split_line.c
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+/*
+ * split_line.c
+ *
+ * Copyright (C) 2023 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "config.h"
+#include "util/parse.h"
+#include "util/test.h"
+#include "compat.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+static const struct {
+ const char *in;
+ size_t count;
+ const char **out;
+} split[] = {
+ { "", 0, NULL },
+ { " \t ", 0, NULL },
+ { "foo", 1, (const char *[]){ "foo" } },
+ { " foo ", 1, (const char *[]){ "foo" } },
+ { "foo bar", 2, (const char *[]){ "foo", "bar" } },
+ { " foo \t bar ", 2, (const char *[]){ "foo", "bar" } },
+ { " foo \t bar baz ", 3, (const char *[]){ "foo", "bar", "baz" } },
+ { " foo \t \" bar \" baz ", 3,
+ (const char *[]){ "foo", " bar ", "baz" } },
+ { " foo \t \" \\\"bar \" baz ", 3,
+ (const char *[]){ "foo", " \"bar ", "baz" } },
+ { " foo \t \" bar \\\\\" baz ", 3,
+ (const char *[]){ "foo", " bar \\", "baz" } },
+};
+
+static const struct {
+ const char *in;
+ size_t orig;
+ size_t count;
+ size_t remain;
+ const char **out;
+} drop_test[] = {
+ { "foo,bar,baz", 3, 0, 3, (const char *[]){ "foo", "bar", "baz" } },
+ { "foo,bar,baz", 3, 1, 2, (const char *[]){ "bar", "baz" } },
+ { "foo,bar,baz", 3, 2, 1, (const char *[]){ "baz" } },
+ { "foo,bar,baz", 3, 3, 0, NULL },
+ { "foo,bar,baz", 3, 4, 0, NULL },
+ { "foo,bar,baz", 3, 100, 0, NULL },
+};
+
+static void dump_components(split_line_t *sep)
+{
+ for (size_t i = 0; i < sep->count; ++i)
+ fprintf(stderr, "\t`%s`\n", sep->args[i]);
+}
+
+int main(int argc, char **argv)
+{
+ (void)argc; (void)argv;
+
+ for (size_t i = 0; i < sizeof(split) / sizeof(split[0]); ++i) {
+ split_line_t *sep;
+ char *copy;
+ int ret;
+
+ copy = strdup(split[i].in);
+ TEST_NOT_NULL(copy);
+
+ ret = split_line(copy, strlen(copy), " \t", &sep);
+ TEST_EQUAL_I(ret, 0);
+ TEST_NOT_NULL(sep);
+
+ fprintf(stderr, "splitting `%s`\n", split[i].in);
+ dump_components(sep);
+
+ TEST_EQUAL_UI(sep->count, split[i].count);
+
+ for (size_t j = 0; j < sep->count; ++j) {
+ TEST_STR_EQUAL(sep->args[j], split[i].out[j]);
+ }
+
+ free(sep);
+ free(copy);
+ }
+
+ for (size_t i = 0; i < sizeof(drop_test) / sizeof(drop_test[0]); ++i) {
+ split_line_t *sep;
+ char *copy;
+ int ret;
+
+ copy = strdup(drop_test[i].in);
+ TEST_NOT_NULL(copy);
+
+ fprintf(stderr, "splitting `%s`\n", drop_test[i].in);
+
+ ret = split_line(copy, strlen(copy), ",", &sep);
+ TEST_EQUAL_I(ret, 0);
+ TEST_NOT_NULL(sep);
+
+ dump_components(sep);
+
+ TEST_EQUAL_UI(sep->count, drop_test[i].orig);
+
+ fprintf(stderr, "removing first %u components\n",
+ (unsigned int)drop_test[i].count);
+
+ split_line_remove_front(sep, drop_test[i].count);
+ dump_components(sep);
+
+ TEST_EQUAL_UI(sep->count, drop_test[i].remain);
+
+ for (size_t j = 0; j < sep->count; ++j) {
+ TEST_STR_EQUAL(sep->args[j], drop_test[i].out[j]);
+ }
+
+ free(sep);
+ free(copy);
+ }
+
+ return EXIT_SUCCESS;
+}