aboutsummaryrefslogtreecommitdiff
path: root/lib/util
diff options
context:
space:
mode:
authorDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2023-06-20 17:43:21 +0200
committerDavid Oberhollenzer <david.oberhollenzer@sigma-star.at>2023-06-21 16:14:31 +0200
commit12727806af641970a651b8f969cba33677ae7395 (patch)
treef02cb947ac2379cdd9bc15e093ec732dfd5bc45f /lib/util
parentca9b6ba17257f88b8d575f18cab0b1e23660cfa5 (diff)
Add a helper to libutil for splitting token separated lines
Signed-off-by: David Oberhollenzer <david.oberhollenzer@sigma-star.at>
Diffstat (limited to 'lib/util')
-rw-r--r--lib/util/Makemodule.am8
-rw-r--r--lib/util/src/split_line.c109
-rw-r--r--lib/util/test/split_line.c120
3 files changed, 235 insertions, 2 deletions
diff --git a/lib/util/Makemodule.am b/lib/util/Makemodule.am
index 1ca4802..6386066 100644
--- a/lib/util/Makemodule.am
+++ b/lib/util/Makemodule.am
@@ -9,7 +9,7 @@ libutil_a_SOURCES = include/util/util.h include/util/str_table.h \
lib/util/src/canonicalize_name.c lib/util/src/filename_sane.c \
lib/util/src/source_date_epoch.c lib/util/src/file_cmp.c \
lib/util/src/hex_decode.c lib/util/src/base64_decode.c \
- lib/util/src/get_line.c
+ lib/util/src/get_line.c lib/util/src/split_line.c
libutil_a_CFLAGS = $(AM_CFLAGS)
libutil_a_CPPFLAGS = $(AM_CPPFLAGS)
@@ -80,10 +80,14 @@ test_base64_decode_LDADD = libutil.a libcompat.a
test_get_line_SOURCES = lib/util/test/get_line.c
test_get_line_LDADD = libutil.a libio.a libcompat.a
+test_split_line_SOURCES = lib/util/test/split_line.c
+test_split_line_LDADD = libutil.a libcompat.a
+
LIBUTIL_TESTS = \
test_str_table test_rbtree test_xxhash test_threadpool test_ismemzero \
test_canonicalize_name test_filename_sane test_filename_sane_w32 \
- test_sdate_epoch test_hex_decode test_base64_decode test_get_line
+ test_sdate_epoch test_hex_decode test_base64_decode test_get_line \
+ test_split_line
check_PROGRAMS += $(LIBUTIL_TESTS)
TESTS += $(LIBUTIL_TESTS)
diff --git a/lib/util/src/split_line.c b/lib/util/src/split_line.c
new file mode 100644
index 0000000..ede9964
--- /dev/null
+++ b/lib/util/src/split_line.c
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: LGPL-3.0-or-later */
+/*
+ * split_line.c
+ *
+ * Copyright (C) 2023 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "config.h"
+#include "util/parse.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+static split_line_t *append_arg(split_line_t *in, char *arg)
+{
+ split_line_t *out = realloc(in, sizeof(*in) +
+ (in->count + 1) * sizeof(char *));
+
+ if (out == NULL) {
+ free(in);
+ return NULL;
+ }
+
+ out->args[out->count++] = arg;
+ return out;
+}
+
+static int is_sep(const char *sep, int c)
+{
+ return strchr(sep, c) != NULL && c != '\0';
+}
+
+int split_line(char *line, size_t len, const char *sep, split_line_t **out)
+{
+ split_line_t *split = calloc(1, sizeof(*split));
+ char *src = line, *dst = line;
+
+ if (split == NULL)
+ return SPLIT_LINE_ALLOC;
+
+ while (len > 0 && is_sep(sep, *src)) {
+ ++src;
+ --len;
+ }
+
+ while (len > 0 && *src != '\0') {
+ split = append_arg(split, dst);
+ if (split == NULL)
+ return SPLIT_LINE_ALLOC;
+
+ if (*src == '"') {
+ ++src;
+ --len;
+
+ while (len > 0 && *src != '\0' && *src != '"') {
+ if (src[0] == '\\') {
+ if (len < 2)
+ goto fail_esc;
+ if (src[1] != '"' && src[1] != '\\')
+ goto fail_esc;
+
+ *(dst++) = src[1];
+ src += 2;
+ len -= 2;
+ } else {
+ *(dst++) = *(src++);
+ --len;
+ }
+ }
+
+ if (len == 0 || *src != '"')
+ goto fail_quote;
+ ++src;
+ --len;
+ } else {
+ while (len > 0 && !is_sep(sep, *src) && *src != '\0') {
+ *(dst++) = *(src++);
+ --len;
+ }
+ }
+
+ while (len > 0 && is_sep(sep, *src)) {
+ ++src;
+ --len;
+ }
+
+ *(dst++) = '\0';
+ }
+
+ *out = split;
+ return SPLIT_LINE_OK;
+fail_esc:
+ free(split);
+ return SPLIT_LINE_ESCAPE;
+fail_quote:
+ free(split);
+ return SPLIT_LINE_UNMATCHED_QUOTE;
+}
+
+void split_line_remove_front(split_line_t *split, size_t count)
+{
+ if (count < split->count) {
+ for (size_t i = count, j = 0; i < split->count; ++i, ++j)
+ split->args[j] = split->args[i];
+ split->count -= count;
+ } else {
+ split->count = 0;
+ }
+}
diff --git a/lib/util/test/split_line.c b/lib/util/test/split_line.c
new file mode 100644
index 0000000..a0c32b8
--- /dev/null
+++ b/lib/util/test/split_line.c
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-3.0-or-later */
+/*
+ * split_line.c
+ *
+ * Copyright (C) 2023 David Oberhollenzer <goliath@infraroot.at>
+ */
+#include "config.h"
+#include "util/parse.h"
+#include "util/test.h"
+#include "compat.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+static const struct {
+ const char *in;
+ size_t count;
+ const char **out;
+} split[] = {
+ { "", 0, NULL },
+ { " \t ", 0, NULL },
+ { "foo", 1, (const char *[]){ "foo" } },
+ { " foo ", 1, (const char *[]){ "foo" } },
+ { "foo bar", 2, (const char *[]){ "foo", "bar" } },
+ { " foo \t bar ", 2, (const char *[]){ "foo", "bar" } },
+ { " foo \t bar baz ", 3, (const char *[]){ "foo", "bar", "baz" } },
+ { " foo \t \" bar \" baz ", 3,
+ (const char *[]){ "foo", " bar ", "baz" } },
+ { " foo \t \" \\\"bar \" baz ", 3,
+ (const char *[]){ "foo", " \"bar ", "baz" } },
+ { " foo \t \" bar \\\\\" baz ", 3,
+ (const char *[]){ "foo", " bar \\", "baz" } },
+};
+
+static const struct {
+ const char *in;
+ size_t orig;
+ size_t count;
+ size_t remain;
+ const char **out;
+} drop_test[] = {
+ { "foo,bar,baz", 3, 0, 3, (const char *[]){ "foo", "bar", "baz" } },
+ { "foo,bar,baz", 3, 1, 2, (const char *[]){ "bar", "baz" } },
+ { "foo,bar,baz", 3, 2, 1, (const char *[]){ "baz" } },
+ { "foo,bar,baz", 3, 3, 0, NULL },
+ { "foo,bar,baz", 3, 4, 0, NULL },
+ { "foo,bar,baz", 3, 100, 0, NULL },
+};
+
+static void dump_components(split_line_t *sep)
+{
+ for (size_t i = 0; i < sep->count; ++i)
+ fprintf(stderr, "\t`%s`\n", sep->args[i]);
+}
+
+int main(int argc, char **argv)
+{
+ (void)argc; (void)argv;
+
+ for (size_t i = 0; i < sizeof(split) / sizeof(split[0]); ++i) {
+ split_line_t *sep;
+ char *copy;
+ int ret;
+
+ copy = strdup(split[i].in);
+ TEST_NOT_NULL(copy);
+
+ ret = split_line(copy, strlen(copy), " \t", &sep);
+ TEST_EQUAL_I(ret, 0);
+ TEST_NOT_NULL(sep);
+
+ fprintf(stderr, "splitting `%s`\n", split[i].in);
+ dump_components(sep);
+
+ TEST_EQUAL_UI(sep->count, split[i].count);
+
+ for (size_t j = 0; j < sep->count; ++j) {
+ TEST_STR_EQUAL(sep->args[j], split[i].out[j]);
+ }
+
+ free(sep);
+ free(copy);
+ }
+
+ for (size_t i = 0; i < sizeof(drop_test) / sizeof(drop_test[0]); ++i) {
+ split_line_t *sep;
+ char *copy;
+ int ret;
+
+ copy = strdup(drop_test[i].in);
+ TEST_NOT_NULL(copy);
+
+ fprintf(stderr, "splitting `%s`\n", drop_test[i].in);
+
+ ret = split_line(copy, strlen(copy), ",", &sep);
+ TEST_EQUAL_I(ret, 0);
+ TEST_NOT_NULL(sep);
+
+ dump_components(sep);
+
+ TEST_EQUAL_UI(sep->count, drop_test[i].orig);
+
+ fprintf(stderr, "removing first %u components\n",
+ (unsigned int)drop_test[i].count);
+
+ split_line_remove_front(sep, drop_test[i].count);
+ dump_components(sep);
+
+ TEST_EQUAL_UI(sep->count, drop_test[i].remain);
+
+ for (size_t j = 0; j < sep->count; ++j) {
+ TEST_STR_EQUAL(sep->args[j], drop_test[i].out[j]);
+ }
+
+ free(sep);
+ free(copy);
+ }
+
+ return EXIT_SUCCESS;
+}