From 352a9060b6813c41527f0c5da43f0c86aecfde2a Mon Sep 17 00:00:00 2001
From: David Oberhollenzer <david.oberhollenzer@tele2.at>
Date: Mon, 2 Apr 2018 21:26:45 +0200
Subject: Add statefull preprocessing to rdline

Signed-off-by: David Oberhollenzer <david.oberhollenzer@tele2.at>
---
 lib/include/util.h | 12 ++++++++++++
 lib/src/rdline.c   | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/lib/include/util.h b/lib/include/util.h
index 9ba9ffb..6943567 100644
--- a/lib/include/util.h
+++ b/lib/include/util.h
@@ -43,6 +43,18 @@ typedef struct {
 	cleared if end-of-file is reached.
 
 	The line must be deallocated with free().
+
+	The following transformations are applied:
+	 - Space characters are replaced with regular white space characters.
+	 - Sequences of space characters are truncated to a single space.
+	 - A '#' sign is interpreted as the start of a comment and removed,
+	   together with everything that follows.
+	 - Padding spaces are removed from the line.
+	 - If a '"' is encounterd, the above rules are disabled, until a
+	   after the matching '"' is read. A '"' can be escaped by preceeding
+	   it with a backslash.
+	 - If a second, coresponding '"' is not found, processing fails with
+	   errno set to EILSEQ.
 */
 char *rdline(int fd);
 
diff --git a/lib/src/rdline.c b/lib/src/rdline.c
index 1b91008..4e72a25 100644
--- a/lib/src/rdline.c
+++ b/lib/src/rdline.c
@@ -22,11 +22,18 @@
 
 #include "util.h"
 
+enum {
+	STATE_INITIAL = 0,
+	STATE_STRING = 1,
+	STATE_STRING_ESC = 2,
+	STATE_COMMENT = 3,
+};
+
 char *rdline(int fd)
 {
 	size_t i = 0, bufsiz = 0, newsz;
+	int ret, state = STATE_INITIAL;
 	char c, *new, *buffer = NULL;
-	int ret;
 
 	for (;;) {
 		switch (read(fd, &c, 1)) {
@@ -47,6 +54,39 @@ char *rdline(int fd)
 			goto fail;
 		}
 
+		switch (state) {
+		case STATE_STRING:
+			if (c == '\\')
+				state = STATE_STRING_ESC;
+			if (c == '"')
+				state = STATE_INITIAL;
+			break;
+		case STATE_STRING_ESC:
+			state = STATE_STRING;
+			break;
+		case STATE_COMMENT:
+			if (c != '\0')
+				continue;
+			break;
+		default:
+			if (isspace(c))
+				c = ' ';
+			if (c == ' ' && (i == 0 || buffer[i - 1] == ' '))
+				continue;
+			if (c == '#') {
+				state = STATE_COMMENT;
+				continue;
+			}
+			if (c == '"')
+				state = STATE_STRING;
+			break;
+		}
+
+		if (c == '\0') {
+			while (i > 0 && buffer[i - 1] == ' ')
+				--i;
+		}
+
 		if (i == bufsiz) {
 			newsz = bufsiz ? bufsiz * 2 : 16;
 			new = realloc(buffer, newsz);
@@ -62,6 +102,11 @@ char *rdline(int fd)
 		if (c == '\0')
 			break;
 	}
+
+	if (state == STATE_STRING || state == STATE_STRING_ESC) {
+		errno = EILSEQ;
+		goto fail;
+	}
 	return buffer;
 fail:
 	ret = errno;
-- 
cgit v1.2.3