How to divide by space keeping words with spaces inside quotes?

cy dlang at verge.info.tm
Mon Aug 9 01:16:39 UTC 2021


On Sunday, 8 August 2021 at 23:04:32 UTC, Marcone wrote:
> How to divide by space keeping words with spaces inside quotes?

Well the designers of ASCII were morons who decided that open 
quote and close quote would be the same damn letter, so it's a 
little trickier. Basically what you have to do is process it 
character by character into a finite state machine that switches 
between word mode, space mode, and quoting mode, accounting for 
backslash escapes since morons, etc. so you kinda need them.

I dunno any modules in specific that do it, but something like:

```d
string somestr = "Duck Cat \"Carl Rivers\" Dog";

enum FSM { QUOTING, WORD, SPACE };

struct FSM {
	FSM mode;
	bool backslash;
	Appender!char cur;
	Appender!string accum;
}

FSM fsm;
fsm.mode = SPACE;

foreach(char ch: somestr) {
	if(fsm.backslash) {
		fsm.backslash = false;
		cur.add(ch);
		continue;
	}
			
	switch(fsm.mode) {
	case FSM.QUOTING:
		switch(ch) {
		case '\\':
			cur.add('\\');
			fsm.backslash = true;
		case '"':
			fsm.mode = FSM.SPACE;
			accum.add(tostring(cur.data));
			cur = appender!char;
			break;
		default:
			cur.add(ch);
		};
		break;
	case FSM.WORD:
		switch(ch) {
		case '\\':
			cur.add('\\');
			fsm.backslash = true;			
		case ' ':
		case '\t':
			fsm.mode = FSM.SPACE;
			if(cur.data.length) {
				accum.add(tostring(cur.data));
				cur = appender!char;
			}
			break;
		default:
			cur.add(ch);
		};
		break;
	case FSM.SPACE:
		switch(ch) {
		case '\\':
			fsm.backslash = true;
			fsm.mode = WORD;
			cur.add('\\');
			break;
		case ' ':
		case '\t':
		case '\n':
			break;
		case '"':
			fsm.mode = FSM.QUOTING;
			break;
		default:
			cur.add(ch);
			fsm.mode = FSM.WORD;
			break;
		};
	};
}

string[] result = fsm.data;
print(result);
```

(untested pseudocode that won't work btw)


More information about the Digitalmars-d-learn mailing list