/* $NetBSD$ */ /*- * Copyright (c) 2007 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Jachym Holecek . * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include /* roundup() */ #if defined(_KERNEL) #include #elif defined(_STANDALONE) #include #else #include #include #include #if defined(DEBUG) #include #include #endif #include #endif #include #include "prop_object_impl.h" #include "prop_codec_impl.h" #if defined(_STANDALONE) #define BUFINCR 16 #else #define BUFINCR 128 #endif #if !defined(__UNCONST) #define __UNCONST(a) ((void *)(unsigned long)(const void *)(a)) #endif /* * This needs to go via a macro -- even if verbose()'s body would be empty * ifndef DEBUG, GCC still isn't smart enough to eliminate calls to funcs * that only exist if DEBUG. And we don't want to clutter the code with * preprocessor conditionals. */ #if defined(DEBUG) #define VERBOSE(s, ...) verbose(s "\n", ## __VA_ARGS__) #else #define VERBOSE(s, ...) /* silence */ #endif /* Ewww, depolymerise function names. */ #define poec _prop_object_externalize_context #define poec_create _prop_object_externalize_context_alloc #define poec_append _prop_object_externalize_append_cstring #define poec_push _prop_object_externalize_append_char #define poec_destroy _prop_object_externalize_context_free #define prop_keysym_str prop_dictionary_keysym_cstring_nocopy #define _TK_COMPOUND_VALUES \ TK_ARRAYO: \ case TK_DICTO typedef enum { /* Self representing. */ TK_FIRST = 0, /* So that memset(0) DTRT. */ TK_ARRAYC, TK_ARRAYO, TK_DATA, TK_DICTC, TK_DICTO, TK_WHITE, TK_LAST, /* Values. */ TK_STRING, TK_SINT64, TK_SYMBOL, TK_UINT64 } token_type_t; #define _PA_TOKEN_LAST TK_UINT64 #define _PA_TOKEN_NAMES \ "FIRST", "ARRAYC", "ARRAYO", "DATA", "DICTC", "DICTO", "WHITE", \ "LAST", "STRING", "SINT64", "SYMBOL", "UINT64", typedef enum { SC_FIRST = 0, SC_WHITE, SC_SINT64, SC_UINT64, SC_QUOTED, SC_SYMBOL, SC_TOPLEVEL, SC_ERROR, SC_BASE64_APPEND, SC_BASE64_MAYBE_NEXT, SC_BASE64_NEXT, SC_BASE64_CLOSE, SC_STRING_APPEND, SC_STRING_MAYBE_NEXT, SC_STRING_CONCAT, SC_STRING_CLOSE, SC_COMMENT } scan_state_t; #define _SC_STATE_LAST SC_COMMENT #define _SC_STATE_NAMES \ "FIRST", "WHITE", "SINT64", "UINT64", "QUOTED", "SYMBOL", "TOPLEVEL", \ "ERROR", "BASE64_APPEND", "BASE64_MAYBE_NEXT", "BASE64_NEXT", \ "SC_BASE64_CLOSE", "STRING_APPEND", "STRING_MAYBE_NEXT", \ "STRING_CONCAT", "STRING_CLOSE", "COMMENT" typedef enum { PA_TOPLEVEL = 0, PA_ARRAY, PA_DICTIONARY, PA_ERROR, PA_HOME, PA_OBJECT } parse_state_t; #define _PA_STATE_LAST PA_OBJECT #define _PA_STATE_NAMES \ "TOPLEVEL", "ARRAY", "DICTIONARY", "ERROR", "HOME", "OBJECT" union value { char *val_string; uint64_t val_unsigned; int64_t val_signed; struct { const u_char *vd_buf; size_t vd_len; } val_data; }; /* Scanner constructs tokens and enqueues them to parser. */ struct token { SIMPLEQ_ENTRY(token) tok_link; token_type_t tok_type; #if !defined(_STANDALONE) size_t tok_pos_line; size_t tok_pos_col; #endif union value tok_value; #define tok_string tok_value.val_string #define tok_signed tok_value.val_signed #define tok_unsigned tok_value.val_unsigned #define tok_data_buf tok_value.val_data.vd_buf #define tok_data_len tok_value.val_data.vd_len }; /* Stack for nested objects, entries relinked to consq on completion. */ struct frame { SIMPLEQ_ENTRY(frame) se_link; /* Stack entry link. */ prop_object_t se_object; /* Compound object. */ /* Dec: key to parent dict, Enc: next object from parent. */ union { const char *un_symbol; prop_object_iterator_t un_iter; } se_un; #define se_symbol se_un.un_symbol #define se_iter se_un.un_iter }; SIMPLEQ_HEAD(stack, frame); struct parser { /* Incoming tokens, operation stack, list of constructed objects. */ SIMPLEQ_HEAD(, token) pa_tokens; /* FIFO */ struct stack pa_stack; /* LIFO */ struct stack pa_consq; /* FIFO */ /* Store parser state across calls. */ parse_state_t pa_state; parse_state_t pa_prev; parse_state_t pa_last; /* Store scanner state across calls. */ scan_state_t sc_state; /* Current state. */ scan_state_t sc_prev; /* State at start of this cycle. */ scan_state_t sc_last; /* Last value of prev != state. */ #if !defined(_STANDALONE) /* Position in input stream. */ size_t sc_pos_line; size_t sc_pos_col; #endif /* Scanner internalization buffer. */ u_char *sc_string; size_t sc_strcur; size_t sc_strlen; /* Base64 decoder. */ size_t sc_base64_size; /* in bits */ }; /* Base64 code space (plus '='). */ static const char base64abc[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopq" "rstuvwxyz0123456789+/"; #if defined(_KERNEL) || defined(_STANDALONE) static char * strdup(const char *s) { char *p; size_t l; l = strlen(s) + 1; p = _PROP_MALLOC(l, M_TEMP); return (memcpy(p, s, l)); } static int isprint(int c) { return (c >= ' ' && c <= '~'); } #endif /* _KERNEL || _STANDALONE */ static boolean_t stroneof(const char *s, const char *t[]) { int i; for (i = 0; t[i]; i++) if (strcasecmp(s, t[i]) == 0) return (TRUE); return (FALSE); } #if defined(DEBUG) static void verbose(const char *fmt, ...) { static FILE *tracefile = NULL; char *s; va_list ap; /* XXX locking */ if (tracefile == NULL) { s = getenv("PROPLIB_SCN_TRACEFILE"); if (s == NULL) s = "__prop_scn.out"; tracefile = fopen(s, "w"); if (tracefile == NULL) abort(); } va_start(ap, fmt); vfprintf(tracefile, fmt, ap); va_end(ap); } static const char * scanner_quote_char(u_char c) { static char buf[8]; /* 'c', '\n', 0xab */ u_char d = 0; switch (c) { case '\f': d = 'f'; break; case '\n': d = 'n'; break; case '\r': d = 'r'; break; case '\t': d = 't'; break; case '\v': d = 'v'; break; } if (d) { sprintf(buf, "'\\%c'", d); return (buf); } if (isprint(c)) sprintf(buf, "'%c'", (char)c); else sprintf(buf, "0x%02x", (u_int)c); return (buf); } static const char * scanner_state_name(scan_state_t n) { static const char *const names[] = { _SC_STATE_NAMES }; static int idx = 0; /* XXX hack, printf */ static char buf[5][32]; if (n < 0 || n > _SC_STATE_LAST) { if (++idx == 5) idx = 0; snprintf(buf[idx], 32, "", n); return (buf[idx]); } return (names[n]); } static const char * parser_state_name(parse_state_t n) { static const char *const names[] = { _PA_STATE_NAMES }; static int idx = 0; /* XXX hack, printf */ static char buf[5][32]; if (++idx == 5) idx = 0; if (n < 0 || n > _PA_STATE_LAST) { snprintf(buf[idx], 32, "", n); return (buf[idx]); } return (names[n]); } static const char * parser_token_name(token_type_t n) { static const char *const names[] = { _PA_TOKEN_NAMES }; if (n < 0 || n > _PA_TOKEN_LAST) return (""); return (names[n]); } static const char * parser_token_desc(struct token *t) { static char buf[32]; static const size_t sof = sizeof(buf); const char *s; int n = t->tok_type; s = parser_token_name(n); switch (n) { case TK_SYMBOL: snprintf(buf, sof, "%s '%s'", s, t->tok_string); return (buf); case TK_SINT64: snprintf(buf, sof, "%s %"PRId64, s, t->tok_signed); return (buf); case TK_STRING: snprintf(buf, sof, "%s \"%s\"", s, t->tok_string); return (buf); case TK_UINT64: snprintf(buf, sof, "%s #%"PRIx64, s, t->tok_unsigned); return (buf); case TK_DATA: snprintf(buf, sof, "%s %zdB", s, t->tok_data_len); return (buf); } return (s); } #endif /* PROP_SCN_DEBUG */ static prop_string_t prop_scn_create_string(struct token *t) { prop_string_t ps; char *str = t->tok_string; ps = _prop_string_alloc(); if (ps == NULL) return (NULL); ps->ps_mutable = str; ps->ps_size = strlen(str); return (ps); } static prop_number_t prop_scn_create_uint(struct token *t) { return (prop_number_create_unsigned_integer(t->tok_unsigned)); } static prop_number_t prop_scn_create_sint(struct token *t) { return (prop_number_create_integer(t->tok_signed)); } static boolean_t scanner_ensure_strlen(struct parser *pa) { void *b; /* Enough room for or base64 triplet. */ if ((pa->sc_strlen - pa->sc_strcur) < 3) { b = _PROP_REALLOC(pa->sc_string, pa->sc_strlen + BUFINCR, M_TEMP); if (b == NULL) { VERBOSE("scanner: ENOMEM internalization buffer"); return (TRUE); } pa->sc_string = b; pa->sc_strlen += BUFINCR; } /* No error == success. */ return (FALSE); } static struct token * parser_token_put(struct parser *pa, token_type_t tok) { struct token *t; t = _PROP_MALLOC(sizeof(struct token), M_TEMP); if (t == NULL) { VERBOSE(" parser: ENOMEM token %s", parser_token_name(tok)); return (NULL); } memset(t, 0, sizeof(struct token)); t->tok_type = tok; #if !defined(_STANDALONE) t->tok_pos_line = pa->sc_pos_line; t->tok_pos_col = pa->sc_pos_col; #endif SIMPLEQ_INSERT_TAIL(&pa->pa_tokens, t, tok_link); return (t); } static void parser_token_free(struct token *t, boolean_t force) { /* Normally, tok_string is donated upwards, not duplicated. */ if (force && (t->tok_type == TK_STRING || t->tok_type == TK_SYMBOL) && t->tok_string != NULL) _PROP_FREE(t->tok_string, M_TEMP); _PROP_FREE(t, M_TEMP); } #define SC_ARROW_P(src, dst) (pa->sc_prev == (src) && pa->sc_state == (dst)) #define SC_CYCLE_P(s) SC_ARROW_P(s, s) static u_char scanner_base64_decode(u_char c) { const char *s; if (c == '=') return (0); s = strchr(base64abc, (int)(u_int)c); _PROP_ASSERT(s); return ((u_char)(s - base64abc)); } static int prop_scanner_exec(struct parser *pa, const u_char *input, size_t length) { const u_char *p = input; const u_char *pe = input + length; struct token *t; boolean_t keepchar; u_char c; if (input == NULL || length == 0) { if (parser_token_put(pa, TK_LAST) == NULL) return (ENOMEM); return (0); } advance: if (p == pe) return (0); c = *p++; dispatch: keepchar = TRUE; /* for callcc */ switch (pa->sc_state) { case SC_FIRST: /* Force transition. */ pa->sc_state = SC_TOPLEVEL; goto callcc; case SC_TOPLEVEL: switch (c) { case '#': pa->sc_state = SC_UINT64; goto callnext; case '{': if (parser_token_put(pa, TK_DICTO) == NULL) return (ENOMEM); goto callnext; case '}': if (parser_token_put(pa, TK_DICTC) == NULL) return (ENOMEM); goto callnext; case '[': if (parser_token_put(pa, TK_ARRAYO) == NULL) return (ENOMEM); goto callnext; case ']': if (parser_token_put(pa, TK_ARRAYC) == NULL) return (ENOMEM); goto callnext; case '"': /* Opening quote, clear from edge action. */ pa->sc_state = SC_STRING_APPEND; goto callnext; case ';': pa->sc_state = SC_COMMENT; goto callnext; case ':': pa->sc_state = SC_BASE64_APPEND; goto callnext; } if (c == '-' || c == '+' || isdigit(c)) { pa->sc_state = SC_SINT64; goto callnext; } else if (isspace(c)) { pa->sc_state = SC_WHITE; goto callnext; } pa->sc_state = SC_SYMBOL; goto callnext; case SC_BASE64_APPEND: if (strchr(base64abc, c) != NULL || c == '=') { char d = scanner_base64_decode(c); if (scanner_ensure_strlen(pa)) return (ENOMEM); switch (pa->sc_base64_size % 24) { case 0: pa->sc_string[pa->sc_strcur] = d << 2; break; case 6: /* LINTED: sc_string & d are u_char */ pa->sc_string[pa->sc_strcur] |= d >> 4; pa->sc_strcur++; pa->sc_string[pa->sc_strcur] = d << 4; break; case 12: /* LINTED: sc_string & d are u_char */ pa->sc_string[pa->sc_strcur] |= d >> 2; pa->sc_strcur++; pa->sc_string[pa->sc_strcur] = d << 6; break; case 18: pa->sc_string[pa->sc_strcur++] |= d; break; } pa->sc_base64_size += 6; } else { pa->sc_state = SC_BASE64_MAYBE_NEXT; goto callcc; } break; case SC_BASE64_MAYBE_NEXT: if (pa->sc_base64_size == 0) { VERBOSE("scanner: BASE64 first chunk empty"); pa->sc_state = SC_ERROR; goto callcc; } if (c == '.') { pa->sc_state = SC_BASE64_NEXT; goto callnext; } if (! isspace(c)) { pa->sc_state = SC_BASE64_CLOSE; goto callcc; } break; case SC_BASE64_NEXT: if (strchr(base64abc, c) != NULL || c == '=') { pa->sc_state = SC_BASE64_APPEND; goto callcc; } if (! isspace(c)) { pa->sc_state = SC_ERROR; goto callcc; } break; case SC_BASE64_CLOSE: { u_char *s; /* Length must always be multiple of 3 bytes. */ if (pa->sc_base64_size % 24) { /* Warrants we're long enough to roundup. */ if (scanner_ensure_strlen(pa)) return (ENOMEM); /* Don't overwrite string[strcur]! */ memset(pa->sc_string + pa->sc_strcur + 1, 0, pa->sc_strlen - pa->sc_strcur - 1); pa->sc_base64_size = roundup(pa->sc_base64_size, 24); } /* Convert length to bytes. */ pa->sc_base64_size = roundup(pa->sc_base64_size, 8)/8; s = _PROP_MALLOC(pa->sc_base64_size, M_TEMP); if (s == NULL) { VERBOSE("scanner: ENOMEM data"); return (ENOMEM); } memcpy(s, pa->sc_string, pa->sc_base64_size); if ((t = parser_token_put(pa, TK_DATA)) == NULL) return (ENOMEM); t->tok_data_buf = s; t->tok_data_len = pa->sc_base64_size; pa->sc_state = SC_TOPLEVEL; goto callcc; } /* UNREACHED */ case SC_STRING_APPEND: if (c == '\\') { pa->sc_state = SC_QUOTED; goto callnext; } if (c == '"') { /* Closing quote, see if concatenation follows. */ pa->sc_state = SC_STRING_MAYBE_NEXT; goto callnext; } if (! isprint(c)) { pa->sc_state = SC_ERROR; goto callcc; } /* Append from edge action. */ break; case SC_STRING_MAYBE_NEXT: if (c == '.') { pa->sc_state = SC_STRING_CONCAT; goto callnext; } if (! isspace(c)) { pa->sc_state = SC_STRING_CLOSE; goto callcc; } break; case SC_STRING_CONCAT: if (c == '"') { /* Opening quote of concat string. */ pa->sc_state = SC_STRING_APPEND; goto callnext; } if (! isspace(c)) { pa->sc_state = SC_ERROR; goto callcc; } break; case SC_STRING_CLOSE: { char *s; s = strdup((const char *)pa->sc_string); if (s == NULL) { VERBOSE("scanner: ENOMEM string"); return (ENOMEM); } if ((t = parser_token_put(pa, TK_STRING)) == NULL) return (ENOMEM); t->tok_string = s; pa->sc_state = SC_TOPLEVEL; goto callcc; } /* NOTREACHED */ case SC_QUOTED: { u_char d; if (scanner_ensure_strlen(pa)) return (ENOMEM); switch (d = c) { case '\\': d = '\\'; break; case 'n': d = '\n'; break; case 't': d = '\t'; break; case '"': d = '\"'; break; } pa->sc_string[pa->sc_strcur++] = d; pa->sc_string[pa->sc_strcur] = '\0'; pa->sc_state = SC_STRING_APPEND; goto callnext; } /* NOTREACHED */ case SC_WHITE: if (! isspace(c)) { if ((t = parser_token_put(pa, TK_WHITE)) == NULL) return (ENOMEM); pa->sc_state = SC_TOPLEVEL; goto callcc; } break; case SC_SINT64: if (! isdigit(c) && c != '+' && c != '-') { long long n; char *end; n = strtoll((const char *)pa->sc_string, &end, 10); if (*end != '\0' || n > INT64_MAX || n < INT64_MIN) { VERBOSE("scanner: wrong SINT64 '%s'", pa->sc_string); return (EINVAL); } if ((t = parser_token_put(pa, TK_SINT64)) == NULL) return (ENOMEM); t->tok_unsigned = (int64_t)n; pa->sc_state = SC_TOPLEVEL; goto callcc; } break; case SC_UINT64: if (! isxdigit(c)) { unsigned long long u; char *end; u = strtoull((const char *)pa->sc_string, &end, 16); if (*end != '\0' || u > UINT64_MAX) { VERBOSE("scanner: wrong UINT64 '%s'", pa->sc_string); return (EINVAL); } if ((t = parser_token_put(pa, TK_UINT64)) == NULL) return (ENOMEM); t->tok_unsigned = (uint64_t)u; pa->sc_state = SC_TOPLEVEL; goto callcc; } break; case SC_SYMBOL: if (isspace(c) || c == '\\') { char *s; s = strdup((const char *)pa->sc_string); if (s == NULL) { VERBOSE("scanner: ENOMEM symbol"); return (ENOMEM); } if ((t = parser_token_put(pa, TK_SYMBOL)) == NULL) { _PROP_FREE(s, M_TEMP); return (ENOMEM); } t->tok_string = s; pa->sc_state = SC_TOPLEVEL; goto callcc; } if (! isprint(c)) { pa->sc_state = SC_ERROR; goto callcc; } break; case SC_COMMENT: if (c == '\n' || c == '\r') { pa->sc_state = SC_TOPLEVEL; goto callnext; } break; case SC_ERROR: VERBOSE("scanner: wrong char '%c' (line %zd char %zd) in " "state %s", c, pa->sc_pos_line, pa->sc_pos_col, scanner_state_name(pa->sc_last)); return (EINVAL); } callnext: keepchar = FALSE; callcc: if (pa->sc_state != pa->sc_prev) { VERBOSE("scanner: %-17s --> %-17s %s\t[%d, %d]", ((keepchar && pa->sc_prev != SC_FIRST) ? "" : scanner_state_name(pa->sc_prev)), scanner_state_name(pa->sc_state), scanner_quote_char(c), pa->sc_pos_line, pa->sc_pos_col); } else { VERBOSE("scanner: %-17s ::: %-17s %s\t[%d, %d]", "", "", scanner_quote_char(c), pa->sc_pos_line, pa->sc_pos_col); } /* Be specific about transitions, let compiler deal w/redundancy. */ if (SC_ARROW_P(SC_TOPLEVEL, SC_BASE64_APPEND)) { pa->sc_base64_size = 0; pa->sc_strcur = 0; } if (SC_ARROW_P(SC_TOPLEVEL, SC_STRING_APPEND) || SC_ARROW_P(SC_TOPLEVEL, SC_SYMBOL) || SC_ARROW_P(SC_TOPLEVEL, SC_UINT64) || SC_ARROW_P(SC_TOPLEVEL, SC_SINT64)) { pa->sc_strcur = 0; } if (SC_CYCLE_P(SC_UINT64) || SC_CYCLE_P(SC_SINT64) || SC_CYCLE_P(SC_STRING_APPEND) || SC_CYCLE_P(SC_SYMBOL) || SC_ARROW_P(SC_TOPLEVEL, SC_SINT64) || SC_ARROW_P(SC_TOPLEVEL, SC_SYMBOL)) { if (scanner_ensure_strlen(pa)) return (ENOMEM); pa->sc_string[pa->sc_strcur++] = c; pa->sc_string[pa->sc_strcur] = '\0'; } #if !defined(_STANDALONE) if (! keepchar) { if (c == '\n') { pa->sc_pos_line++; pa->sc_pos_col = 1; } else { if (c == '\t') pa->sc_pos_col = roundup(pa->sc_pos_col, 8); else pa->sc_pos_col++; } } #endif /* Commited to new state. */ if (pa->sc_state != pa->sc_prev) pa->sc_last = pa->sc_prev; pa->sc_prev = pa->sc_state; if (keepchar) goto dispatch; else goto advance; /* UNREACHED */ } #undef SC_CYCLE_P #undef SC_ARROW_P static void parser_frame_free(struct frame *e) { if (e->se_symbol) _PROP_FREE(__UNCONST(e->se_symbol), M_TEMP); if (e->se_object != NULL) prop_object_release(e->se_object); _PROP_FREE(e, M_TEMP); } static boolean_t parser_frame_enter(struct parser *pa, prop_object_t o) { struct frame *e; if (o == NULL) return (TRUE); e = _PROP_MALLOC(sizeof(struct frame), M_TEMP); if (e == NULL) return (TRUE); memset(e, 0, sizeof(struct frame)); e->se_object = o; SIMPLEQ_INSERT_HEAD(&pa->pa_stack, e, se_link); return (FALSE); } static int parser_frame_store(struct parser *pa, prop_object_t o) { struct frame *e; prop_object_t the; e = SIMPLEQ_FIRST(&pa->pa_stack); if (e == NULL) { VERBOSE(" parser: stack underflow"); return (EINVAL); } the = e->se_object; switch (prop_object_type(the)) { case PROP_TYPE_ARRAY: _PROP_ASSERT(e->se_symbol == NULL); if (prop_array_add(the, o) == FALSE) return (ENOMEM); break; case PROP_TYPE_DICTIONARY: _PROP_ASSERT(e->se_symbol != NULL); if (prop_dictionary_set(the, e->se_symbol, o) == FALSE) return (ENOMEM); _PROP_FREE(__UNCONST(e->se_symbol), M_TEMP); e->se_symbol = NULL; break; default: VERBOSE(" parser: wrong object on stack, not compound"); return (EINVAL); } prop_object_release(o); return (0); } static int parser_frame_leave(struct parser *pa) { struct frame *e; prop_object_t o; /* Get hold of the lower object. */ if ((e = SIMPLEQ_FIRST(&pa->pa_stack)) == NULL) { VERBOSE(" parser: stack underflow"); return (EINVAL); } SIMPLEQ_REMOVE_HEAD(&pa->pa_stack, se_link); /* Move it to finished objects if it's toplevel. */ if (SIMPLEQ_EMPTY(&pa->pa_stack)) { SIMPLEQ_INSERT_TAIL(&pa->pa_consq, e, se_link); _PROP_ASSERT(e->se_object); return (0); } /* Otherwise insert into current compound. */ o = e->se_object; /* Make sure ${o} isn't released, parser_frame_store() will do it. */ e->se_object = NULL; parser_frame_free(e); return (parser_frame_store(pa, o)); } static int prop_scn_parser_create(prop_parser_t *pp) { struct parser *pa; pa = _PROP_MALLOC(sizeof(struct parser), M_TEMP); if (pa == NULL) return (ENOMEM); memset(pa, 0, sizeof(struct parser)); SIMPLEQ_INIT(&pa->pa_tokens); SIMPLEQ_INIT(&pa->pa_stack); SIMPLEQ_INIT(&pa->pa_consq); #if !defined(_STANDALONE) /* Text editors tend to count from 1, be friendly. */ pa->sc_pos_line = 1; pa->sc_pos_col = 1; #endif if (parser_token_put(pa, TK_FIRST) == NULL) { _PROP_FREE(pa, M_TEMP); return (ENOMEM); } *pp = pa; return (0); } static void prop_scn_parser_destroy(prop_parser_t arg) { struct parser *pa = arg; struct token *t; struct frame *e; if (pa->sc_string) _PROP_FREE(pa->sc_string, M_TEMP); /* Free any pending tokens. */ while ((t = SIMPLEQ_FIRST(&pa->pa_tokens)) != NULL) { SIMPLEQ_REMOVE_HEAD(&pa->pa_tokens, tok_link); parser_token_free(t, TRUE); } /* Free any active stack frames. */ while ((e = SIMPLEQ_FIRST(&pa->pa_stack)) != NULL) { SIMPLEQ_REMOVE_HEAD(&pa->pa_stack, se_link); parser_frame_free(e); } /* Free any finished objects. */ while ((e = SIMPLEQ_FIRST(&pa->pa_consq)) != NULL) { SIMPLEQ_REMOVE_HEAD(&pa->pa_consq, se_link); parser_frame_free(e); } _PROP_FREE(pa, M_TEMP); } static prop_object_t prop_scn_parser_yield(prop_parser_t arg) { struct parser *pa = arg; struct frame *e; prop_object_t o; if ((e = SIMPLEQ_FIRST(&pa->pa_consq)) == NULL) return (NULL); SIMPLEQ_REMOVE_HEAD(&pa->pa_consq, se_link); o = e->se_object; e->se_object = NULL; _PROP_ASSERT(e); _PROP_ASSERT(o); parser_frame_free(e); return (o); } static int prop_scn_parser_exec(prop_parser_t arg, const u_char *input, size_t length) { static const char *__truths[] = { "true", "yes", "on", NULL }; static const char *__lies[] = { "false", "no", "off", NULL }; prop_object_t the; struct frame *frame; struct token *t; struct parser *pa = arg; int nexttoken, ret; ret = prop_scanner_exec(pa, input, length); if (ret) return (ret); advance: if ((t = SIMPLEQ_FIRST(&pa->pa_tokens)) == NULL) return (0); dispatch: pa->pa_prev = pa->pa_state; nexttoken = FALSE; /* for callcc */ switch (pa->pa_state) { case PA_TOPLEVEL: switch (t->tok_type) { case TK_FIRST: /* XXX read version token */ goto callnext; case TK_DICTO: if (parser_frame_enter(pa, (prop_object_t)prop_dictionary_create())) { VERBOSE(" parser: ENOMEM dictionary"); return (ENOMEM); } pa->pa_state = PA_DICTIONARY; goto callnext; case TK_ARRAYO: if (parser_frame_enter(pa, (prop_object_t)prop_array_create())) { VERBOSE(" parser: ENOMEM array"); return (ENOMEM); } pa->pa_state = PA_ARRAY; goto callnext; case TK_LAST: if (SIMPLEQ_NEXT(t, tok_link) != NULL) { VERBOSE(" parser: stack not empty at EOF"); return (EINVAL); } return (0); default: /* GCC tries to be smart but fails. */ break; } if (t->tok_type != TK_WHITE) { pa->pa_state = PA_ERROR; goto callcc; } break; case PA_ARRAY: if (t->tok_type == TK_ARRAYC) { if ((ret = parser_frame_leave(pa)) != 0) { if (ret == EINVAL) VERBOSE(" parser: [%d, %d] " "misplaced ']'", t->tok_pos_line, t->tok_pos_col); return (ret); } pa->pa_state = PA_HOME; goto callcc; } else if (t->tok_type != TK_WHITE) { pa->pa_state = PA_OBJECT; goto callcc; } break; case PA_DICTIONARY: switch (t->tok_type) { case TK_DICTC: if ((ret = parser_frame_leave(pa)) != 0) { if (ret == EINVAL) VERBOSE(" parser: [%d, %d] " "misplaced '}'", t->tok_pos_line, t->tok_pos_col); return (ret); } pa->pa_state = PA_HOME; goto callcc; case TK_SYMBOL: frame = SIMPLEQ_FIRST(&pa->pa_stack); _PROP_ASSERT(frame && frame->se_symbol == NULL); frame->se_symbol = (const char *)t->tok_string; pa->pa_state = PA_OBJECT; goto callnext; default: /* GCC */ break; } /* WHITE or ERROR */ break; case PA_OBJECT: if (t->tok_type == TK_WHITE) break; switch (t->tok_type) { case TK_STRING: the = prop_scn_create_string(t); break; case TK_UINT64: the = prop_scn_create_uint(t); break; case TK_SINT64: the = prop_scn_create_sint(t); break; case TK_DATA: the = prop_data_create_data_nocopy(t->tok_data_buf, t->tok_data_len); break; case TK_SYMBOL: /* Coerce SYMBOL to bool at value position. */ if (stroneof((const char *)t->tok_string, __truths)) the = prop_bool_create(TRUE); else if (stroneof((const char *)t->tok_string, __lies)) the = prop_bool_create(FALSE); else { VERBOSE(" parser: [%d, %d] wrong BOOL '%s'", t->tok_pos_line, t->tok_pos_col, t->tok_string); return (EINVAL); } break; case _TK_COMPOUND_VALUES: /* Descend one level deeper via TOPLEVEL actions. */ pa->pa_state = PA_TOPLEVEL; goto callcc; default: pa->pa_state = PA_ERROR; goto callcc; } /* We're supposed to have valid simple object now. */ if (the == NULL) { VERBOSE(" parser: ENOMEM for %s", parser_token_name(t->tok_type)); return (ENOMEM); } /* Store it in current container. */ if (parser_frame_store(pa, the)) return (ENOMEM); pa->pa_state = PA_HOME; goto callcc; case PA_HOME: /* * We've just finished an object (simple or compound). * Continue where we came from -- at the parent container's * main entry point. We get here through callcc. */ frame = SIMPLEQ_FIRST(&pa->pa_stack); if (frame == NULL) { pa->pa_state = PA_TOPLEVEL; break; } _PROP_ASSERT(frame->se_object); switch (prop_object_type(frame->se_object)) { case PROP_TYPE_ARRAY: pa->pa_state = PA_ARRAY; break; case PROP_TYPE_DICTIONARY: pa->pa_state = PA_DICTIONARY; break; default: VERBOSE(" parser: wrong object on stack"); return (EINVAL); } break; case PA_ERROR: VERBOSE(" parser: [%d, %d] wrong token %s in state %s", t->tok_pos_line, t->tok_pos_col, parser_token_desc(t), parser_state_name(pa->pa_last)); return (EINVAL); } /* Call to next implies token was accepted, so stay above. */ if (pa->pa_prev == pa->pa_state && (pa->pa_state == PA_TOPLEVEL || pa->pa_state == PA_ARRAY || pa->pa_state == PA_DICTIONARY)) if (t->tok_type != TK_WHITE) { pa->pa_state = PA_ERROR; goto callcc; } callnext: nexttoken = TRUE; callcc: if (pa->pa_state != pa->pa_last) { VERBOSE(" parser: %-17s --> %-17s %s", (nexttoken ? parser_state_name(pa->pa_prev) : ""), parser_state_name(pa->pa_state), parser_token_desc(t)); pa->pa_last = pa->pa_prev; } else { VERBOSE(" parser: %-17s ::: %-17s %s", "", "", parser_token_desc(t)); } if (nexttoken) { SIMPLEQ_REMOVE_HEAD(&pa->pa_tokens, tok_link); parser_token_free(t, FALSE); goto advance; } else { goto dispatch; } /* UNREACHED */ } static boolean_t format_string_quote(struct poec *ec, const char *s) { const char *t; boolean_t ret; if (! poec_push(ec, '"')) return (TRUE); while (*s) { t = NULL; switch (*s) { case '\f': t = "\\f"; break; case '\n': t = "\\n"; break; case '\r': t = "\\r"; break; case '\t': t = "\\t"; break; case '\v': t = "\\v"; break; case '"': t = "\\\""; break; } if (t) ret = poec_append(ec, t); else ret = poec_push(ec, *s); if (ret == FALSE) return (TRUE); s++; } if (! poec_push(ec, '"')) return (TRUE); return (FALSE); } static boolean_t format_data_base64(struct poec *ec, const char *s, size_t size) { const u_char *b = (const u_char *)s; size_t i; u_int n; int ret = 0; /* XXX gcc sux */ poec_push(ec, ':'); /* LINTED n & b[] are unsigned */ n = b[0] >> 2; for (i = 0; i < size; i++) { switch (i % 3) { case 0: /* LINTED: b[] is u_char */ ret = poec_push(ec, base64abc[b[i] >> 2]); n = b[i] & 0x03; break; case 1: /* LINTED: b[] is u_char */ ret = poec_push(ec, base64abc[(n << 4) | (b[i] >> 4)]); n = b[i] & 0x0f; break; case 2: /* LINTED: b[] is u_char */ if (!poec_push(ec, base64abc[(n << 2) | (b[i] >> 6)]) || !poec_push(ec, base64abc[b[i] & 0x3f])) ret = FALSE; else ret = TRUE; break; } if (ret == FALSE) return (TRUE); } /* Finish based on how many bytes of a triplet we already have. */ switch (size % 3) { case 1: if (!poec_push(ec, base64abc[n << 4])) return (TRUE); break; case 2: if (!poec_push(ec, base64abc[n << 2])) return (TRUE); break; } /* Finally, pad to multiple of four characters of encoded text. */ switch (size % 3) { case 1: if (!poec_push(ec, '=')) return (TRUE); /* FALLTHROUGH */ case 2: if (!poec_push(ec, '=')) return (TRUE); } return (FALSE); } static boolean_t format_indent(struct poec *ec) { int i; #if 0 VERBOSE(" format: %zd/%zdB used, nesting depth %d", ec->poec_len, ec->poec_capacity, ec->poec_depth); #endif for (i = 0; i < ec->poec_depth; i++) if (poec_push(ec, '\t') == FALSE) { VERBOSE(" format: ENOMEM indent"); return (TRUE); } return (FALSE); } static char * prop_scn_externalize(prop_object_t o) { char buf[32]; struct stack sp; struct frame *e; struct poec *ec; char *s; prop_object_t the; boolean_t ret; SIMPLEQ_INIT(&sp); if ((ec = poec_create()) == NULL) return (NULL); /* * We only work for compound objects, like XML codec does. * Note that this ensures we'll record ${o} on the stack. */ switch (prop_object_type(o)) { case PROP_TYPE_ARRAY: case PROP_TYPE_DICTIONARY: break; default: goto lose; } the = o; again: switch (prop_object_type(the)) { case PROP_TYPE_BOOL: VERBOSE(" format: bool [%d]", ec->poec_depth); if (prop_bool_true(the)) ret = poec_append(ec, "True"); else ret = poec_append(ec, "False"); if (ret == FALSE) goto lose; break; case PROP_TYPE_NUMBER: VERBOSE(" format: number [%d]", ec->poec_depth); if (prop_number_unsigned(the)) snprintf(buf, sizeof(buf), "#%" PRIx64, prop_number_unsigned_integer_value(the)); else snprintf(buf, sizeof(buf), "%" PRId64, prop_number_integer_value(the)); if (poec_append(ec, buf) == FALSE) goto lose; break; case PROP_TYPE_STRING: VERBOSE(" format: string [%d]", ec->poec_depth); if (format_string_quote(ec, prop_string_cstring_nocopy(the))) goto lose; break; case PROP_TYPE_DATA: VERBOSE(" format: data [%d]", ec->poec_depth); if (format_data_base64(ec, prop_data_data_nocopy(the), prop_data_size(the))) goto lose; break; case PROP_TYPE_ARRAY: VERBOSE(" format: array [%d]", ec->poec_depth); if (!poec_append(ec, "[")) goto lose; ec->poec_depth++; if ((e = _PROP_MALLOC(sizeof(struct frame), M_TEMP)) == NULL) goto lose; memset(e, 0, sizeof(struct frame)); SIMPLEQ_INSERT_HEAD(&sp, e, se_link); e->se_object = the; e->se_iter = prop_array_iterator(the); if (e->se_iter == NULL) goto lose; break; case PROP_TYPE_DICTIONARY: VERBOSE(" format: dictionary [%d]", ec->poec_depth); if (!poec_append(ec, "{")) { VERBOSE(" format: ENOMEM dictionary open"); goto lose; } ec->poec_depth++; if ((e = _PROP_MALLOC(sizeof(struct frame), M_TEMP)) == NULL) { VERBOSE(" format: ENOMEM stack frame"); goto lose; } memset(e, 0, sizeof(struct frame)); SIMPLEQ_INSERT_HEAD(&sp, e, se_link); e->se_object = the; e->se_iter = prop_dictionary_iterator(the); if (e->se_iter == NULL) { VERBOSE(" format: ENOMEM dictionary iterator"); goto lose; } break; default: VERBOSE(" format: object %p wrong type %d", the, prop_object_type(the)); goto lose; } if (! poec_append(ec, "\n")) { VERBOSE(" format: ENOMEM newline after object"); goto lose; } pop: e = SIMPLEQ_FIRST(&sp); if (e == NULL) { _PROP_ASSERT(ec->poec_depth == 0); printf("DONE\n"); /* XXX done */ } _PROP_ASSERT(e->se_iter != NULL); /* Grab next object. */ o = prop_object_iterator_next(e->se_iter); if (o == NULL) { SIMPLEQ_REMOVE_HEAD(&sp, se_link); _PROP_ASSERT(ec->poec_depth != 0); ec->poec_depth --; if (format_indent(ec)) { VERBOSE(" format: ENOMEM close indent"); goto lose; } if (prop_object_type(e->se_object) == PROP_TYPE_DICTIONARY) ret = poec_append(ec, "}\n"); else ret = poec_append(ec, "]\n"); if (ret == FALSE) { VERBOSE(" format: ENOMEM close compound"); goto lose; } prop_object_iterator_release(e->se_iter); _PROP_FREE(e, M_TEMP); if (SIMPLEQ_EMPTY(&sp)) goto done; else goto pop; } /* Lookup the real object if we got indirect reference. */ if (prop_object_type(o) == PROP_TYPE_DICT_KEYSYM) { const char *r; prop_object_t p; p = prop_dictionary_get_keysym((prop_dictionary_t)e->se_object, (prop_dictionary_keysym_t)o); _PROP_ASSERT(p != NULL); r = prop_keysym_str((prop_dictionary_keysym_t)o); if (r == NULL) { VERBOSE(" format: EINVAL dictionary key"); goto lose; } if (format_indent(ec) || poec_append(ec, r) == FALSE || poec_push(ec, '\t') == FALSE) { VERBOSE(" format: ENOMEM dictionary key"); goto lose; } the = p; } else { if (format_indent(ec)) { VERBOSE(" format: ENOMEM array indent"); goto lose; } the = o; } /* We've got a fresh object from the compound, analyse it. */ goto again; done: /* Prepare the result for caller. */ ec->poec_buf[ec->poec_len] = '\0'; s = ec->poec_buf; /* The stack is empty at this point, just free externalize context. */ poec_destroy(ec); return (s); /*NOTREACHED*/ lose: VERBOSE(" format: LOST"); while ((e = SIMPLEQ_FIRST(&sp)) != NULL) { SIMPLEQ_REMOVE_HEAD(&sp, se_link); if (e->se_iter) prop_object_iterator_release(e->se_iter); _PROP_FREE(e, M_TEMP); } if (ec->poec_buf) _PROP_FREE(ec->poec_buf, M_TEMP); poec_destroy(ec); return (NULL); } const struct _prop_codec prop_codec_scn = { .codec_name = "scn", .codec_sense = (const u_char *)".{[;", .codec_externalize_compound = prop_scn_externalize, .codec_parser_create = prop_scn_parser_create, .codec_parser_exec = prop_scn_parser_exec, .codec_parser_yield = prop_scn_parser_yield, .codec_parser_destroy = prop_scn_parser_destroy, };