diff --git a/doc/apiref.rst b/doc/apiref.rst index 0b2ee15..4127a85 100644 --- a/doc/apiref.rst +++ b/doc/apiref.rst @@ -357,7 +357,6 @@ information, see :ref:`rfc-conformance`. should use ``json_int_t`` explicitly. ``JSON_INTEGER_IS_LONG_LONG`` - This is a preprocessor variable that holds the value 1 if :type:`json_int_t` is ``long long``, and 0 if it's ``long``. It can be used as follows:: @@ -369,7 +368,6 @@ information, see :ref:`rfc-conformance`. #endif ``JSON_INTEGER_FORMAT`` - This is a macro that expands to a :func:`printf()` conversion specifier that corresponds to :type:`json_int_t`, without the leading ``%`` sign, i.e. either ``"lld"`` or ``"ld"``. This macro @@ -1072,13 +1070,25 @@ items:: /* Create the JSON array ["foo", "bar", true] */ json_pack("[ssb]", "foo", "bar", 1); -Here's the full list of format characters. The type in parentheses +Here's the full list of format specifiers. The type in parentheses denotes the resulting JSON type, and the type in brackets (if any) -denotes the C type that is expected as the corresponding argument. +denotes the C type that is expected as the corresponding argument or +arguments. ``s`` (string) [const char \*] Convert a NULL terminated UTF-8 string to a JSON string. +``s#`` (string) [const char \*, int] + Convert a UTF-8 buffer of a given length to a JSON string. + +``+`` [const char \*] + Like ``s``, but concatenate to the previous string. Only valid + after ``s``, ``s#``, ``+`` or ``+#``. + +``+#`` [const char \*, int] + Like ``s#``, but concatenate to the previous string. Only valid + after ``s``, ``s#``, ``+`` or ``+#``. + ``n`` (null) Output a JSON null value. No argument is consumed. @@ -1113,10 +1123,11 @@ denotes the C type that is expected as the corresponding argument. ``{fmt}`` (object) Build an object with contents from the inner format string - ``fmt``. The first, third, etc. format character represent a key, - and must be ``s`` (as object keys are always strings). The second, - fourth, etc. format character represent a value. Any value may be - an object or array, i.e. recursive value building is supported. + ``fmt``. The first, third, etc. format specifier represent a key, + and must be a string (see ``s``, ``s#``, ``+`` and ``+#`` above), + as object keys are always strings. The second, fourth, etc. format + specifier represent a value. Any value may be an object or array, + i.e. recursive value building is supported. Whitespace, ``:`` and ``,`` are ignored. @@ -1127,9 +1138,9 @@ The following functions compose the value building API: .. refcounting:: new Build a new JSON value according to the format string *fmt*. For - each format character (except for ``{}[]n``), one argument is - consumed and used to build the corresponding value. Returns *NULL* - on error. + each format specifier (except for ``{}[]n``), one or more arguments + are consumed and used to build the corresponding value. Returns + *NULL* on error. .. function:: json_t *json_pack_ex(json_error_t *error, size_t flags, const char *fmt, ...) json_t *json_vpack_ex(json_error_t *error, size_t flags, const char *fmt, va_list ap) @@ -1158,6 +1169,13 @@ More examples:: /* Build the JSON array [[1, 2], {"cool": true}] */ json_pack("[[i,i],{s:b}]", 1, 2, "cool", 1); + /* Build a string from a non-NUL terminated buffer */ + char buffer[4] = {'t', 'e', 's', 't'}; + json_pack("s#", buffer, 4); + + /* Concatentate strings together to build the JSON string "foobarbaz" */ + json_pack("s++", "foo", "bar", "baz"); + .. _apiref-unpack: @@ -1172,10 +1190,10 @@ While a JSON value is unpacked, the type specified in the format string is checked to match that of the JSON value. This is the validation part of the process. In addition to this, the unpacking functions can also check that all items of arrays and objects are -unpacked. This check be enabled with the format character ``!`` or by +unpacked. This check be enabled with the format specifier ``!`` or by using the flag ``JSON_STRICT``. See below for details. -Here's the full list of format characters. The type in parentheses +Here's the full list of format specifiers. The type in parentheses denotes the JSON type, and the type in brackets (if any) denotes the C type whose address should be passed. @@ -1217,10 +1235,10 @@ type whose address should be passed. ``{fmt}`` (object) Convert each item in the JSON object according to the inner format - string ``fmt``. The first, third, etc. format character represent + string ``fmt``. The first, third, etc. format specifier represent a key, and must be ``s``. The corresponding argument to unpack functions is read as the object key. The second fourth, etc. - format character represent a value and is written to the address + format specifier represent a value and is written to the address given as the corresponding argument. **Note** that every other argument is read from and every other is written to. @@ -1233,17 +1251,17 @@ type whose address should be passed. extracted. See below for an example. ``!`` - This special format character is used to enable the check that + This special format specifier is used to enable the check that all object and array items are accessed, on a per-value basis. It - must appear inside an array or object as the last format character + must appear inside an array or object as the last format specifier before the closing bracket or brace. To enable the check globally, use the ``JSON_STRICT`` unpacking flag. ``*`` - This special format character is the opposite of ``!``. If the + This special format specifier is the opposite of ``!``. If the ``JSON_STRICT`` flag is used, ``*`` can be used to disable the strict check on a per-value basis. It must appear inside an array - or object as the last format character before the closing bracket + or object as the last format specifier before the closing bracket or brace. Whitespace, ``:`` and ``,`` are ignored. @@ -1268,13 +1286,13 @@ The following functions compose the parsing and validation API: The first argument of all unpack functions is ``json_t *root`` instead of ``const json_t *root``, because the use of ``O`` format - character causes the reference count of ``root``, or some value + specifier causes the reference count of ``root``, or some value reachable from ``root``, to be increased. Furthermore, the ``o`` - format character may be used to extract a value as-is, which allows + format specifier may be used to extract a value as-is, which allows modifying the structure or contents of a value reachable from ``root``. - If the ``O`` and ``o`` format characters are not used, it's + If the ``O`` and ``o`` format specifiers are not used, it's perfectly safe to cast a ``const json_t *`` variable to plain ``json_t *`` when used with these functions. @@ -1283,7 +1301,7 @@ The following unpacking flags are available: ``JSON_STRICT`` Enable the extra validation step checking that all object and array items are unpacked. This is equivalent to appending the - format character ``!`` to the end of every array and object in the + format specifier ``!`` to the end of every array and object in the format string. ``JSON_VALIDATE_ONLY`` diff --git a/src/jansson_private.h b/src/jansson_private.h index bac46be..403b53a 100644 --- a/src/jansson_private.h +++ b/src/jansson_private.h @@ -81,6 +81,7 @@ int jsonp_dtostr(char *buffer, size_t size, double value); /* Wrappers for custom memory functions */ void* jsonp_malloc(size_t size); void jsonp_free(void *ptr); +char *jsonp_strndup(const char *str, size_t length); char *jsonp_strdup(const char *str); /* Windows compatibility */ diff --git a/src/pack_unpack.c b/src/pack_unpack.c index c56435b..0d932f7 100644 --- a/src/pack_unpack.c +++ b/src/pack_unpack.c @@ -11,16 +11,28 @@ #include "jansson_private.h" #include "utf.h" +typedef struct { + int line; + int column; + size_t pos; + char token; +} token_t; + typedef struct { const char *start; const char *fmt; - char token; + token_t prev_token; + token_t token; + token_t next_token; json_error_t *error; size_t flags; int line; int column; + size_t pos; } scanner_t; +#define token(scanner) ((scanner)->token.token) + static const char * const type_names[] = { "object", "array", @@ -43,14 +55,28 @@ static void scanner_init(scanner_t *s, json_error_t *error, s->error = error; s->flags = flags; s->fmt = s->start = fmt; + memset(&s->prev_token, 0, sizeof(token_t)); + memset(&s->token, 0, sizeof(token_t)); + memset(&s->next_token, 0, sizeof(token_t)); s->line = 1; s->column = 0; + s->pos = 0; } static void next_token(scanner_t *s) { - const char *t = s->fmt; + const char *t; + s->prev_token = s->token; + + if(s->next_token.line) { + s->token = s->next_token; + s->next_token.line = 0; + return; + } + + t = s->fmt; s->column++; + s->pos++; /* skip space and ignored chars */ while(*t == ' ' || *t == '\t' || *t == '\n' || *t == ',' || *t == ':') { @@ -61,23 +87,32 @@ static void next_token(scanner_t *s) else s->column++; + s->pos++; t++; } - s->token = *t; + s->token.token = *t; + s->token.line = s->line; + s->token.column = s->column; + s->token.pos = s->pos; t++; s->fmt = t; } +static void prev_token(scanner_t *s) +{ + s->next_token = s->token; + s->token = s->prev_token; +} + static void set_error(scanner_t *s, const char *source, const char *fmt, ...) { va_list ap; - size_t pos; va_start(ap, fmt); - pos = (size_t)(s->fmt - s->start); - jsonp_error_vset(s->error, s->line, s->column, pos, fmt, ap); + jsonp_error_vset(s->error, s->token.line, s->token.column, s->token.pos, + fmt, ap); jsonp_error_set_source(s->error, source); @@ -86,35 +121,107 @@ static void set_error(scanner_t *s, const char *source, const char *fmt, ...) static json_t *pack(scanner_t *s, va_list *ap); + +/* ours will be set to 1 if jsonp_free() must be called for the result + afterwards */ +static char *read_string(scanner_t *s, va_list *ap, + const char *purpose, int *ours) +{ + char t; + strbuffer_t strbuff; + const char *str; + size_t length; + char *result; + + next_token(s); + t = token(s); + prev_token(s); + + if(t != '#' && t != '+') { + /* Optimize the simple case */ + str = va_arg(*ap, const char *); + + if(!str) { + set_error(s, "", "NULL string argument"); + return NULL; + } + + if(!utf8_check_string(str, -1)) { + set_error(s, "", "Invalid UTF-8 %s", purpose); + return NULL; + } + + *ours = 0; + return (char *)str; + } + + strbuffer_init(&strbuff); + + while(1) { + str = va_arg(*ap, const char *); + if(!str) { + set_error(s, "", "NULL string argument"); + strbuffer_close(&strbuff); + return NULL; + } + + next_token(s); + + if(token(s) == '#') { + length = va_arg(*ap, int); + } + else { + prev_token(s); + length = strlen(str); + } + + if(strbuffer_append_bytes(&strbuff, str, length) == -1) { + set_error(s, "", "Out of memory"); + strbuffer_close(&strbuff); + return NULL; + } + + next_token(s); + if(token(s) != '+') { + prev_token(s); + break; + } + } + + result = strbuffer_steal_value(&strbuff); + + if(!utf8_check_string(result, -1)) { + set_error(s, "", "Invalid UTF-8 %s", purpose); + return NULL; + } + + *ours = 1; + return result; +} + static json_t *pack_object(scanner_t *s, va_list *ap) { json_t *object = json_object(); next_token(s); - while(s->token != '}') { - const char *key; + while(token(s) != '}') { + char *key; + int ours; json_t *value; - if(!s->token) { + if(!token(s)) { set_error(s, "", "Unexpected end of format string"); goto error; } - if(s->token != 's') { - set_error(s, "", "Expected format 's', got '%c'", s->token); + if(token(s) != 's') { + set_error(s, "", "Expected format 's', got '%c'", token(s)); goto error; } - key = va_arg(*ap, const char *); - if(!key) { - set_error(s, "", "NULL object key"); + key = read_string(s, ap, "object key", &ours); + if(!key) goto error; - } - - if(!utf8_check_string(key, -1)) { - set_error(s, "", "Invalid UTF-8 in object key"); - goto error; - } next_token(s); @@ -123,10 +230,16 @@ static json_t *pack_object(scanner_t *s, va_list *ap) goto error; if(json_object_set_new_nocheck(object, key, value)) { + if(ours) + jsonp_free(key); + set_error(s, "", "Unable to add key \"%s\"", key); goto error; } + if(ours) + jsonp_free(key); + next_token(s); } @@ -142,10 +255,10 @@ static json_t *pack_array(scanner_t *s, va_list *ap) json_t *array = json_array(); next_token(s); - while(s->token != ']') { + while(token(s) != ']') { json_t *value; - if(!s->token) { + if(!token(s)) { set_error(s, "", "Unexpected end of format string"); goto error; } @@ -170,25 +283,27 @@ error: static json_t *pack(scanner_t *s, va_list *ap) { - switch(s->token) { + switch(token(s)) { case '{': return pack_object(s, ap); case '[': return pack_array(s, ap); - case 's': /* string */ - { - const char *str = va_arg(*ap, const char *); - if(!str) { - set_error(s, "", "NULL string argument"); + case 's': { /* string */ + char *str; + int ours; + json_t *result; + + str = read_string(s, ap, "string", &ours); + if(!str) return NULL; - } - if(!utf8_check_string(str, -1)) { - set_error(s, "", "Invalid UTF-8 string"); - return NULL; - } - return json_string_nocheck(str); + + result = json_string_nocheck(str); + if(ours) + jsonp_free(str); + + return result; } case 'n': /* null */ @@ -214,7 +329,7 @@ static json_t *pack(scanner_t *s, va_list *ap) default: set_error(s, "", "Unexpected format character '%c'", - s->token); + token(s)); return NULL; } } @@ -245,30 +360,30 @@ static int unpack_object(scanner_t *s, json_t *root, va_list *ap) } next_token(s); - while(s->token != '}') { + while(token(s) != '}') { const char *key; json_t *value; int opt = 0; if(strict != 0) { set_error(s, "", "Expected '}' after '%c', got '%c'", - (strict == 1 ? '!' : '*'), s->token); + (strict == 1 ? '!' : '*'), token(s)); goto out; } - if(!s->token) { + if(!token(s)) { set_error(s, "", "Unexpected end of format string"); goto out; } - if(s->token == '!' || s->token == '*') { - strict = (s->token == '!' ? 1 : -1); + if(token(s) == '!' || token(s) == '*') { + strict = (token(s) == '!' ? 1 : -1); next_token(s); continue; } - if(s->token != 's') { - set_error(s, "", "Expected format 's', got '%c'", s->token); + if(token(s) != 's') { + set_error(s, "", "Expected format 's', got '%c'", token(s)); goto out; } @@ -280,7 +395,7 @@ static int unpack_object(scanner_t *s, json_t *root, va_list *ap) next_token(s); - if(s->token == '?') { + if(token(s) == '?') { opt = 1; next_token(s); } @@ -331,30 +446,30 @@ static int unpack_array(scanner_t *s, json_t *root, va_list *ap) } next_token(s); - while(s->token != ']') { + while(token(s) != ']') { json_t *value; if(strict != 0) { set_error(s, "", "Expected ']' after '%c', got '%c'", (strict == 1 ? '!' : '*'), - s->token); + token(s)); return -1; } - if(!s->token) { + if(!token(s)) { set_error(s, "", "Unexpected end of format string"); return -1; } - if(s->token == '!' || s->token == '*') { - strict = (s->token == '!' ? 1 : -1); + if(token(s) == '!' || token(s) == '*') { + strict = (token(s) == '!' ? 1 : -1); next_token(s); continue; } - if(!strchr(unpack_value_starters, s->token)) { + if(!strchr(unpack_value_starters, token(s))) { set_error(s, "", "Unexpected format character '%c'", - s->token); + token(s)); return -1; } @@ -392,7 +507,7 @@ static int unpack_array(scanner_t *s, json_t *root, va_list *ap) static int unpack(scanner_t *s, json_t *root, va_list *ap) { - switch(s->token) + switch(token(s)) { case '{': return unpack_object(s, root, ap); @@ -521,7 +636,7 @@ static int unpack(scanner_t *s, json_t *root, va_list *ap) default: set_error(s, "", "Unexpected format character '%c'", - s->token); + token(s)); return -1; } } @@ -551,7 +666,7 @@ json_t *json_vpack_ex(json_error_t *error, size_t flags, return NULL; next_token(&s); - if(s.token) { + if(token(&s)) { json_decref(value); set_error(&s, "", "Garbage after format string"); return NULL; @@ -614,7 +729,7 @@ int json_vunpack_ex(json_t *root, json_error_t *error, size_t flags, va_end(ap_copy); next_token(&s); - if(s.token) { + if(token(&s)) { set_error(&s, "", "Garbage after format string"); return -1; } diff --git a/src/strbuffer.c b/src/strbuffer.c index 822bb2d..2d6ff31 100644 --- a/src/strbuffer.c +++ b/src/strbuffer.c @@ -34,7 +34,9 @@ int strbuffer_init(strbuffer_t *strbuff) void strbuffer_close(strbuffer_t *strbuff) { - jsonp_free(strbuff->value); + if(strbuff->value) + jsonp_free(strbuff->value); + strbuff->size = 0; strbuff->length = 0; strbuff->value = NULL; @@ -54,7 +56,7 @@ const char *strbuffer_value(const strbuffer_t *strbuff) char *strbuffer_steal_value(strbuffer_t *strbuff) { char *result = strbuff->value; - strbuffer_init(strbuff); + strbuff->value = NULL; return result; } diff --git a/src/strbuffer.h b/src/strbuffer.h index 5559e0a..06fd065 100644 --- a/src/strbuffer.h +++ b/src/strbuffer.h @@ -20,6 +20,8 @@ void strbuffer_close(strbuffer_t *strbuff); void strbuffer_clear(strbuffer_t *strbuff); const char *strbuffer_value(const strbuffer_t *strbuff); + +/* Steal the value and close the strbuffer */ char *strbuffer_steal_value(strbuffer_t *strbuff); int strbuffer_append(strbuffer_t *strbuff, const char *string); diff --git a/test/suites/api/test_pack.c b/test/suites/api/test_pack.c index 4d5d7b8..b6ac2e5 100644 --- a/test/suites/api/test_pack.c +++ b/test/suites/api/test_pack.c @@ -21,6 +21,7 @@ static void run_tests() { json_t *value; int i; + char buffer[4] = {'t', 'e', 's', 't'}; json_error_t error; /* @@ -82,6 +83,38 @@ static void run_tests() fail("json_pack string refcount failed"); json_decref(value); + /* string and length */ + value = json_pack("s#", "test asdf", 4); + if(!json_is_string(value) || strcmp("test", json_string_value(value))) + fail("json_pack string and length failed"); + if(value->refcount != (size_t)1) + fail("json_pack string and length refcount failed"); + json_decref(value); + + /* string and length, non-NUL terminated string */ + value = json_pack("s#", buffer, 4); + if(!json_is_string(value) || strcmp("test", json_string_value(value))) + fail("json_pack string and length failed"); + if(value->refcount != (size_t)1) + fail("json_pack string and length refcount failed"); + json_decref(value); + + /* string concatenation */ + value = json_pack("s++", "te", "st", "ing"); + if(!json_is_string(value) || strcmp("testing", json_string_value(value))) + fail("json_pack string concatenation failed"); + if(value->refcount != (size_t)1) + fail("json_pack string concatenation refcount failed"); + json_decref(value); + + /* string concatenation and length */ + value = json_pack("s#+#+", "test", 1, "test", 2, "test"); + if(!json_is_string(value) || strcmp("ttetest", json_string_value(value))) + fail("json_pack string concatenation and length failed"); + if(value->refcount != (size_t)1) + fail("json_pack string concatenation and length refcount failed"); + json_decref(value); + /* empty object */ value = json_pack("{}", 1.0); if(!json_is_object(value) || json_object_size(value) != 0) @@ -125,6 +158,16 @@ static void run_tests() fail("json_pack object refcount failed"); json_decref(value); + /* object with complex key */ + value = json_pack("{s+#+: []}", "foo", "barbar", 3, "baz"); + if(!json_is_object(value) || json_object_size(value) != 1) + fail("json_pack array failed"); + if(!json_is_array(json_object_get(value, "foobarbaz"))) + fail("json_pack array failed"); + if(json_object_get(value, "foobarbaz")->refcount != (size_t)1) + fail("json_pack object refcount failed"); + json_decref(value); + /* simple array */ value = json_pack("[i,i,i]", 0, 1, 2); if(!json_is_array(value) || json_array_size(value) != 3) @@ -198,6 +241,11 @@ static void run_tests() fail("json_pack failed to catch null argument string"); check_error("NULL string argument", "", 1, 1, 1); + /* + on its own */ + if(json_pack_ex(&error, 0, "+", NULL)) + fail("json_pack failed to a lone +"); + check_error("Unexpected format character '+'", "", 1, 1, 1); + /* NULL format */ if(json_pack_ex(&error, 0, NULL)) fail("json_pack failed to catch NULL format string"); @@ -206,7 +254,7 @@ static void run_tests() /* NULL key */ if(json_pack_ex(&error, 0, "{s:i}", NULL, 1)) fail("json_pack failed to catch NULL key"); - check_error("NULL object key", "", 1, 2, 2); + check_error("NULL string argument", "", 1, 2, 2); /* More complicated checks for row/columns */ if(json_pack_ex(&error, 0, "{ {}: s }", "foo")) @@ -226,7 +274,7 @@ static void run_tests() /* Invalid UTF-8 in object key */ if(json_pack_ex(&error, 0, "{s:i}", "\xff\xff", 42)) fail("json_pack failed to catch invalid UTF-8 in an object key"); - check_error("Invalid UTF-8 in object key", "", 1, 2, 2); + check_error("Invalid UTF-8 object key", "", 1, 2, 2); /* Invalid UTF-8 in a string */ if(json_pack_ex(&error, 0, "{s:s}", "foo", "\xff\xff"))