Refactor decoder input stream
- Add a new field position to the json_error_t structure. This is the position in bytes from the beginning of the input. - Keep track of line, column and input position in the stream level. Previously, only line was tracked, and it was in the lexer level, so this info was not available for UTF-8 decoding errors. - While at it, refactor tests so that no separate "stripped" tests are required. json_process is now able to strip whitespace from its input, and the "valid" and "invalid" test suites now use this to test both non-stripped and stripped input. Closes GH-9.
This commit is contained in:
parent
e54ea1f7c9
commit
5df7b79397
@ -739,27 +739,32 @@ affect especially the behavior of the decoder.
|
|||||||
This data structure is used to return information on decoding
|
This data structure is used to return information on decoding
|
||||||
errors from the decoding functions.
|
errors from the decoding functions.
|
||||||
|
|
||||||
.. member:: const char *text
|
.. member:: char text[]
|
||||||
|
|
||||||
The error message (in UTF-8), or an empty string if a message is
|
The error message (in UTF-8), or an empty string if a message is
|
||||||
not available.
|
not available.
|
||||||
|
|
||||||
.. member:: int line
|
.. member:: char source[]
|
||||||
|
|
||||||
The line number on which the error occurred, or -1 if this
|
|
||||||
information is not available.
|
|
||||||
|
|
||||||
.. member:: int column
|
|
||||||
|
|
||||||
The character column on which the error occurred, or -1 if this
|
|
||||||
information is not available.
|
|
||||||
|
|
||||||
.. member:: const char *source
|
|
||||||
|
|
||||||
Source of the error. This is (a part of) the file name when
|
Source of the error. This is (a part of) the file name when
|
||||||
using :func:`json_load_file()`, or a special identifier in angle
|
using :func:`json_load_file()`, or a special identifier in angle
|
||||||
brackets otherwise (e.g. ``<string>``).
|
brackets otherwise (e.g. ``<string>``).
|
||||||
|
|
||||||
|
.. member:: int line
|
||||||
|
|
||||||
|
The line number on which the error occurred.
|
||||||
|
|
||||||
|
.. member:: int column
|
||||||
|
|
||||||
|
The character column on which the error occurred. Note that this
|
||||||
|
is the *character column*, not the byte column, i.e. a non-ASCII
|
||||||
|
UTF-8 character counts as one column.
|
||||||
|
|
||||||
|
.. member:: size_t position
|
||||||
|
|
||||||
|
The position in bytes from the start of the input. This is
|
||||||
|
useful for debugging Unicode encoding problems.
|
||||||
|
|
||||||
The normal use of :type:`json_error_t` is to allocate it on the
|
The normal use of :type:`json_error_t` is to allocate it on the
|
||||||
stack, and pass a pointer to a decoding function. Example::
|
stack, and pass a pointer to a decoding function. Example::
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@ void jsonp_error_init(json_error_t *error, const char *source)
|
|||||||
error->text[0] = '\0';
|
error->text[0] = '\0';
|
||||||
error->line = -1;
|
error->line = -1;
|
||||||
error->column = -1;
|
error->column = -1;
|
||||||
|
error->position = 0;
|
||||||
|
|
||||||
strncpy(error->source, source, JSON_ERROR_SOURCE_LENGTH);
|
strncpy(error->source, source, JSON_ERROR_SOURCE_LENGTH);
|
||||||
error->source[JSON_ERROR_SOURCE_LENGTH - 1] = '\0';
|
error->source[JSON_ERROR_SOURCE_LENGTH - 1] = '\0';
|
||||||
@ -15,17 +16,17 @@ void jsonp_error_init(json_error_t *error, const char *source)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void jsonp_error_set(json_error_t *error, int line, int column,
|
void jsonp_error_set(json_error_t *error, int line, int column,
|
||||||
const char *msg, ...)
|
size_t position, const char *msg, ...)
|
||||||
{
|
{
|
||||||
va_list ap;
|
va_list ap;
|
||||||
|
|
||||||
va_start(ap, msg);
|
va_start(ap, msg);
|
||||||
jsonp_error_vset(error, line, column, msg, ap);
|
jsonp_error_vset(error, line, column, position, msg, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
}
|
}
|
||||||
|
|
||||||
void jsonp_error_vset(json_error_t *error, int line, int column,
|
void jsonp_error_vset(json_error_t *error, int line, int column,
|
||||||
const char *msg, va_list ap)
|
size_t position, const char *msg, va_list ap)
|
||||||
{
|
{
|
||||||
if(!error)
|
if(!error)
|
||||||
return;
|
return;
|
||||||
@ -37,6 +38,7 @@ void jsonp_error_vset(json_error_t *error, int line, int column,
|
|||||||
|
|
||||||
error->line = line;
|
error->line = line;
|
||||||
error->column = column;
|
error->column = column;
|
||||||
|
error->position = position;
|
||||||
|
|
||||||
vsnprintf(error->text, JSON_ERROR_TEXT_LENGTH, msg, ap);
|
vsnprintf(error->text, JSON_ERROR_TEXT_LENGTH, msg, ap);
|
||||||
}
|
}
|
||||||
|
@ -109,10 +109,11 @@ void json_decref(json_t *json)
|
|||||||
#define JSON_ERROR_SOURCE_LENGTH 80
|
#define JSON_ERROR_SOURCE_LENGTH 80
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char text[JSON_ERROR_TEXT_LENGTH];
|
|
||||||
int line;
|
int line;
|
||||||
int column;
|
int column;
|
||||||
|
int position;
|
||||||
char source[JSON_ERROR_SOURCE_LENGTH];
|
char source[JSON_ERROR_SOURCE_LENGTH];
|
||||||
|
char text[JSON_ERROR_TEXT_LENGTH];
|
||||||
} json_error_t;
|
} json_error_t;
|
||||||
|
|
||||||
|
|
||||||
|
@ -69,9 +69,9 @@ const object_key_t *jsonp_object_iter_fullkey(void *iter);
|
|||||||
|
|
||||||
void jsonp_error_init(json_error_t *error, const char *source);
|
void jsonp_error_init(json_error_t *error, const char *source);
|
||||||
void jsonp_error_set(json_error_t *error, int line, int column,
|
void jsonp_error_set(json_error_t *error, int line, int column,
|
||||||
const char *msg, ...);
|
size_t position, const char *msg, ...);
|
||||||
void jsonp_error_vset(json_error_t *error, int line, int column,
|
void jsonp_error_vset(json_error_t *error, int line, int column,
|
||||||
const char *msg, va_list ap);
|
size_t position, const char *msg, va_list ap);
|
||||||
|
|
||||||
/* Wrappers for custom memory functions */
|
/* Wrappers for custom memory functions */
|
||||||
void* jsonp_malloc(size_t size);
|
void* jsonp_malloc(size_t size);
|
||||||
|
201
src/load.c
201
src/load.c
@ -20,6 +20,10 @@
|
|||||||
#include "strbuffer.h"
|
#include "strbuffer.h"
|
||||||
#include "utf.h"
|
#include "utf.h"
|
||||||
|
|
||||||
|
#define STREAM_STATE_OK 0
|
||||||
|
#define STREAM_STATE_EOF -1
|
||||||
|
#define STREAM_STATE_ERROR -2
|
||||||
|
|
||||||
#define TOKEN_INVALID -1
|
#define TOKEN_INVALID -1
|
||||||
#define TOKEN_EOF 0
|
#define TOKEN_EOF 0
|
||||||
#define TOKEN_STRING 256
|
#define TOKEN_STRING 256
|
||||||
@ -29,27 +33,26 @@
|
|||||||
#define TOKEN_FALSE 260
|
#define TOKEN_FALSE 260
|
||||||
#define TOKEN_NULL 261
|
#define TOKEN_NULL 261
|
||||||
|
|
||||||
/* read one byte from stream, return EOF on end of file */
|
/* Read one byte from stream, convert to unsigned char, then int, and
|
||||||
|
return. return EOF on end of file. This corresponds to the
|
||||||
|
behaviour of fgetc(). */
|
||||||
typedef int (*get_func)(void *data);
|
typedef int (*get_func)(void *data);
|
||||||
|
|
||||||
/* return non-zero if end of file has been reached */
|
|
||||||
typedef int (*eof_func)(void *data);
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
get_func get;
|
get_func get;
|
||||||
eof_func eof;
|
|
||||||
void *data;
|
void *data;
|
||||||
int stream_pos;
|
|
||||||
char buffer[5];
|
char buffer[5];
|
||||||
int buffer_pos;
|
int buffer_pos;
|
||||||
|
int state;
|
||||||
|
int line;
|
||||||
|
int column, last_column;
|
||||||
|
size_t position;
|
||||||
} stream_t;
|
} stream_t;
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
stream_t stream;
|
stream_t stream;
|
||||||
strbuffer_t saved_text;
|
strbuffer_t saved_text;
|
||||||
int token;
|
int token;
|
||||||
int line, column;
|
|
||||||
union {
|
union {
|
||||||
char *string;
|
char *string;
|
||||||
json_int_t integer;
|
json_int_t integer;
|
||||||
@ -57,6 +60,8 @@ typedef struct {
|
|||||||
} value;
|
} value;
|
||||||
} lex_t;
|
} lex_t;
|
||||||
|
|
||||||
|
#define stream_to_lex(stream) container_of(stream, lex_t, stream)
|
||||||
|
|
||||||
|
|
||||||
/*** error reporting ***/
|
/*** error reporting ***/
|
||||||
|
|
||||||
@ -67,6 +72,7 @@ static void error_set(json_error_t *error, const lex_t *lex,
|
|||||||
char msg_text[JSON_ERROR_TEXT_LENGTH];
|
char msg_text[JSON_ERROR_TEXT_LENGTH];
|
||||||
|
|
||||||
int line = -1, col = -1;
|
int line = -1, col = -1;
|
||||||
|
size_t pos = 0;
|
||||||
const char *result = msg_text;
|
const char *result = msg_text;
|
||||||
|
|
||||||
if(!error)
|
if(!error)
|
||||||
@ -81,7 +87,9 @@ static void error_set(json_error_t *error, const lex_t *lex,
|
|||||||
const char *saved_text = strbuffer_value(&lex->saved_text);
|
const char *saved_text = strbuffer_value(&lex->saved_text);
|
||||||
char msg_with_context[JSON_ERROR_TEXT_LENGTH];
|
char msg_with_context[JSON_ERROR_TEXT_LENGTH];
|
||||||
|
|
||||||
line = lex->line;
|
line = lex->stream.line;
|
||||||
|
col = lex->stream.column;
|
||||||
|
pos = lex->stream.position;
|
||||||
|
|
||||||
if(saved_text && saved_text[0])
|
if(saved_text && saved_text[0])
|
||||||
{
|
{
|
||||||
@ -93,41 +101,57 @@ static void error_set(json_error_t *error, const lex_t *lex,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,
|
if(lex->stream.state == STREAM_STATE_ERROR) {
|
||||||
"%s near end of file", msg_text);
|
/* No context for UTF-8 decoding errors */
|
||||||
result = msg_with_context;
|
result = msg_text;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
snprintf(msg_with_context, JSON_ERROR_TEXT_LENGTH,
|
||||||
|
"%s near end of file", msg_text);
|
||||||
|
result = msg_with_context;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
jsonp_error_set(error, line, col, "%s", result);
|
jsonp_error_set(error, line, col, pos, "%s", result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*** lexical analyzer ***/
|
/*** lexical analyzer ***/
|
||||||
|
|
||||||
static void
|
static void
|
||||||
stream_init(stream_t *stream, get_func get, eof_func eof, void *data)
|
stream_init(stream_t *stream, get_func get, void *data)
|
||||||
{
|
{
|
||||||
stream->get = get;
|
stream->get = get;
|
||||||
stream->eof = eof;
|
|
||||||
stream->data = data;
|
stream->data = data;
|
||||||
stream->stream_pos = 0;
|
|
||||||
stream->buffer[0] = '\0';
|
stream->buffer[0] = '\0';
|
||||||
stream->buffer_pos = 0;
|
stream->buffer_pos = 0;
|
||||||
|
|
||||||
|
stream->state = STREAM_STATE_OK;
|
||||||
|
stream->line = 1;
|
||||||
|
stream->column = 0;
|
||||||
|
stream->position = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static char stream_get(stream_t *stream, json_error_t *error)
|
static int stream_get(stream_t *stream, json_error_t *error)
|
||||||
{
|
{
|
||||||
char c;
|
int c;
|
||||||
|
|
||||||
|
if(stream->state != STREAM_STATE_OK)
|
||||||
|
return stream->state;
|
||||||
|
|
||||||
if(!stream->buffer[stream->buffer_pos])
|
if(!stream->buffer[stream->buffer_pos])
|
||||||
{
|
{
|
||||||
stream->buffer[0] = stream->get(stream->data);
|
c = stream->get(stream->data);
|
||||||
|
if(c == EOF) {
|
||||||
|
stream->state = STREAM_STATE_EOF;
|
||||||
|
return STREAM_STATE_EOF;
|
||||||
|
}
|
||||||
|
|
||||||
|
stream->buffer[0] = c;
|
||||||
stream->buffer_pos = 0;
|
stream->buffer_pos = 0;
|
||||||
|
|
||||||
c = stream->buffer[0];
|
if(0x80 <= c && c <= 0xFF)
|
||||||
|
|
||||||
if((unsigned char)c >= 0x80 && c != (char)EOF)
|
|
||||||
{
|
{
|
||||||
/* multi-byte UTF-8 sequence */
|
/* multi-byte UTF-8 sequence */
|
||||||
int i, count;
|
int i, count;
|
||||||
@ -144,30 +168,47 @@ static char stream_get(stream_t *stream, json_error_t *error)
|
|||||||
if(!utf8_check_full(stream->buffer, count, NULL))
|
if(!utf8_check_full(stream->buffer, count, NULL))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
stream->stream_pos += count;
|
|
||||||
stream->buffer[count] = '\0';
|
stream->buffer[count] = '\0';
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
stream->buffer[1] = '\0';
|
stream->buffer[1] = '\0';
|
||||||
stream->stream_pos++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return stream->buffer[stream->buffer_pos++];
|
c = stream->buffer[stream->buffer_pos++];
|
||||||
|
|
||||||
|
stream->position++;
|
||||||
|
if(c == '\n') {
|
||||||
|
stream->line++;
|
||||||
|
stream->last_column = stream->column;
|
||||||
|
stream->column = 0;
|
||||||
|
}
|
||||||
|
else if(utf8_check_first(c)) {
|
||||||
|
/* track the Unicode character column, so increment only if
|
||||||
|
this is the first character of a UTF-8 sequence */
|
||||||
|
stream->column++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
error_set(error, NULL, "unable to decode byte 0x%x at position %d",
|
stream->state = STREAM_STATE_ERROR;
|
||||||
(unsigned char)c, stream->stream_pos);
|
error_set(error, stream_to_lex(stream), "unable to decode byte 0x%x", c);
|
||||||
|
return STREAM_STATE_ERROR;
|
||||||
stream->buffer[0] = EOF;
|
|
||||||
stream->buffer[1] = '\0';
|
|
||||||
stream->buffer_pos = 1;
|
|
||||||
|
|
||||||
return EOF;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void stream_unget(stream_t *stream, char c)
|
static void stream_unget(stream_t *stream, int c)
|
||||||
{
|
{
|
||||||
|
if(c == STREAM_STATE_EOF || c == STREAM_STATE_ERROR)
|
||||||
|
return;
|
||||||
|
|
||||||
|
stream->position--;
|
||||||
|
if(c == '\n') {
|
||||||
|
stream->line--;
|
||||||
|
stream->column = stream->last_column;
|
||||||
|
}
|
||||||
|
else if(utf8_check_first(c))
|
||||||
|
stream->column--;
|
||||||
|
|
||||||
assert(stream->buffer_pos > 0);
|
assert(stream->buffer_pos > 0);
|
||||||
stream->buffer_pos--;
|
stream->buffer_pos--;
|
||||||
assert(stream->buffer[stream->buffer_pos] == c);
|
assert(stream->buffer[stream->buffer_pos] == c);
|
||||||
@ -179,29 +220,32 @@ static int lex_get(lex_t *lex, json_error_t *error)
|
|||||||
return stream_get(&lex->stream, error);
|
return stream_get(&lex->stream, error);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int lex_eof(lex_t *lex)
|
static void lex_save(lex_t *lex, int c)
|
||||||
{
|
|
||||||
return lex->stream.eof(lex->stream.data);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void lex_save(lex_t *lex, char c)
|
|
||||||
{
|
{
|
||||||
strbuffer_append_byte(&lex->saved_text, c);
|
strbuffer_append_byte(&lex->saved_text, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int lex_get_save(lex_t *lex, json_error_t *error)
|
static int lex_get_save(lex_t *lex, json_error_t *error)
|
||||||
{
|
{
|
||||||
char c = stream_get(&lex->stream, error);
|
int c = stream_get(&lex->stream, error);
|
||||||
lex_save(lex, c);
|
if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR)
|
||||||
|
lex_save(lex, c);
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lex_unget_unsave(lex_t *lex, char c)
|
static void lex_unget(lex_t *lex, int c)
|
||||||
{
|
{
|
||||||
char d;
|
|
||||||
stream_unget(&lex->stream, c);
|
stream_unget(&lex->stream, c);
|
||||||
d = strbuffer_pop(&lex->saved_text);
|
}
|
||||||
assert(c == d);
|
|
||||||
|
static void lex_unget_unsave(lex_t *lex, int c)
|
||||||
|
{
|
||||||
|
if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) {
|
||||||
|
char d;
|
||||||
|
stream_unget(&lex->stream, c);
|
||||||
|
d = strbuffer_pop(&lex->saved_text);
|
||||||
|
assert(c == d);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void lex_save_cached(lex_t *lex)
|
static void lex_save_cached(lex_t *lex)
|
||||||
@ -210,6 +254,7 @@ static void lex_save_cached(lex_t *lex)
|
|||||||
{
|
{
|
||||||
lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
|
lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]);
|
||||||
lex->stream.buffer_pos++;
|
lex->stream.buffer_pos++;
|
||||||
|
lex->stream.position++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -239,7 +284,7 @@ static int32_t decode_unicode_escape(const char *str)
|
|||||||
|
|
||||||
static void lex_scan_string(lex_t *lex, json_error_t *error)
|
static void lex_scan_string(lex_t *lex, json_error_t *error)
|
||||||
{
|
{
|
||||||
char c;
|
int c;
|
||||||
const char *p;
|
const char *p;
|
||||||
char *t;
|
char *t;
|
||||||
int i;
|
int i;
|
||||||
@ -250,14 +295,15 @@ static void lex_scan_string(lex_t *lex, json_error_t *error)
|
|||||||
c = lex_get_save(lex, error);
|
c = lex_get_save(lex, error);
|
||||||
|
|
||||||
while(c != '"') {
|
while(c != '"') {
|
||||||
if(c == (char)EOF) {
|
if(c == STREAM_STATE_ERROR)
|
||||||
lex_unget_unsave(lex, c);
|
goto out;
|
||||||
if(lex_eof(lex))
|
|
||||||
error_set(error, lex, "premature end of input");
|
else if(c == STREAM_STATE_EOF) {
|
||||||
|
error_set(error, lex, "premature end of input");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
else if((unsigned char)c <= 0x1F) {
|
else if(0 <= c && c <= 0x1F) {
|
||||||
/* control character */
|
/* control character */
|
||||||
lex_unget_unsave(lex, c);
|
lex_unget_unsave(lex, c);
|
||||||
if(c == '\n')
|
if(c == '\n')
|
||||||
@ -273,7 +319,6 @@ static void lex_scan_string(lex_t *lex, json_error_t *error)
|
|||||||
c = lex_get_save(lex, error);
|
c = lex_get_save(lex, error);
|
||||||
for(i = 0; i < 4; i++) {
|
for(i = 0; i < 4; i++) {
|
||||||
if(!isxdigit(c)) {
|
if(!isxdigit(c)) {
|
||||||
lex_unget_unsave(lex, c);
|
|
||||||
error_set(error, lex, "invalid escape");
|
error_set(error, lex, "invalid escape");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -284,7 +329,6 @@ static void lex_scan_string(lex_t *lex, json_error_t *error)
|
|||||||
c == 'f' || c == 'n' || c == 'r' || c == 't')
|
c == 'f' || c == 'n' || c == 'r' || c == 't')
|
||||||
c = lex_get_save(lex, error);
|
c = lex_get_save(lex, error);
|
||||||
else {
|
else {
|
||||||
lex_unget_unsave(lex, c);
|
|
||||||
error_set(error, lex, "invalid escape");
|
error_set(error, lex, "invalid escape");
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@ -399,7 +443,7 @@ out:
|
|||||||
#define json_strtoint strtol
|
#define json_strtoint strtol
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int lex_scan_number(lex_t *lex, char c, json_error_t *error)
|
static int lex_scan_number(lex_t *lex, int c, json_error_t *error)
|
||||||
{
|
{
|
||||||
const char *saved_text;
|
const char *saved_text;
|
||||||
char *end;
|
char *end;
|
||||||
@ -423,8 +467,8 @@ static int lex_scan_number(lex_t *lex, char c, json_error_t *error)
|
|||||||
c = lex_get_save(lex, error);
|
c = lex_get_save(lex, error);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
lex_unget_unsave(lex, c);
|
lex_unget_unsave(lex, c);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(c != '.' && c != 'E' && c != 'e') {
|
if(c != '.' && c != 'E' && c != 'e') {
|
||||||
@ -453,8 +497,10 @@ static int lex_scan_number(lex_t *lex, char c, json_error_t *error)
|
|||||||
|
|
||||||
if(c == '.') {
|
if(c == '.') {
|
||||||
c = lex_get(lex, error);
|
c = lex_get(lex, error);
|
||||||
if(!isdigit(c))
|
if(!isdigit(c)) {
|
||||||
|
lex_unget(lex, c);
|
||||||
goto out;
|
goto out;
|
||||||
|
}
|
||||||
lex_save(lex, c);
|
lex_save(lex, c);
|
||||||
|
|
||||||
c = lex_get_save(lex, error);
|
c = lex_get_save(lex, error);
|
||||||
@ -498,7 +544,7 @@ out:
|
|||||||
|
|
||||||
static int lex_scan(lex_t *lex, json_error_t *error)
|
static int lex_scan(lex_t *lex, json_error_t *error)
|
||||||
{
|
{
|
||||||
char c;
|
int c;
|
||||||
|
|
||||||
strbuffer_clear(&lex->saved_text);
|
strbuffer_clear(&lex->saved_text);
|
||||||
|
|
||||||
@ -509,18 +555,15 @@ static int lex_scan(lex_t *lex, json_error_t *error)
|
|||||||
|
|
||||||
c = lex_get(lex, error);
|
c = lex_get(lex, error);
|
||||||
while(c == ' ' || c == '\t' || c == '\n' || c == '\r')
|
while(c == ' ' || c == '\t' || c == '\n' || c == '\r')
|
||||||
{
|
|
||||||
if(c == '\n')
|
|
||||||
lex->line++;
|
|
||||||
|
|
||||||
c = lex_get(lex, error);
|
c = lex_get(lex, error);
|
||||||
|
|
||||||
|
if(c == STREAM_STATE_EOF) {
|
||||||
|
lex->token = TOKEN_EOF;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(c == (char)EOF) {
|
if(c == STREAM_STATE_ERROR) {
|
||||||
if(lex_eof(lex))
|
lex->token = TOKEN_INVALID;
|
||||||
lex->token = TOKEN_EOF;
|
|
||||||
else
|
|
||||||
lex->token = TOKEN_INVALID;
|
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -580,15 +623,13 @@ static char *lex_steal_string(lex_t *lex)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int lex_init(lex_t *lex, get_func get, eof_func eof, void *data)
|
static int lex_init(lex_t *lex, get_func get, void *data)
|
||||||
{
|
{
|
||||||
stream_init(&lex->stream, get, eof, data);
|
stream_init(&lex->stream, get, data);
|
||||||
if(strbuffer_init(&lex->saved_text))
|
if(strbuffer_init(&lex->saved_text))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
lex->token = TOKEN_INVALID;
|
lex->token = TOKEN_INVALID;
|
||||||
lex->line = 1;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -791,16 +832,10 @@ static int string_get(void *data)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
stream->pos++;
|
stream->pos++;
|
||||||
return c;
|
return (unsigned char)c;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int string_eof(void *data)
|
|
||||||
{
|
|
||||||
string_data_t *stream = (string_data_t *)data;
|
|
||||||
return (stream->data[stream->pos] == '\0');
|
|
||||||
}
|
|
||||||
|
|
||||||
json_t *json_loads(const char *string, size_t flags, json_error_t *error)
|
json_t *json_loads(const char *string, size_t flags, json_error_t *error)
|
||||||
{
|
{
|
||||||
lex_t lex;
|
lex_t lex;
|
||||||
@ -809,7 +844,7 @@ json_t *json_loads(const char *string, size_t flags, json_error_t *error)
|
|||||||
|
|
||||||
(void)flags; /* unused */
|
(void)flags; /* unused */
|
||||||
|
|
||||||
if(lex_init(&lex, string_get, string_eof, (void *)&stream_data))
|
if(lex_init(&lex, string_get, (void *)&stream_data))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
jsonp_error_init(error, "<string>");
|
jsonp_error_init(error, "<string>");
|
||||||
@ -837,7 +872,7 @@ json_t *json_loadf(FILE *input, size_t flags, json_error_t *error)
|
|||||||
json_t *result;
|
json_t *result;
|
||||||
(void)flags; /* unused */
|
(void)flags; /* unused */
|
||||||
|
|
||||||
if(lex_init(&lex, (get_func)fgetc, (eof_func)feof, input))
|
if(lex_init(&lex, (get_func)fgetc, input))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if(input == stdin)
|
if(input == stdin)
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include "jansson_private.h"
|
#include "jansson_private.h"
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
const char *start;
|
||||||
const char *fmt;
|
const char *fmt;
|
||||||
char token;
|
char token;
|
||||||
json_error_t *error;
|
json_error_t *error;
|
||||||
@ -57,8 +58,12 @@ static void next_token(scanner_t *s)
|
|||||||
static void set_error(scanner_t *s, const char *fmt, ...)
|
static void set_error(scanner_t *s, const char *fmt, ...)
|
||||||
{
|
{
|
||||||
va_list ap;
|
va_list ap;
|
||||||
|
size_t pos;
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
jsonp_error_vset(s->error, s->line, s->column, fmt, ap);
|
|
||||||
|
pos = (size_t)(s->fmt - s->start);
|
||||||
|
jsonp_error_vset(s->error, s->line, s->column, pos, fmt, ap);
|
||||||
|
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -447,13 +452,13 @@ json_t *json_vpack_ex(json_error_t *error, size_t flags,
|
|||||||
jsonp_error_init(error, "");
|
jsonp_error_init(error, "");
|
||||||
|
|
||||||
if(!fmt || !*fmt) {
|
if(!fmt || !*fmt) {
|
||||||
jsonp_error_set(error, 1, 1, "Null or empty format string");
|
jsonp_error_set(error, -1, -1, 0, "Null or empty format string");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
s.error = error;
|
s.error = error;
|
||||||
s.flags = flags;
|
s.flags = flags;
|
||||||
s.fmt = fmt;
|
s.fmt = s.start = fmt;
|
||||||
s.line = 1;
|
s.line = 1;
|
||||||
s.column = 0;
|
s.column = 0;
|
||||||
|
|
||||||
@ -505,13 +510,13 @@ int json_vunpack_ex(json_t *root, json_error_t *error, size_t flags,
|
|||||||
jsonp_error_init(error, "");
|
jsonp_error_init(error, "");
|
||||||
|
|
||||||
if(!fmt || !*fmt) {
|
if(!fmt || !*fmt) {
|
||||||
jsonp_error_set(error, 1, 1, "Null or empty format string");
|
jsonp_error_set(error, -1, -1, 0, "Null or empty format string");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
s.error = error;
|
s.error = error;
|
||||||
s.flags = flags;
|
s.flags = flags;
|
||||||
s.fmt = fmt;
|
s.fmt = s.start = fmt;
|
||||||
s.line = 1;
|
s.line = 1;
|
||||||
s.column = 0;
|
s.column = 0;
|
||||||
|
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <ctype.h>
|
||||||
#include <jansson.h>
|
#include <jansson.h>
|
||||||
|
|
||||||
static int getenv_int(const char *name)
|
static int getenv_int(const char *name)
|
||||||
@ -25,6 +27,26 @@ static int getenv_int(const char *name)
|
|||||||
return (int)result;
|
return (int)result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return a pointer to the first non-whitespace character of str.
|
||||||
|
Modifies str so that all trailing whitespace characters are
|
||||||
|
replaced by '\0'. */
|
||||||
|
static const char *strip(char *str)
|
||||||
|
{
|
||||||
|
size_t length;
|
||||||
|
char *result = str;
|
||||||
|
while(*result && isspace(*result))
|
||||||
|
result++;
|
||||||
|
|
||||||
|
length = strlen(result);
|
||||||
|
if(length == 0)
|
||||||
|
return result;
|
||||||
|
|
||||||
|
while(isspace(result[length - 1]))
|
||||||
|
result[--length] = '\0';
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int indent = 0;
|
int indent = 0;
|
||||||
@ -59,9 +81,39 @@ int main(int argc, char *argv[])
|
|||||||
if(getenv_int("JSON_SORT_KEYS"))
|
if(getenv_int("JSON_SORT_KEYS"))
|
||||||
flags |= JSON_SORT_KEYS;
|
flags |= JSON_SORT_KEYS;
|
||||||
|
|
||||||
json = json_loadf(stdin, 0, &error);
|
if(getenv_int("STRIP")) {
|
||||||
|
/* Load to memory, strip leading and trailing whitespace */
|
||||||
|
size_t size = 0, used = 0;
|
||||||
|
char *buffer = NULL;
|
||||||
|
|
||||||
|
while(1) {
|
||||||
|
int count;
|
||||||
|
|
||||||
|
size = (size == 0 ? 128 : size * 2);
|
||||||
|
buffer = realloc(buffer, size);
|
||||||
|
if(!buffer) {
|
||||||
|
fprintf(stderr, "Unable to allocate %d bytes\n", (int)size);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
count = fread(buffer + used, 1, size - used, stdin);
|
||||||
|
if(count < size - used) {
|
||||||
|
buffer[used + count] = '\0';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
used += count;
|
||||||
|
}
|
||||||
|
|
||||||
|
json = json_loads(strip(buffer), 0, &error);
|
||||||
|
free(buffer);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
json = json_loadf(stdin, 0, &error);
|
||||||
|
|
||||||
if(!json) {
|
if(!json) {
|
||||||
fprintf(stderr, "%d\n%s\n", error.line, error.text);
|
fprintf(stderr, "%d %d %d\n%s\n",
|
||||||
|
error.line, error.column, error.position,
|
||||||
|
error.text);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,7 +180,7 @@ int main()
|
|||||||
/* NULL format */
|
/* NULL format */
|
||||||
if(json_pack_ex(&error, 0, NULL))
|
if(json_pack_ex(&error, 0, NULL))
|
||||||
fail("json_pack failed to catch NULL format string");
|
fail("json_pack failed to catch NULL format string");
|
||||||
if(error.line != 1 || error.column != 1)
|
if(error.line != -1 || error.column != -1)
|
||||||
fail("json_pack didn't get the error coordinates right!");
|
fail("json_pack didn't get the error coordinates right!");
|
||||||
|
|
||||||
/* More complicated checks for row/columns */
|
/* More complicated checks for row/columns */
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near '''
|
|
@ -1 +0,0 @@
|
|||||||
['
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
'[' or '{' expected near 'a'
|
|
@ -1 +0,0 @@
|
|||||||
aå
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
string or '}' expected near ','
|
|
@ -1 +0,0 @@
|
|||||||
{,
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
unexpected token near ','
|
|
@ -1 +0,0 @@
|
|||||||
[,
|
|
@ -1 +0,0 @@
|
|||||||
[1,
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
'[' or '{' expected near end of file
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
\u0000 is not allowed
|
|
@ -1 +0,0 @@
|
|||||||
["\u0000 (null byte not allowed)"]
|
|
@ -1 +0,0 @@
|
|||||||
[1,]
|
|
@ -1,2 +0,0 @@
|
|||||||
6
|
|
||||||
unexpected token near ']'
|
|
@ -1,6 +0,0 @@
|
|||||||
[1,
|
|
||||||
2,
|
|
||||||
3,
|
|
||||||
4,
|
|
||||||
5,
|
|
||||||
]
|
|
@ -1,2 +0,0 @@
|
|||||||
2
|
|
||||||
end of file expected near 'foo'
|
|
@ -1,2 +0,0 @@
|
|||||||
[1,2,3]
|
|
||||||
foo
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
end of file expected near 'foo'
|
|
@ -1 +0,0 @@
|
|||||||
[1,2,3]foo
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near '0'
|
|
@ -1 +0,0 @@
|
|||||||
[012]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid escape near '"\'
|
|
@ -1 +0,0 @@
|
|||||||
["\a <-- invalid escape"]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near 'troo'
|
|
@ -1 +0,0 @@
|
|||||||
[troo
|
|
@ -1 +0,0 @@
|
|||||||
[-123foo]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
']' expected near 'foo'
|
|
@ -1 +0,0 @@
|
|||||||
[-123.123foo]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid Unicode '\uD888\u3210'
|
|
@ -1 +0,0 @@
|
|||||||
["\uD888\u3210 (first surrogate and invalid second surrogate)"]
|
|
@ -1 +0,0 @@
|
|||||||
{
|
|
@ -1 +0,0 @@
|
|||||||
[
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid Unicode '\uDFAA'
|
|
@ -1 +0,0 @@
|
|||||||
["\uDFAA (second surrogate on it's own)"]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near '-'
|
|
@ -1 +0,0 @@
|
|||||||
[-foo]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near '-0'
|
|
@ -1 +0,0 @@
|
|||||||
[-012]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
control character 0x0 near '"null byte '
|
|
Binary file not shown.
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near end of file
|
|
Binary file not shown.
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
'[' or '{' expected near 'null'
|
|
@ -1 +0,0 @@
|
|||||||
null
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
string or '}' expected near '''
|
|
@ -1 +0,0 @@
|
|||||||
{'a'
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
'}' expected near '123'
|
|
@ -1 +0,0 @@
|
|||||||
{"a":"a" 123}
|
|
@ -1 +0,0 @@
|
|||||||
[{}
|
|
@ -1 +0,0 @@
|
|||||||
{"a"
|
|
@ -1 +0,0 @@
|
|||||||
{"a":
|
|
@ -1 +0,0 @@
|
|||||||
{"a":"a
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near '1e'
|
|
@ -1 +0,0 @@
|
|||||||
[1ea]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
real number overflow near '-123123e100000'
|
|
@ -1 +0,0 @@
|
|||||||
[-123123e100000]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
real number overflow near '123123e100000'
|
|
@ -1 +0,0 @@
|
|||||||
[123123e100000]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near '1e'
|
|
@ -1 +0,0 @@
|
|||||||
[1e]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid token near '1.'
|
|
@ -1 +0,0 @@
|
|||||||
[1.]
|
|
@ -1,27 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
#
|
|
||||||
# Copyright (c) 2009-2011 Petri Lehtinen <petri@digip.org>
|
|
||||||
#
|
|
||||||
# Jansson is free software; you can redistribute it and/or modify
|
|
||||||
# it under the terms of the MIT license. See LICENSE for details.
|
|
||||||
|
|
||||||
is_test() {
|
|
||||||
test -d $test_path
|
|
||||||
}
|
|
||||||
|
|
||||||
run_test() {
|
|
||||||
$json_process <$test_path/input >$test_log/stdout 2>$test_log/stderr
|
|
||||||
valgrind_check $test_log/stderr || return 1
|
|
||||||
cmp -s $test_path/error $test_log/stderr
|
|
||||||
}
|
|
||||||
|
|
||||||
show_error() {
|
|
||||||
valgrind_show_error && return
|
|
||||||
|
|
||||||
echo "EXPECTED ERROR:"
|
|
||||||
nl -bn $test_path/error
|
|
||||||
echo "ACTUAL ERROR:"
|
|
||||||
nl -bn $test_log/stderr
|
|
||||||
}
|
|
||||||
|
|
||||||
. $top_srcdir/test/scripts/run-tests.sh
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
control character 0x9 near '"'
|
|
@ -1 +0,0 @@
|
|||||||
[" <-- tab character"]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
too big negative integer
|
|
@ -1 +0,0 @@
|
|||||||
[-123123123123123123123123123123]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
too big integer
|
|
@ -1 +0,0 @@
|
|||||||
[123123123123123123123123123123]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
invalid Unicode '\uDADA'
|
|
@ -1 +0,0 @@
|
|||||||
["\uDADA (first surrogate without the second)"]
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
'[' or '{' expected near 'å'
|
|
@ -1 +0,0 @@
|
|||||||
å
|
|
@ -1 +0,0 @@
|
|||||||
[{
|
|
@ -1 +0,0 @@
|
|||||||
["a"
|
|
@ -1 +0,0 @@
|
|||||||
{"
|
|
@ -1 +0,0 @@
|
|||||||
{"a
|
|
@ -1,2 +0,0 @@
|
|||||||
1
|
|
||||||
string or '}' expected near '['
|
|
@ -1 +0,0 @@
|
|||||||
{[
|
|
@ -1 +0,0 @@
|
|||||||
["a
|
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 2 2
|
||||||
unable to decode byte 0xed at position 2
|
unable to decode byte 0xed near '"'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 3 3
|
||||||
unable to decode byte 0xe5 at position 3
|
unable to decode byte 0xe5 near '"\'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 1 1
|
||||||
unable to decode byte 0xe5 at position 1
|
unable to decode byte 0xe5
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 4 4
|
||||||
unable to decode byte 0xe5 at position 4
|
unable to decode byte 0xe5 near '123'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 4 4
|
||||||
unable to decode byte 0xe5 at position 4
|
unable to decode byte 0xe5 near '"\u'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 4 4
|
||||||
unable to decode byte 0xe5 at position 4
|
unable to decode byte 0xe5 near '1e1'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 2 2
|
||||||
unable to decode byte 0xe5 at position 2
|
unable to decode byte 0xe5 near 'a'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 2 2
|
||||||
unable to decode byte 0xe5 at position 2
|
unable to decode byte 0xe5 near '0'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 3 3
|
||||||
unable to decode byte 0xe5 at position 3
|
unable to decode byte 0xe5 near '1e'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 2 2
|
||||||
unable to decode byte 0xe5 at position 2
|
unable to decode byte 0xe5 near '"'
|
||||||
|
@ -1,2 +1,2 @@
|
|||||||
-1
|
1 0 0
|
||||||
unable to decode byte 0xe5 at position 0
|
unable to decode byte 0xe5
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user