jansson/src/load.c

556 lines
11 KiB
C
Raw Normal View History

2009-02-07 02:26:27 +08:00
#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
2009-02-07 02:26:27 +08:00
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <unistd.h>
2009-02-07 02:26:27 +08:00
#include <assert.h>
#include <jansson.h>
#include "strbuffer.h"
2009-02-07 02:26:27 +08:00
#define TOKEN_INVALID -1
#define TOKEN_EOF 0
#define TOKEN_STRING 256
#define TOKEN_INTEGER 257
#define TOKEN_REAL 258
#define TOKEN_TRUE 259
#define TOKEN_FALSE 260
#define TOKEN_NULL 261
2009-02-07 02:26:27 +08:00
typedef struct {
const char *input;
const char *start;
int token;
int line, column;
union {
char *string;
2009-06-23 05:14:28 +08:00
int integer;
double real;
2009-02-07 02:26:27 +08:00
} value;
} lex_t;
2009-02-07 02:26:27 +08:00
/*** error reporting ***/
static void error_set(json_error_t *error, const lex_t *lex,
const char *msg, ...)
2009-02-07 02:26:27 +08:00
{
va_list ap;
char text[JSON_ERROR_TEXT_LENGTH];
2009-05-14 03:25:34 +08:00
if(!error)
return;
va_start(ap, msg);
vsnprintf(text, JSON_ERROR_TEXT_LENGTH, msg, ap);
va_end(ap);
if(lex)
2009-05-14 03:25:34 +08:00
{
error->line = lex->line;
if(*lex->start)
{
int n = (int)(lex->input - lex->start);
snprintf(error->text, JSON_ERROR_TEXT_LENGTH,
"%s near '%.*s'", text, n, lex->start);
}
else
{
snprintf(error->text, JSON_ERROR_TEXT_LENGTH,
"%s near end of file", text);
}
2009-05-14 03:25:34 +08:00
}
2009-02-07 02:26:27 +08:00
else
{
error->line = -1;
snprintf(error->text, JSON_ERROR_TEXT_LENGTH, "%s", text);
}
2009-02-07 02:26:27 +08:00
}
/*** lexical analyzer ***/
static void lex_scan_string(lex_t *lex)
2009-02-07 02:26:27 +08:00
{
/* skip the " */
const char *p = lex->input + 1;
char *t;
lex->token = TOKEN_INVALID;
2009-02-07 02:26:27 +08:00
while(*p != '"') {
if(*p == '\0') {
/* unterminated string literal */
goto out;
}
if(0 <= *p && *p <= 0x1F) {
2009-02-07 02:26:27 +08:00
/* control character */
goto out;
}
else if(*p == '\\') {
p++;
if(*p == 'u') {
p++;
for(int i = 0; i < 4; i++, p++) {
if(!isxdigit(*p))
goto out;
}
}
else if(*p == '"' || *p == '\\' || *p == '/' || *p == 'b' ||
*p == 'f' || *p == 'n' || *p == 'r' || *p == 't')
2009-02-07 02:26:27 +08:00
p++;
else
goto out;
}
else
p++;
}
/* the actual value is at most of the same length as the source
string */
lex->value.string = malloc(p - lex->start);
if(!lex->value.string) {
/* this is not very nice, since TOKEN_INVALID is returned */
goto out;
}
/* the target */
t = lex->value.string;
p = lex->input + 1;
while(*p != '"') {
if(*p == '\\') {
p++;
if(*p == 'u') {
/* TODO: \uXXXX not supported yet */
free(lex->value.string);
lex->value.string = NULL;
goto out;
} else {
switch(*p) {
case '"': case '\\': case '/':
*t = *p; break;
case 'b': *t = '\b'; break;
case 'f': *t = '\f'; break;
case 'n': *t = '\n'; break;
case 'r': *t = '\r'; break;
case 't': *t = '\t'; break;
default: assert(0);
}
}
}
else
*t = *p;
t++;
p++;
}
/* skip the " */
p++;
*t = '\0';
lex->token = TOKEN_STRING;
2009-02-07 02:26:27 +08:00
out:
lex->input = p;
}
static void lex_scan_number(lex_t *lex)
2009-02-07 02:26:27 +08:00
{
const char *p = lex->input;
char *end;
lex->token = TOKEN_INVALID;
2009-02-07 02:26:27 +08:00
if(*p == '-')
p++;
2009-07-04 18:18:49 +08:00
if(*p == '0') {
2009-02-07 02:26:27 +08:00
p++;
2009-07-04 18:18:49 +08:00
if(isdigit(*p))
goto out;
}
2009-02-07 02:26:27 +08:00
else /* *p != '0' */ {
p++;
while(isdigit(*p))
p++;
}
2009-07-04 18:18:49 +08:00
if(*p != '.' && *p != 'E' && *p != 'e') {
lex->token = TOKEN_INTEGER;
2009-06-23 05:14:28 +08:00
lex->value.integer = strtol(lex->start, &end, 10);
assert(end == p);
2009-07-04 18:18:49 +08:00
goto out;
2009-06-23 05:14:28 +08:00
}
2009-07-04 18:18:49 +08:00
if(*p == '.') {
2009-02-07 02:26:27 +08:00
p++;
2009-07-04 18:18:49 +08:00
if(!isdigit(*p))
2009-02-07 02:26:27 +08:00
goto out;
2009-07-04 18:18:49 +08:00
p++;
2009-02-07 02:26:27 +08:00
while(isdigit(*p))
p++;
}
if(*p == 'E' || *p == 'e') {
p++;
if(*p == '+' || *p == '-')
p++;
2009-07-04 18:18:49 +08:00
if(!isdigit(*p))
2009-02-07 02:26:27 +08:00
goto out;
2009-07-04 18:18:49 +08:00
p++;
2009-02-07 02:26:27 +08:00
while(isdigit(*p))
p++;
}
lex->token = TOKEN_REAL;
2009-02-07 02:26:27 +08:00
2009-06-23 05:14:28 +08:00
lex->value.real = strtod(lex->start, &end);
2009-02-07 02:26:27 +08:00
assert(end == p);
out:
lex->input = p;
}
static int lex_scan(lex_t *lex)
2009-02-07 02:26:27 +08:00
{
char c;
if(lex->token == TOKEN_STRING) {
2009-02-07 02:26:27 +08:00
free(lex->value.string);
lex->value.string = NULL;
}
c = *lex->input;
while(c == ' ' || c == '\t' || c == '\n' || c == '\r')
{
if(c == '\n')
2009-02-07 02:26:27 +08:00
lex->line++;
lex->input++;
c = *lex->input;
2009-02-07 02:26:27 +08:00
}
lex->start = lex->input;
c = *lex->input;
if(c == '\0')
lex->token = TOKEN_EOF;
2009-02-07 02:26:27 +08:00
else if(c == '{' || c == '}' || c == '[' || c == ']' ||
c == ':' || c == ',')
{
2009-02-07 02:26:27 +08:00
lex->token = c;
lex->input++;
}
else if(c == '"')
lex_scan_string(lex);
2009-02-07 02:26:27 +08:00
else if(isdigit(c) || c == '-')
lex_scan_number(lex);
2009-02-07 02:26:27 +08:00
else if(isupper(c) || islower(c)) {
2009-02-07 02:26:27 +08:00
/* eat up the whole identifier for clearer error messages */
int len;
while(isupper(*lex->input) || islower(*lex->input))
2009-02-07 02:26:27 +08:00
lex->input++;
len = lex->input - lex->start;
if(strncmp(lex->start, "true", len) == 0)
lex->token = TOKEN_TRUE;
2009-02-07 02:26:27 +08:00
else if(strncmp(lex->start, "false", len) == 0)
lex->token = TOKEN_FALSE;
2009-02-07 02:26:27 +08:00
else if(strncmp(lex->start, "null", len) == 0)
lex->token = TOKEN_NULL;
2009-02-07 02:26:27 +08:00
else
lex->token = TOKEN_INVALID;
2009-02-07 02:26:27 +08:00
}
else {
lex->token = TOKEN_INVALID;
2009-02-07 02:26:27 +08:00
lex->input++;
}
return lex->token;
}
static int lex_init(lex_t *lex, const char *input)
2009-02-07 02:26:27 +08:00
{
lex->input = input;
lex->token = TOKEN_INVALID;
2009-02-07 02:26:27 +08:00
lex->line = 1;
lex_scan(lex);
2009-02-07 02:26:27 +08:00
return 0;
}
static void lex_close(lex_t *lex)
2009-02-07 02:26:27 +08:00
{
if(lex->token == TOKEN_STRING)
2009-02-07 02:26:27 +08:00
free(lex->value.string);
}
/*** parser ***/
static json_t *parse_value(lex_t *lex, json_error_t *error);
2009-02-07 02:26:27 +08:00
static json_t *parse_object(lex_t *lex, json_error_t *error)
2009-02-07 02:26:27 +08:00
{
json_t *object = json_object();
if(!object)
return NULL;
lex_scan(lex);
2009-05-07 02:01:00 +08:00
if(lex->token == '}')
return object;
while(lex->token) {
2009-02-07 02:26:27 +08:00
char *key;
json_t *value;
if(lex->token != TOKEN_STRING) {
error_set(error, lex, "string expected");
2009-02-07 02:26:27 +08:00
goto error;
}
key = strdup(lex->value.string);
if(!key)
return NULL;
lex_scan(lex);
2009-02-07 02:26:27 +08:00
if(lex->token != ':') {
free(key);
error_set(error, lex, "':' expected");
2009-02-07 02:26:27 +08:00
goto error;
}
lex_scan(lex);
2009-02-07 02:26:27 +08:00
value = parse_value(lex, error);
if(!value) {
free(key);
2009-02-07 02:26:27 +08:00
goto error;
}
2009-02-07 02:26:27 +08:00
if(json_object_set(object, key, value)) {
free(key);
2009-02-07 02:26:27 +08:00
json_decref(value);
goto error;
}
json_decref(value);
free(key);
if(lex->token != ',')
break;
lex_scan(lex);
2009-02-07 02:26:27 +08:00
}
if(lex->token != '}') {
error_set(error, lex, "'}' expected");
2009-02-07 02:26:27 +08:00
goto error;
}
return object;
error:
json_decref(object);
return NULL;
}
static json_t *parse_array(lex_t *lex, json_error_t *error)
2009-02-07 02:26:27 +08:00
{
json_t *array = json_array();
if(!array)
return NULL;
lex_scan(lex);
if(lex->token == ']')
return array;
2009-02-07 02:26:27 +08:00
while(lex->token) {
json_t *elem = parse_value(lex, error);
if(!elem)
goto error;
2009-02-07 02:26:27 +08:00
if(json_array_append(array, elem)) {
json_decref(elem);
goto error;
2009-02-07 02:26:27 +08:00
}
json_decref(elem);
if(lex->token != ',')
break;
lex_scan(lex);
2009-02-07 02:26:27 +08:00
}
2009-02-07 02:26:27 +08:00
if(lex->token != ']') {
error_set(error, lex, "']' expected");
2009-02-07 02:26:27 +08:00
goto error;
}
return array;
error:
json_decref(array);
return NULL;
}
static json_t *parse_value(lex_t *lex, json_error_t *error)
2009-02-07 02:26:27 +08:00
{
json_t *json;
switch(lex->token) {
case TOKEN_STRING: {
2009-02-07 02:26:27 +08:00
json = json_string(lex->value.string);
break;
}
case TOKEN_INTEGER: {
2009-06-23 05:14:28 +08:00
json = json_integer(lex->value.integer);
break;
}
case TOKEN_REAL: {
2009-06-23 05:14:28 +08:00
json = json_real(lex->value.real);
2009-02-07 02:26:27 +08:00
break;
}
case TOKEN_TRUE:
2009-02-07 02:26:27 +08:00
json = json_true();
break;
case TOKEN_FALSE:
2009-02-07 02:26:27 +08:00
json = json_false();
break;
case TOKEN_NULL:
2009-02-07 02:26:27 +08:00
json = json_null();
break;
case '{':
json = parse_object(lex, error);
2009-02-07 02:26:27 +08:00
break;
case '[':
json = parse_array(lex, error);
2009-02-07 02:26:27 +08:00
break;
case TOKEN_INVALID:
error_set(error, lex, "invalid token");
2009-02-07 02:26:27 +08:00
return NULL;
default:
error_set(error, lex, "unexpected token");
2009-02-07 02:26:27 +08:00
return NULL;
}
if(!json)
return NULL;
lex_scan(lex);
2009-02-07 02:26:27 +08:00
return json;
}
json_t *parse_json(lex_t *lex, json_error_t *error)
{
if(lex->token != '[' && lex->token != '{') {
error_set(error, lex, "'[' or '{' expected");
return NULL;
}
return parse_value(lex, error);
}
json_t *json_load(const char *path, json_error_t *error)
{
json_t *result;
FILE *fp;
fp = fopen(path, "r");
if(!fp)
{
error_set(error, NULL, "unable to open %s: %s",
path, strerror(errno));
return NULL;
}
result = json_loadf(fp, error);
fclose(fp);
return result;
}
2009-05-14 03:25:34 +08:00
json_t *json_loads(const char *string, json_error_t *error)
2009-02-07 02:26:27 +08:00
{
lex_t lex;
2009-02-07 02:26:27 +08:00
json_t *result = NULL;
if(lex_init(&lex, string))
2009-02-07 02:26:27 +08:00
return NULL;
result = parse_json(&lex, error);
2009-02-07 02:26:27 +08:00
if(!result)
goto out;
if(lex.token != TOKEN_EOF) {
error_set(error, &lex, "end of file expected");
2009-02-07 02:26:27 +08:00
json_decref(result);
result = NULL;
}
out:
lex_close(&lex);
2009-02-07 02:26:27 +08:00
return result;
}
#define BUFFER_SIZE 4096
json_t *json_loadf(FILE *input, json_error_t *error)
{
strbuffer_t strbuff;
char buffer[BUFFER_SIZE];
size_t length;
json_t *result = NULL;
if(strbuffer_init(&strbuff))
return NULL;
while(1)
{
length = fread(buffer, 1, BUFFER_SIZE, input);
if(length == 0)
{
if(ferror(input))
{
error_set(error, NULL, "read error");
goto out;
}
break;
}
if(strbuffer_append_bytes(&strbuff, buffer, length))
goto out;
}
result = json_loads(strbuffer_value(&strbuff), error);
out:
strbuffer_close(&strbuff);
return result;
}