25 #ifdef YAJL_LEXER_DEBUG 96 #define readChar(lxr, txt, off) \ 97 (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \ 98 (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \ 101 #define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--)) 142 static const char charLookupTable[256] =
150 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
VEC ,
152 VHC ,
VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
155 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
156 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
157 0 , 0 , 0 , 0 ,
NFP|VEC|
IJC, 0 , 0 , 0 ,
160 0 , 0 , 0 , 0 , 0 , 0 ,
VEC , 0 ,
161 0 , 0 ,
VEC , 0 ,
VEC , 0 , 0 , 0 ,
162 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
196 #define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; } 199 yajl_lex_utf8_char(
yajl_lexer lexer,
const unsigned char * jsonText,
200 size_t jsonTextLen,
size_t * offset,
201 unsigned char curChar)
203 if (curChar <= 0x7f) {
206 }
else if ((curChar >> 5) == 0x6) {
209 curChar =
readChar(lexer, jsonText, offset);
211 }
else if ((curChar >> 4) == 0x0e) {
214 curChar =
readChar(lexer, jsonText, offset);
215 if ((curChar >> 6) == 0x2) {
217 curChar =
readChar(lexer, jsonText, offset);
220 }
else if ((curChar >> 3) == 0x1e) {
223 curChar =
readChar(lexer, jsonText, offset);
224 if ((curChar >> 6) == 0x2) {
226 curChar =
readChar(lexer, jsonText, offset);
227 if ((curChar >> 6) == 0x2) {
229 curChar =
readChar(lexer, jsonText, offset);
248 #define STR_CHECK_EOF \ 249 if (*offset >= jsonTextLen) { \ 250 tok = yajl_tok_eof; \ 251 goto finish_string_lex; \ 259 yajl_string_scan(
const unsigned char *
buf,
size_t len,
int utf8check)
261 unsigned char mask =
IJC|
NFP|(utf8check ? NUC : 0);
263 while (skip < len && !(charLookupTable[*buf] & mask))
272 yajl_lex_string(
yajl_lexer lexer,
const unsigned char * jsonText,
273 size_t jsonTextLen,
size_t * offset)
279 unsigned char curChar;
284 const unsigned char * p;
295 else if (*offset < jsonTextLen)
297 p = jsonText + *offset;
298 len = jsonTextLen - *offset;
299 *offset += yajl_string_scan(p, len, lexer->
validateUTF8);
305 curChar =
readChar(lexer, jsonText, offset);
308 if (curChar ==
'"') {
313 else if (curChar ==
'\\') {
318 curChar =
readChar(lexer, jsonText, offset);
319 if (curChar ==
'u') {
324 curChar =
readChar(lexer, jsonText, offset);
325 if (!(charLookupTable[curChar] &
VHC)) {
329 goto finish_string_lex;
332 }
else if (!(charLookupTable[curChar] &
VEC)) {
336 goto finish_string_lex;
341 else if(charLookupTable[curChar] &
IJC) {
345 goto finish_string_lex;
349 yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
354 goto finish_string_lex;
357 goto finish_string_lex;
372 #define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof; 375 yajl_lex_number(
yajl_lexer lexer,
const unsigned char * jsonText,
376 size_t jsonTextLen,
size_t * offset)
387 c =
readChar(lexer, jsonText, offset);
392 c =
readChar(lexer, jsonText, offset);
398 c =
readChar(lexer, jsonText, offset);
399 }
else if (c >=
'1' && c <=
'9') {
402 c =
readChar(lexer, jsonText, offset);
403 }
while (c >=
'0' && c <=
'9');
415 c =
readChar(lexer, jsonText, offset);
417 while (c >=
'0' && c <=
'9') {
420 c =
readChar(lexer, jsonText, offset);
432 if (c ==
'e' || c ==
'E') {
434 c =
readChar(lexer, jsonText, offset);
437 if (c ==
'+' || c ==
'-') {
439 c =
readChar(lexer, jsonText, offset);
442 if (c >=
'0' && c <=
'9') {
445 c =
readChar(lexer, jsonText, offset);
446 }
while (c >=
'0' && c <=
'9');
462 yajl_lex_comment(
yajl_lexer lexer,
const unsigned char * jsonText,
463 size_t jsonTextLen,
size_t * offset)
470 c =
readChar(lexer, jsonText, offset);
477 c =
readChar(lexer, jsonText, offset);
479 }
else if (c ==
'*') {
483 c =
readChar(lexer, jsonText, offset);
486 c =
readChar(lexer, jsonText, offset);
504 size_t jsonTextLen,
size_t * offset,
505 const unsigned char ** outBuf,
size_t * outLen)
509 size_t startOffset = *offset;
515 assert(*offset <= jsonTextLen);
517 if (*offset >= jsonTextLen) {
522 c =
readChar(lexer, jsonText, offset);
543 case '\t':
case '\n':
case '\v':
case '\f':
case '\r':
case ' ':
547 const char * want =
"rue";
549 if (*offset >= jsonTextLen) {
553 c =
readChar(lexer, jsonText, offset);
565 const char * want =
"alse";
567 if (*offset >= jsonTextLen) {
571 c =
readChar(lexer, jsonText, offset);
583 const char * want =
"ull";
585 if (*offset >= jsonTextLen) {
589 c =
readChar(lexer, jsonText, offset);
601 tok = yajl_lex_string(lexer, (
const unsigned char *) jsonText,
602 jsonTextLen, offset);
606 case '0':
case '1':
case '2':
case '3':
case '4':
607 case '5':
case '6':
case '7':
case '8':
case '9': {
610 tok = yajl_lex_number(lexer, (
const unsigned char *) jsonText,
611 jsonTextLen, offset);
629 tok = yajl_lex_comment(lexer, (
const unsigned char *) jsonText,
630 jsonTextLen, offset);
637 startOffset = *offset;
665 *outBuf = jsonText + startOffset;
666 *outLen = *offset - startOffset;
678 #ifdef YAJL_LEXER_DEBUG 680 printf(
"lexical error: %s\n",
685 printf(
"lexed %s: '", tokToStr(tok));
686 fwrite(*outBuf, 1, *outLen,
stdout);
699 return "ok, no error";
701 return "invalid bytes in UTF8 string.";
703 return "inside a string, '\\' occurs before a character " 706 return "invalid character inside string.";
708 return "invalid (non-hex) character occurs after '\\u' inside " 711 return "invalid char in json text.";
713 return "invalid string in json text.";
715 return "malformed number, a digit is required after the exponent.";
717 return "malformed number, a digit is required after the " 720 return "malformed number, a digit is required after the " 723 return "probable comment found in input text, comments are " 726 return "unknown error code";
750 size_t jsonTextLen,
size_t offset)
752 const unsigned char * outBuf;
759 tok =
yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
#define YA_MALLOC(afs, sz)
#define assert(exp)
Declare that a condition should be true.
yajl_lex_error yajl_lex_get_error(yajl_lexer lexer)
#define YA_FREE(afs, ptr)
const char * yajl_lex_error_to_string(yajl_lex_error error)
yajl_buf yajl_buf_alloc(yajl_alloc_funcs *alloc)
unsigned int validateUTF8
size_t yajl_buf_len(yajl_buf buf)
size_t yajl_lex_current_line(yajl_lexer lexer)
struct yajl_lexer_t * yajl_lexer
size_t yajl_lex_current_char(yajl_lexer lexer)
void yajl_buf_append(yajl_buf buf, const void *data, size_t len)
yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char *jsonText, size_t jsonTextLen, size_t *offset, const unsigned char **outBuf, size_t *outLen)
void yajl_buf_truncate(yajl_buf buf, size_t len)
yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char *jsonText, size_t jsonTextLen, size_t offset)
void yajl_buf_clear(yajl_buf buf)
yajl_lexer yajl_lex_alloc(yajl_alloc_funcs *alloc, unsigned int allowComments, unsigned int validateUTF8)
void yajl_buf_free(yajl_buf buf)
unsigned int allowComments
#define readChar(lxr, txt, off)
void yajl_lex_free(yajl_lexer lxr)
const unsigned char * yajl_buf_data(yajl_buf buf)
#define unreadChar(lxr, off)