This is Unofficial EPICS BASE Doxygen Site
yajl_lex.c File Reference
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include "yajl_lex.h"
#include "yajl_buf.h"
+ Include dependency graph for yajl_lex.c:

Go to the source code of this file.

Classes

struct  yajl_lexer_t
 

Macros

#define readChar(lxr, txt, off)
 
#define unreadChar(lxr, off)   ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
 
#define VEC   0x01
 
#define IJC   0x02
 
#define VHC   0x04
 
#define NFP   0x08
 
#define NUC   0x10
 
#define UTF8_CHECK_EOF   if (*offset >= jsonTextLen) { return yajl_tok_eof; }
 
#define STR_CHECK_EOF
 
#define RETURN_IF_EOF   if (*offset >= jsonTextLen) return yajl_tok_eof;
 

Functions

yajl_lexer yajl_lex_alloc (yajl_alloc_funcs *alloc, unsigned int allowComments, unsigned int validateUTF8)
 
void yajl_lex_free (yajl_lexer lxr)
 
yajl_tok yajl_lex_lex (yajl_lexer lexer, const unsigned char *jsonText, size_t jsonTextLen, size_t *offset, const unsigned char **outBuf, size_t *outLen)
 
const char * yajl_lex_error_to_string (yajl_lex_error error)
 
yajl_lex_error yajl_lex_get_error (yajl_lexer lexer)
 
size_t yajl_lex_current_line (yajl_lexer lexer)
 
size_t yajl_lex_current_char (yajl_lexer lexer)
 
yajl_tok yajl_lex_peek (yajl_lexer lexer, const unsigned char *jsonText, size_t jsonTextLen, size_t offset)
 

Macro Definition Documentation

#define IJC   0x02

Definition at line 137 of file yajl_lex.c.

#define NFP   0x08

Definition at line 139 of file yajl_lex.c.

#define NUC   0x10

Definition at line 140 of file yajl_lex.c.

#define readChar (   lxr,
  txt,
  off 
)
Value:
(((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
(*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
((txt)[(*(off))++]))
size_t yajl_buf_len(yajl_buf buf)
Definition: yajl_buf.c:97
const unsigned char * yajl_buf_data(yajl_buf buf)
Definition: yajl_buf.c:92

Definition at line 96 of file yajl_lex.c.

#define RETURN_IF_EOF   if (*offset >= jsonTextLen) return yajl_tok_eof;

Definition at line 372 of file yajl_lex.c.

#define STR_CHECK_EOF
Value:
if (*offset >= jsonTextLen) { \
tok = yajl_tok_eof; \
goto finish_string_lex; \
}

Definition at line 248 of file yajl_lex.c.

#define unreadChar (   lxr,
  off 
)    ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))

Definition at line 101 of file yajl_lex.c.

#define UTF8_CHECK_EOF   if (*offset >= jsonTextLen) { return yajl_tok_eof; }

process a variable length utf8 encoded codepoint.

returns: yajl_tok_string - if valid utf8 char was parsed and offset was advanced yajl_tok_eof - if end of input was hit before validation could complete yajl_tok_error - if invalid utf8 was encountered

NOTE: on error the offset will point to the first char of the invalid utf8

Definition at line 196 of file yajl_lex.c.

#define VEC   0x01

Definition at line 136 of file yajl_lex.c.

#define VHC   0x04

Definition at line 138 of file yajl_lex.c.

Function Documentation

yajl_lexer yajl_lex_alloc ( yajl_alloc_funcs alloc,
unsigned int  allowComments,
unsigned int  validateUTF8 
)

Definition at line 104 of file yajl_lex.c.

106 {
107  yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
108  if (lxr == NULL) {
109  return NULL;
110  }
111 
112  memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
113  lxr->buf = yajl_buf_alloc(alloc);
114  lxr->allowComments = allowComments;
115  lxr->validateUTF8 = validateUTF8;
116  lxr->alloc = alloc;
117  return lxr;
118 }
#define YA_MALLOC(afs, sz)
Definition: yajl_alloc.h:32
yajl_buf yajl_buf_alloc(yajl_alloc_funcs *alloc)
Definition: yajl_buf.c:56
unsigned int validateUTF8
Definition: yajl_lex.c:91
#define NULL
Definition: catime.c:38
yajl_alloc_funcs * alloc
Definition: yajl_lex.c:93
struct yajl_lexer_t * yajl_lexer
Definition: yajl_lex.h:48
yajl_buf buf
Definition: yajl_lex.c:78
unsigned int allowComments
Definition: yajl_lex.c:88
size_t yajl_lex_current_char ( yajl_lexer  lexer)

get the number of chars lexed by this lexer instance since the last
or

Definition at line 744 of file yajl_lex.c.

745 {
746  return lexer->charOff;
747 }
size_t charOff
Definition: yajl_lex.c:71
size_t yajl_lex_current_line ( yajl_lexer  lexer)

get the number of lines lexed by this lexer instance

Definition at line 739 of file yajl_lex.c.

740 {
741  return lexer->lineOff;
742 }
size_t lineOff
Definition: yajl_lex.c:70
const char* yajl_lex_error_to_string ( yajl_lex_error  error)

Definition at line 695 of file yajl_lex.c.

696 {
697  switch (error) {
698  case yajl_lex_e_ok:
699  return "ok, no error";
701  return "invalid bytes in UTF8 string.";
703  return "inside a string, '\\' occurs before a character "
704  "which it may not.";
706  return "invalid character inside string.";
708  return "invalid (non-hex) character occurs after '\\u' inside "
709  "string.";
711  return "invalid char in json text.";
713  return "invalid string in json text.";
715  return "malformed number, a digit is required after the exponent.";
717  return "malformed number, a digit is required after the "
718  "decimal point.";
720  return "malformed number, a digit is required after the "
721  "minus sign.";
723  return "probable comment found in input text, comments are "
724  "not enabled.";
725  }
726  return "unknown error code";
727 }
void yajl_lex_free ( yajl_lexer  lxr)

Definition at line 121 of file yajl_lex.c.

122 {
123  yajl_buf_free(lxr->buf);
124  YA_FREE(lxr->alloc, lxr);
125  return;
126 }
#define YA_FREE(afs, ptr)
Definition: yajl_alloc.h:33
yajl_alloc_funcs * alloc
Definition: yajl_lex.c:93
yajl_buf buf
Definition: yajl_lex.c:78
void yajl_buf_free(yajl_buf buf)
Definition: yajl_buf.c:68
yajl_lex_error yajl_lex_get_error ( yajl_lexer  lexer)

allows access to more specific information about the lexical error when yajl_lex_lex returns yajl_tok_error.

Definition at line 733 of file yajl_lex.c.

734 {
735  if (lexer == NULL) return (yajl_lex_error) -1;
736  return lexer->error;
737 }
yajl_lex_error error
Definition: yajl_lex.c:74
#define NULL
Definition: catime.c:38
yajl_lex_error
Definition: yajl_lex.h:91
yajl_tok yajl_lex_lex ( yajl_lexer  lexer,
const unsigned char *  jsonText,
size_t  jsonTextLen,
size_t *  offset,
const unsigned char **  outBuf,
size_t *  outLen 
)

run/continue a lex. "offset" is an input/output parameter. It should be initialized to zero for a new chunk of target text, and upon subsetquent calls with the same target text should passed with the value of the previous invocation.

the client may be interested in the value of offset when an error is returned from the lexer. This allows the client to render useful n * error messages.

When you pass the next chunk of data, context should be reinitialized to zero.

Finally, the output buffer is usually just a pointer into the jsonText, however in cases where the entity being lexed spans multiple chunks, the lexer will buffer the entity and the data returned will be a pointer into that buffer.

This behavior is abstracted from client code except for the performance implications which require that the client choose a reasonable chunk size to get adequate performance.

Definition at line 503 of file yajl_lex.c.

506 {
507  yajl_tok tok = yajl_tok_error;
508  unsigned char c;
509  size_t startOffset = *offset;
510 
511  *outBuf = NULL;
512  *outLen = 0;
513 
514  for (;;) {
515  assert(*offset <= jsonTextLen);
516 
517  if (*offset >= jsonTextLen) {
518  tok = yajl_tok_eof;
519  goto lexed;
520  }
521 
522  c = readChar(lexer, jsonText, offset);
523 
524  switch (c) {
525  case '{':
526  tok = yajl_tok_left_brace;
527  goto lexed;
528  case '}':
529  tok = yajl_tok_right_brace;
530  goto lexed;
531  case '[':
532  tok = yajl_tok_left_bracket;
533  goto lexed;
534  case ']':
536  goto lexed;
537  case ',':
538  tok = yajl_tok_comma;
539  goto lexed;
540  case ':':
541  tok = yajl_tok_colon;
542  goto lexed;
543  case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
544  startOffset++;
545  break;
546  case 't': {
547  const char * want = "rue";
548  do {
549  if (*offset >= jsonTextLen) {
550  tok = yajl_tok_eof;
551  goto lexed;
552  }
553  c = readChar(lexer, jsonText, offset);
554  if (c != *want) {
555  unreadChar(lexer, offset);
557  tok = yajl_tok_error;
558  goto lexed;
559  }
560  } while (*(++want));
561  tok = yajl_tok_bool;
562  goto lexed;
563  }
564  case 'f': {
565  const char * want = "alse";
566  do {
567  if (*offset >= jsonTextLen) {
568  tok = yajl_tok_eof;
569  goto lexed;
570  }
571  c = readChar(lexer, jsonText, offset);
572  if (c != *want) {
573  unreadChar(lexer, offset);
575  tok = yajl_tok_error;
576  goto lexed;
577  }
578  } while (*(++want));
579  tok = yajl_tok_bool;
580  goto lexed;
581  }
582  case 'n': {
583  const char * want = "ull";
584  do {
585  if (*offset >= jsonTextLen) {
586  tok = yajl_tok_eof;
587  goto lexed;
588  }
589  c = readChar(lexer, jsonText, offset);
590  if (c != *want) {
591  unreadChar(lexer, offset);
593  tok = yajl_tok_error;
594  goto lexed;
595  }
596  } while (*(++want));
597  tok = yajl_tok_null;
598  goto lexed;
599  }
600  case '"': {
601  tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
602  jsonTextLen, offset);
603  goto lexed;
604  }
605  case '-':
606  case '0': case '1': case '2': case '3': case '4':
607  case '5': case '6': case '7': case '8': case '9': {
608  /* integer parsing wants to start from the beginning */
609  unreadChar(lexer, offset);
610  tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
611  jsonTextLen, offset);
612  goto lexed;
613  }
614  case '/':
615  /* hey, look, a probable comment! If comments are disabled
616  * it's an error. */
617  if (!lexer->allowComments) {
618  unreadChar(lexer, offset);
620  tok = yajl_tok_error;
621  goto lexed;
622  }
623  /* if comments are enabled, then we should try to lex
624  * the thing. possible outcomes are
625  * - successful lex (tok_comment, which means continue),
626  * - malformed comment opening (slash not followed by
627  * '*' or '/') (tok_error)
628  * - eof hit. (tok_eof) */
629  tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
630  jsonTextLen, offset);
631  if (tok == yajl_tok_comment) {
632  /* "error" is silly, but that's the initial
633  * state of tok. guilty until proven innocent. */
634  tok = yajl_tok_error;
635  yajl_buf_clear(lexer->buf);
636  lexer->bufInUse = 0;
637  startOffset = *offset;
638  break;
639  }
640  /* hit error or eof, bail */
641  goto lexed;
642  default:
643  lexer->error = yajl_lex_invalid_char;
644  tok = yajl_tok_error;
645  goto lexed;
646  }
647  }
648 
649 
650  lexed:
651  /* need to append to buffer if the buffer is in use or
652  * if it's an EOF token */
653  if (tok == yajl_tok_eof || lexer->bufInUse) {
654  if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
655  lexer->bufInUse = 1;
656  yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
657  lexer->bufOff = 0;
658 
659  if (tok != yajl_tok_eof) {
660  *outBuf = yajl_buf_data(lexer->buf);
661  *outLen = yajl_buf_len(lexer->buf);
662  lexer->bufInUse = 0;
663  }
664  } else if (tok != yajl_tok_error) {
665  *outBuf = jsonText + startOffset;
666  *outLen = *offset - startOffset;
667  }
668 
669  /* special case for strings. skip the quotes. */
670  if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
671  {
672  assert(*outLen >= 2);
673  (*outBuf)++;
674  *outLen -= 2;
675  }
676 
677 
678 #ifdef YAJL_LEXER_DEBUG
679  if (tok == yajl_tok_error) {
680  printf("lexical error: %s\n",
682  } else if (tok == yajl_tok_eof) {
683  printf("EOF hit\n");
684  } else {
685  printf("lexed %s: '", tokToStr(tok));
686  fwrite(*outBuf, 1, *outLen, stdout);
687  printf("'\n");
688  }
689 #endif
690 
691  return tok;
692 }
size_t bufOff
Definition: yajl_lex.c:82
#define assert(exp)
Declare that a condition should be true.
Definition: epicsAssert.h:70
yajl_lex_error yajl_lex_get_error(yajl_lexer lexer)
Definition: yajl_lex.c:733
const char * yajl_lex_error_to_string(yajl_lex_error error)
Definition: yajl_lex.c:695
yajl_lex_error error
Definition: yajl_lex.c:74
#define printf
Definition: epicsStdio.h:41
#define NULL
Definition: catime.c:38
size_t yajl_buf_len(yajl_buf buf)
Definition: yajl_buf.c:97
void yajl_buf_append(yajl_buf buf, const void *data, size_t len)
Definition: yajl_buf.c:75
yajl_buf buf
Definition: yajl_lex.c:78
yajl_tok
Definition: yajl_lex.h:22
void yajl_buf_clear(yajl_buf buf)
Definition: yajl_buf.c:86
#define stdout
Definition: epicsStdio.h:30
unsigned int allowComments
Definition: yajl_lex.c:88
#define readChar(lxr, txt, off)
Definition: yajl_lex.c:96
unsigned int bufInUse
Definition: yajl_lex.c:85
const unsigned char * yajl_buf_data(yajl_buf buf)
Definition: yajl_buf.c:92
#define unreadChar(lxr, off)
Definition: yajl_lex.c:101
yajl_tok yajl_lex_peek ( yajl_lexer  lexer,
const unsigned char *  jsonText,
size_t  jsonTextLen,
size_t  offset 
)

have a peek at the next token, but don't move the lexer forward

Definition at line 749 of file yajl_lex.c.

751 {
752  const unsigned char * outBuf;
753  size_t outLen;
754  size_t bufLen = yajl_buf_len(lexer->buf);
755  size_t bufOff = lexer->bufOff;
756  unsigned int bufInUse = lexer->bufInUse;
757  yajl_tok tok;
758 
759  tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
760  &outBuf, &outLen);
761 
762  lexer->bufOff = bufOff;
763  lexer->bufInUse = bufInUse;
764  yajl_buf_truncate(lexer->buf, bufLen);
765 
766  return tok;
767 }
size_t bufOff
Definition: yajl_lex.c:82
size_t yajl_buf_len(yajl_buf buf)
Definition: yajl_buf.c:97
yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char *jsonText, size_t jsonTextLen, size_t *offset, const unsigned char **outBuf, size_t *outLen)
Definition: yajl_lex.c:503
void yajl_buf_truncate(yajl_buf buf, size_t len)
Definition: yajl_buf.c:103
yajl_buf buf
Definition: yajl_lex.c:78
yajl_tok
Definition: yajl_lex.h:22
unsigned int bufInUse
Definition: yajl_lex.c:85