This is Unofficial EPICS BASE Doxygen Site
yajl_lex.h File Reference
#include "yajl_common.h"
+ Include dependency graph for yajl_lex.h:
+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Typedefs

typedef struct yajl_lexer_tyajl_lexer
 

Enumerations

enum  yajl_tok {
  yajl_tok_bool, yajl_tok_colon, yajl_tok_comma, yajl_tok_eof,
  yajl_tok_error, yajl_tok_left_brace, yajl_tok_left_bracket, yajl_tok_null,
  yajl_tok_right_brace, yajl_tok_right_bracket, yajl_tok_integer, yajl_tok_double,
  yajl_tok_string, yajl_tok_string_with_escapes, yajl_tok_comment
}
 
enum  yajl_lex_error {
  yajl_lex_e_ok = 0, yajl_lex_string_invalid_utf8, yajl_lex_string_invalid_escaped_char, yajl_lex_string_invalid_json_char,
  yajl_lex_string_invalid_hex_char, yajl_lex_invalid_char, yajl_lex_invalid_string, yajl_lex_missing_integer_after_decimal,
  yajl_lex_missing_integer_after_exponent, yajl_lex_missing_integer_after_minus, yajl_lex_unallowed_comment
}
 

Functions

yajl_lexer yajl_lex_alloc (yajl_alloc_funcs *alloc, unsigned int allowComments, unsigned int validateUTF8)
 
void yajl_lex_free (yajl_lexer lexer)
 
yajl_tok yajl_lex_lex (yajl_lexer lexer, const unsigned char *jsonText, size_t jsonTextLen, size_t *offset, const unsigned char **outBuf, size_t *outLen)
 
yajl_tok yajl_lex_peek (yajl_lexer lexer, const unsigned char *jsonText, size_t jsonTextLen, size_t offset)
 
const char * yajl_lex_error_to_string (yajl_lex_error error)
 
yajl_lex_error yajl_lex_get_error (yajl_lexer lexer)
 
size_t yajl_lex_current_offset (yajl_lexer lexer)
 
size_t yajl_lex_current_line (yajl_lexer lexer)
 
size_t yajl_lex_current_char (yajl_lexer lexer)
 

Typedef Documentation

typedef struct yajl_lexer_t* yajl_lexer

Definition at line 48 of file yajl_lex.h.

Enumeration Type Documentation

Enumerator
yajl_lex_e_ok 
yajl_lex_string_invalid_utf8 
yajl_lex_string_invalid_escaped_char 
yajl_lex_string_invalid_json_char 
yajl_lex_string_invalid_hex_char 
yajl_lex_invalid_char 
yajl_lex_invalid_string 
yajl_lex_missing_integer_after_decimal 
yajl_lex_missing_integer_after_exponent 
yajl_lex_missing_integer_after_minus 
yajl_lex_unallowed_comment 

Definition at line 91 of file yajl_lex.h.

enum yajl_tok
Enumerator
yajl_tok_bool 
yajl_tok_colon 
yajl_tok_comma 
yajl_tok_eof 
yajl_tok_error 
yajl_tok_left_brace 
yajl_tok_left_bracket 
yajl_tok_null 
yajl_tok_right_brace 
yajl_tok_right_bracket 
yajl_tok_integer 
yajl_tok_double 
yajl_tok_string 
yajl_tok_string_with_escapes 
yajl_tok_comment 

Definition at line 22 of file yajl_lex.h.

22  {
33 
34  /* we differentiate between integers and doubles to allow the
35  * parser to interpret the number without re-scanning */
38 
39  /* we differentiate between strings which require further processing,
40  * and strings that do not */
43 
44  /* comment tokens are not currently returned to the parser, ever */
46 } yajl_tok;
yajl_tok
Definition: yajl_lex.h:22

Function Documentation

yajl_lexer yajl_lex_alloc ( yajl_alloc_funcs alloc,
unsigned int  allowComments,
unsigned int  validateUTF8 
)

Definition at line 104 of file yajl_lex.c.

106 {
107  yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
108  if (lxr == NULL) {
109  return NULL;
110  }
111 
112  memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
113  lxr->buf = yajl_buf_alloc(alloc);
114  lxr->allowComments = allowComments;
115  lxr->validateUTF8 = validateUTF8;
116  lxr->alloc = alloc;
117  return lxr;
118 }
#define YA_MALLOC(afs, sz)
Definition: yajl_alloc.h:32
yajl_buf yajl_buf_alloc(yajl_alloc_funcs *alloc)
Definition: yajl_buf.c:56
unsigned int validateUTF8
Definition: yajl_lex.c:91
#define NULL
Definition: catime.c:38
yajl_alloc_funcs * alloc
Definition: yajl_lex.c:93
struct yajl_lexer_t * yajl_lexer
Definition: yajl_lex.h:48
yajl_buf buf
Definition: yajl_lex.c:78
unsigned int allowComments
Definition: yajl_lex.c:88
size_t yajl_lex_current_char ( yajl_lexer  lexer)

get the number of chars lexed by this lexer instance since the last
or

Definition at line 744 of file yajl_lex.c.

745 {
746  return lexer->charOff;
747 }
size_t charOff
Definition: yajl_lex.c:71
size_t yajl_lex_current_line ( yajl_lexer  lexer)

get the number of lines lexed by this lexer instance

Definition at line 739 of file yajl_lex.c.

740 {
741  return lexer->lineOff;
742 }
size_t lineOff
Definition: yajl_lex.c:70
size_t yajl_lex_current_offset ( yajl_lexer  lexer)

get the current offset into the most recently lexed json string.

const char* yajl_lex_error_to_string ( yajl_lex_error  error)

Definition at line 695 of file yajl_lex.c.

696 {
697  switch (error) {
698  case yajl_lex_e_ok:
699  return "ok, no error";
701  return "invalid bytes in UTF8 string.";
703  return "inside a string, '\\' occurs before a character "
704  "which it may not.";
706  return "invalid character inside string.";
708  return "invalid (non-hex) character occurs after '\\u' inside "
709  "string.";
711  return "invalid char in json text.";
713  return "invalid string in json text.";
715  return "malformed number, a digit is required after the exponent.";
717  return "malformed number, a digit is required after the "
718  "decimal point.";
720  return "malformed number, a digit is required after the "
721  "minus sign.";
723  return "probable comment found in input text, comments are "
724  "not enabled.";
725  }
726  return "unknown error code";
727 }
void yajl_lex_free ( yajl_lexer  lexer)

Definition at line 121 of file yajl_lex.c.

122 {
123  yajl_buf_free(lxr->buf);
124  YA_FREE(lxr->alloc, lxr);
125  return;
126 }
#define YA_FREE(afs, ptr)
Definition: yajl_alloc.h:33
void yajl_buf_free(yajl_buf buf)
Definition: yajl_buf.c:68
yajl_lex_error yajl_lex_get_error ( yajl_lexer  lexer)

allows access to more specific information about the lexical error when yajl_lex_lex returns yajl_tok_error.

Definition at line 733 of file yajl_lex.c.

734 {
735  if (lexer == NULL) return (yajl_lex_error) -1;
736  return lexer->error;
737 }
yajl_lex_error error
Definition: yajl_lex.c:74
#define NULL
Definition: catime.c:38
yajl_lex_error
Definition: yajl_lex.h:91
yajl_tok yajl_lex_lex ( yajl_lexer  lexer,
const unsigned char *  jsonText,
size_t  jsonTextLen,
size_t *  offset,
const unsigned char **  outBuf,
size_t *  outLen 
)

run/continue a lex. "offset" is an input/output parameter. It should be initialized to zero for a new chunk of target text, and upon subsetquent calls with the same target text should passed with the value of the previous invocation.

the client may be interested in the value of offset when an error is returned from the lexer. This allows the client to render useful n * error messages.

When you pass the next chunk of data, context should be reinitialized to zero.

Finally, the output buffer is usually just a pointer into the jsonText, however in cases where the entity being lexed spans multiple chunks, the lexer will buffer the entity and the data returned will be a pointer into that buffer.

This behavior is abstracted from client code except for the performance implications which require that the client choose a reasonable chunk size to get adequate performance.

Definition at line 503 of file yajl_lex.c.

506 {
507  yajl_tok tok = yajl_tok_error;
508  unsigned char c;
509  size_t startOffset = *offset;
510 
511  *outBuf = NULL;
512  *outLen = 0;
513 
514  for (;;) {
515  assert(*offset <= jsonTextLen);
516 
517  if (*offset >= jsonTextLen) {
518  tok = yajl_tok_eof;
519  goto lexed;
520  }
521 
522  c = readChar(lexer, jsonText, offset);
523 
524  switch (c) {
525  case '{':
526  tok = yajl_tok_left_brace;
527  goto lexed;
528  case '}':
529  tok = yajl_tok_right_brace;
530  goto lexed;
531  case '[':
532  tok = yajl_tok_left_bracket;
533  goto lexed;
534  case ']':
536  goto lexed;
537  case ',':
538  tok = yajl_tok_comma;
539  goto lexed;
540  case ':':
541  tok = yajl_tok_colon;
542  goto lexed;
543  case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
544  startOffset++;
545  break;
546  case 't': {
547  const char * want = "rue";
548  do {
549  if (*offset >= jsonTextLen) {
550  tok = yajl_tok_eof;
551  goto lexed;
552  }
553  c = readChar(lexer, jsonText, offset);
554  if (c != *want) {
555  unreadChar(lexer, offset);
557  tok = yajl_tok_error;
558  goto lexed;
559  }
560  } while (*(++want));
561  tok = yajl_tok_bool;
562  goto lexed;
563  }
564  case 'f': {
565  const char * want = "alse";
566  do {
567  if (*offset >= jsonTextLen) {
568  tok = yajl_tok_eof;
569  goto lexed;
570  }
571  c = readChar(lexer, jsonText, offset);
572  if (c != *want) {
573  unreadChar(lexer, offset);
575  tok = yajl_tok_error;
576  goto lexed;
577  }
578  } while (*(++want));
579  tok = yajl_tok_bool;
580  goto lexed;
581  }
582  case 'n': {
583  const char * want = "ull";
584  do {
585  if (*offset >= jsonTextLen) {
586  tok = yajl_tok_eof;
587  goto lexed;
588  }
589  c = readChar(lexer, jsonText, offset);
590  if (c != *want) {
591  unreadChar(lexer, offset);
593  tok = yajl_tok_error;
594  goto lexed;
595  }
596  } while (*(++want));
597  tok = yajl_tok_null;
598  goto lexed;
599  }
600  case '"': {
601  tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
602  jsonTextLen, offset);
603  goto lexed;
604  }
605  case '-':
606  case '0': case '1': case '2': case '3': case '4':
607  case '5': case '6': case '7': case '8': case '9': {
608  /* integer parsing wants to start from the beginning */
609  unreadChar(lexer, offset);
610  tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
611  jsonTextLen, offset);
612  goto lexed;
613  }
614  case '/':
615  /* hey, look, a probable comment! If comments are disabled
616  * it's an error. */
617  if (!lexer->allowComments) {
618  unreadChar(lexer, offset);
620  tok = yajl_tok_error;
621  goto lexed;
622  }
623  /* if comments are enabled, then we should try to lex
624  * the thing. possible outcomes are
625  * - successful lex (tok_comment, which means continue),
626  * - malformed comment opening (slash not followed by
627  * '*' or '/') (tok_error)
628  * - eof hit. (tok_eof) */
629  tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
630  jsonTextLen, offset);
631  if (tok == yajl_tok_comment) {
632  /* "error" is silly, but that's the initial
633  * state of tok. guilty until proven innocent. */
634  tok = yajl_tok_error;
635  yajl_buf_clear(lexer->buf);
636  lexer->bufInUse = 0;
637  startOffset = *offset;
638  break;
639  }
640  /* hit error or eof, bail */
641  goto lexed;
642  default:
643  lexer->error = yajl_lex_invalid_char;
644  tok = yajl_tok_error;
645  goto lexed;
646  }
647  }
648 
649 
650  lexed:
651  /* need to append to buffer if the buffer is in use or
652  * if it's an EOF token */
653  if (tok == yajl_tok_eof || lexer->bufInUse) {
654  if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
655  lexer->bufInUse = 1;
656  yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
657  lexer->bufOff = 0;
658 
659  if (tok != yajl_tok_eof) {
660  *outBuf = yajl_buf_data(lexer->buf);
661  *outLen = yajl_buf_len(lexer->buf);
662  lexer->bufInUse = 0;
663  }
664  } else if (tok != yajl_tok_error) {
665  *outBuf = jsonText + startOffset;
666  *outLen = *offset - startOffset;
667  }
668 
669  /* special case for strings. skip the quotes. */
670  if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
671  {
672  assert(*outLen >= 2);
673  (*outBuf)++;
674  *outLen -= 2;
675  }
676 
677 
678 #ifdef YAJL_LEXER_DEBUG
679  if (tok == yajl_tok_error) {
680  printf("lexical error: %s\n",
682  } else if (tok == yajl_tok_eof) {
683  printf("EOF hit\n");
684  } else {
685  printf("lexed %s: '", tokToStr(tok));
686  fwrite(*outBuf, 1, *outLen, stdout);
687  printf("'\n");
688  }
689 #endif
690 
691  return tok;
692 }
size_t bufOff
Definition: yajl_lex.c:82
#define assert(exp)
Declare that a condition should be true.
Definition: epicsAssert.h:70
yajl_lex_error yajl_lex_get_error(yajl_lexer lexer)
Definition: yajl_lex.c:733
const char * yajl_lex_error_to_string(yajl_lex_error error)
Definition: yajl_lex.c:695
yajl_lex_error error
Definition: yajl_lex.c:74
#define printf
Definition: epicsStdio.h:41
#define NULL
Definition: catime.c:38
size_t yajl_buf_len(yajl_buf buf)
Definition: yajl_buf.c:97
void yajl_buf_append(yajl_buf buf, const void *data, size_t len)
Definition: yajl_buf.c:75
yajl_buf buf
Definition: yajl_lex.c:78
yajl_tok
Definition: yajl_lex.h:22
void yajl_buf_clear(yajl_buf buf)
Definition: yajl_buf.c:86
#define stdout
Definition: epicsStdio.h:30
unsigned int allowComments
Definition: yajl_lex.c:88
#define readChar(lxr, txt, off)
Definition: yajl_lex.c:96
unsigned int bufInUse
Definition: yajl_lex.c:85
const unsigned char * yajl_buf_data(yajl_buf buf)
Definition: yajl_buf.c:92
#define unreadChar(lxr, off)
Definition: yajl_lex.c:101
yajl_tok yajl_lex_peek ( yajl_lexer  lexer,
const unsigned char *  jsonText,
size_t  jsonTextLen,
size_t  offset 
)

have a peek at the next token, but don't move the lexer forward

Definition at line 749 of file yajl_lex.c.

751 {
752  const unsigned char * outBuf;
753  size_t outLen;
754  size_t bufLen = yajl_buf_len(lexer->buf);
755  size_t bufOff = lexer->bufOff;
756  unsigned int bufInUse = lexer->bufInUse;
757  yajl_tok tok;
758 
759  tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
760  &outBuf, &outLen);
761 
762  lexer->bufOff = bufOff;
763  lexer->bufInUse = bufInUse;
764  yajl_buf_truncate(lexer->buf, bufLen);
765 
766  return tok;
767 }
size_t bufOff
Definition: yajl_lex.c:82
size_t yajl_buf_len(yajl_buf buf)
Definition: yajl_buf.c:97
yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char *jsonText, size_t jsonTextLen, size_t *offset, const unsigned char **outBuf, size_t *outLen)
Definition: yajl_lex.c:503
void yajl_buf_truncate(yajl_buf buf, size_t len)
Definition: yajl_buf.c:103
yajl_buf buf
Definition: yajl_lex.c:78
yajl_tok
Definition: yajl_lex.h:22
unsigned int bufInUse
Definition: yajl_lex.c:85