This is Unofficial EPICS BASE Doxygen Site
yajl_encode.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 #include <assert.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <stdio.h>
21 
22 #include "yajl_encode.h"
23 
24 static void CharToHex(unsigned char c, char * hexBuf)
25 {
26  const char * hexchar = "0123456789ABCDEF";
27  hexBuf[0] = hexchar[c >> 4];
28  hexBuf[1] = hexchar[c & 0x0F];
29 }
30 
31 void
33  void * ctx,
34  const unsigned char * str,
35  size_t len,
36  int escape_solidus)
37 {
38  size_t beg = 0;
39  size_t end = 0;
40  char hexBuf[7];
41  hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
42  hexBuf[6] = 0;
43 
44  while (end < len) {
45  const char * escaped = NULL;
46  switch (str[end]) {
47  case '\r': escaped = "\\r"; break;
48  case '\n': escaped = "\\n"; break;
49  case '\\': escaped = "\\\\"; break;
50  /* it is not required to escape a solidus in JSON:
51  * read sec. 2.5: http://www.ietf.org/rfc/rfc4627.txt
52  * specifically, this production from the grammar:
53  * unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
54  */
55  case '/': if (escape_solidus) escaped = "\\/"; break;
56  case '"': escaped = "\\\""; break;
57  case '\f': escaped = "\\f"; break;
58  case '\b': escaped = "\\b"; break;
59  case '\t': escaped = "\\t"; break;
60  default:
61  if ((unsigned char) str[end] < 32) {
62  CharToHex(str[end], hexBuf + 4);
63  escaped = hexBuf;
64  }
65  break;
66  }
67  if (escaped != NULL) {
68  print(ctx, (const char *) (str + beg), end - beg);
69  print(ctx, escaped, (unsigned int)strlen(escaped));
70  beg = ++end;
71  } else {
72  ++end;
73  }
74  }
75  print(ctx, (const char *) (str + beg), end - beg);
76 }
77 
78 static void hexToDigit(unsigned int * val, const unsigned char * hex)
79 {
80  unsigned int i;
81  for (i=0;i<4;i++) {
82  unsigned char c = hex[i];
83  if (c >= 'A') c = (c & ~0x20) - 7;
84  c -= '0';
85  assert(!(c & 0xF0));
86  *val = (*val << 4) | c;
87  }
88 }
89 
90 static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf)
91 {
92  if (codepoint < 0x80) {
93  utf8Buf[0] = (char) codepoint;
94  utf8Buf[1] = 0;
95  } else if (codepoint < 0x0800) {
96  utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0);
97  utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80);
98  utf8Buf[2] = 0;
99  } else if (codepoint < 0x10000) {
100  utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0);
101  utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80);
102  utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80);
103  utf8Buf[3] = 0;
104  } else if (codepoint < 0x200000) {
105  utf8Buf[0] =(char)((codepoint >> 18) | 0xF0);
106  utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80);
107  utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80);
108  utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80);
109  utf8Buf[4] = 0;
110  } else {
111  utf8Buf[0] = '?';
112  utf8Buf[1] = 0;
113  }
114 }
115 
116 void yajl_string_decode(yajl_buf buf, const unsigned char * str,
117  size_t len)
118 {
119  size_t beg = 0;
120  size_t end = 0;
121 
122  while (end < len) {
123  if (str[end] == '\\') {
124  char utf8Buf[5];
125  const char * unescaped = "?";
126  yajl_buf_append(buf, str + beg, end - beg);
127  switch (str[++end]) {
128  case 'r': unescaped = "\r"; break;
129  case 'n': unescaped = "\n"; break;
130  case '\\': unescaped = "\\"; break;
131  case '/': unescaped = "/"; break;
132  case '"': unescaped = "\""; break;
133  case 'f': unescaped = "\f"; break;
134  case 'b': unescaped = "\b"; break;
135  case 't': unescaped = "\t"; break;
136  case 'u': {
137  unsigned int codepoint = 0;
138  hexToDigit(&codepoint, str + ++end);
139  end+=3;
140  /* check if this is a surrogate */
141  if ((codepoint & 0xFC00) == 0xD800) {
142  end++;
143  if (str[end] == '\\' && str[end + 1] == 'u') {
144  unsigned int surrogate = 0;
145  hexToDigit(&surrogate, str + end + 2);
146  codepoint =
147  (((codepoint & 0x3F) << 10) |
148  ((((codepoint >> 6) & 0xF) + 1) << 16) |
149  (surrogate & 0x3FF));
150  end += 5;
151  } else {
152  unescaped = "?";
153  break;
154  }
155  }
156 
157  Utf32toUtf8(codepoint, utf8Buf);
158  unescaped = utf8Buf;
159 
160  if (codepoint == 0) {
161  yajl_buf_append(buf, unescaped, 1);
162  beg = ++end;
163  continue;
164  }
165 
166  break;
167  }
168  default:
169  assert("this should never happen" == NULL);
170  }
171  yajl_buf_append(buf, unescaped, (unsigned int)strlen(unescaped));
172  beg = ++end;
173  } else {
174  end++;
175  }
176  }
177  yajl_buf_append(buf, str + beg, end - beg);
178 }
179 
180 #define ADV_PTR s++; if (!(len--)) return 0;
181 
182 int yajl_string_validate_utf8(const unsigned char * s, size_t len)
183 {
184  if (!len) return 1;
185  if (!s) return 0;
186 
187  while (len--) {
188  /* single byte */
189  if (*s <= 0x7f) {
190  /* noop */
191  }
192  /* two byte */
193  else if ((*s >> 5) == 0x6) {
194  ADV_PTR;
195  if (!((*s >> 6) == 0x2)) return 0;
196  }
197  /* three byte */
198  else if ((*s >> 4) == 0x0e) {
199  ADV_PTR;
200  if (!((*s >> 6) == 0x2)) return 0;
201  ADV_PTR;
202  if (!((*s >> 6) == 0x2)) return 0;
203  }
204  /* four byte */
205  else if ((*s >> 3) == 0x1e) {
206  ADV_PTR;
207  if (!((*s >> 6) == 0x2)) return 0;
208  ADV_PTR;
209  if (!((*s >> 6) == 0x2)) return 0;
210  ADV_PTR;
211  if (!((*s >> 6) == 0x2)) return 0;
212  } else {
213  return 0;
214  }
215 
216  s++;
217  }
218 
219  return 1;
220 }
int yajl_string_validate_utf8(const unsigned char *s, size_t len)
Definition: yajl_encode.c:182
#define assert(exp)
Declare that a condition should be true.
Definition: epicsAssert.h:70
int i
Definition: scan.c:967
#define NULL
Definition: catime.c:38
#define str(v)
void yajl_buf_append(yajl_buf buf, const void *data, size_t len)
Definition: yajl_buf.c:75
#define ADV_PTR
Definition: yajl_encode.c:180
void(* yajl_print_t)(void *ctx, const char *str, size_t len)
Definition: yajl_gen.h:62
Definition: tool_lib.h:64
void yajl_string_encode(const yajl_print_t print, void *ctx, const unsigned char *str, size_t len, int escape_solidus)
Definition: yajl_encode.c:32
void yajl_string_decode(yajl_buf buf, const unsigned char *str, size_t len)
Definition: yajl_encode.c:116