monotone

monotone Mtn Source Tree

Root/sqlite/tokenize.c

1/*
2** 2001 September 15
3**
4** The author disclaims copyright to this source code. In place of
5** a legal notice, here is a blessing:
6**
7** May you do good and not evil.
8** May you find forgiveness for yourself and forgive others.
9** May you share freely, never taking more than you give.
10**
11*************************************************************************
12** An tokenizer for SQL
13**
14** This file contains C code that splits an SQL input string up into
15** individual tokens and sends those tokens one-by-one over to the
16** parser for analysis.
17**
18** $Id: tokenize.c,v 1.129 2007/05/15 14:34:32 drh Exp $
19*/
20#include "sqliteInt.h"
21#include "os.h"
22#include <ctype.h>
23#include <stdlib.h>
24
25/*
26** The charMap() macro maps alphabetic characters into their
27** lower-case ASCII equivalent. On ASCII machines, this is just
28** an upper-to-lower case map. On EBCDIC machines we also need
29** to adjust the encoding. Only alphabetic characters and underscores
30** need to be translated.
31*/
32#ifdef SQLITE_ASCII
33# define charMap(X) sqlite3UpperToLower[(unsigned char)X]
34#endif
35#ifdef SQLITE_EBCDIC
36# define charMap(X) ebcdicToAscii[(unsigned char)X]
37const unsigned char ebcdicToAscii[] = {
38/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 3x */
43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 4x */
44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 5x */
45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95, 0, 0, /* 6x */
46 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 7x */
47 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* 8x */
48 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* 9x */
49 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ax */
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */
51 0, 97, 98, 99,100,101,102,103,104,105, 0, 0, 0, 0, 0, 0, /* Cx */
52 0,106,107,108,109,110,111,112,113,114, 0, 0, 0, 0, 0, 0, /* Dx */
53 0, 0,115,116,117,118,119,120,121,122, 0, 0, 0, 0, 0, 0, /* Ex */
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Fx */
55};
56#endif
57
58/*
59** The sqlite3KeywordCode function looks up an identifier to determine if
60** it is a keyword. If it is a keyword, the token code of that keyword is
61** returned. If the input is not a keyword, TK_ID is returned.
62**
63** The implementation of this routine was generated by a program,
64** mkkeywordhash.h, located in the tool subdirectory of the distribution.
65** The output of the mkkeywordhash.c program is written into a file
66** named keywordhash.h and then included into this source file by
67** the #include below.
68*/
69#include "keywordhash.h"
70
71
72/*
73** If X is a character that can be used in an identifier then
74** IdChar(X) will be true. Otherwise it is false.
75**
76** For ASCII, any character with the high-order bit set is
77** allowed in an identifier. For 7-bit characters,
78** sqlite3IsIdChar[X] must be 1.
79**
80** For EBCDIC, the rules are more complex but have the same
81** end result.
82**
83** Ticket #1066. the SQL standard does not allow '$' in the
84** middle of identfiers. But many SQL implementations do.
85** SQLite will allow '$' in identifiers for compatibility.
86** But the feature is undocumented.
87*/
88#ifdef SQLITE_ASCII
89const char sqlite3IsIdChar[] = {
90/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
91 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
92 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
93 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
97};
98#define IdChar(C) (((c=C)&0x80)!=0 || (c>0x1f && sqlite3IsIdChar[c-0x20]))
99#endif
100#ifdef SQLITE_EBCDIC
101const char sqlite3IsIdChar[] = {
102/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
103 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 4x */
104 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, /* 5x */
105 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, /* 6x */
106 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, /* 7x */
107 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, /* 8x */
108 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, /* 9x */
109 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, /* Ax */
110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Bx */
111 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Cx */
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Dx */
113 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, /* Ex */
114 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* Fx */
115};
116#define IdChar(C) (((c=C)>=0x42 && sqlite3IsIdChar[c-0x40]))
117#endif
118
119
120/*
121** Return the length of the token that begins at z[0].
122** Store the token type in *tokenType before returning.
123*/
124static int getToken(const unsigned char *z, int *tokenType){
125 int i, c;
126 switch( *z ){
127 case ' ': case '\t': case '\n': case '\f': case '\r': {
128 for(i=1; isspace(z[i]); i++){}
129 *tokenType = TK_SPACE;
130 return i;
131 }
132 case '-': {
133 if( z[1]=='-' ){
134 for(i=2; (c=z[i])!=0 && c!='\n'; i++){}
135 *tokenType = TK_COMMENT;
136 return i;
137 }
138 *tokenType = TK_MINUS;
139 return 1;
140 }
141 case '(': {
142 *tokenType = TK_LP;
143 return 1;
144 }
145 case ')': {
146 *tokenType = TK_RP;
147 return 1;
148 }
149 case ';': {
150 *tokenType = TK_SEMI;
151 return 1;
152 }
153 case '+': {
154 *tokenType = TK_PLUS;
155 return 1;
156 }
157 case '*': {
158 *tokenType = TK_STAR;
159 return 1;
160 }
161 case '/': {
162 if( z[1]!='*' || z[2]==0 ){
163 *tokenType = TK_SLASH;
164 return 1;
165 }
166 for(i=3, c=z[2]; (c!='*' || z[i]!='/') && (c=z[i])!=0; i++){}
167 if( c ) i++;
168 *tokenType = TK_COMMENT;
169 return i;
170 }
171 case '%': {
172 *tokenType = TK_REM;
173 return 1;
174 }
175 case '=': {
176 *tokenType = TK_EQ;
177 return 1 + (z[1]=='=');
178 }
179 case '<': {
180 if( (c=z[1])=='=' ){
181 *tokenType = TK_LE;
182 return 2;
183 }else if( c=='>' ){
184 *tokenType = TK_NE;
185 return 2;
186 }else if( c=='<' ){
187 *tokenType = TK_LSHIFT;
188 return 2;
189 }else{
190 *tokenType = TK_LT;
191 return 1;
192 }
193 }
194 case '>': {
195 if( (c=z[1])=='=' ){
196 *tokenType = TK_GE;
197 return 2;
198 }else if( c=='>' ){
199 *tokenType = TK_RSHIFT;
200 return 2;
201 }else{
202 *tokenType = TK_GT;
203 return 1;
204 }
205 }
206 case '!': {
207 if( z[1]!='=' ){
208 *tokenType = TK_ILLEGAL;
209 return 2;
210 }else{
211 *tokenType = TK_NE;
212 return 2;
213 }
214 }
215 case '|': {
216 if( z[1]!='|' ){
217 *tokenType = TK_BITOR;
218 return 1;
219 }else{
220 *tokenType = TK_CONCAT;
221 return 2;
222 }
223 }
224 case ',': {
225 *tokenType = TK_COMMA;
226 return 1;
227 }
228 case '&': {
229 *tokenType = TK_BITAND;
230 return 1;
231 }
232 case '~': {
233 *tokenType = TK_BITNOT;
234 return 1;
235 }
236 case '`':
237 case '\'':
238 case '"': {
239 int delim = z[0];
240 for(i=1; (c=z[i])!=0; i++){
241 if( c==delim ){
242 if( z[i+1]==delim ){
243 i++;
244 }else{
245 break;
246 }
247 }
248 }
249 if( c ){
250 *tokenType = TK_STRING;
251 return i+1;
252 }else{
253 *tokenType = TK_ILLEGAL;
254 return i;
255 }
256 }
257 case '.': {
258#ifndef SQLITE_OMIT_FLOATING_POINT
259 if( !isdigit(z[1]) )
260#endif
261 {
262 *tokenType = TK_DOT;
263 return 1;
264 }
265 /* If the next character is a digit, this is a floating point
266 ** number that begins with ".". Fall thru into the next case */
267 }
268 case '0': case '1': case '2': case '3': case '4':
269 case '5': case '6': case '7': case '8': case '9': {
270 *tokenType = TK_INTEGER;
271 for(i=0; isdigit(z[i]); i++){}
272#ifndef SQLITE_OMIT_FLOATING_POINT
273 if( z[i]=='.' ){
274 i++;
275 while( isdigit(z[i]) ){ i++; }
276 *tokenType = TK_FLOAT;
277 }
278 if( (z[i]=='e' || z[i]=='E') &&
279 ( isdigit(z[i+1])
280 || ((z[i+1]=='+' || z[i+1]=='-') && isdigit(z[i+2]))
281 )
282 ){
283 i += 2;
284 while( isdigit(z[i]) ){ i++; }
285 *tokenType = TK_FLOAT;
286 }
287#endif
288 while( IdChar(z[i]) ){
289 *tokenType = TK_ILLEGAL;
290 i++;
291 }
292 return i;
293 }
294 case '[': {
295 for(i=1, c=z[0]; c!=']' && (c=z[i])!=0; i++){}
296 *tokenType = TK_ID;
297 return i;
298 }
299 case '?': {
300 *tokenType = TK_VARIABLE;
301 for(i=1; isdigit(z[i]); i++){}
302 return i;
303 }
304 case '#': {
305 for(i=1; isdigit(z[i]); i++){}
306 if( i>1 ){
307 /* Parameters of the form #NNN (where NNN is a number) are used
308 ** internally by sqlite3NestedParse. */
309 *tokenType = TK_REGISTER;
310 return i;
311 }
312 /* Fall through into the next case if the '#' is not followed by
313 ** a digit. Try to match #AAAA where AAAA is a parameter name. */
314 }
315#ifndef SQLITE_OMIT_TCL_VARIABLE
316 case '$':
317#endif
318 case '@': /* For compatibility with MS SQL Server */
319 case ':': {
320 int n = 0;
321 *tokenType = TK_VARIABLE;
322 for(i=1; (c=z[i])!=0; i++){
323 if( IdChar(c) ){
324 n++;
325#ifndef SQLITE_OMIT_TCL_VARIABLE
326 }else if( c=='(' && n>0 ){
327 do{
328 i++;
329 }while( (c=z[i])!=0 && !isspace(c) && c!=')' );
330 if( c==')' ){
331 i++;
332 }else{
333 *tokenType = TK_ILLEGAL;
334 }
335 break;
336 }else if( c==':' && z[i+1]==':' ){
337 i++;
338#endif
339 }else{
340 break;
341 }
342 }
343 if( n==0 ) *tokenType = TK_ILLEGAL;
344 return i;
345 }
346#ifndef SQLITE_OMIT_BLOB_LITERAL
347 case 'x': case 'X': {
348 if( (c=z[1])=='\'' || c=='"' ){
349 int delim = c;
350 *tokenType = TK_BLOB;
351 for(i=2; (c=z[i])!=0; i++){
352 if( c==delim ){
353 if( i%2 ) *tokenType = TK_ILLEGAL;
354 break;
355 }
356 if( !isxdigit(c) ){
357 *tokenType = TK_ILLEGAL;
358 return i;
359 }
360 }
361 if( c ) i++;
362 return i;
363 }
364 /* Otherwise fall through to the next case */
365 }
366#endif
367 default: {
368 if( !IdChar(*z) ){
369 break;
370 }
371 for(i=1; IdChar(z[i]); i++){}
372 *tokenType = keywordCode((char*)z, i);
373 return i;
374 }
375 }
376 *tokenType = TK_ILLEGAL;
377 return 1;
378}
379int sqlite3GetToken(const unsigned char *z, int *tokenType){
380 return getToken(z, tokenType);
381}
382
383/*
384** Run the parser on the given SQL string. The parser structure is
385** passed in. An SQLITE_ status code is returned. If an error occurs
386** and pzErrMsg!=NULL then an error message might be written into
387** memory obtained from malloc() and *pzErrMsg made to point to that
388** error message. Or maybe not.
389*/
390int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzErrMsg){
391 int nErr = 0;
392 int i;
393 void *pEngine;
394 int tokenType;
395 int lastTokenParsed = -1;
396 sqlite3 *db = pParse->db;
397 extern void *sqlite3ParserAlloc(void*(*)(size_t));
398 extern void sqlite3ParserFree(void*, void(*)(void*));
399 extern void sqlite3Parser(void*, int, Token, Parse*);
400
401 if( db->activeVdbeCnt==0 ){
402 db->u1.isInterrupted = 0;
403 }
404 pParse->rc = SQLITE_OK;
405 i = 0;
406 pEngine = sqlite3ParserAlloc((void*(*)(size_t))sqlite3MallocX);
407 if( pEngine==0 ){
408 return SQLITE_NOMEM;
409 }
410 assert( pParse->sLastToken.dyn==0 );
411 assert( pParse->pNewTable==0 );
412 assert( pParse->pNewTrigger==0 );
413 assert( pParse->nVar==0 );
414 assert( pParse->nVarExpr==0 );
415 assert( pParse->nVarExprAlloc==0 );
416 assert( pParse->apVarExpr==0 );
417 pParse->zTail = pParse->zSql = zSql;
418 while( !sqlite3MallocFailed() && zSql[i]!=0 ){
419 assert( i>=0 );
420 pParse->sLastToken.z = (u8*)&zSql[i];
421 assert( pParse->sLastToken.dyn==0 );
422 pParse->sLastToken.n = getToken((unsigned char*)&zSql[i],&tokenType);
423 i += pParse->sLastToken.n;
424 if( i>SQLITE_MAX_SQL_LENGTH ){
425 pParse->rc = SQLITE_TOOBIG;
426 break;
427 }
428 switch( tokenType ){
429 case TK_SPACE:
430 case TK_COMMENT: {
431 if( db->u1.isInterrupted ){
432 pParse->rc = SQLITE_INTERRUPT;
433 sqlite3SetString(pzErrMsg, "interrupt", (char*)0);
434 goto abort_parse;
435 }
436 break;
437 }
438 case TK_ILLEGAL: {
439 if( pzErrMsg ){
440 sqliteFree(*pzErrMsg);
441 *pzErrMsg = sqlite3MPrintf("unrecognized token: \"%T\"",
442 &pParse->sLastToken);
443 }
444 nErr++;
445 goto abort_parse;
446 }
447 case TK_SEMI: {
448 pParse->zTail = &zSql[i];
449 /* Fall thru into the default case */
450 }
451 default: {
452 sqlite3Parser(pEngine, tokenType, pParse->sLastToken, pParse);
453 lastTokenParsed = tokenType;
454 if( pParse->rc!=SQLITE_OK ){
455 goto abort_parse;
456 }
457 break;
458 }
459 }
460 }
461abort_parse:
462 if( zSql[i]==0 && nErr==0 && pParse->rc==SQLITE_OK ){
463 if( lastTokenParsed!=TK_SEMI ){
464 sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse);
465 pParse->zTail = &zSql[i];
466 }
467 sqlite3Parser(pEngine, 0, pParse->sLastToken, pParse);
468 }
469 sqlite3ParserFree(pEngine, sqlite3FreeX);
470 if( sqlite3MallocFailed() ){
471 pParse->rc = SQLITE_NOMEM;
472 }
473 if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){
474 sqlite3SetString(&pParse->zErrMsg, sqlite3ErrStr(pParse->rc), (char*)0);
475 }
476 if( pParse->zErrMsg ){
477 if( pzErrMsg && *pzErrMsg==0 ){
478 *pzErrMsg = pParse->zErrMsg;
479 }else{
480 sqliteFree(pParse->zErrMsg);
481 }
482 pParse->zErrMsg = 0;
483 if( !nErr ) nErr++;
484 }
485 if( pParse->pVdbe && pParse->nErr>0 && pParse->nested==0 ){
486 sqlite3VdbeDelete(pParse->pVdbe);
487 pParse->pVdbe = 0;
488 }
489#ifndef SQLITE_OMIT_SHARED_CACHE
490 if( pParse->nested==0 ){
491 sqliteFree(pParse->aTableLock);
492 pParse->aTableLock = 0;
493 pParse->nTableLock = 0;
494 }
495#endif
496
497 if( !IN_DECLARE_VTAB ){
498 /* If the pParse->declareVtab flag is set, do not delete any table
499 ** structure built up in pParse->pNewTable. The calling code (see vtab.c)
500 ** will take responsibility for freeing the Table structure.
501 */
502 sqlite3DeleteTable(pParse->pNewTable);
503 }
504
505 sqlite3DeleteTrigger(pParse->pNewTrigger);
506 sqliteFree(pParse->apVarExpr);
507 if( nErr>0 && (pParse->rc==SQLITE_OK || pParse->rc==SQLITE_DONE) ){
508 pParse->rc = SQLITE_ERROR;
509 }
510 return nErr;
511}

Archive Download this file

Branches

Tags

Quick Links:     www.monotone.ca    -     Downloads    -     Documentation    -     Wiki    -     Code Forge    -     Build Status