misc.go 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023
  1. // Copyright 2016 PingCAP, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package parser
  14. import (
  15. "strings"
  16. "github.com/pingcap/parser/charset"
  17. )
  18. func isLetter(ch rune) bool {
  19. return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
  20. }
  21. func isDigit(ch rune) bool {
  22. return ch >= '0' && ch <= '9'
  23. }
  24. func isIdentChar(ch rune) bool {
  25. return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || isIdentExtend(ch)
  26. }
  27. func isIdentExtend(ch rune) bool {
  28. return ch >= 0x80 && ch <= '\uffff'
  29. }
  30. func isUserVarChar(ch rune) bool {
  31. return isLetter(ch) || isDigit(ch) || ch == '_' || ch == '$' || ch == '.' || isIdentExtend(ch)
  32. }
  33. type trieNode struct {
  34. childs [256]*trieNode
  35. token int
  36. fn func(s *Scanner) (int, Pos, string)
  37. }
  38. var ruleTable trieNode
  39. func initTokenByte(c byte, tok int) {
  40. if ruleTable.childs[c] == nil {
  41. ruleTable.childs[c] = &trieNode{}
  42. }
  43. ruleTable.childs[c].token = tok
  44. }
  45. func initTokenString(str string, tok int) {
  46. node := &ruleTable
  47. for _, c := range str {
  48. if node.childs[c] == nil {
  49. node.childs[c] = &trieNode{}
  50. }
  51. node = node.childs[c]
  52. }
  53. node.token = tok
  54. }
  55. func initTokenFunc(str string, fn func(s *Scanner) (int, Pos, string)) {
  56. for i := 0; i < len(str); i++ {
  57. c := str[i]
  58. if ruleTable.childs[c] == nil {
  59. ruleTable.childs[c] = &trieNode{}
  60. }
  61. ruleTable.childs[c].fn = fn
  62. }
  63. }
  64. func init() {
  65. // invalid is a special token defined in parser.y, when parser meet
  66. // this token, it will throw an error.
  67. // set root trie node's token to invalid, so when input match nothing
  68. // in the trie, invalid will be the default return token.
  69. ruleTable.token = invalid
  70. initTokenByte('/', int('/'))
  71. initTokenByte('+', int('+'))
  72. initTokenByte('>', int('>'))
  73. initTokenByte('<', int('<'))
  74. initTokenByte('(', int('('))
  75. initTokenByte(')', int(')'))
  76. initTokenByte('[', int('['))
  77. initTokenByte(']', int(']'))
  78. initTokenByte(';', int(';'))
  79. initTokenByte(',', int(','))
  80. initTokenByte('&', int('&'))
  81. initTokenByte('%', int('%'))
  82. initTokenByte(':', int(':'))
  83. initTokenByte('|', int('|'))
  84. initTokenByte('!', int('!'))
  85. initTokenByte('^', int('^'))
  86. initTokenByte('~', int('~'))
  87. initTokenByte('\\', int('\\'))
  88. initTokenByte('?', paramMarker)
  89. initTokenByte('=', eq)
  90. initTokenByte('{', int('{'))
  91. initTokenByte('}', int('}'))
  92. initTokenString("||", pipes)
  93. initTokenString("&&", andand)
  94. initTokenString("&^", andnot)
  95. initTokenString(":=", assignmentEq)
  96. initTokenString("<=>", nulleq)
  97. initTokenString(">=", ge)
  98. initTokenString("<=", le)
  99. initTokenString("!=", neq)
  100. initTokenString("<>", neqSynonym)
  101. initTokenString("<<", lsh)
  102. initTokenString(">>", rsh)
  103. initTokenString("\\N", null)
  104. initTokenFunc("@", startWithAt)
  105. initTokenFunc("/", startWithSlash)
  106. initTokenFunc("*", startWithStar)
  107. initTokenFunc("-", startWithDash)
  108. initTokenFunc("#", startWithSharp)
  109. initTokenFunc("Xx", startWithXx)
  110. initTokenFunc("Nn", startWithNn)
  111. initTokenFunc("Bb", startWithBb)
  112. initTokenFunc(".", startWithDot)
  113. initTokenFunc("_$ACDEFGHIJKLMOPQRSTUVWYZacdefghijklmopqrstuvwyz", scanIdentifier)
  114. initTokenFunc("`", scanQuotedIdent)
  115. initTokenFunc("0123456789", startWithNumber)
  116. initTokenFunc("'\"", startString)
  117. }
  118. // isInTokenMap indicates whether the target string is contained in tokenMap.
  119. func isInTokenMap(target string) bool {
  120. _, ok := tokenMap[target]
  121. return ok
  122. }
  123. // tokenMap is a map of known identifiers to the parser token ID.
  124. // Please try to keep the map in alphabetical order.
  125. var tokenMap = map[string]int{
  126. "ACCOUNT": account,
  127. "ACTION": action,
  128. "ADD": add,
  129. "ADDDATE": addDate,
  130. "ADMIN": admin,
  131. "ADVISE": advise,
  132. "AFTER": after,
  133. "AGAINST": against,
  134. "AGO": ago,
  135. "ALGORITHM": algorithm,
  136. "ALL": all,
  137. "ALTER": alter,
  138. "ALWAYS": always,
  139. "ANALYZE": analyze,
  140. "AND": and,
  141. "ANY": any,
  142. "APPROX_COUNT_DISTINCT": approxCountDistinct,
  143. "APPROX_PERCENTILE": approxPercentile,
  144. "AS": as,
  145. "ASC": asc,
  146. "ASCII": ascii,
  147. "AUTO_ID_CACHE": autoIdCache,
  148. "AUTO_INCREMENT": autoIncrement,
  149. "AUTO_RANDOM": autoRandom,
  150. "AUTO_RANDOM_BASE": autoRandomBase,
  151. "AVG_ROW_LENGTH": avgRowLength,
  152. "AVG": avg,
  153. "BACKEND": backend,
  154. "BACKUP": backup,
  155. "BACKUPS": backups,
  156. "BEGIN": begin,
  157. "BETWEEN": between,
  158. "BERNOULLI": bernoulli,
  159. "BIGINT": bigIntType,
  160. "BINARY": binaryType,
  161. "BINDING": binding,
  162. "BINDINGS": bindings,
  163. "BINLOG": binlog,
  164. "BIT_AND": bitAnd,
  165. "BIT_OR": bitOr,
  166. "BIT_XOR": bitXor,
  167. "BIT": bitType,
  168. "BLOB": blobType,
  169. "BLOCK": block,
  170. "BOOL": boolType,
  171. "BOOLEAN": booleanType,
  172. "BOTH": both,
  173. "BOUND": bound,
  174. "BTREE": btree,
  175. "BUCKETS": buckets,
  176. "BUILTINS": builtins,
  177. "BY": by,
  178. "BYTE": byteType,
  179. "CACHE": cache,
  180. "CALL": call,
  181. "CANCEL": cancel,
  182. "CAPTURE": capture,
  183. "CARDINALITY": cardinality,
  184. "CASCADE": cascade,
  185. "CASCADED": cascaded,
  186. "CASE": caseKwd,
  187. "CAST": cast,
  188. "CAUSAL": causal,
  189. "CHAIN": chain,
  190. "CHANGE": change,
  191. "CHAR": charType,
  192. "CHARACTER": character,
  193. "CHARSET": charsetKwd,
  194. "CHECK": check,
  195. "CHECKPOINT": checkpoint,
  196. "CHECKSUM": checksum,
  197. "CIPHER": cipher,
  198. "CLEANUP": cleanup,
  199. "CLIENT": client,
  200. "CLIENT_ERRORS_SUMMARY": clientErrorsSummary,
  201. "CLUSTERED": clustered,
  202. "CMSKETCH": cmSketch,
  203. "COALESCE": coalesce,
  204. "COLLATE": collate,
  205. "COLLATION": collation,
  206. "COLUMN_FORMAT": columnFormat,
  207. "COLUMN": column,
  208. "COLUMNS": columns,
  209. "COMMENT": comment,
  210. "COMMIT": commit,
  211. "COMMITTED": committed,
  212. "COMPACT": compact,
  213. "COMPRESSED": compressed,
  214. "COMPRESSION": compression,
  215. "CONCURRENCY": concurrency,
  216. "CONFIG": config,
  217. "CONNECTION": connection,
  218. "CONSISTENCY": consistency,
  219. "CONSISTENT": consistent,
  220. "CONSTRAINT": constraint,
  221. "CONSTRAINTS": constraints,
  222. "CONTEXT": context,
  223. "CONVERT": convert,
  224. "COPY": copyKwd,
  225. "CORRELATION": correlation,
  226. "CPU": cpu,
  227. "CREATE": create,
  228. "CROSS": cross,
  229. "CSV_BACKSLASH_ESCAPE": csvBackslashEscape,
  230. "CSV_DELIMITER": csvDelimiter,
  231. "CSV_HEADER": csvHeader,
  232. "CSV_NOT_NULL": csvNotNull,
  233. "CSV_NULL": csvNull,
  234. "CSV_SEPARATOR": csvSeparator,
  235. "CSV_TRIM_LAST_SEPARATORS": csvTrimLastSeparators,
  236. "CURRENT_DATE": currentDate,
  237. "CURRENT_ROLE": currentRole,
  238. "CURRENT_TIME": currentTime,
  239. "CURRENT_TIMESTAMP": currentTs,
  240. "CURRENT_USER": currentUser,
  241. "CURRENT": current,
  242. "CURTIME": curTime,
  243. "CYCLE": cycle,
  244. "DATA": data,
  245. "DATABASE": database,
  246. "DATABASES": databases,
  247. "DATE_ADD": dateAdd,
  248. "DATE_SUB": dateSub,
  249. "DATE": dateType,
  250. "DATETIME": datetimeType,
  251. "DAY_HOUR": dayHour,
  252. "DAY_MICROSECOND": dayMicrosecond,
  253. "DAY_MINUTE": dayMinute,
  254. "DAY_SECOND": daySecond,
  255. "DAY": day,
  256. "DDL": ddl,
  257. "DEALLOCATE": deallocate,
  258. "DEC": decimalType,
  259. "DECIMAL": decimalType,
  260. "DEFAULT": defaultKwd,
  261. "DEFINER": definer,
  262. "DELAY_KEY_WRITE": delayKeyWrite,
  263. "DELAYED": delayed,
  264. "DELETE": deleteKwd,
  265. "DEPENDENCY": dependency,
  266. "DEPTH": depth,
  267. "DESC": desc,
  268. "DESCRIBE": describe,
  269. "DIRECTORY": directory,
  270. "DISABLE": disable,
  271. "DISCARD": discard,
  272. "DISK": disk,
  273. "DISTINCT": distinct,
  274. "DISTINCTROW": distinct,
  275. "DIV": div,
  276. "DO": do,
  277. "DOUBLE": doubleType,
  278. "DRAINER": drainer,
  279. "DROP": drop,
  280. "DUAL": dual,
  281. "DUPLICATE": duplicate,
  282. "DYNAMIC": dynamic,
  283. "ELSE": elseKwd,
  284. "ENABLE": enable,
  285. "ENCLOSED": enclosed,
  286. "ENCRYPTION": encryption,
  287. "END": end,
  288. "ENFORCED": enforced,
  289. "ENGINE": engine,
  290. "ENGINES": engines,
  291. "ENUM": enum,
  292. "ERROR": errorKwd,
  293. "ERRORS": identSQLErrors,
  294. "ESCAPE": escape,
  295. "ESCAPED": escaped,
  296. "EVENT": event,
  297. "EVENTS": events,
  298. "EVOLVE": evolve,
  299. "EXACT": exact,
  300. "EXCEPT": except,
  301. "EXCHANGE": exchange,
  302. "EXCLUSIVE": exclusive,
  303. "EXECUTE": execute,
  304. "EXISTS": exists,
  305. "EXPANSION": expansion,
  306. "EXPIRE": expire,
  307. "EXPLAIN": explain,
  308. "EXPR_PUSHDOWN_BLACKLIST": exprPushdownBlacklist,
  309. "EXTENDED": extended,
  310. "EXTRACT": extract,
  311. "FALSE": falseKwd,
  312. "FAULTS": faultsSym,
  313. "FETCH": fetch,
  314. "FIELDS": fields,
  315. "FILE": file,
  316. "FIRST": first,
  317. "FIXED": fixed,
  318. "FLASHBACK": flashback,
  319. "FLOAT": floatType,
  320. "FLUSH": flush,
  321. "FOLLOWER": follower,
  322. "FOLLOWING": following,
  323. "FOR": forKwd,
  324. "FORCE": force,
  325. "FOREIGN": foreign,
  326. "FORMAT": format,
  327. "FROM": from,
  328. "FULL": full,
  329. "FULLTEXT": fulltext,
  330. "FUNCTION": function,
  331. "GENERAL": general,
  332. "GENERATED": generated,
  333. "GET_FORMAT": getFormat,
  334. "GLOBAL": global,
  335. "GRANT": grant,
  336. "GRANTS": grants,
  337. "GROUP_CONCAT": groupConcat,
  338. "GROUP": group,
  339. "HASH": hash,
  340. "HAVING": having,
  341. "HIGH_PRIORITY": highPriority,
  342. "HISTORY": history,
  343. "HISTOGRAM": histogram,
  344. "HOSTS": hosts,
  345. "HOUR_MICROSECOND": hourMicrosecond,
  346. "HOUR_MINUTE": hourMinute,
  347. "HOUR_SECOND": hourSecond,
  348. "HOUR": hour,
  349. "IDENTIFIED": identified,
  350. "IF": ifKwd,
  351. "IGNORE": ignore,
  352. "IMPORT": importKwd,
  353. "IMPORTS": imports,
  354. "IN": in,
  355. "INCREMENT": increment,
  356. "INCREMENTAL": incremental,
  357. "INDEX": index,
  358. "INDEXES": indexes,
  359. "INFILE": infile,
  360. "INNER": inner,
  361. "INPLACE": inplace,
  362. "INSERT_METHOD": insertMethod,
  363. "INSERT": insert,
  364. "INSTANCE": instance,
  365. "INSTANT": instant,
  366. "INT": intType,
  367. "INT1": int1Type,
  368. "INT2": int2Type,
  369. "INT3": int3Type,
  370. "INT4": int4Type,
  371. "INT8": int8Type,
  372. "INTEGER": integerType,
  373. "INTERNAL": internal,
  374. "INTERSECT": intersect,
  375. "INTERVAL": interval,
  376. "INTO": into,
  377. "INVISIBLE": invisible,
  378. "INVOKER": invoker,
  379. "IO": io,
  380. "IPC": ipc,
  381. "IS": is,
  382. "ISOLATION": isolation,
  383. "ISSUER": issuer,
  384. "JOB": job,
  385. "JOBS": jobs,
  386. "JOIN": join,
  387. "JSON_ARRAYAGG": jsonArrayagg,
  388. "JSON_OBJECTAGG": jsonObjectAgg,
  389. "JSON": jsonType,
  390. "KEY_BLOCK_SIZE": keyBlockSize,
  391. "KEY": key,
  392. "KEYS": keys,
  393. "KILL": kill,
  394. "LABELS": labels,
  395. "LANGUAGE": language,
  396. "LAST_BACKUP": lastBackup,
  397. "LAST": last,
  398. "LASTVAL": lastval,
  399. "LEADER": leader,
  400. "LEADING": leading,
  401. "LEARNER": learner,
  402. "LEFT": left,
  403. "LESS": less,
  404. "LEVEL": level,
  405. "LIKE": like,
  406. "LIMIT": limit,
  407. "LINEAR": linear,
  408. "LINES": lines,
  409. "LIST": list,
  410. "LOAD": load,
  411. "LOCAL": local,
  412. "LOCALTIME": localTime,
  413. "LOCALTIMESTAMP": localTs,
  414. "LOCATION": location,
  415. "LOCK": lock,
  416. "LOCKED": locked,
  417. "LOGS": logs,
  418. "LONG": long,
  419. "LONGBLOB": longblobType,
  420. "LONGTEXT": longtextType,
  421. "LOW_PRIORITY": lowPriority,
  422. "MASTER": master,
  423. "MATCH": match,
  424. "MAX_CONNECTIONS_PER_HOUR": maxConnectionsPerHour,
  425. "MAX_IDXNUM": max_idxnum,
  426. "MAX_MINUTES": max_minutes,
  427. "MAX_QUERIES_PER_HOUR": maxQueriesPerHour,
  428. "MAX_ROWS": maxRows,
  429. "MAX_UPDATES_PER_HOUR": maxUpdatesPerHour,
  430. "MAX_USER_CONNECTIONS": maxUserConnections,
  431. "MAX": max,
  432. "MAXVALUE": maxValue,
  433. "MB": mb,
  434. "MEDIUMBLOB": mediumblobType,
  435. "MEDIUMINT": mediumIntType,
  436. "MEDIUMTEXT": mediumtextType,
  437. "MEMORY": memory,
  438. "MERGE": merge,
  439. "MICROSECOND": microsecond,
  440. "MIN_ROWS": minRows,
  441. "MIN": min,
  442. "MINUTE_MICROSECOND": minuteMicrosecond,
  443. "MINUTE_SECOND": minuteSecond,
  444. "MINUTE": minute,
  445. "MINVALUE": minValue,
  446. "MOD": mod,
  447. "MODE": mode,
  448. "MODIFY": modify,
  449. "MONTH": month,
  450. "NAMES": names,
  451. "NATIONAL": national,
  452. "NATURAL": natural,
  453. "NCHAR": ncharType,
  454. "NEVER": never,
  455. "NEXT_ROW_ID": next_row_id,
  456. "NEXT": next,
  457. "NEXTVAL": nextval,
  458. "NO_WRITE_TO_BINLOG": noWriteToBinLog,
  459. "NO": no,
  460. "NOCACHE": nocache,
  461. "NOCYCLE": nocycle,
  462. "NODE_ID": nodeID,
  463. "NODE_STATE": nodeState,
  464. "NODEGROUP": nodegroup,
  465. "NOMAXVALUE": nomaxvalue,
  466. "NOMINVALUE": nominvalue,
  467. "NONCLUSTERED": nonclustered,
  468. "NONE": none,
  469. "NOT": not,
  470. "NOW": now,
  471. "NOWAIT": nowait,
  472. "NULL": null,
  473. "NULLS": nulls,
  474. "NUMERIC": numericType,
  475. "NVARCHAR": nvarcharType,
  476. "OFF": off,
  477. "OFFSET": offset,
  478. "ON_DUPLICATE": onDuplicate,
  479. "ON": on,
  480. "ONLINE": online,
  481. "ONLY": only,
  482. "OPEN": open,
  483. "OPT_RULE_BLACKLIST": optRuleBlacklist,
  484. "OPTIMISTIC": optimistic,
  485. "OPTIMIZE": optimize,
  486. "OPTION": option,
  487. "OPTIONAL": optional,
  488. "OPTIONALLY": optionally,
  489. "OR": or,
  490. "ORDER": order,
  491. "OUTER": outer,
  492. "OUTFILE": outfile,
  493. "PACK_KEYS": packKeys,
  494. "PAGE": pageSym,
  495. "PARSER": parser,
  496. "PARTIAL": partial,
  497. "PARTITION": partition,
  498. "PARTITIONING": partitioning,
  499. "PARTITIONS": partitions,
  500. "PASSWORD": password,
  501. "PERCENT": percent,
  502. "PER_DB": per_db,
  503. "PER_TABLE": per_table,
  504. "PESSIMISTIC": pessimistic,
  505. "PLACEMENT": placement,
  506. "PLUGINS": plugins,
  507. "POLICY": policy,
  508. "POSITION": position,
  509. "PRE_SPLIT_REGIONS": preSplitRegions,
  510. "PRECEDING": preceding,
  511. "PRECISION": precisionType,
  512. "PREPARE": prepare,
  513. "PRIMARY": primary,
  514. "PRIVILEGES": privileges,
  515. "PROCEDURE": procedure,
  516. "PROCESS": process,
  517. "PROCESSLIST": processlist,
  518. "PROFILE": profile,
  519. "PROFILES": profiles,
  520. "PROXY": proxy,
  521. "PUMP": pump,
  522. "PURGE": purge,
  523. "QUARTER": quarter,
  524. "QUERIES": queries,
  525. "QUERY": query,
  526. "QUICK": quick,
  527. "RANGE": rangeKwd,
  528. "RATE_LIMIT": rateLimit,
  529. "READ": read,
  530. "REAL": realType,
  531. "REBUILD": rebuild,
  532. "RECENT": recent,
  533. "RECOVER": recover,
  534. "RECURSIVE": recursive,
  535. "REDUNDANT": redundant,
  536. "REFERENCES": references,
  537. "REGEXP": regexpKwd,
  538. "REGION": region,
  539. "REGIONS": regions,
  540. "RELEASE": release,
  541. "RELOAD": reload,
  542. "REMOVE": remove,
  543. "RENAME": rename,
  544. "REORGANIZE": reorganize,
  545. "REPAIR": repair,
  546. "REPEAT": repeat,
  547. "REPEATABLE": repeatable,
  548. "REPLACE": replace,
  549. "REPLICA": replica,
  550. "REPLICAS": replicas,
  551. "REPLICATION": replication,
  552. "REQUIRE": require,
  553. "REQUIRED": required,
  554. "RESET": reset,
  555. "RESPECT": respect,
  556. "RESTART": restart,
  557. "RESTORE": restore,
  558. "RESTORES": restores,
  559. "RESTRICT": restrict,
  560. "REVERSE": reverse,
  561. "REVOKE": revoke,
  562. "RIGHT": right,
  563. "RLIKE": rlike,
  564. "ROLE": role,
  565. "ROLLBACK": rollback,
  566. "ROUTINE": routine,
  567. "ROW_COUNT": rowCount,
  568. "ROW_FORMAT": rowFormat,
  569. "ROW": row,
  570. "ROWS": rows,
  571. "RTREE": rtree,
  572. "RESUME": resume,
  573. "RUNNING": running,
  574. "S3": s3,
  575. "SAMPLES": samples,
  576. "SAN": san,
  577. "SCHEMA": database,
  578. "SCHEMAS": databases,
  579. "SECOND_MICROSECOND": secondMicrosecond,
  580. "SECOND": second,
  581. "SECONDARY_ENGINE": secondaryEngine,
  582. "SECONDARY_LOAD": secondaryLoad,
  583. "SECONDARY_UNLOAD": secondaryUnload,
  584. "SECURITY": security,
  585. "SELECT": selectKwd,
  586. "SEND_CREDENTIALS_TO_TIKV": sendCredentialsToTiKV,
  587. "SEPARATOR": separator,
  588. "SEQUENCE": sequence,
  589. "SERIAL": serial,
  590. "SERIALIZABLE": serializable,
  591. "SESSION": session,
  592. "SET": set,
  593. "SETVAL": setval,
  594. "SHARD_ROW_ID_BITS": shardRowIDBits,
  595. "SHARE": share,
  596. "SHARED": shared,
  597. "SHOW": show,
  598. "SHUTDOWN": shutdown,
  599. "SIGNED": signed,
  600. "SIMPLE": simple,
  601. "SKIP": skip,
  602. "SKIP_SCHEMA_FILES": skipSchemaFiles,
  603. "SLAVE": slave,
  604. "SLOW": slow,
  605. "SMALLINT": smallIntType,
  606. "SNAPSHOT": snapshot,
  607. "SOME": some,
  608. "SOURCE": source,
  609. "SPATIAL": spatial,
  610. "SPLIT": split,
  611. "SQL_BIG_RESULT": sqlBigResult,
  612. "SQL_BUFFER_RESULT": sqlBufferResult,
  613. "SQL_CACHE": sqlCache,
  614. "SQL_CALC_FOUND_ROWS": sqlCalcFoundRows,
  615. "SQL_NO_CACHE": sqlNoCache,
  616. "SQL_SMALL_RESULT": sqlSmallResult,
  617. "SQL_TSI_DAY": sqlTsiDay,
  618. "SQL_TSI_HOUR": sqlTsiHour,
  619. "SQL_TSI_MINUTE": sqlTsiMinute,
  620. "SQL_TSI_MONTH": sqlTsiMonth,
  621. "SQL_TSI_QUARTER": sqlTsiQuarter,
  622. "SQL_TSI_SECOND": sqlTsiSecond,
  623. "SQL_TSI_WEEK": sqlTsiWeek,
  624. "SQL_TSI_YEAR": sqlTsiYear,
  625. "SQL": sql,
  626. "SSL": ssl,
  627. "STALENESS": staleness,
  628. "START": start,
  629. "STARTING": starting,
  630. "STATISTICS": statistics,
  631. "STATS_AUTO_RECALC": statsAutoRecalc,
  632. "STATS_BUCKETS": statsBuckets,
  633. "STATS_EXTENDED": statsExtended,
  634. "STATS_HEALTHY": statsHealthy,
  635. "STATS_HISTOGRAMS": statsHistograms,
  636. "STATS_TOPN": statsTopN,
  637. "STATS_META": statsMeta,
  638. "STATS_PERSISTENT": statsPersistent,
  639. "STATS_SAMPLE_PAGES": statsSamplePages,
  640. "STATS": stats,
  641. "STATUS": status,
  642. "STD": stddevPop,
  643. "STDDEV_POP": stddevPop,
  644. "STDDEV_SAMP": stddevSamp,
  645. "STDDEV": stddevPop,
  646. "STOP": stop,
  647. "STORAGE": storage,
  648. "STORED": stored,
  649. "STRAIGHT_JOIN": straightJoin,
  650. "STRICT": strict,
  651. "STRICT_FORMAT": strictFormat,
  652. "STRONG": strong,
  653. "SUBDATE": subDate,
  654. "SUBJECT": subject,
  655. "SUBPARTITION": subpartition,
  656. "SUBPARTITIONS": subpartitions,
  657. "SUBSTR": substring,
  658. "SUBSTRING": substring,
  659. "SUM": sum,
  660. "SUPER": super,
  661. "SWAPS": swaps,
  662. "SWITCHES": switchesSym,
  663. "SYSTEM": system,
  664. "SYSTEM_TIME": systemTime,
  665. "TABLE_CHECKSUM": tableChecksum,
  666. "TABLE": tableKwd,
  667. "TABLES": tables,
  668. "TABLESAMPLE": tableSample,
  669. "TABLESPACE": tablespace,
  670. "TELEMETRY": telemetry,
  671. "TELEMETRY_ID": telemetryID,
  672. "TEMPORARY": temporary,
  673. "TEMPTABLE": temptable,
  674. "TERMINATED": terminated,
  675. "TEXT": textType,
  676. "THAN": than,
  677. "THEN": then,
  678. "TIDB": tidb,
  679. "TIFLASH": tiFlash,
  680. "TIKV_IMPORTER": tikvImporter,
  681. "TIME": timeType,
  682. "TIMESTAMP": timestampType,
  683. "TIMESTAMPADD": timestampAdd,
  684. "TIMESTAMPDIFF": timestampDiff,
  685. "TINYBLOB": tinyblobType,
  686. "TINYINT": tinyIntType,
  687. "TINYTEXT": tinytextType,
  688. "TLS": tls,
  689. "TO": to,
  690. "TOKUDB_DEFAULT": tokudbDefault,
  691. "TOKUDB_FAST": tokudbFast,
  692. "TOKUDB_LZMA": tokudbLzma,
  693. "TOKUDB_QUICKLZ": tokudbQuickLZ,
  694. "TOKUDB_SMALL": tokudbSmall,
  695. "TOKUDB_SNAPPY": tokudbSnappy,
  696. "TOKUDB_UNCOMPRESSED": tokudbUncompressed,
  697. "TOKUDB_ZLIB": tokudbZlib,
  698. "TOP": top,
  699. "TOPN": topn,
  700. "TRACE": trace,
  701. "TRADITIONAL": traditional,
  702. "TRAILING": trailing,
  703. "TRANSACTION": transaction,
  704. "TRIGGER": trigger,
  705. "TRIGGERS": triggers,
  706. "TRIM": trim,
  707. "TRUE": trueKwd,
  708. "TRUNCATE": truncate,
  709. "TYPE": tp,
  710. "UNBOUNDED": unbounded,
  711. "UNCOMMITTED": uncommitted,
  712. "UNDEFINED": undefined,
  713. "UNICODE": unicodeSym,
  714. "UNION": union,
  715. "UNIQUE": unique,
  716. "UNKNOWN": unknown,
  717. "UNLOCK": unlock,
  718. "UNSIGNED": unsigned,
  719. "UPDATE": update,
  720. "USAGE": usage,
  721. "USE": use,
  722. "USER": user,
  723. "USING": using,
  724. "UTC_DATE": utcDate,
  725. "UTC_TIME": utcTime,
  726. "UTC_TIMESTAMP": utcTimestamp,
  727. "VALIDATION": validation,
  728. "VALUE": value,
  729. "VALUES": values,
  730. "VAR_POP": varPop,
  731. "VAR_SAMP": varSamp,
  732. "VARBINARY": varbinaryType,
  733. "VARCHAR": varcharType,
  734. "VARCHARACTER": varcharacter,
  735. "VARIABLES": variables,
  736. "VARIANCE": varPop,
  737. "VARYING": varying,
  738. "VOTER": voter,
  739. "VIEW": view,
  740. "VIRTUAL": virtual,
  741. "VISIBLE": visible,
  742. "WARNINGS": warnings,
  743. "WEEK": week,
  744. "WEIGHT_STRING": weightString,
  745. "WHEN": when,
  746. "WHERE": where,
  747. "WIDTH": width,
  748. "WITH": with,
  749. "WITHOUT": without,
  750. "WRITE": write,
  751. "X509": x509,
  752. "XOR": xor,
  753. "YEAR_MONTH": yearMonth,
  754. "YEAR": yearType,
  755. "ZEROFILL": zerofill,
  756. "WAIT": wait,
  757. }
  758. // See https://dev.mysql.com/doc/refman/5.7/en/function-resolution.html for details
  759. var btFuncTokenMap = map[string]int{
  760. "ADDDATE": builtinAddDate,
  761. "BIT_AND": builtinBitAnd,
  762. "BIT_OR": builtinBitOr,
  763. "BIT_XOR": builtinBitXor,
  764. "CAST": builtinCast,
  765. "COUNT": builtinCount,
  766. "APPROX_COUNT_DISTINCT": builtinApproxCountDistinct,
  767. "APPROX_PERCENTILE": builtinApproxPercentile,
  768. "CURDATE": builtinCurDate,
  769. "CURTIME": builtinCurTime,
  770. "DATE_ADD": builtinDateAdd,
  771. "DATE_SUB": builtinDateSub,
  772. "EXTRACT": builtinExtract,
  773. "GROUP_CONCAT": builtinGroupConcat,
  774. "MAX": builtinMax,
  775. "MID": builtinSubstring,
  776. "MIN": builtinMin,
  777. "NOW": builtinNow,
  778. "POSITION": builtinPosition,
  779. "SESSION_USER": builtinUser,
  780. "STD": builtinStddevPop,
  781. "STDDEV": builtinStddevPop,
  782. "STDDEV_POP": builtinStddevPop,
  783. "STDDEV_SAMP": builtinStddevSamp,
  784. "SUBDATE": builtinSubDate,
  785. "SUBSTR": builtinSubstring,
  786. "SUBSTRING": builtinSubstring,
  787. "SUM": builtinSum,
  788. "SYSDATE": builtinSysDate,
  789. "SYSTEM_USER": builtinUser,
  790. "TRIM": builtinTrim,
  791. "VARIANCE": builtinVarPop,
  792. "VAR_POP": builtinVarPop,
  793. "VAR_SAMP": builtinVarSamp,
  794. }
  795. var windowFuncTokenMap = map[string]int{
  796. "CUME_DIST": cumeDist,
  797. "DENSE_RANK": denseRank,
  798. "FIRST_VALUE": firstValue,
  799. "GROUPS": groups,
  800. "LAG": lag,
  801. "LAST_VALUE": lastValue,
  802. "LEAD": lead,
  803. "NTH_VALUE": nthValue,
  804. "NTILE": ntile,
  805. "OVER": over,
  806. "PERCENT_RANK": percentRank,
  807. "RANK": rank,
  808. "ROW_NUMBER": rowNumber,
  809. "WINDOW": window,
  810. }
  811. // aliases are strings directly map to another string and use the same token.
  812. var aliases = map[string]string{
  813. "SCHEMA": "DATABASE",
  814. "SCHEMAS": "DATABASES",
  815. "DEC": "DECIMAL",
  816. "SUBSTR": "SUBSTRING",
  817. }
  818. // hintedTokens is a set of tokens which recognizes a hint.
  819. // According to https://dev.mysql.com/doc/refman/8.0/en/optimizer-hints.html,
  820. // only SELECT, INSERT, REPLACE, UPDATE and DELETE accept optimizer hints.
  821. // additionally we support CREATE and PARTITION for hints at table creation.
  822. var hintedTokens = map[int]struct{}{
  823. selectKwd: {},
  824. insert: {},
  825. replace: {},
  826. update: {},
  827. deleteKwd: {},
  828. create: {},
  829. partition: {},
  830. }
  831. var hintTokenMap = map[string]int{
  832. // MySQL 8.0 hint names
  833. "JOIN_FIXED_ORDER": hintJoinFixedOrder,
  834. "JOIN_ORDER": hintJoinOrder,
  835. "JOIN_PREFIX": hintJoinPrefix,
  836. "JOIN_SUFFIX": hintJoinSuffix,
  837. "BKA": hintBKA,
  838. "NO_BKA": hintNoBKA,
  839. "BNL": hintBNL,
  840. "NO_BNL": hintNoBNL,
  841. "HASH_JOIN": hintHashJoin,
  842. "NO_HASH_JOIN": hintNoHashJoin,
  843. "MERGE": hintMerge,
  844. "NO_MERGE": hintNoMerge,
  845. "INDEX_MERGE": hintIndexMerge,
  846. "NO_INDEX_MERGE": hintNoIndexMerge,
  847. "MRR": hintMRR,
  848. "NO_MRR": hintNoMRR,
  849. "NO_ICP": hintNoICP,
  850. "NO_RANGE_OPTIMIZATION": hintNoRangeOptimization,
  851. "SKIP_SCAN": hintSkipScan,
  852. "NO_SKIP_SCAN": hintNoSkipScan,
  853. "SEMIJOIN": hintSemijoin,
  854. "NO_SEMIJOIN": hintNoSemijoin,
  855. "MAX_EXECUTION_TIME": hintMaxExecutionTime,
  856. "SET_VAR": hintSetVar,
  857. "RESOURCE_GROUP": hintResourceGroup,
  858. "QB_NAME": hintQBName,
  859. // TiDB hint names
  860. "AGG_TO_COP": hintAggToCop,
  861. "LIMIT_TO_COP": hintLimitToCop,
  862. "IGNORE_PLAN_CACHE": hintIgnorePlanCache,
  863. "HASH_AGG": hintHashAgg,
  864. "IGNORE_INDEX": hintIgnoreIndex,
  865. "INL_HASH_JOIN": hintInlHashJoin,
  866. "INL_JOIN": hintInlJoin,
  867. "INL_MERGE_JOIN": hintInlMergeJoin,
  868. "MEMORY_QUOTA": hintMemoryQuota,
  869. "NO_SWAP_JOIN_INPUTS": hintNoSwapJoinInputs,
  870. "QUERY_TYPE": hintQueryType,
  871. "READ_CONSISTENT_REPLICA": hintReadConsistentReplica,
  872. "READ_FROM_STORAGE": hintReadFromStorage,
  873. "BROADCAST_JOIN": hintBCJoin,
  874. "BROADCAST_JOIN_LOCAL": hintBCJoinPreferLocal,
  875. "MERGE_JOIN": hintSMJoin,
  876. "STREAM_AGG": hintStreamAgg,
  877. "SWAP_JOIN_INPUTS": hintSwapJoinInputs,
  878. "USE_INDEX_MERGE": hintUseIndexMerge,
  879. "USE_INDEX": hintUseIndex,
  880. "USE_PLAN_CACHE": hintUsePlanCache,
  881. "USE_TOJA": hintUseToja,
  882. "TIME_RANGE": hintTimeRange,
  883. "USE_CASCADES": hintUseCascades,
  884. "NTH_PLAN": hintNthPlan,
  885. "FORCE_INDEX": hintForceIndex,
  886. // TiDB hint aliases
  887. "TIDB_HJ": hintHashJoin,
  888. "TIDB_INLJ": hintInlJoin,
  889. "TIDB_SMJ": hintSMJoin,
  890. // Other keywords
  891. "OLAP": hintOLAP,
  892. "OLTP": hintOLTP,
  893. "TIKV": hintTiKV,
  894. "TIFLASH": hintTiFlash,
  895. "PARTITION": hintPartition,
  896. "FALSE": hintFalse,
  897. "TRUE": hintTrue,
  898. "MB": hintMB,
  899. "GB": hintGB,
  900. "DUPSWEEDOUT": hintDupsWeedOut,
  901. "FIRSTMATCH": hintFirstMatch,
  902. "LOOSESCAN": hintLooseScan,
  903. "MATERIALIZATION": hintMaterialization,
  904. }
  905. func (s *Scanner) isTokenIdentifier(lit string, offset int) int {
  906. // An identifier before or after '.' means it is part of a qualified identifier.
  907. // We do not parse it as keyword.
  908. if s.r.peek() == '.' {
  909. return 0
  910. }
  911. if offset > 0 && s.r.s[offset-1] == '.' {
  912. return 0
  913. }
  914. buf := &s.buf
  915. buf.Reset()
  916. buf.Grow(len(lit))
  917. data := buf.Bytes()[:len(lit)]
  918. for i := 0; i < len(lit); i++ {
  919. if lit[i] >= 'a' && lit[i] <= 'z' {
  920. data[i] = lit[i] + 'A' - 'a'
  921. } else {
  922. data[i] = lit[i]
  923. }
  924. }
  925. checkBtFuncToken := false
  926. if s.r.peek() == '(' {
  927. checkBtFuncToken = true
  928. } else if s.sqlMode.HasIgnoreSpaceMode() {
  929. s.skipWhitespace()
  930. if s.r.peek() == '(' {
  931. checkBtFuncToken = true
  932. }
  933. }
  934. if checkBtFuncToken {
  935. if tok := btFuncTokenMap[string(data)]; tok != 0 {
  936. return tok
  937. }
  938. }
  939. tok, ok := tokenMap[string(data)]
  940. if !ok && s.supportWindowFunc {
  941. tok = windowFuncTokenMap[string(data)]
  942. }
  943. return tok
  944. }
  945. func handleIdent(lval *yySymType) int {
  946. s := lval.ident
  947. // A character string literal may have an optional character set introducer and COLLATE clause:
  948. // [_charset_name]'string' [COLLATE collation_name]
  949. // See https://dev.mysql.com/doc/refman/5.7/en/charset-literal.html
  950. if !strings.HasPrefix(s, "_") {
  951. return identifier
  952. }
  953. cs, _, err := charset.GetCharsetInfo(s[1:])
  954. if err != nil {
  955. return identifier
  956. }
  957. lval.ident = cs
  958. return underscoreCS
  959. }
  960. // SpecialCommentsController controls whether special comments like `/*T![xxx] yyy */`
  961. // can be parsed as `yyy`. To add such rules, please use SpecialCommentsController.Register().
  962. // For example:
  963. // SpecialCommentsController.Register("30100");
  964. // Now the parser will treat
  965. // select a, /*T![30100] mysterious_keyword */ from t;
  966. // and
  967. // select a, mysterious_keyword from t;
  968. // equally.
  969. // Similar special comments without registration are ignored by parser.
  970. var SpecialCommentsController = specialCommentsCtrl{
  971. supportedFeatures: map[string]struct{}{},
  972. }
  973. type specialCommentsCtrl struct {
  974. supportedFeatures map[string]struct{}
  975. }
  976. func (s *specialCommentsCtrl) Register(featureID string) {
  977. s.supportedFeatures[featureID] = struct{}{}
  978. }
  979. func (s *specialCommentsCtrl) Unregister(featureID string) {
  980. delete(s.supportedFeatures, featureID)
  981. }
  982. func (s *specialCommentsCtrl) ContainsAll(featureIDs []string) bool {
  983. for _, f := range featureIDs {
  984. if _, found := s.supportedFeatures[f]; !found {
  985. return false
  986. }
  987. }
  988. return true
  989. }