charset.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635
  1. // Copyright 2015 PingCAP, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package mysql
  14. import "unicode"
  15. // CharsetNameToID maps charset name to its default collation ID.
  16. func CharsetNameToID(charset string) uint8 {
  17. // Use quick path for TiDB to avoid access CharsetIDs map
  18. // "SHOW CHARACTER SET;" to see all the supported character sets.
  19. if charset == "utf8mb4" {
  20. return UTF8MB4DefaultCollationID
  21. } else if charset == "binary" {
  22. return BinaryDefaultCollationID
  23. } else if charset == "utf8" {
  24. return UTF8DefaultCollationID
  25. } else if charset == "ascii" {
  26. return ASCIIDefaultCollationID
  27. } else if charset == "latin1" {
  28. return Latin1DefaultCollationID
  29. } else {
  30. return CharsetIDs[charset]
  31. }
  32. }
  33. // CharsetIDs maps charset name to its default collation ID.
  34. var CharsetIDs = map[string]uint8{
  35. "big5": 1,
  36. "dec8": 3,
  37. "cp850": 4,
  38. "hp8": 6,
  39. "koi8r": 7,
  40. "latin1": Latin1DefaultCollationID,
  41. "latin2": 9,
  42. "swe7": 10,
  43. "ascii": ASCIIDefaultCollationID,
  44. "ujis": 12,
  45. "sjis": 13,
  46. "hebrew": 16,
  47. "tis620": 18,
  48. "euckr": 19,
  49. "koi8u": 22,
  50. "gb2312": 24,
  51. "greek": 25,
  52. "cp1250": 26,
  53. "gbk": 28,
  54. "latin5": 30,
  55. "armscii8": 32,
  56. "utf8": UTF8DefaultCollationID,
  57. "ucs2": 35,
  58. "cp866": 36,
  59. "keybcs2": 37,
  60. "macce": 38,
  61. "macroman": 39,
  62. "cp852": 40,
  63. "latin7": 41,
  64. "utf8mb4": UTF8MB4DefaultCollationID,
  65. "cp1251": 51,
  66. "utf16": 54,
  67. "utf16le": 56,
  68. "cp1256": 57,
  69. "cp1257": 59,
  70. "utf32": 60,
  71. "binary": BinaryDefaultCollationID,
  72. "geostd8": 92,
  73. "cp932": 95,
  74. "eucjpms": 97,
  75. }
  76. // Charsets maps charset name to its default collation name.
  77. var Charsets = map[string]string{
  78. "big5": "big5_chinese_ci",
  79. "dec8": "dec8_swedish_ci",
  80. "cp850": "cp850_general_ci",
  81. "hp8": "hp8_english_ci",
  82. "koi8r": "koi8r_general_ci",
  83. "latin1": "latin1_bin",
  84. "latin2": "latin2_general_ci",
  85. "swe7": "swe7_swedish_ci",
  86. "ascii": "ascii_bin",
  87. "ujis": "ujis_japanese_ci",
  88. "sjis": "sjis_japanese_ci",
  89. "hebrew": "hebrew_general_ci",
  90. "tis620": "tis620_thai_ci",
  91. "euckr": "euckr_korean_ci",
  92. "koi8u": "koi8u_general_ci",
  93. "gb2312": "gb2312_chinese_ci",
  94. "greek": "greek_general_ci",
  95. "cp1250": "cp1250_general_ci",
  96. "gbk": "gbk_chinese_ci",
  97. "latin5": "latin5_turkish_ci",
  98. "armscii8": "armscii8_general_ci",
  99. "utf8": "utf8_bin",
  100. "ucs2": "ucs2_general_ci",
  101. "cp866": "cp866_general_ci",
  102. "keybcs2": "keybcs2_general_ci",
  103. "macce": "macce_general_ci",
  104. "macroman": "macroman_general_ci",
  105. "cp852": "cp852_general_ci",
  106. "latin7": "latin7_general_ci",
  107. "utf8mb4": "utf8mb4_bin",
  108. "cp1251": "cp1251_general_ci",
  109. "utf16": "utf16_general_ci",
  110. "utf16le": "utf16le_general_ci",
  111. "cp1256": "cp1256_general_ci",
  112. "cp1257": "cp1257_general_ci",
  113. "utf32": "utf32_general_ci",
  114. "binary": "binary",
  115. "geostd8": "geostd8_general_ci",
  116. "cp932": "cp932_japanese_ci",
  117. "eucjpms": "eucjpms_japanese_ci",
  118. }
  119. // Collations maps MySQL collation ID to its name.
  120. var Collations = map[uint8]string{
  121. 1: "big5_chinese_ci",
  122. 2: "latin2_czech_cs",
  123. 3: "dec8_swedish_ci",
  124. 4: "cp850_general_ci",
  125. 5: "latin1_german1_ci",
  126. 6: "hp8_english_ci",
  127. 7: "koi8r_general_ci",
  128. 8: "latin1_swedish_ci",
  129. 9: "latin2_general_ci",
  130. 10: "swe7_swedish_ci",
  131. 11: "ascii_general_ci",
  132. 12: "ujis_japanese_ci",
  133. 13: "sjis_japanese_ci",
  134. 14: "cp1251_bulgarian_ci",
  135. 15: "latin1_danish_ci",
  136. 16: "hebrew_general_ci",
  137. 18: "tis620_thai_ci",
  138. 19: "euckr_korean_ci",
  139. 20: "latin7_estonian_cs",
  140. 21: "latin2_hungarian_ci",
  141. 22: "koi8u_general_ci",
  142. 23: "cp1251_ukrainian_ci",
  143. 24: "gb2312_chinese_ci",
  144. 25: "greek_general_ci",
  145. 26: "cp1250_general_ci",
  146. 27: "latin2_croatian_ci",
  147. 28: "gbk_chinese_ci",
  148. 29: "cp1257_lithuanian_ci",
  149. 30: "latin5_turkish_ci",
  150. 31: "latin1_german2_ci",
  151. 32: "armscii8_general_ci",
  152. 33: "utf8_general_ci",
  153. 34: "cp1250_czech_cs",
  154. 35: "ucs2_general_ci",
  155. 36: "cp866_general_ci",
  156. 37: "keybcs2_general_ci",
  157. 38: "macce_general_ci",
  158. 39: "macroman_general_ci",
  159. 40: "cp852_general_ci",
  160. 41: "latin7_general_ci",
  161. 42: "latin7_general_cs",
  162. 43: "macce_bin",
  163. 44: "cp1250_croatian_ci",
  164. 45: "utf8mb4_general_ci",
  165. 46: "utf8mb4_bin",
  166. 47: "latin1_bin",
  167. 48: "latin1_general_ci",
  168. 49: "latin1_general_cs",
  169. 50: "cp1251_bin",
  170. 51: "cp1251_general_ci",
  171. 52: "cp1251_general_cs",
  172. 53: "macroman_bin",
  173. 54: "utf16_general_ci",
  174. 55: "utf16_bin",
  175. 56: "utf16le_general_ci",
  176. 57: "cp1256_general_ci",
  177. 58: "cp1257_bin",
  178. 59: "cp1257_general_ci",
  179. 60: "utf32_general_ci",
  180. 61: "utf32_bin",
  181. 62: "utf16le_bin",
  182. 63: "binary",
  183. 64: "armscii8_bin",
  184. 65: "ascii_bin",
  185. 66: "cp1250_bin",
  186. 67: "cp1256_bin",
  187. 68: "cp866_bin",
  188. 69: "dec8_bin",
  189. 70: "greek_bin",
  190. 71: "hebrew_bin",
  191. 72: "hp8_bin",
  192. 73: "keybcs2_bin",
  193. 74: "koi8r_bin",
  194. 75: "koi8u_bin",
  195. 77: "latin2_bin",
  196. 78: "latin5_bin",
  197. 79: "latin7_bin",
  198. 80: "cp850_bin",
  199. 81: "cp852_bin",
  200. 82: "swe7_bin",
  201. 83: "utf8_bin",
  202. 84: "big5_bin",
  203. 85: "euckr_bin",
  204. 86: "gb2312_bin",
  205. 87: "gbk_bin",
  206. 88: "sjis_bin",
  207. 89: "tis620_bin",
  208. 90: "ucs2_bin",
  209. 91: "ujis_bin",
  210. 92: "geostd8_general_ci",
  211. 93: "geostd8_bin",
  212. 94: "latin1_spanish_ci",
  213. 95: "cp932_japanese_ci",
  214. 96: "cp932_bin",
  215. 97: "eucjpms_japanese_ci",
  216. 98: "eucjpms_bin",
  217. 99: "cp1250_polish_ci",
  218. 101: "utf16_unicode_ci",
  219. 102: "utf16_icelandic_ci",
  220. 103: "utf16_latvian_ci",
  221. 104: "utf16_romanian_ci",
  222. 105: "utf16_slovenian_ci",
  223. 106: "utf16_polish_ci",
  224. 107: "utf16_estonian_ci",
  225. 108: "utf16_spanish_ci",
  226. 109: "utf16_swedish_ci",
  227. 110: "utf16_turkish_ci",
  228. 111: "utf16_czech_ci",
  229. 112: "utf16_danish_ci",
  230. 113: "utf16_lithuanian_ci",
  231. 114: "utf16_slovak_ci",
  232. 115: "utf16_spanish2_ci",
  233. 116: "utf16_roman_ci",
  234. 117: "utf16_persian_ci",
  235. 118: "utf16_esperanto_ci",
  236. 119: "utf16_hungarian_ci",
  237. 120: "utf16_sinhala_ci",
  238. 121: "utf16_german2_ci",
  239. 122: "utf16_croatian_ci",
  240. 123: "utf16_unicode_520_ci",
  241. 124: "utf16_vietnamese_ci",
  242. 128: "ucs2_unicode_ci",
  243. 129: "ucs2_icelandic_ci",
  244. 130: "ucs2_latvian_ci",
  245. 131: "ucs2_romanian_ci",
  246. 132: "ucs2_slovenian_ci",
  247. 133: "ucs2_polish_ci",
  248. 134: "ucs2_estonian_ci",
  249. 135: "ucs2_spanish_ci",
  250. 136: "ucs2_swedish_ci",
  251. 137: "ucs2_turkish_ci",
  252. 138: "ucs2_czech_ci",
  253. 139: "ucs2_danish_ci",
  254. 140: "ucs2_lithuanian_ci",
  255. 141: "ucs2_slovak_ci",
  256. 142: "ucs2_spanish2_ci",
  257. 143: "ucs2_roman_ci",
  258. 144: "ucs2_persian_ci",
  259. 145: "ucs2_esperanto_ci",
  260. 146: "ucs2_hungarian_ci",
  261. 147: "ucs2_sinhala_ci",
  262. 148: "ucs2_german2_ci",
  263. 149: "ucs2_croatian_ci",
  264. 150: "ucs2_unicode_520_ci",
  265. 151: "ucs2_vietnamese_ci",
  266. 159: "ucs2_general_mysql500_ci",
  267. 160: "utf32_unicode_ci",
  268. 161: "utf32_icelandic_ci",
  269. 162: "utf32_latvian_ci",
  270. 163: "utf32_romanian_ci",
  271. 164: "utf32_slovenian_ci",
  272. 165: "utf32_polish_ci",
  273. 166: "utf32_estonian_ci",
  274. 167: "utf32_spanish_ci",
  275. 168: "utf32_swedish_ci",
  276. 169: "utf32_turkish_ci",
  277. 170: "utf32_czech_ci",
  278. 171: "utf32_danish_ci",
  279. 172: "utf32_lithuanian_ci",
  280. 173: "utf32_slovak_ci",
  281. 174: "utf32_spanish2_ci",
  282. 175: "utf32_roman_ci",
  283. 176: "utf32_persian_ci",
  284. 177: "utf32_esperanto_ci",
  285. 178: "utf32_hungarian_ci",
  286. 179: "utf32_sinhala_ci",
  287. 180: "utf32_german2_ci",
  288. 181: "utf32_croatian_ci",
  289. 182: "utf32_unicode_520_ci",
  290. 183: "utf32_vietnamese_ci",
  291. 192: "utf8_unicode_ci",
  292. 193: "utf8_icelandic_ci",
  293. 194: "utf8_latvian_ci",
  294. 195: "utf8_romanian_ci",
  295. 196: "utf8_slovenian_ci",
  296. 197: "utf8_polish_ci",
  297. 198: "utf8_estonian_ci",
  298. 199: "utf8_spanish_ci",
  299. 200: "utf8_swedish_ci",
  300. 201: "utf8_turkish_ci",
  301. 202: "utf8_czech_ci",
  302. 203: "utf8_danish_ci",
  303. 204: "utf8_lithuanian_ci",
  304. 205: "utf8_slovak_ci",
  305. 206: "utf8_spanish2_ci",
  306. 207: "utf8_roman_ci",
  307. 208: "utf8_persian_ci",
  308. 209: "utf8_esperanto_ci",
  309. 210: "utf8_hungarian_ci",
  310. 211: "utf8_sinhala_ci",
  311. 212: "utf8_german2_ci",
  312. 213: "utf8_croatian_ci",
  313. 214: "utf8_unicode_520_ci",
  314. 215: "utf8_vietnamese_ci",
  315. 223: "utf8_general_mysql500_ci",
  316. 224: "utf8mb4_unicode_ci",
  317. 225: "utf8mb4_icelandic_ci",
  318. 226: "utf8mb4_latvian_ci",
  319. 227: "utf8mb4_romanian_ci",
  320. 228: "utf8mb4_slovenian_ci",
  321. 229: "utf8mb4_polish_ci",
  322. 230: "utf8mb4_estonian_ci",
  323. 231: "utf8mb4_spanish_ci",
  324. 232: "utf8mb4_swedish_ci",
  325. 233: "utf8mb4_turkish_ci",
  326. 234: "utf8mb4_czech_ci",
  327. 235: "utf8mb4_danish_ci",
  328. 236: "utf8mb4_lithuanian_ci",
  329. 237: "utf8mb4_slovak_ci",
  330. 238: "utf8mb4_spanish2_ci",
  331. 239: "utf8mb4_roman_ci",
  332. 240: "utf8mb4_persian_ci",
  333. 241: "utf8mb4_esperanto_ci",
  334. 242: "utf8mb4_hungarian_ci",
  335. 243: "utf8mb4_sinhala_ci",
  336. 244: "utf8mb4_german2_ci",
  337. 245: "utf8mb4_croatian_ci",
  338. 246: "utf8mb4_unicode_520_ci",
  339. 247: "utf8mb4_vietnamese_ci",
  340. 255: "utf8mb4_0900_ai_ci",
  341. }
  342. // CollationNames maps MySQL collation name to its ID
  343. var CollationNames = map[string]uint8{
  344. "big5_chinese_ci": 1,
  345. "latin2_czech_cs": 2,
  346. "dec8_swedish_ci": 3,
  347. "cp850_general_ci": 4,
  348. "latin1_german1_ci": 5,
  349. "hp8_english_ci": 6,
  350. "koi8r_general_ci": 7,
  351. "latin1_swedish_ci": 8,
  352. "latin2_general_ci": 9,
  353. "swe7_swedish_ci": 10,
  354. "ascii_general_ci": 11,
  355. "ujis_japanese_ci": 12,
  356. "sjis_japanese_ci": 13,
  357. "cp1251_bulgarian_ci": 14,
  358. "latin1_danish_ci": 15,
  359. "hebrew_general_ci": 16,
  360. "tis620_thai_ci": 18,
  361. "euckr_korean_ci": 19,
  362. "latin7_estonian_cs": 20,
  363. "latin2_hungarian_ci": 21,
  364. "koi8u_general_ci": 22,
  365. "cp1251_ukrainian_ci": 23,
  366. "gb2312_chinese_ci": 24,
  367. "greek_general_ci": 25,
  368. "cp1250_general_ci": 26,
  369. "latin2_croatian_ci": 27,
  370. "gbk_chinese_ci": 28,
  371. "cp1257_lithuanian_ci": 29,
  372. "latin5_turkish_ci": 30,
  373. "latin1_german2_ci": 31,
  374. "armscii8_general_ci": 32,
  375. "utf8_general_ci": 33,
  376. "cp1250_czech_cs": 34,
  377. "ucs2_general_ci": 35,
  378. "cp866_general_ci": 36,
  379. "keybcs2_general_ci": 37,
  380. "macce_general_ci": 38,
  381. "macroman_general_ci": 39,
  382. "cp852_general_ci": 40,
  383. "latin7_general_ci": 41,
  384. "latin7_general_cs": 42,
  385. "macce_bin": 43,
  386. "cp1250_croatian_ci": 44,
  387. "utf8mb4_general_ci": 45,
  388. "utf8mb4_bin": 46,
  389. "latin1_bin": 47,
  390. "latin1_general_ci": 48,
  391. "latin1_general_cs": 49,
  392. "cp1251_bin": 50,
  393. "cp1251_general_ci": 51,
  394. "cp1251_general_cs": 52,
  395. "macroman_bin": 53,
  396. "utf16_general_ci": 54,
  397. "utf16_bin": 55,
  398. "utf16le_general_ci": 56,
  399. "cp1256_general_ci": 57,
  400. "cp1257_bin": 58,
  401. "cp1257_general_ci": 59,
  402. "utf32_general_ci": 60,
  403. "utf32_bin": 61,
  404. "utf16le_bin": 62,
  405. "binary": 63,
  406. "armscii8_bin": 64,
  407. "ascii_bin": 65,
  408. "cp1250_bin": 66,
  409. "cp1256_bin": 67,
  410. "cp866_bin": 68,
  411. "dec8_bin": 69,
  412. "greek_bin": 70,
  413. "hebrew_bin": 71,
  414. "hp8_bin": 72,
  415. "keybcs2_bin": 73,
  416. "koi8r_bin": 74,
  417. "koi8u_bin": 75,
  418. "latin2_bin": 77,
  419. "latin5_bin": 78,
  420. "latin7_bin": 79,
  421. "cp850_bin": 80,
  422. "cp852_bin": 81,
  423. "swe7_bin": 82,
  424. "utf8_bin": 83,
  425. "big5_bin": 84,
  426. "euckr_bin": 85,
  427. "gb2312_bin": 86,
  428. "gbk_bin": 87,
  429. "sjis_bin": 88,
  430. "tis620_bin": 89,
  431. "ucs2_bin": 90,
  432. "ujis_bin": 91,
  433. "geostd8_general_ci": 92,
  434. "geostd8_bin": 93,
  435. "latin1_spanish_ci": 94,
  436. "cp932_japanese_ci": 95,
  437. "cp932_bin": 96,
  438. "eucjpms_japanese_ci": 97,
  439. "eucjpms_bin": 98,
  440. "cp1250_polish_ci": 99,
  441. "utf16_unicode_ci": 101,
  442. "utf16_icelandic_ci": 102,
  443. "utf16_latvian_ci": 103,
  444. "utf16_romanian_ci": 104,
  445. "utf16_slovenian_ci": 105,
  446. "utf16_polish_ci": 106,
  447. "utf16_estonian_ci": 107,
  448. "utf16_spanish_ci": 108,
  449. "utf16_swedish_ci": 109,
  450. "utf16_turkish_ci": 110,
  451. "utf16_czech_ci": 111,
  452. "utf16_danish_ci": 112,
  453. "utf16_lithuanian_ci": 113,
  454. "utf16_slovak_ci": 114,
  455. "utf16_spanish2_ci": 115,
  456. "utf16_roman_ci": 116,
  457. "utf16_persian_ci": 117,
  458. "utf16_esperanto_ci": 118,
  459. "utf16_hungarian_ci": 119,
  460. "utf16_sinhala_ci": 120,
  461. "utf16_german2_ci": 121,
  462. "utf16_croatian_ci": 122,
  463. "utf16_unicode_520_ci": 123,
  464. "utf16_vietnamese_ci": 124,
  465. "ucs2_unicode_ci": 128,
  466. "ucs2_icelandic_ci": 129,
  467. "ucs2_latvian_ci": 130,
  468. "ucs2_romanian_ci": 131,
  469. "ucs2_slovenian_ci": 132,
  470. "ucs2_polish_ci": 133,
  471. "ucs2_estonian_ci": 134,
  472. "ucs2_spanish_ci": 135,
  473. "ucs2_swedish_ci": 136,
  474. "ucs2_turkish_ci": 137,
  475. "ucs2_czech_ci": 138,
  476. "ucs2_danish_ci": 139,
  477. "ucs2_lithuanian_ci": 140,
  478. "ucs2_slovak_ci": 141,
  479. "ucs2_spanish2_ci": 142,
  480. "ucs2_roman_ci": 143,
  481. "ucs2_persian_ci": 144,
  482. "ucs2_esperanto_ci": 145,
  483. "ucs2_hungarian_ci": 146,
  484. "ucs2_sinhala_ci": 147,
  485. "ucs2_german2_ci": 148,
  486. "ucs2_croatian_ci": 149,
  487. "ucs2_unicode_520_ci": 150,
  488. "ucs2_vietnamese_ci": 151,
  489. "ucs2_general_mysql500_ci": 159,
  490. "utf32_unicode_ci": 160,
  491. "utf32_icelandic_ci": 161,
  492. "utf32_latvian_ci": 162,
  493. "utf32_romanian_ci": 163,
  494. "utf32_slovenian_ci": 164,
  495. "utf32_polish_ci": 165,
  496. "utf32_estonian_ci": 166,
  497. "utf32_spanish_ci": 167,
  498. "utf32_swedish_ci": 168,
  499. "utf32_turkish_ci": 169,
  500. "utf32_czech_ci": 170,
  501. "utf32_danish_ci": 171,
  502. "utf32_lithuanian_ci": 172,
  503. "utf32_slovak_ci": 173,
  504. "utf32_spanish2_ci": 174,
  505. "utf32_roman_ci": 175,
  506. "utf32_persian_ci": 176,
  507. "utf32_esperanto_ci": 177,
  508. "utf32_hungarian_ci": 178,
  509. "utf32_sinhala_ci": 179,
  510. "utf32_german2_ci": 180,
  511. "utf32_croatian_ci": 181,
  512. "utf32_unicode_520_ci": 182,
  513. "utf32_vietnamese_ci": 183,
  514. "utf8_unicode_ci": 192,
  515. "utf8_icelandic_ci": 193,
  516. "utf8_latvian_ci": 194,
  517. "utf8_romanian_ci": 195,
  518. "utf8_slovenian_ci": 196,
  519. "utf8_polish_ci": 197,
  520. "utf8_estonian_ci": 198,
  521. "utf8_spanish_ci": 199,
  522. "utf8_swedish_ci": 200,
  523. "utf8_turkish_ci": 201,
  524. "utf8_czech_ci": 202,
  525. "utf8_danish_ci": 203,
  526. "utf8_lithuanian_ci": 204,
  527. "utf8_slovak_ci": 205,
  528. "utf8_spanish2_ci": 206,
  529. "utf8_roman_ci": 207,
  530. "utf8_persian_ci": 208,
  531. "utf8_esperanto_ci": 209,
  532. "utf8_hungarian_ci": 210,
  533. "utf8_sinhala_ci": 211,
  534. "utf8_german2_ci": 212,
  535. "utf8_croatian_ci": 213,
  536. "utf8_unicode_520_ci": 214,
  537. "utf8_vietnamese_ci": 215,
  538. "utf8_general_mysql500_ci": 223,
  539. "utf8mb4_unicode_ci": 224,
  540. "utf8mb4_icelandic_ci": 225,
  541. "utf8mb4_latvian_ci": 226,
  542. "utf8mb4_romanian_ci": 227,
  543. "utf8mb4_slovenian_ci": 228,
  544. "utf8mb4_polish_ci": 229,
  545. "utf8mb4_estonian_ci": 230,
  546. "utf8mb4_spanish_ci": 231,
  547. "utf8mb4_swedish_ci": 232,
  548. "utf8mb4_turkish_ci": 233,
  549. "utf8mb4_czech_ci": 234,
  550. "utf8mb4_danish_ci": 235,
  551. "utf8mb4_lithuanian_ci": 236,
  552. "utf8mb4_slovak_ci": 237,
  553. "utf8mb4_spanish2_ci": 238,
  554. "utf8mb4_roman_ci": 239,
  555. "utf8mb4_persian_ci": 240,
  556. "utf8mb4_esperanto_ci": 241,
  557. "utf8mb4_hungarian_ci": 242,
  558. "utf8mb4_sinhala_ci": 243,
  559. "utf8mb4_german2_ci": 244,
  560. "utf8mb4_croatian_ci": 245,
  561. "utf8mb4_unicode_520_ci": 246,
  562. "utf8mb4_vietnamese_ci": 247,
  563. "utf8mb4_0900_ai_ci": 255,
  564. }
  565. // MySQL collation information.
  566. const (
  567. UTF8Charset = "utf8"
  568. UTF8MB4Charset = "utf8mb4"
  569. DefaultCharset = UTF8MB4Charset
  570. // DefaultCollationID is utf8mb4_bin(46)
  571. DefaultCollationID = 46
  572. Latin1DefaultCollationID = 47
  573. ASCIIDefaultCollationID = 65
  574. UTF8DefaultCollationID = 83
  575. UTF8MB4DefaultCollationID = 46
  576. BinaryDefaultCollationID = 63
  577. UTF8DefaultCollation = "utf8_bin"
  578. UTF8MB4DefaultCollation = "utf8mb4_bin"
  579. DefaultCollationName = UTF8MB4DefaultCollation
  580. // MaxBytesOfCharacter, is the max bytes length of a character,
  581. // refer to RFC3629, in UTF-8, characters from the U+0000..U+10FFFF range
  582. // (the UTF-16 accessible range) are encoded using sequences of 1 to 4 octets.
  583. MaxBytesOfCharacter = 4
  584. )
  585. // IsUTF8Charset checks if charset is utf8 or utf8mb4
  586. func IsUTF8Charset(charset string) bool {
  587. return charset == UTF8Charset || charset == UTF8MB4Charset
  588. }
  589. // RangeGraph defines valid unicode characters to use in column names. It strictly follows MySQL's definition.
  590. // See #3994.
  591. var RangeGraph = []*unicode.RangeTable{
  592. // _MY_PNT
  593. unicode.No,
  594. unicode.Mn,
  595. unicode.Me,
  596. unicode.Pc,
  597. unicode.Pd,
  598. unicode.Pd,
  599. unicode.Ps,
  600. unicode.Pe,
  601. unicode.Pi,
  602. unicode.Pf,
  603. unicode.Po,
  604. unicode.Sm,
  605. unicode.Sc,
  606. unicode.Sk,
  607. unicode.So,
  608. // _MY_U
  609. unicode.Lu,
  610. unicode.Lt,
  611. unicode.Nl,
  612. // _MY_L
  613. unicode.Ll,
  614. unicode.Lm,
  615. unicode.Lo,
  616. unicode.Nl,
  617. unicode.Mn,
  618. unicode.Mc,
  619. unicode.Me,
  620. // _MY_NMR
  621. unicode.Nd,
  622. unicode.Nl,
  623. unicode.No,
  624. }