encoding_table.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. // Copyright 2015 PingCAP, Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // See the License for the specific language governing permissions and
  12. // limitations under the License.
  13. package charset
  14. import (
  15. "strings"
  16. "golang.org/x/text/encoding"
  17. "golang.org/x/text/encoding/charmap"
  18. "golang.org/x/text/encoding/japanese"
  19. "golang.org/x/text/encoding/korean"
  20. "golang.org/x/text/encoding/simplifiedchinese"
  21. "golang.org/x/text/encoding/traditionalchinese"
  22. "golang.org/x/text/encoding/unicode"
  23. )
  24. // Lookup returns the encoding with the specified label, and its canonical
  25. // name. It returns nil and the empty string if label is not one of the
  26. // standard encodings for HTML. Matching is case-insensitive and ignores
  27. // leading and trailing whitespace.
  28. func Lookup(label string) (e encoding.Encoding, name string) {
  29. label = strings.ToLower(strings.Trim(label, "\t\n\r\f "))
  30. enc := encodings[label]
  31. return enc.e, enc.name
  32. }
  33. var encodings = map[string]struct {
  34. e encoding.Encoding
  35. name string
  36. }{
  37. "unicode-1-1-utf-8": {encoding.Nop, "utf-8"},
  38. "utf-8": {encoding.Nop, "utf-8"},
  39. "utf8": {encoding.Nop, "utf-8"},
  40. "utf8mb4": {encoding.Nop, "utf-8"},
  41. "binary": {encoding.Nop, "binary"},
  42. "866": {charmap.CodePage866, "ibm866"},
  43. "cp866": {charmap.CodePage866, "ibm866"},
  44. "csibm866": {charmap.CodePage866, "ibm866"},
  45. "ibm866": {charmap.CodePage866, "ibm866"},
  46. "csisolatin2": {charmap.ISO8859_2, "iso-8859-2"},
  47. "iso-8859-2": {charmap.ISO8859_2, "iso-8859-2"},
  48. "iso-ir-101": {charmap.ISO8859_2, "iso-8859-2"},
  49. "iso8859-2": {charmap.ISO8859_2, "iso-8859-2"},
  50. "iso88592": {charmap.ISO8859_2, "iso-8859-2"},
  51. "iso_8859-2": {charmap.ISO8859_2, "iso-8859-2"},
  52. "iso_8859-2:1987": {charmap.ISO8859_2, "iso-8859-2"},
  53. "l2": {charmap.ISO8859_2, "iso-8859-2"},
  54. "latin2": {charmap.ISO8859_2, "iso-8859-2"},
  55. "csisolatin3": {charmap.ISO8859_3, "iso-8859-3"},
  56. "iso-8859-3": {charmap.ISO8859_3, "iso-8859-3"},
  57. "iso-ir-109": {charmap.ISO8859_3, "iso-8859-3"},
  58. "iso8859-3": {charmap.ISO8859_3, "iso-8859-3"},
  59. "iso88593": {charmap.ISO8859_3, "iso-8859-3"},
  60. "iso_8859-3": {charmap.ISO8859_3, "iso-8859-3"},
  61. "iso_8859-3:1988": {charmap.ISO8859_3, "iso-8859-3"},
  62. "l3": {charmap.ISO8859_3, "iso-8859-3"},
  63. "latin3": {charmap.ISO8859_3, "iso-8859-3"},
  64. "csisolatin4": {charmap.ISO8859_4, "iso-8859-4"},
  65. "iso-8859-4": {charmap.ISO8859_4, "iso-8859-4"},
  66. "iso-ir-110": {charmap.ISO8859_4, "iso-8859-4"},
  67. "iso8859-4": {charmap.ISO8859_4, "iso-8859-4"},
  68. "iso88594": {charmap.ISO8859_4, "iso-8859-4"},
  69. "iso_8859-4": {charmap.ISO8859_4, "iso-8859-4"},
  70. "iso_8859-4:1988": {charmap.ISO8859_4, "iso-8859-4"},
  71. "l4": {charmap.ISO8859_4, "iso-8859-4"},
  72. "latin4": {charmap.ISO8859_4, "iso-8859-4"},
  73. "csisolatincyrillic": {charmap.ISO8859_5, "iso-8859-5"},
  74. "cyrillic": {charmap.ISO8859_5, "iso-8859-5"},
  75. "iso-8859-5": {charmap.ISO8859_5, "iso-8859-5"},
  76. "iso-ir-144": {charmap.ISO8859_5, "iso-8859-5"},
  77. "iso8859-5": {charmap.ISO8859_5, "iso-8859-5"},
  78. "iso88595": {charmap.ISO8859_5, "iso-8859-5"},
  79. "iso_8859-5": {charmap.ISO8859_5, "iso-8859-5"},
  80. "iso_8859-5:1988": {charmap.ISO8859_5, "iso-8859-5"},
  81. "arabic": {charmap.ISO8859_6, "iso-8859-6"},
  82. "asmo-708": {charmap.ISO8859_6, "iso-8859-6"},
  83. "csiso88596e": {charmap.ISO8859_6, "iso-8859-6"},
  84. "csiso88596i": {charmap.ISO8859_6, "iso-8859-6"},
  85. "csisolatinarabic": {charmap.ISO8859_6, "iso-8859-6"},
  86. "ecma-114": {charmap.ISO8859_6, "iso-8859-6"},
  87. "iso-8859-6": {charmap.ISO8859_6, "iso-8859-6"},
  88. "iso-8859-6-e": {charmap.ISO8859_6, "iso-8859-6"},
  89. "iso-8859-6-i": {charmap.ISO8859_6, "iso-8859-6"},
  90. "iso-ir-127": {charmap.ISO8859_6, "iso-8859-6"},
  91. "iso8859-6": {charmap.ISO8859_6, "iso-8859-6"},
  92. "iso88596": {charmap.ISO8859_6, "iso-8859-6"},
  93. "iso_8859-6": {charmap.ISO8859_6, "iso-8859-6"},
  94. "iso_8859-6:1987": {charmap.ISO8859_6, "iso-8859-6"},
  95. "csisolatingreek": {charmap.ISO8859_7, "iso-8859-7"},
  96. "ecma-118": {charmap.ISO8859_7, "iso-8859-7"},
  97. "elot_928": {charmap.ISO8859_7, "iso-8859-7"},
  98. "greek": {charmap.ISO8859_7, "iso-8859-7"},
  99. "greek8": {charmap.ISO8859_7, "iso-8859-7"},
  100. "iso-8859-7": {charmap.ISO8859_7, "iso-8859-7"},
  101. "iso-ir-126": {charmap.ISO8859_7, "iso-8859-7"},
  102. "iso8859-7": {charmap.ISO8859_7, "iso-8859-7"},
  103. "iso88597": {charmap.ISO8859_7, "iso-8859-7"},
  104. "iso_8859-7": {charmap.ISO8859_7, "iso-8859-7"},
  105. "iso_8859-7:1987": {charmap.ISO8859_7, "iso-8859-7"},
  106. "sun_eu_greek": {charmap.ISO8859_7, "iso-8859-7"},
  107. "csiso88598e": {charmap.ISO8859_8, "iso-8859-8"},
  108. "csisolatinhebrew": {charmap.ISO8859_8, "iso-8859-8"},
  109. "hebrew": {charmap.ISO8859_8, "iso-8859-8"},
  110. "iso-8859-8": {charmap.ISO8859_8, "iso-8859-8"},
  111. "iso-8859-8-e": {charmap.ISO8859_8, "iso-8859-8"},
  112. "iso-ir-138": {charmap.ISO8859_8, "iso-8859-8"},
  113. "iso8859-8": {charmap.ISO8859_8, "iso-8859-8"},
  114. "iso88598": {charmap.ISO8859_8, "iso-8859-8"},
  115. "iso_8859-8": {charmap.ISO8859_8, "iso-8859-8"},
  116. "iso_8859-8:1988": {charmap.ISO8859_8, "iso-8859-8"},
  117. "visual": {charmap.ISO8859_8, "iso-8859-8"},
  118. "csiso88598i": {charmap.ISO8859_8, "iso-8859-8-i"},
  119. "iso-8859-8-i": {charmap.ISO8859_8, "iso-8859-8-i"},
  120. "logical": {charmap.ISO8859_8, "iso-8859-8-i"},
  121. "csisolatin6": {charmap.ISO8859_10, "iso-8859-10"},
  122. "iso-8859-10": {charmap.ISO8859_10, "iso-8859-10"},
  123. "iso-ir-157": {charmap.ISO8859_10, "iso-8859-10"},
  124. "iso8859-10": {charmap.ISO8859_10, "iso-8859-10"},
  125. "iso885910": {charmap.ISO8859_10, "iso-8859-10"},
  126. "l6": {charmap.ISO8859_10, "iso-8859-10"},
  127. "latin6": {charmap.ISO8859_10, "iso-8859-10"},
  128. "iso-8859-13": {charmap.ISO8859_13, "iso-8859-13"},
  129. "iso8859-13": {charmap.ISO8859_13, "iso-8859-13"},
  130. "iso885913": {charmap.ISO8859_13, "iso-8859-13"},
  131. "iso-8859-14": {charmap.ISO8859_14, "iso-8859-14"},
  132. "iso8859-14": {charmap.ISO8859_14, "iso-8859-14"},
  133. "iso885914": {charmap.ISO8859_14, "iso-8859-14"},
  134. "csisolatin9": {charmap.ISO8859_15, "iso-8859-15"},
  135. "iso-8859-15": {charmap.ISO8859_15, "iso-8859-15"},
  136. "iso8859-15": {charmap.ISO8859_15, "iso-8859-15"},
  137. "iso885915": {charmap.ISO8859_15, "iso-8859-15"},
  138. "iso_8859-15": {charmap.ISO8859_15, "iso-8859-15"},
  139. "l9": {charmap.ISO8859_15, "iso-8859-15"},
  140. "iso-8859-16": {charmap.ISO8859_16, "iso-8859-16"},
  141. "cskoi8r": {charmap.KOI8R, "koi8-r"},
  142. "koi": {charmap.KOI8R, "koi8-r"},
  143. "koi8": {charmap.KOI8R, "koi8-r"},
  144. "koi8-r": {charmap.KOI8R, "koi8-r"},
  145. "koi8_r": {charmap.KOI8R, "koi8-r"},
  146. "koi8-u": {charmap.KOI8U, "koi8-u"},
  147. "csmacintosh": {charmap.Macintosh, "macintosh"},
  148. "mac": {charmap.Macintosh, "macintosh"},
  149. "macintosh": {charmap.Macintosh, "macintosh"},
  150. "x-mac-roman": {charmap.Macintosh, "macintosh"},
  151. "dos-874": {charmap.Windows874, "windows-874"},
  152. "iso-8859-11": {charmap.Windows874, "windows-874"},
  153. "iso8859-11": {charmap.Windows874, "windows-874"},
  154. "iso885911": {charmap.Windows874, "windows-874"},
  155. "tis-620": {charmap.Windows874, "windows-874"},
  156. "windows-874": {charmap.Windows874, "windows-874"},
  157. "cp1250": {charmap.Windows1250, "windows-1250"},
  158. "windows-1250": {charmap.Windows1250, "windows-1250"},
  159. "x-cp1250": {charmap.Windows1250, "windows-1250"},
  160. "cp1251": {charmap.Windows1251, "windows-1251"},
  161. "windows-1251": {charmap.Windows1251, "windows-1251"},
  162. "x-cp1251": {charmap.Windows1251, "windows-1251"},
  163. "ansi_x3.4-1968": {charmap.Windows1252, "windows-1252"},
  164. "ascii": {charmap.Windows1252, "windows-1252"},
  165. "cp1252": {charmap.Windows1252, "windows-1252"},
  166. "cp819": {charmap.Windows1252, "windows-1252"},
  167. "csisolatin1": {charmap.Windows1252, "windows-1252"},
  168. "ibm819": {charmap.Windows1252, "windows-1252"},
  169. "iso-8859-1": {charmap.Windows1252, "windows-1252"},
  170. "iso-ir-100": {charmap.Windows1252, "windows-1252"},
  171. "iso8859-1": {charmap.Windows1252, "windows-1252"},
  172. "iso88591": {charmap.Windows1252, "windows-1252"},
  173. "iso_8859-1": {charmap.Windows1252, "windows-1252"},
  174. "iso_8859-1:1987": {charmap.Windows1252, "windows-1252"},
  175. "l1": {charmap.Windows1252, "windows-1252"},
  176. "latin1": {charmap.Windows1252, "windows-1252"},
  177. "us-ascii": {charmap.Windows1252, "windows-1252"},
  178. "windows-1252": {charmap.Windows1252, "windows-1252"},
  179. "x-cp1252": {charmap.Windows1252, "windows-1252"},
  180. "cp1253": {charmap.Windows1253, "windows-1253"},
  181. "windows-1253": {charmap.Windows1253, "windows-1253"},
  182. "x-cp1253": {charmap.Windows1253, "windows-1253"},
  183. "cp1254": {charmap.Windows1254, "windows-1254"},
  184. "csisolatin5": {charmap.Windows1254, "windows-1254"},
  185. "iso-8859-9": {charmap.Windows1254, "windows-1254"},
  186. "iso-ir-148": {charmap.Windows1254, "windows-1254"},
  187. "iso8859-9": {charmap.Windows1254, "windows-1254"},
  188. "iso88599": {charmap.Windows1254, "windows-1254"},
  189. "iso_8859-9": {charmap.Windows1254, "windows-1254"},
  190. "iso_8859-9:1989": {charmap.Windows1254, "windows-1254"},
  191. "l5": {charmap.Windows1254, "windows-1254"},
  192. "latin5": {charmap.Windows1254, "windows-1254"},
  193. "windows-1254": {charmap.Windows1254, "windows-1254"},
  194. "x-cp1254": {charmap.Windows1254, "windows-1254"},
  195. "cp1255": {charmap.Windows1255, "windows-1255"},
  196. "windows-1255": {charmap.Windows1255, "windows-1255"},
  197. "x-cp1255": {charmap.Windows1255, "windows-1255"},
  198. "cp1256": {charmap.Windows1256, "windows-1256"},
  199. "windows-1256": {charmap.Windows1256, "windows-1256"},
  200. "x-cp1256": {charmap.Windows1256, "windows-1256"},
  201. "cp1257": {charmap.Windows1257, "windows-1257"},
  202. "windows-1257": {charmap.Windows1257, "windows-1257"},
  203. "x-cp1257": {charmap.Windows1257, "windows-1257"},
  204. "cp1258": {charmap.Windows1258, "windows-1258"},
  205. "windows-1258": {charmap.Windows1258, "windows-1258"},
  206. "x-cp1258": {charmap.Windows1258, "windows-1258"},
  207. "x-mac-cyrillic": {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
  208. "x-mac-ukrainian": {charmap.MacintoshCyrillic, "x-mac-cyrillic"},
  209. "chinese": {simplifiedchinese.GBK, "gbk"},
  210. "csgb2312": {simplifiedchinese.GBK, "gbk"},
  211. "csiso58gb231280": {simplifiedchinese.GBK, "gbk"},
  212. "gb2312": {simplifiedchinese.GBK, "gbk"},
  213. "gb_2312": {simplifiedchinese.GBK, "gbk"},
  214. "gb_2312-80": {simplifiedchinese.GBK, "gbk"},
  215. "gbk": {simplifiedchinese.GBK, "gbk"},
  216. "iso-ir-58": {simplifiedchinese.GBK, "gbk"},
  217. "x-gbk": {simplifiedchinese.GBK, "gbk"},
  218. "gb18030": {simplifiedchinese.GB18030, "gb18030"},
  219. "hz-gb-2312": {simplifiedchinese.HZGB2312, "hz-gb-2312"},
  220. "big5": {traditionalchinese.Big5, "big5"},
  221. "big5-hkscs": {traditionalchinese.Big5, "big5"},
  222. "cn-big5": {traditionalchinese.Big5, "big5"},
  223. "csbig5": {traditionalchinese.Big5, "big5"},
  224. "x-x-big5": {traditionalchinese.Big5, "big5"},
  225. "cseucpkdfmtjapanese": {japanese.EUCJP, "euc-jp"},
  226. "euc-jp": {japanese.EUCJP, "euc-jp"},
  227. "x-euc-jp": {japanese.EUCJP, "euc-jp"},
  228. "csiso2022jp": {japanese.ISO2022JP, "iso-2022-jp"},
  229. "iso-2022-jp": {japanese.ISO2022JP, "iso-2022-jp"},
  230. "csshiftjis": {japanese.ShiftJIS, "shift_jis"},
  231. "ms_kanji": {japanese.ShiftJIS, "shift_jis"},
  232. "shift-jis": {japanese.ShiftJIS, "shift_jis"},
  233. "shift_jis": {japanese.ShiftJIS, "shift_jis"},
  234. "sjis": {japanese.ShiftJIS, "shift_jis"},
  235. "windows-31j": {japanese.ShiftJIS, "shift_jis"},
  236. "x-sjis": {japanese.ShiftJIS, "shift_jis"},
  237. "cseuckr": {korean.EUCKR, "euc-kr"},
  238. "csksc56011987": {korean.EUCKR, "euc-kr"},
  239. "euc-kr": {korean.EUCKR, "euc-kr"},
  240. "iso-ir-149": {korean.EUCKR, "euc-kr"},
  241. "korean": {korean.EUCKR, "euc-kr"},
  242. "ks_c_5601-1987": {korean.EUCKR, "euc-kr"},
  243. "ks_c_5601-1989": {korean.EUCKR, "euc-kr"},
  244. "ksc5601": {korean.EUCKR, "euc-kr"},
  245. "ksc_5601": {korean.EUCKR, "euc-kr"},
  246. "windows-949": {korean.EUCKR, "euc-kr"},
  247. "csiso2022kr": {encoding.Replacement, "replacement"},
  248. "iso-2022-kr": {encoding.Replacement, "replacement"},
  249. "iso-2022-cn": {encoding.Replacement, "replacement"},
  250. "iso-2022-cn-ext": {encoding.Replacement, "replacement"},
  251. "utf-16be": {unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM), "utf-16be"},
  252. "utf-16": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
  253. "utf-16le": {unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM), "utf-16le"},
  254. "x-user-defined": {charmap.XUserDefined, "x-user-defined"},
  255. }