prism-js-templates.js 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. (function (Prism) {
  2. var templateString = Prism.languages.javascript['template-string'];
  3. // see the pattern in prism-javascript.js
  4. var templateLiteralPattern = templateString.pattern.source;
  5. var interpolationObject = templateString.inside['interpolation'];
  6. var interpolationPunctuationObject = interpolationObject.inside['interpolation-punctuation'];
  7. var interpolationPattern = interpolationObject.pattern.source;
  8. /**
  9. * Creates a new pattern to match a template string with a special tag.
  10. *
  11. * This will return `undefined` if there is no grammar with the given language id.
  12. *
  13. * @param {string} language The language id of the embedded language. E.g. `markdown`.
  14. * @param {string} tag The regex pattern to match the tag.
  15. * @returns {object | undefined}
  16. * @example
  17. * createTemplate('css', /\bcss/.source);
  18. */
  19. function createTemplate(language, tag) {
  20. if (!Prism.languages[language]) {
  21. return undefined;
  22. }
  23. return {
  24. pattern: RegExp('((?:' + tag + ')\\s*)' + templateLiteralPattern),
  25. lookbehind: true,
  26. greedy: true,
  27. inside: {
  28. 'template-punctuation': {
  29. pattern: /^`|`$/,
  30. alias: 'string'
  31. },
  32. 'embedded-code': {
  33. pattern: /[\s\S]+/,
  34. alias: language
  35. }
  36. }
  37. };
  38. }
  39. Prism.languages.javascript['template-string'] = [
  40. // styled-jsx:
  41. // css`a { color: #25F; }`
  42. // styled-components:
  43. // styled.h1`color: red;`
  44. createTemplate('css', /\b(?:styled(?:\([^)]*\))?(?:\s*\.\s*\w+(?:\([^)]*\))*)*|css(?:\s*\.\s*(?:global|resolve))?|createGlobalStyle|keyframes)/.source),
  45. // html`<p></p>`
  46. // div.innerHTML = `<p></p>`
  47. createTemplate('html', /\bhtml|\.\s*(?:inner|outer)HTML\s*\+?=/.source),
  48. // svg`<path fill="#fff" d="M55.37 ..."/>`
  49. createTemplate('svg', /\bsvg/.source),
  50. // md`# h1`, markdown`## h2`
  51. createTemplate('markdown', /\b(?:md|markdown)/.source),
  52. // gql`...`, graphql`...`, graphql.experimental`...`
  53. createTemplate('graphql', /\b(?:gql|graphql(?:\s*\.\s*experimental)?)/.source),
  54. // vanilla template string
  55. templateString
  56. ].filter(Boolean);
  57. /**
  58. * Returns a specific placeholder literal for the given language.
  59. *
  60. * @param {number} counter
  61. * @param {string} language
  62. * @returns {string}
  63. */
  64. function getPlaceholder(counter, language) {
  65. return '___' + language.toUpperCase() + '_' + counter + '___';
  66. }
  67. /**
  68. * Returns the tokens of `Prism.tokenize` but also runs the `before-tokenize` and `after-tokenize` hooks.
  69. *
  70. * @param {string} code
  71. * @param {any} grammar
  72. * @param {string} language
  73. * @returns {(string|Token)[]}
  74. */
  75. function tokenizeWithHooks(code, grammar, language) {
  76. var env = {
  77. code: code,
  78. grammar: grammar,
  79. language: language
  80. };
  81. Prism.hooks.run('before-tokenize', env);
  82. env.tokens = Prism.tokenize(env.code, env.grammar);
  83. Prism.hooks.run('after-tokenize', env);
  84. return env.tokens;
  85. }
  86. /**
  87. * Returns the token of the given JavaScript interpolation expression.
  88. *
  89. * @param {string} expression The code of the expression. E.g. `"${42}"`
  90. * @returns {Token}
  91. */
  92. function tokenizeInterpolationExpression(expression) {
  93. var tempGrammar = {};
  94. tempGrammar['interpolation-punctuation'] = interpolationPunctuationObject;
  95. /** @type {Array} */
  96. var tokens = Prism.tokenize(expression, tempGrammar);
  97. if (tokens.length === 3) {
  98. /**
  99. * The token array will look like this
  100. * [
  101. * ["interpolation-punctuation", "${"]
  102. * "..." // JavaScript expression of the interpolation
  103. * ["interpolation-punctuation", "}"]
  104. * ]
  105. */
  106. var args = [1, 1];
  107. args.push.apply(args, tokenizeWithHooks(tokens[1], Prism.languages.javascript, 'javascript'));
  108. tokens.splice.apply(tokens, args);
  109. }
  110. return new Prism.Token('interpolation', tokens, interpolationObject.alias, expression);
  111. }
  112. /**
  113. * Tokenizes the given code with support for JavaScript interpolation expressions mixed in.
  114. *
  115. * This function has 3 phases:
  116. *
  117. * 1. Replace all JavaScript interpolation expression with a placeholder.
  118. * The placeholder will have the syntax of a identify of the target language.
  119. * 2. Tokenize the code with placeholders.
  120. * 3. Tokenize the interpolation expressions and re-insert them into the tokenize code.
  121. * The insertion only works if a placeholder hasn't been "ripped apart" meaning that the placeholder has been
  122. * tokenized as two tokens by the grammar of the embedded language.
  123. *
  124. * @param {string} code
  125. * @param {object} grammar
  126. * @param {string} language
  127. * @returns {Token}
  128. */
  129. function tokenizeEmbedded(code, grammar, language) {
  130. // 1. First filter out all interpolations
  131. // because they might be escaped, we need a lookbehind, so we use Prism
  132. /** @type {(Token|string)[]} */
  133. var _tokens = Prism.tokenize(code, {
  134. 'interpolation': {
  135. pattern: RegExp(interpolationPattern),
  136. lookbehind: true
  137. }
  138. });
  139. // replace all interpolations with a placeholder which is not in the code already
  140. var placeholderCounter = 0;
  141. /** @type {Object<string, string>} */
  142. var placeholderMap = {};
  143. var embeddedCode = _tokens.map(function (token) {
  144. if (typeof token === 'string') {
  145. return token;
  146. } else {
  147. var interpolationExpression = token.content;
  148. var placeholder;
  149. while (code.indexOf(placeholder = getPlaceholder(placeholderCounter++, language)) !== -1) { }
  150. placeholderMap[placeholder] = interpolationExpression;
  151. return placeholder;
  152. }
  153. }).join('');
  154. // 2. Tokenize the embedded code
  155. var embeddedTokens = tokenizeWithHooks(embeddedCode, grammar, language);
  156. // 3. Re-insert the interpolation
  157. var placeholders = Object.keys(placeholderMap);
  158. placeholderCounter = 0;
  159. /**
  160. *
  161. * @param {(Token|string)[]} tokens
  162. * @returns {void}
  163. */
  164. function walkTokens(tokens) {
  165. for (var i = 0; i < tokens.length; i++) {
  166. if (placeholderCounter >= placeholders.length) {
  167. return;
  168. }
  169. var token = tokens[i];
  170. if (typeof token === 'string' || typeof token.content === 'string') {
  171. var placeholder = placeholders[placeholderCounter];
  172. var s = typeof token === 'string' ? token : /** @type {string} */ (token.content);
  173. var index = s.indexOf(placeholder);
  174. if (index !== -1) {
  175. ++placeholderCounter;
  176. var before = s.substring(0, index);
  177. var middle = tokenizeInterpolationExpression(placeholderMap[placeholder]);
  178. var after = s.substring(index + placeholder.length);
  179. var replacement = [];
  180. if (before) {
  181. replacement.push(before);
  182. }
  183. replacement.push(middle);
  184. if (after) {
  185. var afterTokens = [after];
  186. walkTokens(afterTokens);
  187. replacement.push.apply(replacement, afterTokens);
  188. }
  189. if (typeof token === 'string') {
  190. tokens.splice.apply(tokens, [i, 1].concat(replacement));
  191. i += replacement.length - 1;
  192. } else {
  193. token.content = replacement;
  194. }
  195. }
  196. } else {
  197. var content = token.content;
  198. if (Array.isArray(content)) {
  199. walkTokens(content);
  200. } else {
  201. walkTokens([content]);
  202. }
  203. }
  204. }
  205. }
  206. walkTokens(embeddedTokens);
  207. return new Prism.Token(language, embeddedTokens, 'language-' + language, code);
  208. }
  209. /**
  210. * The languages for which JS templating will handle tagged template literals.
  211. *
  212. * JS templating isn't active for only JavaScript but also related languages like TypeScript, JSX, and TSX.
  213. */
  214. var supportedLanguages = {
  215. 'javascript': true,
  216. 'js': true,
  217. 'typescript': true,
  218. 'ts': true,
  219. 'jsx': true,
  220. 'tsx': true,
  221. };
  222. Prism.hooks.add('after-tokenize', function (env) {
  223. if (!(env.language in supportedLanguages)) {
  224. return;
  225. }
  226. /**
  227. * Finds and tokenizes all template strings with an embedded languages.
  228. *
  229. * @param {(Token | string)[]} tokens
  230. * @returns {void}
  231. */
  232. function findTemplateStrings(tokens) {
  233. for (var i = 0, l = tokens.length; i < l; i++) {
  234. var token = tokens[i];
  235. if (typeof token === 'string') {
  236. continue;
  237. }
  238. var content = token.content;
  239. if (!Array.isArray(content)) {
  240. if (typeof content !== 'string') {
  241. findTemplateStrings([content]);
  242. }
  243. continue;
  244. }
  245. if (token.type === 'template-string') {
  246. /**
  247. * A JavaScript template-string token will look like this:
  248. *
  249. * ["template-string", [
  250. * ["template-punctuation", "`"],
  251. * (
  252. * An array of "string" and "interpolation" tokens. This is the simple string case.
  253. * or
  254. * ["embedded-code", "..."] This is the token containing the embedded code.
  255. * It also has an alias which is the language of the embedded code.
  256. * ),
  257. * ["template-punctuation", "`"]
  258. * ]]
  259. */
  260. var embedded = content[1];
  261. if (content.length === 3 && typeof embedded !== 'string' && embedded.type === 'embedded-code') {
  262. // get string content
  263. var code = stringContent(embedded);
  264. var alias = embedded.alias;
  265. var language = Array.isArray(alias) ? alias[0] : alias;
  266. var grammar = Prism.languages[language];
  267. if (!grammar) {
  268. // the embedded language isn't registered.
  269. continue;
  270. }
  271. content[1] = tokenizeEmbedded(code, grammar, language);
  272. }
  273. } else {
  274. findTemplateStrings(content);
  275. }
  276. }
  277. }
  278. findTemplateStrings(env.tokens);
  279. });
  280. /**
  281. * Returns the string content of a token or token stream.
  282. *
  283. * @param {string | Token | (string | Token)[]} value
  284. * @returns {string}
  285. */
  286. function stringContent(value) {
  287. if (typeof value === 'string') {
  288. return value;
  289. } else if (Array.isArray(value)) {
  290. return value.map(stringContent).join('');
  291. } else {
  292. return stringContent(value.content);
  293. }
  294. }
  295. }(Prism));