-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
621 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
/** | ||
* Commonly used match pattern | ||
*/ | ||
|
||
export default { | ||
num: { | ||
type: 'num', | ||
match: /(\.e?|\b)\d(e-|[\d.oxa-fA-F_])*(\.|\b)/g | ||
}, | ||
str: { | ||
type: 'str', | ||
match: /(["'])(\\[^]|(?!\1)[^\r\n\\])*\1?/g | ||
}, | ||
strDouble: { | ||
type: 'str', | ||
match: /"((?!")[^\r\n\\]|\\[^])*"?/g | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
/** | ||
* @module detect | ||
* (Language detector) | ||
*/ | ||
|
||
import './typedef.js' | ||
|
||
/** | ||
* @type {Array.<[ShjLanguage, ...[RegExp, Number][]]>} | ||
*/ | ||
const languages = [ | ||
['bash', [/#!(\/usr)?\/bin\/bash/g, 500], [/\b(if|elif|then|fi|echo)\b|\$/g, 10]], | ||
['html', [/<\/?[a-z-]+[^\n>]*>/g, 10], [/^\s+<!DOCTYPE\s+html/g, 500]], | ||
['http', [/^(GET|HEAD|POST|PUT|DELETE|PATCH|HTTP)\b/g, 500]], | ||
['js', [/\b(console|await|async|function|export|import|this|class|for|let|const|map|join|require)\b/g, 10]], | ||
['ts', [/\b(console|await|async|function|export|import|this|class|for|let|const|map|join|require|implements|interface|namespace)\b/g, 10]], | ||
//['py', [/\b(def|print|class|and|or|lambda)\b/g, 10]], | ||
['sql', [/\b(SELECT|INSERT|FROM)\b/g, 50]], | ||
//['pl', [/#!(\/usr)?\/bin\/perl/g, 500], [/\b(use|print)\b|\$/g, 10]], | ||
['lua', [/#!(\/usr)?\/bin\/lua/g, 500]], | ||
['make', [/\b(ifneq|endif|if|elif|then|fi|echo|.PHONY|^[a-z]+ ?:$)\b|\$/gm, 10]], | ||
['uri', [/https?:|mailto:|tel:|ftp:/g, 30]], | ||
['css', [/^(@import|@page|@media|(\.|#)[a-z]+)/gm, 20]], | ||
//['diff', [/^[+><-]/gm, 10], [/^@@ ?[-+,0-9 ]+ ?@@/gm, 25]], | ||
//['md', [/^(>|\t\*|\t\d+.)/gm, 10], [/\[.*\](.*)/g, 10]], | ||
['docker', [/^(FROM|ENTRYPOINT|RUN)/gm, 500]], | ||
['xml', [/<\/?[a-z-]+[^\n>]*>/g, 10], [/^<\?xml/g, 500]], | ||
//['c', [/#include\b|\bprintf\s+\(/g, 100]], | ||
//['rs', [/^\s+(use|fn|mut|match)\b/gm, 100]], | ||
['go', [/\b(func|fmt|package)\b/g, 100]], | ||
//['java', [/^import\s+java/gm, 500]], | ||
//['asm', [/^(section|global main|extern|\t(call|mov|ret))/gm, 100]], | ||
['css', [/^(@import|@page|@media|(\.|#)[a-z]+)/gm, 20]], | ||
//['json', [/\b(true|false|null|\{})\b|\"[^"]+\":/g, 10]], | ||
['json', [/((\[)?(\s*)?)({((\s*)?\"[^"]+\"(\s*)?:(\s*)?(\b(true|false|null|[0-9]+)\b|\"[^"]+\")(,)?(\s*)?)+})+(\s*)?(\])?/g, 10]], | ||
['yaml', [/^(\s+)?[a-z][a-z0-9]*:/gmi, 10]] | ||
] | ||
|
||
/** | ||
* @function detectLanguage | ||
* Try to find the language the given code belong to | ||
* @param {String} code The code | ||
* @returns {ShjLanguage} The language of the code | ||
*/ | ||
export const detectLanguage = code => { | ||
return (languages | ||
.map(([lang, ...features]) => [ | ||
lang, | ||
features.reduce((acc, [match, score]) => acc + [...code.matchAll(match)].length * score, 0) | ||
]) | ||
//.filter(([lang, score]) => score > 20) | ||
.filter(([lang, score]) => score > 5) | ||
.sort((a, b) => b[1] - a[1])[0]?.[0] || 'plain'); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
/** | ||
* @module index | ||
* (Base script) | ||
*/ | ||
|
||
import './typedef.js' | ||
|
||
import expandData from './common.js'; | ||
import { detectLanguage } from './detect.js'; | ||
|
||
const langs = {}, | ||
sanitize = (str = '') => | ||
str.replaceAll('&', '&').replaceAll?.('<', '<').replaceAll?.('>', '>'), | ||
/** | ||
* @function | ||
* @ignore | ||
* Create a HTML element with the right token styling | ||
* @param {String} str The content (need to be sanitized) | ||
* @param {ShjToken} [token] The type of token | ||
* @returns A HMTL string | ||
*/ | ||
toSpan = (str, token) => token ? `<span class="shj-syn-${token}">${str}</span>` : str; | ||
|
||
/** | ||
* @function tokenize | ||
* Find the tokens in the given code and call the callback | ||
* @param {String} src The code | ||
* @param {ShjLanguage|Array} lang The language of the code | ||
* @param {function(String, ShjToken=):void} token The callback function | ||
* this function will be given | ||
* * the text of the token | ||
* * the type of the token | ||
*/ | ||
export async function tokenize(src, lang, token) { | ||
try { | ||
let m, | ||
part, | ||
first = {}, | ||
match, | ||
cache = [], | ||
i = 0, | ||
data = typeof lang === 'string' ? (await (langs[lang] ??= import(`./languages/${lang}.js`))) : lang, | ||
// make a fast shallow copy to bee able to splice lang without change the original one | ||
arr = [...typeof lang === 'string' ? data.default : lang.sub]; | ||
|
||
while (i < src.length) { | ||
first.index = null; | ||
for (m = arr.length; m-- > 0;) { | ||
part = arr[m].expand ? expandData[arr[m].expand] : arr[m]; | ||
// do not call again exec if the previous result is sufficient | ||
if (cache[m] === undefined || cache[m].match.index < i) { | ||
part.match.lastIndex = i; | ||
match = part.match.exec(src); | ||
if (match === null) { | ||
// no more match with this regex can be disposed | ||
arr.splice(m, 1); | ||
cache.splice(m, 1); | ||
continue; | ||
} | ||
// save match for later use to decrease performance cost | ||
cache[m] = { match, lastIndex: part.match.lastIndex }; | ||
} | ||
// check if it the first match in the string | ||
if (cache[m].match[0] && (cache[m].match.index <= first.index || first.index === null)) | ||
first = { | ||
part: part, | ||
index: cache[m].match.index, | ||
match: cache[m].match[0], | ||
end: cache[m].lastIndex | ||
} | ||
} | ||
if (first.index === null) | ||
break; | ||
token(src.slice(i, first.index), data.type); | ||
i = first.end; | ||
if (first.part.sub) | ||
await tokenize(first.match, typeof first.part.sub === 'string' ? first.part.sub : (typeof first.part.sub === 'function' ? first.part.sub(first.match) : first.part), token); | ||
else | ||
token(first.match, first.part.type); | ||
} | ||
token(src.slice(i, src.length), data.type); | ||
} | ||
catch { | ||
token(src); | ||
} | ||
} | ||
|
||
/** | ||
* @function highlightText | ||
* @async | ||
* Highlight a string passed as argument and return it | ||
* @example | ||
* elm.innerHTML = await highlightText(code, 'js'); | ||
* @param {String} src The code | ||
* @param {ShjLanguage} lang The language of the code | ||
* @param {Boolean} [multiline=true] If it is multiline, it will add a wrapper for the line numbering and header | ||
* @param {ShjOptions} [opt={}] Customization options | ||
* @returns {Promise<String>} The highlighted string | ||
*/ | ||
export async function highlightText(src, lang, multiline = true, opt = {}) { | ||
let tmp = '' | ||
await tokenize(src, lang, (str, type) => tmp += toSpan(sanitize(str), type)) | ||
|
||
return multiline | ||
? `<div><div class="shj-numbers">${'<div></div>'.repeat(!opt.hideLineNumbers && src.split('\n').length)}</div><div>${tmp}</div></div>` | ||
: tmp; | ||
} | ||
|
||
/** | ||
* @function highlightElement | ||
* @async | ||
* Highlight a DOM element by getting the new innerHTML with highlightText | ||
* @param {Element} elm The DOM element | ||
* @param {ShjLanguage} [lang] The language of the code (seaching by default on `elm` for a 'shj-lang-' class) | ||
* @param {ShjDisplayMode} [mode] The display mode (guessed by default) | ||
* @param {ShjOptions} [opt={}] Customization options | ||
*/ | ||
//export async function highlightElement(elm, lang = elm.className.match(/shj-lang-([\w-]+)/)?.[1], mode, opt) { | ||
export async function highlightElement(elm, opt) { | ||
//let txt = elm.textContent; | ||
let txt = elm.innerText.trim(); | ||
let lang = detectLanguage(txt); | ||
let mode = 'multiline'; | ||
//mode ??= `${elm.tagName == 'CODE' ? 'in' : (txt.split('\n').length < 2 ? 'one' : 'multi')}line`; | ||
elm.dataset.lang = lang; | ||
elm.className = "shj-lang-" + lang; | ||
//elm.className = `${[...elm.classList].filter(className => !className.startsWith('shj-') || className.startsWith('shj-mode-')).join(' ')} shj-lang-${lang} shj-${mode}`; | ||
elm.innerHTML = await highlightText(txt, lang, mode == 'multiline', opt); | ||
} | ||
|
||
/** | ||
* @function highlightAll | ||
* @async | ||
* Call highlightElement on element with a css class starting with `shj-lang-` | ||
* @param {ShjOptions} [opt={}] Customization options | ||
*/ | ||
export let highlightAll = async (opt) => | ||
document | ||
.querySelectorAll('codeblock') | ||
.forEach(elm => highlightElement(elm, opt)) | ||
//.forEach(elm => highlightElement(elm, detectLanguage(elm.textContent), undefined, opt)) | ||
//.querySelectorAll('[class*="shj-lang-"]') | ||
//.forEach(elm => highlightElement(elm, undefined, undefined, opt)) | ||
|
||
/** | ||
* @function loadLanguage | ||
* Load a language and add it to the langs object | ||
* @param {String} languageName The name of the language | ||
* @param {ShjLanguage} language The language | ||
*/ | ||
export let loadLanguage = (languageName, language) => { | ||
langs[languageName] = language; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
export default [ | ||
{ | ||
match: /\/\/.*\n?|\/\*((?!\*\/)[^])*(\*\/)?/g, | ||
sub: 'todo' | ||
}, | ||
{ | ||
expand: 'str' | ||
}, | ||
{ | ||
expand: 'num' | ||
}, | ||
{ | ||
type: 'kwd', | ||
match: /\*|&|\b(break|case|chan|const|continue|default|defer|else|fallthrough|for|func|go|goto|if|import|interface|map|package|range|return|select|struct|switch|type|var)\b/g | ||
}, | ||
{ | ||
type: 'func', | ||
match: /[a-zA-Z_][\w_]*(?=\s*\()/g | ||
}, | ||
{ | ||
type: 'class', | ||
match: /\b[A-Z][\w_]*\b/g | ||
}, | ||
{ | ||
type: 'oper', | ||
match: /[+\-*\/%&|^~=!<>.^-]+/g | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import xml, { property, xmlElement } from './xml.js' | ||
|
||
export default [ | ||
{ | ||
type: 'class', | ||
match: /<!DOCTYPE("[^"]*"|'[^']*'|[^"'>])*>/gi, | ||
sub: [ | ||
{ | ||
type: 'str', | ||
match: /"[^"]*"|'[^']*'/g | ||
}, | ||
{ | ||
type: 'oper', | ||
match: /^<!|>$/g | ||
}, | ||
{ | ||
type: 'var', | ||
match: /DOCTYPE/gi | ||
} | ||
] | ||
}, | ||
{ | ||
match: RegExp(`<style${property}>((?!</style>)[^])*</style\\s*>`, 'g'), | ||
sub: [ | ||
{ | ||
match: RegExp(`^<style${property}>`, 'g'), | ||
sub: xmlElement.sub | ||
}, | ||
{ | ||
match: RegExp(`${xmlElement.match}|[^]*(?=</style\\s*>$)`, 'g'), | ||
sub: 'css' | ||
}, | ||
xmlElement | ||
] | ||
}, | ||
{ | ||
match: RegExp(`<script${property}>((?!</script>)[^])*</script\\s*>`, 'g'), | ||
sub: [ | ||
{ | ||
match: RegExp(`^<script${property}>`, 'g'), | ||
sub: xmlElement.sub | ||
}, | ||
{ | ||
match: RegExp(`${xmlElement.match}|[^]*(?=</script\\s*>$)`, 'g'), | ||
sub: 'js' | ||
}, | ||
xmlElement | ||
] | ||
}, | ||
...xml | ||
] |
Oops, something went wrong.