Hellthread(tm) Certified
This commit is contained in:
parent
0f73e96194
commit
cc00af7a31
13 changed files with 257 additions and 89 deletions
102
src/services/html_converter/html_line_converter.service.js
Normal file
102
src/services/html_converter/html_line_converter.service.js
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
/**
|
||||
* This is a tiny purpose-built HTML parser/processor. This basically detects
|
||||
* any type of visual newline and converts entire HTML into a array structure.
|
||||
*
|
||||
* Text nodes are represented as object with single property - text - containing
|
||||
* the visual line. Intended usage is to process the array with .map() in which
|
||||
* map function returns a string and resulting array can be converted back to html
|
||||
* with a .join('').
|
||||
*
|
||||
* Generally this isn't very useful except for when you really need to either
|
||||
* modify visual lines (greentext i.e. simple quoting) or do something with
|
||||
* first/last line.
|
||||
*
|
||||
* known issue: doesn't handle CDATA so nested CDATA might not work well
|
||||
*
|
||||
* @param {Object} input - input data
|
||||
* @return {(string|{ text: string })[]} processed html in form of a list.
|
||||
*/
|
||||
export const convertHtmlToLines = (html) => {
|
||||
const handledTags = new Set(['p', 'br', 'div'])
|
||||
const openCloseTags = new Set(['p', 'div'])
|
||||
|
||||
let buffer = [] // Current output buffer
|
||||
const level = [] // How deep we are in tags and which tags were there
|
||||
let textBuffer = '' // Current line content
|
||||
let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
|
||||
|
||||
// Extracts tag name from tag, i.e. <span a="b"> => span
|
||||
const getTagName = (tag) => {
|
||||
const result = /(?:<\/(\w+)>|<(\w+)\s?[^/]*?\/?>)/gi.exec(tag)
|
||||
return result && (result[1] || result[2])
|
||||
}
|
||||
|
||||
const flush = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
|
||||
if (textBuffer.trim().length > 0) {
|
||||
buffer.push({ text: textBuffer })
|
||||
} else {
|
||||
buffer.push(textBuffer)
|
||||
}
|
||||
textBuffer = ''
|
||||
}
|
||||
|
||||
const handleBr = (tag) => { // handles single newlines/linebreaks/selfclosing
|
||||
flush()
|
||||
buffer.push(tag)
|
||||
}
|
||||
|
||||
const handleOpen = (tag) => { // handles opening tags
|
||||
flush()
|
||||
buffer.push(tag)
|
||||
level.push(tag)
|
||||
}
|
||||
|
||||
const handleClose = (tag) => { // handles closing tags
|
||||
flush()
|
||||
buffer.push(tag)
|
||||
if (level[level.length - 1] === tag) {
|
||||
level.pop()
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < html.length; i++) {
|
||||
const char = html[i]
|
||||
if (char === '<' && tagBuffer === null) {
|
||||
tagBuffer = char
|
||||
} else if (char !== '>' && tagBuffer !== null) {
|
||||
tagBuffer += char
|
||||
} else if (char === '>' && tagBuffer !== null) {
|
||||
tagBuffer += char
|
||||
const tagFull = tagBuffer
|
||||
tagBuffer = null
|
||||
const tagName = getTagName(tagFull)
|
||||
if (handledTags.has(tagName)) {
|
||||
if (tagName === 'br') {
|
||||
handleBr(tagFull)
|
||||
} else if (openCloseTags.has(tagName)) {
|
||||
if (tagFull[1] === '/') {
|
||||
handleClose(tagFull)
|
||||
} else if (tagFull[tagFull.length - 2] === '/') {
|
||||
// self-closing
|
||||
handleBr(tagFull)
|
||||
} else {
|
||||
handleOpen(tagFull)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
textBuffer += tagFull
|
||||
}
|
||||
} else if (char === '\n') {
|
||||
handleBr(char)
|
||||
} else {
|
||||
textBuffer += char
|
||||
}
|
||||
}
|
||||
if (tagBuffer) {
|
||||
textBuffer += tagBuffer
|
||||
}
|
||||
|
||||
flush()
|
||||
|
||||
return buffer
|
||||
}
|
||||
146
src/services/html_converter/html_tree_converter.service.js
Normal file
146
src/services/html_converter/html_tree_converter.service.js
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
/**
|
||||
* This is a not-so-tiny purpose-built HTML parser/processor. This parses html
|
||||
* and converts it into a tree structure representing tag openers/closers and
|
||||
* children.
|
||||
*
|
||||
* Structure follows this pattern: [opener, [...children], closer] except root
|
||||
* node which is just [...children]. Text nodes can only be within children and
|
||||
* are represented as strings.
|
||||
*
|
||||
* Intended use is to convert HTML structure and then recursively iterate over it
|
||||
* most likely using a map. Very useful for dynamically rendering html replacing
|
||||
* tags with JSX elements in a render function.
|
||||
*
|
||||
* known issue: doesn't handle CDATA so CDATA might not work well
|
||||
* known issue: doesn't handle HTML comments
|
||||
*
|
||||
* @param {Object} input - input data
|
||||
* @return {string} processed html
|
||||
*/
|
||||
export const convertHtmlToTree = (html) => {
|
||||
// Elements that are implicitly self-closing
|
||||
// https://developer.mozilla.org/en-US/docs/Glossary/empty_element
|
||||
const emptyElements = new Set([
|
||||
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
|
||||
'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'
|
||||
])
|
||||
// TODO For future - also parse HTML5 multi-source components?
|
||||
|
||||
const buffer = [] // Current output buffer
|
||||
const levels = [['', buffer]] // How deep we are in tags and which tags were there
|
||||
let textBuffer = '' // Current line content
|
||||
let tagBuffer = null // Current tag buffer, if null = we are not currently reading a tag
|
||||
|
||||
const getCurrentBuffer = () => {
|
||||
return levels[levels.length - 1][1]
|
||||
}
|
||||
|
||||
const flushText = () => { // Processes current line buffer, adds it to output buffer and clears line buffer
|
||||
if (textBuffer === '') return
|
||||
getCurrentBuffer().push(textBuffer)
|
||||
textBuffer = ''
|
||||
}
|
||||
|
||||
const handleSelfClosing = (tag) => {
|
||||
getCurrentBuffer().push([tag])
|
||||
}
|
||||
|
||||
const handleOpen = (tag) => {
|
||||
const curBuf = getCurrentBuffer()
|
||||
const newLevel = [tag, []]
|
||||
levels.push(newLevel)
|
||||
curBuf.push(newLevel)
|
||||
}
|
||||
|
||||
const handleClose = (tag) => {
|
||||
const currentTag = levels[levels.length - 1]
|
||||
if (getTagName(levels[levels.length - 1][0]) === getTagName(tag)) {
|
||||
currentTag.push(tag)
|
||||
levels.pop()
|
||||
} else {
|
||||
getCurrentBuffer().push(tag)
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < html.length; i++) {
|
||||
const char = html[i]
|
||||
if (char === '<' && tagBuffer === null) {
|
||||
flushText()
|
||||
tagBuffer = char
|
||||
} else if (char !== '>' && tagBuffer !== null) {
|
||||
tagBuffer += char
|
||||
} else if (char === '>' && tagBuffer !== null) {
|
||||
tagBuffer += char
|
||||
const tagFull = tagBuffer
|
||||
tagBuffer = null
|
||||
const tagName = getTagName(tagFull)
|
||||
if (tagFull[1] === '/') {
|
||||
handleClose(tagFull)
|
||||
} else if (emptyElements.has(tagName) || tagFull[tagFull.length - 2] === '/') {
|
||||
// self-closing
|
||||
handleSelfClosing(tagFull)
|
||||
} else {
|
||||
handleOpen(tagFull)
|
||||
}
|
||||
} else {
|
||||
textBuffer += char
|
||||
}
|
||||
}
|
||||
if (tagBuffer) {
|
||||
textBuffer += tagBuffer
|
||||
}
|
||||
|
||||
flushText()
|
||||
return buffer
|
||||
}
|
||||
|
||||
// Extracts tag name from tag, i.e. <span a="b"> => span
|
||||
export const getTagName = (tag) => {
|
||||
const result = /(?:<\/(\w+)>|<(\w+)\s?.*?\/?>)/gi.exec(tag)
|
||||
return result && (result[1] || result[2])
|
||||
}
|
||||
|
||||
export const processTextForEmoji = (text, emojis, processor) => {
|
||||
const buffer = []
|
||||
let textBuffer = ''
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const char = text[i]
|
||||
if (char === ':') {
|
||||
const next = text.slice(i + 1)
|
||||
let found = false
|
||||
for (let emoji of emojis) {
|
||||
if (next.slice(0, emoji.shortcode.length + 1) === (emoji.shortcode + ':')) {
|
||||
found = emoji
|
||||
break
|
||||
}
|
||||
}
|
||||
if (found) {
|
||||
buffer.push(textBuffer)
|
||||
textBuffer = ''
|
||||
buffer.push(processor(found))
|
||||
i += found.shortcode.length + 1
|
||||
} else {
|
||||
textBuffer += char
|
||||
}
|
||||
} else {
|
||||
textBuffer += char
|
||||
}
|
||||
}
|
||||
if (textBuffer) buffer.push(textBuffer)
|
||||
return buffer
|
||||
}
|
||||
|
||||
export const getAttrs = tag => {
|
||||
const innertag = tag
|
||||
.substring(1, tag.length - 1)
|
||||
.replace(new RegExp('^' + getTagName(tag)), '')
|
||||
.replace(/\/?$/, '')
|
||||
.trim()
|
||||
const attrs = Array.from(innertag.matchAll(/([a-z0-9-]+)(?:=("[^"]+?"|'[^']+?'))?/gi))
|
||||
.map(([trash, key, value]) => [key, value])
|
||||
.map(([k, v]) => {
|
||||
if (!v) return [k, true]
|
||||
return [k, v.substring(1, v.length - 1)]
|
||||
})
|
||||
return Object.fromEntries(attrs)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue