2024-11-06 20:57:07 -07:00
|
|
|
import {htmlEscape} from 'escape-goat';
|
|
|
|
|
|
|
|
type Processors = {
|
|
|
|
[tagName: string]: (el: HTMLElement) => string | HTMLElement | void;
|
|
|
|
}
|
|
|
|
|
|
|
|
type ProcessorContext = {
|
|
|
|
elementIsFirst: boolean;
|
|
|
|
elementIsLast: boolean;
|
|
|
|
listNestingLevel: number;
|
|
|
|
}
|
|
|
|
|
|
|
|
function prepareProcessors(ctx:ProcessorContext): Processors {
|
|
|
|
const processors = {
|
2024-11-11 04:13:57 -07:00
|
|
|
H1(el: HTMLHeadingElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
const level = parseInt(el.tagName.slice(1));
|
|
|
|
el.textContent = `${'#'.repeat(level)} ${el.textContent.trim()}`;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
STRONG(el: HTMLElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
return `**${el.textContent}**`;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
EM(el: HTMLElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
return `_${el.textContent}_`;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
DEL(el: HTMLElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
return `~~${el.textContent}~~`;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
A(el: HTMLAnchorElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
const text = el.textContent || 'link';
|
|
|
|
const href = el.getAttribute('href');
|
|
|
|
if (/^https?:/.test(text) && text === href) {
|
|
|
|
return text;
|
|
|
|
}
|
|
|
|
return href ? `[${text}](${href})` : text;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
IMG(el: HTMLImageElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
const alt = el.getAttribute('alt') || 'image';
|
|
|
|
const src = el.getAttribute('src');
|
|
|
|
const widthAttr = el.hasAttribute('width') ? ` width="${htmlEscape(el.getAttribute('width') || '')}"` : '';
|
|
|
|
const heightAttr = el.hasAttribute('height') ? ` height="${htmlEscape(el.getAttribute('height') || '')}"` : '';
|
|
|
|
if (widthAttr || heightAttr) {
|
|
|
|
return `<img alt="${htmlEscape(alt)}"${widthAttr}${heightAttr} src="${htmlEscape(src)}">`;
|
|
|
|
}
|
|
|
|
return `![${alt}](${src})`;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
P(el: HTMLParagraphElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
el.textContent = `${el.textContent}\n`;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
BLOCKQUOTE(el: HTMLElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
el.textContent = `${el.textContent.replace(/^/mg, '> ')}\n`;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
OL(el: HTMLElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
const preNewLine = ctx.listNestingLevel ? '\n' : '';
|
|
|
|
el.textContent = `${preNewLine}${el.textContent}\n`;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
LI(el: HTMLElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
const parent = el.parentNode;
|
2024-11-11 04:13:57 -07:00
|
|
|
const bullet = (parent as HTMLElement).tagName === 'OL' ? `1. ` : '* ';
|
2024-11-06 20:57:07 -07:00
|
|
|
const nestingIdentLevel = Math.max(0, ctx.listNestingLevel - 1);
|
|
|
|
el.textContent = `${' '.repeat(nestingIdentLevel * 4)}${bullet}${el.textContent}${ctx.elementIsLast ? '' : '\n'}`;
|
|
|
|
return el;
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
INPUT(el: HTMLInputElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
return el.checked ? '[x] ' : '[ ] ';
|
|
|
|
},
|
2024-11-11 04:13:57 -07:00
|
|
|
CODE(el: HTMLElement) {
|
2024-11-06 20:57:07 -07:00
|
|
|
const text = el.textContent;
|
2024-11-11 04:13:57 -07:00
|
|
|
if (el.parentNode && (el.parentNode as HTMLElement).tagName === 'PRE') {
|
2024-11-06 20:57:07 -07:00
|
|
|
el.textContent = `\`\`\`\n${text}\n\`\`\`\n`;
|
|
|
|
return el;
|
|
|
|
}
|
|
|
|
if (text.includes('`')) {
|
|
|
|
return `\`\` ${text} \`\``;
|
|
|
|
}
|
|
|
|
return `\`${text}\``;
|
|
|
|
},
|
|
|
|
};
|
|
|
|
processors['UL'] = processors.OL;
|
|
|
|
for (let level = 2; level <= 6; level++) {
|
|
|
|
processors[`H${level}`] = processors.H1;
|
|
|
|
}
|
|
|
|
return processors;
|
|
|
|
}
|
|
|
|
|
2024-11-11 04:13:57 -07:00
|
|
|
function processElement(ctx :ProcessorContext, processors: Processors, el: HTMLElement): string | void {
|
2024-11-06 20:57:07 -07:00
|
|
|
if (el.hasAttribute('data-markdown-generated-content')) return el.textContent;
|
|
|
|
if (el.tagName === 'A' && el.children.length === 1 && el.children[0].tagName === 'IMG') {
|
|
|
|
return processElement(ctx, processors, el.children[0] as HTMLElement);
|
|
|
|
}
|
|
|
|
|
|
|
|
const isListContainer = el.tagName === 'OL' || el.tagName === 'UL';
|
|
|
|
if (isListContainer) ctx.listNestingLevel++;
|
|
|
|
for (let i = 0; i < el.children.length; i++) {
|
|
|
|
ctx.elementIsFirst = i === 0;
|
|
|
|
ctx.elementIsLast = i === el.children.length - 1;
|
|
|
|
processElement(ctx, processors, el.children[i] as HTMLElement);
|
|
|
|
}
|
|
|
|
if (isListContainer) ctx.listNestingLevel--;
|
|
|
|
|
|
|
|
if (processors[el.tagName]) {
|
|
|
|
const ret = processors[el.tagName](el);
|
|
|
|
if (ret && ret !== el) {
|
|
|
|
el.replaceWith(typeof ret === 'string' ? document.createTextNode(ret) : ret);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export function convertHtmlToMarkdown(el: HTMLElement): string {
|
|
|
|
const div = document.createElement('div');
|
|
|
|
div.append(el);
|
|
|
|
const ctx = {} as ProcessorContext;
|
|
|
|
ctx.listNestingLevel = 0;
|
|
|
|
processElement(ctx, prepareProcessors(ctx), el);
|
|
|
|
return div.textContent;
|
|
|
|
}
|