Hi, I’m trying to correctly convert a markdown tecxt in html. Do you have any suggestions?
right now I do smth like this
export const code = async (inputs) => {
const language = 'en';
const org = inputs.org;
if (!inputs || typeof inputs.md !== 'string') {
throw new Error('Input Markdown not valid');
}
// ✅ Sanitize Markdown
const sanitizeMarkdown = (md) => {
return md
.replace(/\\n/g, '\n')
.replace(/\\\\/g, '\\')
.replace(/\\#/g, '#')
.replace(/\\\*/g, '*')
.replace(/\\"/g, '"')
.replace(/\\-/g, '-')
.replace(/\\_/g, '_')
.replace(/\\\[/g, '[')
.replace(/\\\]/g, ']')
.replace(/\\\(/g, '(')
.replace(/\\\)/g, ')')
.trim();
};
const cleanedMarkdown = sanitizeMarkdown(inputs.md)
.split('\n')
.filter((line, idx) => idx > 0 || line.trim() !== '')
.join('\n');
// ✅ Markdown to HTML (fallback)
const fallbackMarkdownToHTML = (md) => {
md = md
.replace(/^###### (.*)$/gm, '<h6>$1</h6>')
.replace(/^##### (.*)$/gm, '<h5>$1</h5>')
.replace(/^#### (.*)$/gm, '<h4>$1</h4>')
.replace(/^### (.*)$/gm, '<h3>$1</h3>')
.replace(/^## (.*)$/gm, '<h2>$1</h2>')
.replace(/^# (.*)$/gm, '<h1>$1</h1>');
// Grassetto e corsivo
md = md
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
.replace(/\*(.*?)\*/g, '<em>$1</em>');
// Multiline block Code
md = md.replace(/```([\s\S]*?)```/g, '<pre><code>$1</code></pre>');
// Code inline
md = md.replace(/`([^`]+)`/g, '<code>$1</code>');
// Blockquote
md = md.replace(/^>(.*)$/gm, '<blockquote>$1</blockquote>');
// Lists
md = md.replace(/^- (.*)$/gm, '<li>$1</li>');
// Paragraphs and line breaks
// 1) Open and close paragraph at beginning/end of text
md = '<p>' + md.trim() + '</p>';
// 2) Replace double \n with closing/opening p
md = md.replace(/\n{2,}/g, '</p><p>');
// 3) Replace single \n inside paragraphs with space
md = md.replace(/(?<!<\/p>)\n(?!<p>)/g, ' ');
// Clean up <li>s not enclosed by <ul>s
md = md.replace(/(<li>.*?<\/li>)/gs, '<ul>$1</ul>');
return md;
};
const htmlContent = fallbackMarkdownToHTML(cleanedMarkdown);
return `
<!DOCTYPE html>
<html lang="${language}">
<head>
<meta charset="UTF-8" />
<title>'Evaluation Cybersecurity Report' - ${org.name}</title>
<style>
@page {
margin: 2cm;
}
body {
font-family: 'Calibri', sans-serif;
font-size: 12px;
color: #000;
line-height: 1.5;
margin: 0;
padding: 0;
text-align: justify;
}
h1, h2, h3, h4, h5, h6 {
color: #000;
font-weight: bold;
margin-top: 20px;
margin-bottom: 10px;
}
table {
border-collapse: collapse;
width: 100%;
margin: 20px 0;
}
table th {
background-color: #007BFF;
color: white;
padding: 10px;
}
table td {
border: 1px solid #ccc;
padding: 10px;
}
blockquote {
margin: 20px 0;
padding-left: 15px;
border-left: 3px solid #ccc;
color: #555;
font-style: italic;
}
pre, code {
background-color: #f4f4f4;
border: 1px solid #ddd;
border-radius: 3px;
font-family: monospace;
font-size: 10px;
padding: 4px 8px;
}
a {
color: #007BFF;
text-decoration: none;
}
a:hover {
text-decoration: underline;
}
li {
margin-bottom: 5px;
}
</style>
</head>
<body>
${htmlContent}
</body>
</html>
`;
};
but in the end for example some # are not converted, and if I download the result, paste it in VsCode and open the html file on chrome, I found a lot of \n that are shown.
Thank you all