From 28f1432648afb9a5f0c3a1e61450cc1407aedbf3 Mon Sep 17 00:00:00 2001 From: jetstream0 <49297268+jetstream0@users.noreply.github.com> Date: Fri, 28 Jul 2023 17:33:58 -0700 Subject: [PATCH] publishing --- .gitignore | 1 + npm/.npmignore | 1 + npm/index.d.ts | 11 + npm/index.js | 826 +++++++++++++++++++++++++++++++++++++++++++++++ npm/index.ts | 771 +++++++++++++++++++++++++++++++++++++++++++ npm/package.json | 22 ++ 6 files changed, 1632 insertions(+) create mode 100644 npm/.npmignore create mode 100644 npm/index.d.ts create mode 100644 npm/index.js create mode 100644 npm/index.ts create mode 100644 npm/package.json diff --git a/.gitignore b/.gitignore index 87ffa6a..ab47bdc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.js +!npm/index.js tsconfig.json tsconfig-node.json tsconfig-web.json \ No newline at end of file diff --git a/npm/.npmignore b/npm/.npmignore new file mode 100644 index 0000000..bfe3b8d --- /dev/null +++ b/npm/.npmignore @@ -0,0 +1 @@ +tsconfig.json \ No newline at end of file diff --git a/npm/index.d.ts b/npm/index.d.ts new file mode 100644 index 0000000..31313f6 --- /dev/null +++ b/npm/index.d.ts @@ -0,0 +1,11 @@ +export type Warning = { + type: string; + message: string; + line_number?: number; +}; +export type ParseResult = { + html: string; + warnings: Warning[]; +}; +export declare function parse_md_to_html_with_warnings(md: string): ParseResult; +export declare function parse_md_to_html(md: string): string; diff --git a/npm/index.js b/npm/index.js new file mode 100644 index 0000000..c2ab588 --- /dev/null +++ b/npm/index.js @@ -0,0 +1,826 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.parse_md_to_html = exports.parse_md_to_html_with_warnings = void 0; +//some minor differences with markdown spec? +function parse_md_to_html_with_warnings(md) { + let html = ""; + let html_line = ""; + let warnings = []; + let line_number = 1; + //markdown parsing vars + let is_first_line = true; + let backslashed = false; + let heading_level = 0; + let in_heading = false; + let header_num = 0; + let asterisk_num = 0; + let asterisk_out_num = 0; + let in_asterisk = false; + let horizontal_num = 0; + let horizontal_rule = false; + let was_image = false; + let image_alt = undefined; + let image_src = undefined; + let was_link = false; + let link_content = undefined; + let link_href = undefined; + let in_code = false; + let in_code_block = false; + let first_line_code_block = false; + let code_block_lang = undefined; + let space_start = false; + let in_blockquote = false; + let in_unordered_list = false; + let blockquote_list = false; + let in_ordered_list = false; + let ordered_list_num = 0; + let in_superscript = false; + let in_table = false; + let in_table_header = false; + let table_item = ""; + //loop through characters + let chars = md; + for (let i = 0; i < chars.length; i++) { + let char = chars[i]; + let end_add_char = true; + //sanitize input + if (char === "<") { + char = "<"; + } + else if (char === ">") { + char = ">"; + } + //handle backslashes + if (backslashed) { + backslashed = false; + if (i !== chars.length - 1) { + html_line += char; + continue; + } + } + if (char === "\\" && chars[i + 1] !== "\n") { + backslashed = true; + if (i === 0 || chars[i - 1] === "\n") { + html_line = "

" + html_line; + } + continue; + } + //end of text or newline + if (char === "\n" || i === chars.length - 1) { + if (is_first_line) { + //it can only be the first line once :) + is_first_line = false; + } + //preserving the newlines/linebreaks of the code block + if (in_code_block && char === "\n" && !first_line_code_block) { + html_line += "
\n"; + space_start = true; + } + //if first line of code block, create the code block div + if (first_line_code_block) { + code_block_lang = code_block_lang.toLowerCase().trim(); + let known_langs = ["python", "py", "rust", "rs", "javascript", "js", "typescript", "ts", "java", "c", "cpp", "csharp", "html", "css", "markdown", "md", "brainfuck", "php", "bash", "perl", "sql", "ruby", "basic", "assembly", "asm", "wasm", "r", "go", "swift"]; + if (!known_langs.includes(code_block_lang) && code_block_lang !== "") { + warnings.push({ + type: "unknown-language", + message: `Unknown language '${code_block_lang}' for code block`, + line_number, + }); + } + if (code_block_lang === "") { + //if no code block language specified, don't put it as a css class obviously + html_line = `

\n`; + } + else { + html_line = `
\n`; + } + code_block_lang = undefined; + first_line_code_block = false; + } + //if image was never completed + if (image_alt !== undefined) { + if (!html_line.startsWith("

")) { + html_line = "

" + html_line; + } + html_line += "![" + image_alt; + if (image_src !== undefined) { + html_line += "](" + image_src; + warnings.push({ + type: "image-incomplete", + message: "Image incomplete, missing `)`", + line_number, + }); + } + else { + warnings.push({ + type: "image-incomplete", + message: "Image incomplete, missing `]` or `(`", + line_number, + }); + } + image_alt = undefined; + image_src = undefined; + } + //if link was never completed + if (link_content !== undefined) { + if (!html_line.startsWith("

")) { + html_line = "

" + html_line; + } + html_line += "[" + link_content; + if (link_href !== undefined) { + html_line += "](" + link_href; + warnings.push({ + type: "link-incomplete", + message: "Link incomplete, missing `)`", + line_number, + }); + } + else { + warnings.push({ + type: "link-incomplete", + message: "Link incomplete, missing `]` or `(`", + line_number, + }); + } + link_content = undefined; + link_href = undefined; + } + //if last character + if (i === chars.length - 1 && char !== "\n") { + let add_char = true; + //close code block div + if (in_code_block && i === chars.length - 1) { + in_code_block = false; + add_char = false; + html_line = "

"; + } + //if in code + if (in_code && char === "`") { + in_code = false; + html_line += ""; + add_char = false; + } + //if in horizontal rule + if (horizontal_rule) { + add_char = false; + } + //handle image just ending + if ((was_image || was_link) && char === ")") { + add_char = false; + } + //handle table row ending thing? + if (in_table && char === "|") { + in_table = false; + add_char = false; + if (in_table_header) { + html_line += `${table_item}\n\n`; + } + else { + html_line += `${table_item}\n\n`; + } + } + //if previous character is also newline, there hasn't been opportunity to add a

, so add it! + if (chars[i - 1] === "\n") { + html_line = "

"; + } + //ending a bold/italic? + if (in_asterisk && char === "*") { + if (asterisk_num === 2 && chars[i - 1] === "*") { + html_line += ""; + in_asterisk = false; + asterisk_num = 0; + add_char = false; + } + else if (asterisk_num === 1) { + html_line += ""; + in_asterisk = false; + asterisk_num = 0; + add_char = false; + } + } + //ending superscript + if (in_superscript && char === "^") { + html_line += ""; + in_superscript = false; + add_char = false; + } + if (add_char) { + html_line += char; + } + } + if (in_asterisk) { + //bold/italic never ended + if (asterisk_num === 1) { + //remove the last and replace it with a * + let split = html_line.split(""); + html_line = ""; + for (let ii = 0; ii < split.length; ii++) { + html_line += split[ii]; + if (ii !== split.length - 1) { + if (ii === split.length - 2) { + html_line += "*"; + } + else { + html_line += ""; + } + } + } + warnings.push({ + type: "italic-not-closed", + message: "Italic not closed, may be missing closing '*'? Backslash the '*' if this is intentional", + line_number, + }); + } + else if (asterisk_num === 2) { + //remove the last and replace it with a ** + let split = html_line.split(""); + html_line = ""; + for (let ii = 0; ii < split.length; ii++) { + html_line += split[ii]; + if (ii !== split.length - 1) { + if (ii === split.length - 2) { + html_line += "**"; + } + else { + html_line += ""; + } + } + } + warnings.push({ + type: "bold-not-closed", + message: "Bold not closed, may be missing closing '**'? Backslash the '**' if this is intentional", + line_number, + }); + } + asterisk_num = 0; + asterisk_out_num = 0; + in_asterisk = false; + } + if (in_superscript) { + //superscript never ended + //remove the last and replace it with a ^ + let split = html_line.split(""); + html_line = ""; + for (let ii = 0; ii < split.length; ii++) { + html_line += split[ii]; + if (ii !== split.length - 1) { + if (ii === split.length - 2) { + html_line += "^"; + } + else { + html_line += ""; + } + } + } + in_superscript = false; + warnings.push({ + type: "superscript-not-closed", + message: "Superscript not closed, may be missing closing '^'? Backslash the '^' if this is intentional", + line_number, + }); + } + //ending table row + if (in_table) { + in_table_header = false; + html_line += "\n"; + } + html += html_line; + if (html_line.startsWith("

")) { + html += "

\n"; + } + else if ((html_line.startsWith("
  • ") || html_line.startsWith("
      ") || html_line.startsWith("
        ")) && (in_unordered_list || in_ordered_list)) { + html += "\n"; + if (in_ordered_list) { + ordered_list_num++; + } + if (i === chars.length - 1 && in_unordered_list) { + html += "
    "; + //set it to false, not that it matters + blockquote_list = false; + } + else if (i === chars.length - 1 && in_ordered_list) { + html += ""; + //set it to false, not that it matters + blockquote_list = false; + } + } + html_line = ""; + horizontal_num = 0; + if (char === "\n") { + line_number++; + } + //check to see if unordered list is ending + if (in_unordered_list && char === "\n" && ((chars.slice(i + 1, i + 3) !== "- " && !blockquote_list) || (chars.slice(i + 1, i + 5) !== "> - " && blockquote_list))) { + html += "\n"; + in_unordered_list = false; + blockquote_list = false; + } + //check to see if ordered list is ending + let ol_num_length = String(ordered_list_num + 1).length; + if (in_ordered_list && char === "\n" && ((chars.slice(i + 1, i + ol_num_length + 3) !== `${ordered_list_num + 1}. ` && !blockquote_list) || (chars.slice(i + 1, i + ol_num_length + 5) !== `> ${ordered_list_num + 1}. ` && blockquote_list))) { + html += "\n"; + ordered_list_num = 0; + in_ordered_list = false; + blockquote_list = false; + } + if (horizontal_rule || was_image || was_link) { + if (i !== chars.length - 1 && html[html.length - 1] !== "\n") { + //only add new line if there isn't already one, and isn't last character + html += "\n"; + } + else if (i === chars.length - 1) { + //remove newline + html = html.trim(); + } + horizontal_rule = false; + was_image = false; + was_link = false; + continue; + } + //ending a header, line break not needed + if (in_heading) { + html += `\n`; + if (i === chars.length - 1) { + //remove newline + html = html.trim(); + } + heading_level = 0; + in_heading = false; + //continue; + } + //if in blockquote + if (in_blockquote && i === chars.length - 1) { + if (html[html.length - 1] !== "\n") { + html += "\n"; + } + html += ""; + in_blockquote = false; + } + else if (in_blockquote && char === "\n" && chars[i - 1] === "\n") { + //two new lines in a row means blockquote ends + html += "\n"; + in_blockquote = false; + } + heading_level = 0; + if (i === chars.length - 1) { + //remove newline + html = html.trim(); + } + continue; + } + //block quotes + if (char === " " && chars[i - 1] === ">" && !in_blockquote && (chars[i - 2] === "\n" || i === 1)) { + in_blockquote = true; + html += "
    \n"; + continue; + } + else if (in_blockquote && chars[i - 1] === "\n" && (char !== ">" || chars[i + 1] !== " ")) { + if (blockquote_list) { + //end list if list started in blockquote and blockquote ends + blockquote_list = false; + if (in_unordered_list) { + html += "\n
    \n"; + } + else if (in_ordered_list) { + html += "\n\n"; + } + ordered_list_num = 0; + in_ordered_list = false; + in_unordered_list = false; + } + else { + html += "\n"; + } + in_blockquote = false; + } + else if (char === ">" && chars[i + 1] === " " && (chars[i - 1] === "\n" || i === 0)) { + //do not add the '>' to the html + end_add_char = false; + } + else if (char === " " && chars[i - 1] === ">" && chars[i - 2] === "\n") { + //do not add the ' ' in '> ' to the html + end_add_char = false; + } + else if (char === ">" && chars[i + 1] !== " " && (chars[i - 1] === "\n" || i === 0)) { + warnings.push({ + type: "blockquote-broken", + message: "Missing space after `>` for blockquote?", + line_number, + }); + } + //code blocks + if (char === "`" && chars[i + 1] !== "`" && ((chars.slice(i - 3, i) === "\n``" || (i === 2 && chars.slice(0, i) === "``")) || (in_blockquote && (chars.slice(i - 5, i) === "\n> ``" || (i === 4 && chars.slice(0, i) === "> ``"))))) { + if (!in_code_block) { + //make sure there is ``` further on, that is not backslashed + let skip_next = false; + let end_found = false; + for (let ii = 1; ii < chars.length - i; ii++) { + let adjusted_index = i + ii; + if (skip_next) { + skip_next = false; + continue; + } + if (chars[adjusted_index] === "\\") { + skip_next = true; + } + else if (chars.slice(adjusted_index - 3, adjusted_index + 1) === "\n```" && (adjusted_index === chars.length - 1 || chars[adjusted_index + 1] === "\n")) { + end_found = true; + break; + } + else if (in_blockquote && chars.slice(adjusted_index - 5, adjusted_index + 1) === "\n> ```" && (adjusted_index === chars.length - 1 || chars[adjusted_index + 1] === "\n")) { + end_found = true; + break; + } + else if (in_blockquote && chars[adjusted_index] === "\n" && (chars[adjusted_index + 1] !== ">" || chars[adjusted_index + 2] !== " ")) { + //blockquote ended without finding end + break; + } + } + if (end_found) { + in_code = false; + in_code_block = true; + first_line_code_block = true; + code_block_lang = ""; + //at this point html_line would have two backticks (probably a actually) in it + html_line = ""; + continue; + } + else { + warnings.push({ + type: "code-block-not-closed", + message: "Code block not closed, may be missing closing backticks?", + line_number, + }); + } + } + else if (in_code_block && chars[i + 1] === "\n") { // || i === chars.length-1 will be handled by a different part + in_code = false; + in_code_block = false; + html_line = "
  • \n"; + continue; + } + } + else if (first_line_code_block) { + code_block_lang += char; + continue; + } + else if (in_code_block) { + //do not render markdown inside code blocks... obviously + //preserve spaces at the beginning of lines + if (in_blockquote && ((char === " " && chars.slice(i - 2, i) === "\n>") || (char === ">" && chars[i - 1] === "\n" && chars[i + 1] === " "))) { + //do not add the blockquote syntax thing "> " to the codeblock + } + else if (char === " " && space_start) { + html_line += " "; + } + else { + space_start = false; + html_line += char; + } + continue; + } + //handle unordered lists + if (char === " " && chars[i - 1] === "-" && (chars[i - 2] === "\n" || i === 1 || (in_blockquote && chars.slice(i - 3, i - 1) === "> " && (chars[i - 4] === "\n" || i === 3)))) { + //it's a unordered list bullet point!! + if (!in_unordered_list || (!in_blockquote && blockquote_list)) { + html_line = "
      \n
    • "; + blockquote_list = false; + } + else { + html_line = "
    • "; + } + in_unordered_list = true; + if (in_blockquote) { + blockquote_list = true; + } + continue; + } + else if (char !== " " && char !== "-" && chars[i - 1] === "-" && (chars[i - 2] === "\n" || i === 1)) { + warnings.push({ + type: "unordered-list-broken", + message: "Missing space after unordered list", + line_number, + }); + } + //handle ordered lists + let ol_num_length = String(ordered_list_num + 1).length; + if (char === " " && chars.slice(i - 1 - ol_num_length, i) === `${ordered_list_num + 1}.` && (chars[i - ol_num_length - 2] === "\n" || i === ol_num_length + 1 || (in_blockquote && chars.slice(i - ol_num_length - 3, i) === `> ${ordered_list_num + 1}.` && (chars[i - ol_num_length - 4] === "\n" || i === ol_num_length + 3)))) { + if (ordered_list_num === 0) { + html_line = "
        \n
      1. "; + in_ordered_list = true; + } + else { + html_line = "
      2. "; + } + if (in_blockquote) { + blockquote_list = true; + } + continue; + } + //handle code + if (char === "`" && !in_code) { + //make sure there is another ` in the line + let skip_next = false; + let end_found = false; + for (let ii = 1; ii < chars.length - i; ii++) { + if (skip_next) { + skip_next = false; + continue; + } + if (chars[i + ii] === "\\") { + skip_next = true; + } + else if (chars[i + ii] === "\n") { + end_found = false; + break; + } + else if (chars[i + ii] === "`") { + end_found = true; + break; + } + } + if (end_found) { + in_code = true; + html_line += ""; + //we have to repeat some code from later on and add a

        + if (i === 0 || chars[i - 1] === "\n") { + html_line = "

        " + html_line; + } + continue; + } + else { + warnings.push({ + type: "code-snippet-not-closed", + message: "Code snippet not closed, may be missing closing backtick?", + line_number, + }); + } + } + else if (char === "`" && in_code) { + in_code = false; + html_line += ""; + continue; + } + else if (in_code) { + html_line += char; + continue; + } + //handle tables + if (char === "|" && (chars[i - 1] === "\n" || i === 0 || (in_blockquote && chars.slice(i - 2, i) === "> " && (chars[i - 3] === "\n" || i === 3)))) { + if (!in_table) { + //start of table + in_table = true; + in_table_header = true; + html += "\n\n"; + } + else { + //just a new table row + html += "\n"; + } + continue; + } + else if (in_table && char === "|") { + if (in_table_header) { + html_line += `\n`; + } + else { + html_line += `\n`; + } + table_item = ""; + continue; + } + else if (in_table && ((chars[i - 1] === "\n" && char !== "|") || (in_blockquote && chars[i - 3] === "\n" && char !== "|"))) { + in_table = false; + table_item = ""; + //table ends + html += "
        ${table_item}${table_item}
        \n"; + } + else if (in_table) { + table_item += char; + continue; + } + //handle heading levels + //ensure headings are continuous and have after it ("#a##" or "##abc" are not a valid headings), and are at the beginning of the line + //ensure headings are possible in block quotes + if (chars.slice(i - heading_level - 1, i) === "\n" + "#".repeat(heading_level) || (is_first_line && chars.slice(0, i) === "#".repeat(heading_level)) || (chars.slice(i - heading_level - 3, i) === "\n> " + "#".repeat(heading_level) && in_blockquote) || (is_first_line && chars.slice(0, i) === "> " + "#".repeat(heading_level) && in_blockquote)) { + if (char === "#" && !in_heading && heading_level < 6) { + heading_level++; + continue; + } + else if (heading_level > 0 && char === " " && !in_heading) { + in_heading = true; + html_line = ``; + header_num++; + continue; + } + else if (char === "#" && heading_level === 6) { + warnings.push({ + type: "too-much-header", + message: "Header cannot be more than 6 levels", + line_number, + }); + } + else if (heading_level > 0) { + //not a heading + html_line = "

        " + "#".repeat(heading_level); + heading_level = 0; + warnings.push({ + type: "heading-broken", + message: "Missing space after `#` for heading?", + line_number, + }); + } + } + //handle horizontal rules + //similar code as headings to ensure beginning of the line, continuous + if (chars.slice(i - horizontal_num - 1, i) === "\n" + "-".repeat(horizontal_num) || (is_first_line && chars.slice(0, i) === "-".repeat(horizontal_num))) { + if (char === "-") { + horizontal_num++; + if (horizontal_num === 3 || (horizontal_num === 2 && chars[chars.length - 1] === "-" && i === chars.length - 2)) { + horizontal_rule = true; + html_line = "


        "; + } + else if (horizontal_num < 3 && (chars[i + 1] === "\n" || i === chars.length - 2)) { + //if next is end or newline, but less than 3 '-'s, it is not a valid horizontal rule + html_line = "

        " + "-".repeat(horizontal_num); + } + continue; + } + else if (horizontal_num > 0) { + //no longer a horizontal line + html_line = "

        " + "-".repeat(horizontal_num); + if (horizontal_num > 2) { + warnings.push({ + type: "horizontal-rule-broken", + message: "Horizontal rule broken", + line_number, + }); + } + } + } + //handle images + if (char === "!" && chars[i + 1] === "[") { + continue; + } + else if (char === "]" && chars[i + 1] === "(" && image_alt !== undefined && image_src === undefined) { + continue; + } + else if (char === "[" && chars[i - 1] === "!" && image_alt === undefined && image_src === undefined) { + image_alt = ""; + continue; + } + else if (char === "(" && chars[i - 1] === "]" && image_alt !== undefined && image_src === undefined) { + image_src = ""; + continue; + } + else if ((char === ")" || (chars[i + 1] === ")" && i + 1 === chars.length - 1)) && image_src !== undefined) { + if (chars[i + 1] === ")" && i + 1 === chars.length - 1) { + image_src += char; + } + if (image_alt === "") { + warnings.push({ + type: "missing-image-alt", + message: "Image is missing alt text, this is bad for accessibility", + line_number, + }); + } + html_line += `${image_alt}`; + was_image = true; + image_alt = undefined; + image_src = undefined; + continue; + } + else if (image_alt !== undefined && image_src === undefined && !(char === "]" && chars[i + 1] === "(")) { + image_alt += char; + continue; + } + else if (image_src !== undefined) { + image_src += char; + continue; + } + else if (was_image) { + was_image = false; + } + //handle links + if (char === "[") { + link_content = ""; + continue; + } + else if (char === "]" && chars[i + 1] === "(" && link_content !== undefined && link_href === undefined) { + continue; + } + else if (char === "(" && chars[i - 1] === "]" && link_content !== undefined && link_href === undefined) { + link_href = ""; + continue; + } + else if ((char === ")" || (chars[i + 1] === ")" && i + 1 === chars.length - 1)) && link_href !== undefined && link_content !== undefined) { + let before_link; + if (chars[i + 1] === ")" && i + 1 === chars.length - 1) { + link_href += char; + before_link = i - link_href.length - link_content.length - 3; + } + else { + before_link = i - link_href.length - link_content.length - 4; + } + if (chars[before_link] === "\n" || before_link === -1) { + html_line = "

        "; + } + if (link_content === "") { + warnings.push({ + type: "empty-link", + message: "Link missing text", + line_number, + }); + } + //":" includes protocols like http:// https:// wss:// and app uris + if (!link_href.includes(":") && !link_href.startsWith("./") && !link_href.startsWith("/")) { + warnings.push({ + type: "weird-href", + message: "Link href does not start with './' or '/' or contain ':', please double check it", + line_number, + }); + } + html_line += `${link_content}`; + was_link = true; + link_content = undefined; + link_href = undefined; + continue; + } + else if (link_content !== undefined && link_href === undefined) { + link_content += char; + continue; + } + else if (link_href !== undefined) { + link_href += char; + continue; + } + else if (was_link) { + was_link = false; + } + //add beginning paragraph + if (i === 0 || chars[i - 1] === "\n") { + html_line = "

        " + html_line; + } + //handle italics and bolds + if (char === "*" && asterisk_num < 2 && !in_asterisk) { + asterisk_num++; + if (asterisk_num === 1 && chars[i + 1] !== "*") { + html_line += ""; + in_asterisk = true; + } + else if (asterisk_num === 2) { + html_line += ""; + in_asterisk = true; + } + continue; + } + else if (char === "*" && in_asterisk) { + asterisk_out_num++; + if (asterisk_out_num === asterisk_num) { + if (asterisk_num === 1) { + html_line += ""; + } + else if (asterisk_num === 2) { + html_line += ""; + } + in_asterisk = false; + asterisk_num = 0; + asterisk_out_num = 0; + continue; + } + else if (asterisk_out_num === 1 && chars[i + 1] === "*") { + //implied that asterisk_num === 2 here due to previous if statement + continue; + } + } + else if (char !== "*" && in_asterisk) { + asterisk_out_num = 0; + } + //handle superscripts + if (char === "^") { + if (in_superscript) { + in_superscript = false; + html_line += ""; + continue; + } + else { + in_superscript = true; + html_line += ""; + continue; + } + } + // + if (end_add_char) { + html_line += char; + } + } + return { + html, + warnings, + }; +} +exports.parse_md_to_html_with_warnings = parse_md_to_html_with_warnings; +function parse_md_to_html(md) { + return parse_md_to_html_with_warnings(md).html; +} +exports.parse_md_to_html = parse_md_to_html; diff --git a/npm/index.ts b/npm/index.ts new file mode 100644 index 0000000..65ce48f --- /dev/null +++ b/npm/index.ts @@ -0,0 +1,771 @@ + +export type Warning = { + type: string, + message: string, + line_number?: number, +}; + +export type ParseResult = { + html: string, + warnings: Warning[] +}; + +//some minor differences with markdown spec? +export function parse_md_to_html_with_warnings(md: string): ParseResult { + let html: string = ""; + let html_line: string = ""; + let warnings: Warning[] = []; + + let line_number: number = 1; + + //markdown parsing vars + let is_first_line: boolean = true; + let backslashed: boolean = false; + let heading_level: number = 0; + let in_heading: boolean = false; + let header_num: number = 0; + let asterisk_num: number = 0; + let asterisk_out_num: number = 0; + let in_asterisk: boolean = false; + let horizontal_num: number = 0; + let horizontal_rule: boolean = false; + let was_image: boolean = false; + let image_alt: string | undefined = undefined; + let image_src: string | undefined = undefined; + let was_link: boolean = false; + let link_content: string | undefined = undefined; + let link_href: string | undefined = undefined; + let in_code: boolean = false; + let in_code_block: boolean = false; + let first_line_code_block: boolean = false; + let code_block_lang: string | undefined = undefined; + let space_start: boolean = false; + let in_blockquote: boolean = false; + let in_unordered_list: boolean = false; + let blockquote_list: boolean = false; + let in_ordered_list: boolean = false; + let ordered_list_num: number = 0; + let in_superscript: boolean = false; + let in_table: boolean = false; + let in_table_header: boolean = false; + let table_item: string = ""; + + //loop through characters + let chars: string = md; + for (let i=0; i < chars.length; i++) { + let char: string = chars[i]; + let end_add_char: boolean = true; + //sanitize input + if (char === "<") { + char = "<"; + } else if (char === ">") { + char = ">"; + } + //handle backslashes + if (backslashed) { + backslashed = false; + if (i !== chars.length-1) { + html_line += char; + continue; + } + } + if (char === "\\" && chars[i+1] !== "\n") { + backslashed = true; + if (i === 0 || chars[i-1] === "\n") { + html_line = "

        "+html_line; + } + continue; + } + //end of text or newline + if (char === "\n" || i === chars.length-1) { + if (is_first_line) { + //it can only be the first line once :) + is_first_line = false; + } + //preserving the newlines/linebreaks of the code block + if (in_code_block && char === "\n" && !first_line_code_block) { + html_line += "
        \n"; + space_start = true; + } + //if first line of code block, create the code block div + if (first_line_code_block) { + code_block_lang = code_block_lang!.toLowerCase().trim(); + let known_langs: string[] = ["python", "py", "rust", "rs", "javascript", "js", "typescript", "ts", "java", "c", "cpp", "csharp", "html", "css", "markdown", "md", "brainfuck", "php", "bash", "perl", "sql", "ruby", "basic", "assembly", "asm", "wasm", "r", "go", "swift"] + if (!known_langs.includes(code_block_lang) && code_block_lang !== "") { + warnings.push({ + type: "unknown-language", + message: `Unknown language '${code_block_lang}' for code block`, + line_number, + }); + } + if (code_block_lang === "") { + //if no code block language specified, don't put it as a css class obviously + html_line = `

        \n`; + } else { + html_line = `
        \n`; + } + code_block_lang = undefined; + first_line_code_block = false; + } + //if image was never completed + if (image_alt !== undefined) { + if (!html_line.startsWith("

        ")) { + html_line = "

        "+html_line; + } + html_line += "!["+image_alt; + if (image_src !== undefined) { + html_line += "]("+image_src; + warnings.push({ + type: "image-incomplete", + message: "Image incomplete, missing `)`", + line_number, + }); + } else { + warnings.push({ + type: "image-incomplete", + message: "Image incomplete, missing `]` or `(`", + line_number, + }); + } + image_alt = undefined; + image_src = undefined; + } + //if link was never completed + if (link_content !== undefined) { + if (!html_line.startsWith("

        ")) { + html_line = "

        "+html_line; + } + html_line += "["+link_content; + if (link_href !== undefined) { + html_line += "]("+link_href; + warnings.push({ + type: "link-incomplete", + message: "Link incomplete, missing `)`", + line_number, + }); + } else { + warnings.push({ + type: "link-incomplete", + message: "Link incomplete, missing `]` or `(`", + line_number, + }); + } + link_content = undefined; + link_href = undefined; + } + //if last character + if (i === chars.length-1 && char !== "\n") { + let add_char: boolean = true; + //close code block div + if (in_code_block && i === chars.length-1) { + in_code_block = false; + add_char = false; + html_line = "

        "; + } + //if in code + if (in_code && char === "`") { + in_code = false; + html_line += ""; + add_char = false; + } + //if in horizontal rule + if (horizontal_rule) { + add_char = false; + } + //handle image just ending + if ((was_image || was_link) && char === ")") { + add_char = false; + } + //handle table row ending thing? + if (in_table && char === "|") { + in_table = false; + add_char = false; + if (in_table_header) { + html_line += `${table_item}\n\n`; + } else { + html_line += `${table_item}\n\n`; + } + } + //if previous character is also newline, there hasn't been opportunity to add a

        , so add it! + if (chars[i-1] === "\n") { + html_line = "

        "; + } + //ending a bold/italic? + if (in_asterisk && char === "*") { + if (asterisk_num === 2 && chars[i-1] === "*") { + html_line += ""; + in_asterisk = false; + asterisk_num = 0; + add_char = false; + } else if (asterisk_num === 1) { + html_line += ""; + in_asterisk = false; + asterisk_num = 0; + add_char = false; + } + } + //ending superscript + if (in_superscript && char === "^") { + html_line += ""; + in_superscript = false; + add_char = false; + } + if (add_char) { + html_line += char; + } + } + if (in_asterisk) { + //bold/italic never ended + if (asterisk_num === 1) { + //remove the last and replace it with a * + let split: string[] = html_line.split(""); + html_line = ""; + for (let ii=0; ii < split.length; ii++) { + html_line += split[ii]; + if (ii !== split.length-1) { + if (ii === split.length-2) { + html_line += "*"; + } else { + html_line += ""; + } + } + } + warnings.push({ + type: "italic-not-closed", + message: "Italic not closed, may be missing closing '*'? Backslash the '*' if this is intentional", + line_number, + }); + } else if (asterisk_num === 2) { + //remove the last and replace it with a ** + let split: string[] = html_line.split(""); + html_line = ""; + for (let ii=0; ii < split.length; ii++) { + html_line += split[ii]; + if (ii !== split.length-1) { + if (ii === split.length-2) { + html_line += "**"; + } else { + html_line += ""; + } + } + } + warnings.push({ + type: "bold-not-closed", + message: "Bold not closed, may be missing closing '**'? Backslash the '**' if this is intentional", + line_number, + }); + } + asterisk_num = 0; + asterisk_out_num = 0; + in_asterisk = false; + } + if (in_superscript) { + //superscript never ended + //remove the last and replace it with a ^ + let split: string[] = html_line.split(""); + html_line = ""; + for (let ii=0; ii < split.length; ii++) { + html_line += split[ii]; + if (ii !== split.length-1) { + if (ii === split.length-2) { + html_line += "^"; + } else { + html_line += ""; + } + } + } + in_superscript = false; + warnings.push({ + type: "superscript-not-closed", + message: "Superscript not closed, may be missing closing '^'? Backslash the '^' if this is intentional", + line_number, + }); + } + //ending table row + if (in_table) { + in_table_header = false; + html_line += "\n"; + } + html += html_line; + if (html_line.startsWith("

        ")) { + html += "

        \n"; + } else if ((html_line.startsWith("
      3. ") || html_line.startsWith("
          ") || html_line.startsWith("
            ")) && (in_unordered_list || in_ordered_list)) { + html += "\n"; + if (in_ordered_list) { + ordered_list_num++; + } + if (i === chars.length-1 && in_unordered_list) { + html += "
        "; + //set it to false, not that it matters + blockquote_list = false; + } else if (i === chars.length-1 && in_ordered_list) { + html += "
      "; + //set it to false, not that it matters + blockquote_list = false; + } + } + html_line = ""; + horizontal_num = 0; + if (char === "\n") { + line_number++; + } + //check to see if unordered list is ending + if (in_unordered_list && char === "\n" && ((chars.slice(i+1, i+3) !== "- " && !blockquote_list) || (chars.slice(i+1, i+5) !== "> - " && blockquote_list))) { + html += "
    \n"; + in_unordered_list = false; + blockquote_list = false; + } + //check to see if ordered list is ending + let ol_num_length: number = String(ordered_list_num+1).length; + if (in_ordered_list && char === "\n" && ((chars.slice(i+1, i+ol_num_length+3) !== `${ordered_list_num+1}. ` && !blockquote_list) || (chars.slice(i+1, i+ol_num_length+5) !== `> ${ordered_list_num+1}. ` && blockquote_list))) { + html += "\n"; + ordered_list_num = 0; + in_ordered_list = false; + blockquote_list = false; + } + if (horizontal_rule || was_image || was_link) { + if (i !== chars.length - 1 && html[html.length-1] !== "\n") { + //only add new line if there isn't already one, and isn't last character + html += "\n"; + } else if (i === chars.length - 1) { + //remove newline + html = html.trim(); + } + horizontal_rule = false; + was_image = false; + was_link = false; + continue; + } + //ending a header, line break not needed + if (in_heading) { + html += `\n`; + if (i === chars.length - 1) { + //remove newline + html = html.trim(); + } + heading_level = 0; + in_heading = false; + //continue; + } + //if in blockquote + if (in_blockquote && i === chars.length-1) { + if (html[html.length-1] !== "\n") { + html += "\n"; + } + html += ""; + in_blockquote = false; + } else if (in_blockquote && char === "\n" && chars[i-1] === "\n") { + //two new lines in a row means blockquote ends + html += "\n"; + in_blockquote = false; + } + heading_level = 0; + if (i === chars.length - 1) { + //remove newline + html = html.trim(); + } + continue; + } + //block quotes + if (char === " " && chars[i-1] === ">" && !in_blockquote && (chars[i-2] === "\n" || i === 1)) { + in_blockquote = true; + html += "
    \n"; + continue; + } else if (in_blockquote && chars[i-1] === "\n" && (char !== ">" || chars[i+1] !== " ")) { + if (blockquote_list) { + //end list if list started in blockquote and blockquote ends + blockquote_list = false; + if (in_unordered_list) { + html += "\n
    \n"; + } else if (in_ordered_list) { + html += "\n\n"; + } + ordered_list_num = 0; + in_ordered_list = false; + in_unordered_list = false; + } else { + html += "\n"; + } + in_blockquote = false; + } else if (char === ">" && chars[i+1] === " " && (chars[i-1] === "\n" || i === 0)) { + //do not add the '>' to the html + end_add_char = false; + } else if (char === " " && chars[i-1] === ">" && chars[i-2] === "\n") { + //do not add the ' ' in '> ' to the html + end_add_char = false; + } else if (char === ">" && chars[i+1] !== " " && (chars[i-1] === "\n" || i === 0)) { + warnings.push({ + type: "blockquote-broken", + message: "Missing space after `>` for blockquote?", + line_number, + }); + } + //code blocks + if (char === "`" && chars[i+1] !== "`" && ((chars.slice(i-3, i) === "\n``" || (i === 2 && chars.slice(0, i) === "``")) || (in_blockquote && (chars.slice(i-5, i) === "\n> ``" || (i === 4 && chars.slice(0, i) === "> ``"))))) { + if (!in_code_block) { + //make sure there is ``` further on, that is not backslashed + let skip_next: boolean = false; + let end_found: boolean = false; + for (let ii=1; ii < chars.length-i; ii++) { + let adjusted_index: number = i+ii; + if (skip_next) { + skip_next = false; + continue; + } + if (chars[adjusted_index] === "\\") { + skip_next = true; + } else if (chars.slice(adjusted_index-3, adjusted_index+1) === "\n```" && (adjusted_index === chars.length-1 || chars[adjusted_index+1] === "\n")) { + end_found = true; + break; + } else if (in_blockquote && chars.slice(adjusted_index-5, adjusted_index+1) === "\n> ```" && (adjusted_index === chars.length-1 || chars[adjusted_index+1] === "\n")) { + end_found = true; + break; + } else if (in_blockquote && chars[adjusted_index] === "\n" && (chars[adjusted_index+1] !== ">" || chars[adjusted_index+2] !== " ")) { + //blockquote ended without finding end + break; + } + } + if (end_found) { + in_code = false; + in_code_block = true; + first_line_code_block = true; + code_block_lang = ""; + //at this point html_line would have two backticks (probably a actually) in it + html_line = ""; + continue; + } else { + warnings.push({ + type: "code-block-not-closed", + message: "Code block not closed, may be missing closing backticks?", + line_number, + }); + } + } else if (in_code_block && chars[i+1] === "\n") { // || i === chars.length-1 will be handled by a different part + in_code = false; + in_code_block = false; + html_line = "\n"; + continue; + } + } else if (first_line_code_block) { + code_block_lang += char; + continue; + } else if (in_code_block) { + //do not render markdown inside code blocks... obviously + //preserve spaces at the beginning of lines + if (in_blockquote && ((char === " " && chars.slice(i-2, i) === "\n>") || (char === ">" && chars[i-1] === "\n" && chars[i+1] === " "))) { + //do not add the blockquote syntax thing "> " to the codeblock + } else if (char === " " && space_start) { + html_line += " "; + } else { + space_start = false; + html_line += char; + } + continue; + } + //handle unordered lists + if (char === " " && chars[i-1] === "-" && (chars[i-2] === "\n" || i === 1 || (in_blockquote && chars.slice(i-3, i-1) === "> " && (chars[i-4] === "\n" || i === 3)))) { + //it's a unordered list bullet point!! + if (!in_unordered_list || (!in_blockquote && blockquote_list)) { + html_line = "
      \n
    • "; + blockquote_list = false; + } else { + html_line = "
    • "; + } + in_unordered_list = true; + if (in_blockquote) { + blockquote_list = true; + } + continue; + } else if (char !== " " && char !== "-" && chars[i-1] === "-" && (chars[i-2] === "\n" || i === 1)) { + warnings.push({ + type: "unordered-list-broken", + message: "Missing space after unordered list", + line_number, + }); + } + //handle ordered lists + let ol_num_length: number = String(ordered_list_num+1).length; + if (char === " " && chars.slice(i-1-ol_num_length, i) === `${ordered_list_num+1}.` && (chars[i-ol_num_length-2] === "\n" || i === ol_num_length+1 || (in_blockquote && chars.slice(i-ol_num_length-3, i) === `> ${ordered_list_num+1}.` && (chars[i-ol_num_length-4] === "\n" || i === ol_num_length+3)))) { + if (ordered_list_num === 0) { + html_line = "
        \n
      1. "; + in_ordered_list = true; + } else { + html_line = "
      2. "; + } + if (in_blockquote) { + blockquote_list = true; + } + continue; + } + //handle code + if (char === "`" && !in_code) { + //make sure there is another ` in the line + let skip_next: boolean = false; + let end_found: boolean = false; + for (let ii=1; ii < chars.length-i; ii++) { + if (skip_next) { + skip_next = false; + continue; + } + if (chars[i+ii] === "\\") { + skip_next = true; + } else if (chars[i+ii] === "\n") { + end_found = false; + break; + } else if (chars[i+ii] === "`") { + end_found = true; + break; + } + } + if (end_found) { + in_code = true; + html_line += ""; + //we have to repeat some code from later on and add a

        + if (i === 0 || chars[i-1] === "\n") { + html_line = "

        "+html_line; + } + continue; + } else { + warnings.push({ + type: "code-snippet-not-closed", + message: "Code snippet not closed, may be missing closing backtick?", + line_number, + }); + } + } else if (char === "`" && in_code) { + in_code = false; + html_line += ""; + continue; + } else if (in_code) { + html_line += char; + continue; + } + //handle tables + if (char === "|" && (chars[i-1] === "\n" || i === 0 || (in_blockquote && chars.slice(i-2, i) === "> " && (chars[i-3] === "\n" || i === 3)))) { + if (!in_table) { + //start of table + in_table = true; + in_table_header = true; + html += "\n\n"; + } else { + //just a new table row + html += "\n"; + } + continue; + } else if (in_table && char === "|") { + if (in_table_header) { + html_line += `\n`; + } else { + html_line += `\n`; + } + table_item = ""; + continue; + } else if (in_table && ((chars[i-1] === "\n" && char !== "|") || (in_blockquote && chars[i-3] === "\n" && char !== "|"))) { + in_table = false; + table_item = ""; + //table ends + html += "
        ${table_item}${table_item}
        \n"; + } else if (in_table) { + table_item += char; + continue; + } + //handle heading levels + //ensure headings are continuous and have after it ("#a##" or "##abc" are not a valid headings), and are at the beginning of the line + //ensure headings are possible in block quotes + if (chars.slice(i-heading_level-1, i) === "\n"+"#".repeat(heading_level) || (is_first_line && chars.slice(0, i) === "#".repeat(heading_level)) || (chars.slice(i-heading_level-3, i) === "\n> "+"#".repeat(heading_level) && in_blockquote) || (is_first_line && chars.slice(0, i) === "> "+"#".repeat(heading_level) && in_blockquote)) { + if (char === "#" && !in_heading && heading_level < 6) { + heading_level++; + continue; + } else if (heading_level > 0 && char === " " && !in_heading) { + in_heading = true; + html_line = ``; + header_num++; + continue; + } else if (char === "#" && heading_level === 6) { + warnings.push({ + type: "too-much-header", + message: "Header cannot be more than 6 levels", + line_number, + }) + } else if (heading_level > 0) { + //not a heading + html_line = "

        "+"#".repeat(heading_level); + heading_level = 0; + warnings.push({ + type: "heading-broken", + message: "Missing space after `#` for heading?", + line_number, + }); + } + } + //handle horizontal rules + //similar code as headings to ensure beginning of the line, continuous + if (chars.slice(i-horizontal_num-1, i) === "\n"+"-".repeat(horizontal_num) || (is_first_line && chars.slice(0, i) === "-".repeat(horizontal_num))) { + if (char === "-") { + horizontal_num++; + if (horizontal_num === 3 || (horizontal_num === 2 && chars[chars.length-1] === "-" && i === chars.length-2)) { + horizontal_rule = true; + html_line = "


        "; + } else if (horizontal_num < 3 && (chars[i+1] === "\n" || i === chars.length-2)) { + //if next is end or newline, but less than 3 '-'s, it is not a valid horizontal rule + html_line = "

        "+"-".repeat(horizontal_num); + } + continue; + } else if (horizontal_num > 0) { + //no longer a horizontal line + html_line = "

        "+"-".repeat(horizontal_num); + if (horizontal_num > 2) { + warnings.push({ + type: "horizontal-rule-broken", + message: "Horizontal rule broken", + line_number, + }); + } + } + } + //handle images + if (char === "!" && chars[i+1] === "[") { + continue; + } else if (char === "]" && chars[i+1] === "(" && image_alt !== undefined && image_src === undefined) { + continue; + } else if (char === "[" && chars[i-1] === "!" && image_alt === undefined && image_src === undefined) { + image_alt = ""; + continue; + } else if (char === "(" && chars[i-1] === "]" && image_alt !== undefined && image_src === undefined) { + image_src = ""; + continue; + } else if ((char === ")" || (chars[i+1] === ")" && i+1 === chars.length-1)) && image_src !== undefined) { + if (chars[i+1] === ")" && i+1 === chars.length-1) { + image_src += char; + } + if (image_alt === "") { + warnings.push({ + type: "missing-image-alt", + message: "Image is missing alt text, this is bad for accessibility", + line_number, + }); + } + html_line += `${image_alt}`; + was_image = true; + image_alt = undefined; + image_src = undefined; + continue; + } else if (image_alt !== undefined && image_src === undefined && !(char === "]" && chars[i+1] === "(")) { + image_alt += char; + continue; + } else if (image_src !== undefined) { + image_src += char; + continue; + } else if (was_image) { + was_image = false; + } + //handle links + if (char === "[") { + link_content = ""; + continue; + } else if (char === "]" && chars[i+1] === "(" && link_content !== undefined && link_href === undefined) { + continue; + } else if (char === "(" && chars[i-1] === "]" && link_content !== undefined && link_href === undefined) { + link_href = ""; + continue; + } else if ((char === ")" || (chars[i+1] === ")" && i+1 === chars.length-1)) && link_href !== undefined && link_content !== undefined) { + let before_link: number; + if (chars[i+1] === ")" && i+1 === chars.length-1) { + link_href += char; + before_link = i-link_href.length-link_content.length-3; + } else { + before_link = i-link_href.length-link_content.length-4; + } + if (chars[before_link] === "\n" || before_link === -1) { + html_line = "

        "; + } + if (link_content === "") { + warnings.push({ + type: "empty-link", + message: "Link missing text", + line_number, + }); + } + //":" includes protocols like http:// https:// wss:// and app uris + if (!link_href.includes(":") && !link_href.startsWith("./") && !link_href.startsWith("/")) { + warnings.push({ + type: "weird-href", + message: "Link href does not start with './' or '/' or contain ':', please double check it", + line_number, + }); + } + html_line += `${link_content}`; + was_link = true; + link_content = undefined; + link_href = undefined; + continue; + } else if (link_content !== undefined && link_href === undefined) { + link_content += char; + continue; + } else if (link_href !== undefined) { + link_href += char; + continue; + } else if (was_link) { + was_link = false; + } + //add beginning paragraph + if (i === 0 || chars[i-1] === "\n") { + html_line = "

        "+html_line; + } + //handle italics and bolds + if (char === "*" && asterisk_num < 2 && !in_asterisk) { + asterisk_num++; + if (asterisk_num === 1 && chars[i+1] !== "*") { + html_line += ""; + in_asterisk = true; + } else if (asterisk_num === 2) { + html_line += ""; + in_asterisk = true; + } + continue; + } else if (char === "*" && in_asterisk) { + asterisk_out_num++; + if (asterisk_out_num === asterisk_num) { + if (asterisk_num === 1) { + html_line += ""; + } else if (asterisk_num === 2) { + html_line += ""; + } + in_asterisk = false; + asterisk_num = 0; + asterisk_out_num = 0; + continue; + } else if (asterisk_out_num === 1 && chars[i+1] === "*") { + //implied that asterisk_num === 2 here due to previous if statement + continue; + } + } else if (char !== "*" && in_asterisk) { + asterisk_out_num = 0; + } + //handle superscripts + if (char === "^") { + if (in_superscript) { + in_superscript = false; + html_line += ""; + continue; + } else { + in_superscript = true; + html_line += ""; + continue; + } + } + // + if (end_add_char) { + html_line += char; + } + } + + return { + html, + warnings, + }; +} + +export function parse_md_to_html(md: string): string { + return parse_md_to_html_with_warnings(md).html; +} diff --git a/npm/package.json b/npm/package.json new file mode 100644 index 0000000..3207c79 --- /dev/null +++ b/npm/package.json @@ -0,0 +1,22 @@ +{ + "name": "makoto", + "version": "1.0.0", + "description": "Markdown to HTML parser", + "main": "index.js", + "types": "index.d.ts", + "repository": { + "type": "git", + "url": "git+https://github.com/jetstream0/Makoto-Markdown-to-HTML.git" + }, + "keywords": [ + "md", + "markdown", + "html" + ], + "author": "jetstream0/prussia", + "license": "MIT", + "bugs": { + "url": "https://github.com/jetstream0/Makoto-Markdown-to-HTML/issues" + }, + "homepage": "https://github.com/jetstream0/Makoto-Markdown-to-HTML#readme" +}