block quotes

and unordered lists draft
This commit is contained in:
jetstream0
2023-06-07 23:00:38 -07:00
parent d2b2600824
commit 75260e28a3
2 changed files with 115 additions and 19 deletions

View File

@@ -3,8 +3,25 @@ import { test_assert_equal, total_tests, failed_tests, passed_tests } from './en
/*
Quirks
- Only one newline between text is needed for a new paragraph, not two.
- Headings are given automatically generated ids ('header-0', 'header-1' etc) so url anchors (https://example.com/blog#header-1) are possible
- Will only put newlines after headings, paragraphs, and horizontal rules, all others will not be in final output (exception is it will not put a newline on the last line, and also it will leave in newlines in code blocks)
- Will only put newlines after headings, paragraphs, and horizontal rules, all others will not be in final output (exception is it will not put a newline on the last line, and also newlines in code blocks are preserved)
- Spaces at the beginning of the line are normally cut off with html, so this parser will replace spaces with the html entity for spaces (  ) at the beginning of the line in code blocks
- Lines in code blocks will be split with <br>s
- The three backticks indicating a beginning or end of a code MUST be on their own line
Invalid:
```burger```
Valid:
```
burger
```
- If a language for the code block is provided, the parser will add a css class "code-<language name>" to the resulting code block div (which btw has class code-block)
- All elements should be able to be used in block quotes (ok, not really a quirk). In code blocks, the code in the code block must also start with "> " of course
-
*/
/*
List of warning types
-
*/
@@ -42,8 +59,16 @@ test_assert_equal(parse_md_to_html("[a](b)\n[fake link](oops"), "<p><a href=\"b\
test_assert_equal(parse_md_to_html("`e\n\\`testing `console.log('*koala*');`"), "<p>`e</p>\n<p>`testing <code>console.log('*koala*');</code></p>", "code snippet test");
test_assert_equal(parse_md_to_html("## testing\n```markdown\n# title\n i like **cheeseburgers** and `code`\n```\n```"), "<h2 id=\"header-0\">testing</h2>\n<div class=\"code-block code-markdown\">\n# title\n i like **cheeseburgers** and `code`\n</div>\n<p><code></code>`</p>", "code block test");
test_assert_equal(parse_md_to_html("```\nif time == 420:\n weed()\n```"), "<div class=\"code-block\">\nif time == 420:<br>\n&nbsp;&nbsp;&nbsp;&nbsp;weed()<br>\n</div>", "code block test 1");
//todo: blockquotes, ordered lists, unordered lists, table, code block
test_assert_equal(parse_md_to_html("## testing\n```markdown\n# title\n i like **cheeseburgers** and `code`\n```\n```"), "<h2 id=\"header-0\">testing</h2>\n<div class=\"code-block code-markdown\">\n# title<br>\n&nbsp;&nbsp;i like **cheeseburgers** and `code`<br>\n</div>\n<p><code></code>`</p>", "code block test 2");
test_assert_equal(parse_md_to_html("test\n> test\n> ## TEST\n> **beach**\n> `wee`\n> # dd"), "<p>test</p>\n<blockquote>\n<p>test</p>\n<h2 id=\"header-0\">TEST</h2>\n<p><b>beach</b></p>\n<p><code>wee</code></p>\n<h1 id=\"header-1\">dd</h1>\n</blockquote>", "block quote test 1");
test_assert_equal(parse_md_to_html("> ```\n> alert('e')\n> ```"), "<blockquote>\n<div class=\"code-block\">\nalert('e')<br>\n</div>\n</blockquote>", "block quote test 2");
//todo: ordered lists, unordered lists, tables
console.log(parse_md_to_html("- burger\n- fries\n- pizza"));
console.log(`Total Passed: \x1B[32m${passed_tests}/${total_tests}\x1B[m\nTotal Failed: \x1B[31m${failed_tests}/${total_tests}\x1B[m`);

View File

@@ -34,11 +34,16 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
let in_code_block: boolean = false;
let first_line_code_block: boolean = false;
let code_block_lang: string | undefined = undefined;
let space_start: boolean = false;
let in_blockquote: boolean = false;
let in_unordered_list: boolean = false;
let ordered_list_num: number = 0;
//loop through characters
let chars: string = md;
for (let i=0; i < chars.length; i++) {
let char: string = chars[i];
let end_add_char: boolean = true;
//sanitize input
if (char === "<") {
char = "&lt;";
@@ -66,29 +71,31 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
//it can only be the first line once :)
is_first_line = false;
}
//preserving the newlines/linebreaks of the code block
if (in_code_block && char === "\n" && !first_line_code_block) {
html_line += "<br>\n";
space_start = true;
}
//if first line of code block, create the code block div
if (first_line_code_block) {
code_block_lang = code_block_lang!.toLowerCase().trim();
let known_langs: string[] = ["python", "py", "rust", "rs", "javascript", "js", "typescript", "ts", "java", "c", "cpp", "csharp", "html", "css", "markdown", "md", "brainfuck", "php", "bash", "perl", "sql", "ruby", "basic", "assembly", "asm", "wasm", "r", "go", "swift"]
if (!known_langs.includes(code_block_lang)) {
if (!known_langs.includes(code_block_lang) && code_block_lang !== "") {
warnings.push({
type: "unknown-language",
message: `Unknown language '${code_block_lang}' for code block`,
line_number,
});
}
html_line = `<div class="code-block code-${code_block_lang}">`;
if (code_block_lang === "") {
//if no code block language specified, don't put it as a css class obviously
html_line = `<div class="code-block">\n`;
} else {
html_line = `<div class="code-block code-${code_block_lang}">\n`;
}
code_block_lang = undefined;
first_line_code_block = false;
}
if (in_code_block && char === "\n") {
html_line += "\n";
}
//close code block div
if (in_code_block && i === chars.length-1) {
in_code_block = false;
html_line = "</div>";
}
//if image was never completed
if (image_alt !== undefined) {
if (!html_line.startsWith("<p>")) {
@@ -138,6 +145,12 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
//if last character
if (i === chars.length-1 && char !== "\n") {
let add_char: boolean = true;
//close code block div
if (in_code_block && i === chars.length-1) {
in_code_block = false;
add_char = false;
html_line = "</div>";
}
//if in code
if (in_code && char === "`") {
in_code = false;
@@ -177,6 +190,11 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
html += html_line;
if (html_line.startsWith("<p>")) {
html += "</p>\n";
} else if ((html_line.startsWith("<li>") || html_line.startsWith("<ul>")) && in_unordered_list) {
html += "</li>\n";
if (i === chars.length-1) {
html += "</ul>";
}
}
html_line = "";
horizontal_num = 0;
@@ -203,7 +221,14 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
}
heading_level = 0;
in_heading = false;
continue;
//continue;
}
//if in blockquote
if (in_blockquote && i === chars.length-1) {
if (html[html.length-1] !== "\n") {
html += "\n";
}
html += "</blockquote>";
}
heading_level = 0;
if (i === chars.length - 1) {
@@ -212,8 +237,23 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
}
continue;
}
//block quotes
if (char === " " && chars[i-1] === ">" && !in_blockquote && (chars[i-2] === "\n" || i === 1)) {
in_blockquote = true;
html += "<blockquote>\n";
continue;
} else if (in_blockquote && chars[i-1] === "\n" && (char !== "&gt;" || chars[i+1] !== " ")) {
html_line = "</blockquote>\n";
in_blockquote = false;
} else if (char === "&gt;" && chars[i+1] === " " && (chars[i-1] === "\n" || i === 0)) {
//do not add the '>' to the html
end_add_char = false;
} else if (char === " " && chars[i-1] === ">" && chars[i-2] === "\n") {
//do not add the ' ' in '> ' to the html
end_add_char = false;
}
//code blocks
if (char === "`" && chars[i+1] !== "`" && (chars.slice(i-3, i) === "\n``" || (i === 2 && chars.slice(i-2, i) === "``"))) {
if (char === "`" && chars[i+1] !== "`" && ((chars.slice(i-3, i) === "\n``" || (i === 2 && chars.slice(0, i) === "``")) || (in_blockquote && (chars.slice(i-5, i) === "\n> ``" || (i === 4 && chars.slice(0, i) === "> ``"))))) {
if (!in_code_block) {
//make sure there is ``` further on, that is not backslashed
let skip_next: boolean = false;
@@ -229,6 +269,12 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
} else if (chars.slice(adjusted_index-3, adjusted_index+1) === "\n```" && (adjusted_index === chars.length-1 || chars[adjusted_index+1] === "\n")) {
end_found = true;
break;
} else if (in_blockquote && chars.slice(adjusted_index-5, adjusted_index+1) === "\n> ```" && (adjusted_index === chars.length-1 || chars[adjusted_index+1] === "\n")) {
end_found = true;
break;
} else if (in_blockquote && chars[adjusted_index] === "\n" && (chars[adjusted_index+1] !== ">" || chars[adjusted_index+2] !== " ")) {
//blockquote ended without finding end
break;
}
}
if (end_found) {
@@ -257,9 +303,31 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
continue;
} else if (in_code_block) {
//do not render markdown inside code blocks... obviously
//preserve spaces at the beginning of lines
if (char === " " && space_start) {
html_line += "&nbsp;";
} else if (in_blockquote && ((char === " " && chars.slice(i-2, i) === "\n>") || (char === "&gt;" && chars[i-1] === "\n" && chars[i+1] === " "))) {
//do not add the blockquote syntax thing "> " to the codeblock
} else {
space_start = false;
html_line += char;
}
continue;
}
//handle unordered lists
if (char === " " && chars[i-1] === "-" && (chars[i-2] === "\n" || i === 1)) {
//it's a unordered list bullet point!!
if (!in_unordered_list) {
html_line = "<ul>\n<li>";
} else {
html_line = "<li>";
}
in_unordered_list = true;
continue;
} else if (in_unordered_list && ((chars[i-1] === "\n" && char !== "-") || (chars[i-2] === "\n" && char !== " "))) {
html_line += "</ul>\n";
in_unordered_list = false;
}
//handle code
if (char === "`" && !in_code) {
//make sure there is another ` in the line
@@ -305,13 +373,14 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
}
//handle heading levels
//ensure headings are continuous and have after it ("#a##" or "##abc" are not a valid headings), and are at the beginning of the line
if (chars.slice(i-heading_level-1, i) === "\n"+"#".repeat(heading_level) || (is_first_line && chars.slice(0, i) === "#".repeat(heading_level))) {
//ensure headings are possible in block quotes
if (chars.slice(i-heading_level-1, i) === "\n"+"#".repeat(heading_level) || (is_first_line && chars.slice(0, i) === "#".repeat(heading_level)) || (chars.slice(i-heading_level-3, i) === "\n> "+"#".repeat(heading_level) && in_blockquote) || (is_first_line && chars.slice(0, i) === "> "+"#".repeat(heading_level) && in_blockquote)) {
if (char === "#" && !in_heading && heading_level <= 6) {
heading_level++;
continue;
} else if (heading_level > 0 && char === " " && !in_heading) {
in_heading = true;
html_line += `<h${heading_level} id="header-${header_num}">`;
html_line = `<h${heading_level} id="header-${header_num}">`;
header_num++;
continue;
} else if (heading_level > 0) {
@@ -446,8 +515,10 @@ export function parse_md_to_html_with_warnings(md: string): ParseResult {
asterisk_out_num = 0;
}
//
if (end_add_char) {
html_line += char;
}
}
return {
html,