From d2b260082432681ac8f199db12dea17c0a73f23c Mon Sep 17 00:00:00 2001 From: jetstream0 <49297268+jetstream0@users.noreply.github.com> Date: Sun, 4 Jun 2023 23:24:23 -0700 Subject: [PATCH] links and code blocks and fixes --- endosulfan.ts | 10 +-- index.ts | 23 ++++-- makoto.ts | 211 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 211 insertions(+), 33 deletions(-) diff --git a/endosulfan.ts b/endosulfan.ts index c2f8125..236380f 100644 --- a/endosulfan.ts +++ b/endosulfan.ts @@ -36,7 +36,7 @@ export function test_assert_equal(first_item: any, second_item: any, test_name: if (!silent && !passed) { //log info for debugging purposes //log both items - console.log(`${test_name}:\n${first_item}\n${second_item}`); + console.log(`${test_name}:\n========\n${first_item}\n========\n${second_item}\n========`); //log differences (partially effective) //don't log differences if not string if (typeof first_item !== "string") return; @@ -48,9 +48,6 @@ export function test_assert_equal(first_item: any, second_item: any, test_name: let dispute_length: number = 0; for (let i=0; i < longer.length; i++) { if (first_item[i] === second_item[i-offset]) { - if (i === 9) { - console.log(offset) - } continue; } else { if (dispute_length > 0) { @@ -138,10 +135,7 @@ export type Warning = { line_number?: number, }; -export type WarningFunction = (input: any) => Warning[]; - -export function generate_warnings(input: any, warning_function: WarningFunction, ignore_types: string[]) { - let warnings: Warning[] = warning_function(input); +export function generate_warnings(warnings: Warning[], ignore_types: string[]) { let ignore_count: number = 0; for (let i=0; i < warnings.length; i++) { let warning: Warning = warnings[i]; diff --git a/index.ts b/index.ts index 8ca5f77..52fde3e 100644 --- a/index.ts +++ b/index.ts @@ -3,7 +3,8 @@ import { test_assert_equal, total_tests, failed_tests, passed_tests } from './en /* Quirks -- Will only put newlines after headings, paragraphs, and horizontal rules, all others will not be in final output (exception is it will not put a newline on the last line) +- Headings are given automatically generated ids ('header-0', 'header-1' etc) so url anchors (https://example.com/blog#header-1) are possible +- Will only put newlines after headings, paragraphs, and horizontal rules, all others will not be in final output (exception is it will not put a newline on the last line, and also it will leave in newlines in code blocks) - */ @@ -15,15 +16,15 @@ test_assert_equal(parse_md_to_html("a\n\n\nb\n"), "

a

\n

b

", "new lin test_assert_equal(parse_md_to_html("a\n\n\nb\n\n"), "

a

\n

b

", "new line test 3"); -test_assert_equal(parse_md_to_html("# testing\n## Heading#\n# Chee see\nlorem ipsum"), "

testing

\n

Heading#

\n

Chee see

\n

lorem ipsum

", "heading test 1"); +test_assert_equal(parse_md_to_html("# testing\n## Heading#\n# Chee see\nlorem ipsum"), "

testing

\n

Heading#

\n

Chee see

\n

lorem ipsum

", "heading test 1"); -test_assert_equal(parse_md_to_html("in the sam#e way# bricks don't\n# Yay\n#a# b"), "

in the sam#e way# bricks don't

\n

Yay

\n

#a# b

", "heading test 2"); +test_assert_equal(parse_md_to_html("in the sam#e way# bricks don't\n# Yay\n#a# b"), "

in the sam#e way# bricks don't

\n

Yay

\n

#a# b

", "heading test 2"); -test_assert_equal(parse_md_to_html("# "), "

<script>a<bc</script>

", "sanitize test"); +test_assert_equal(parse_md_to_html("# "), "

<script>a<bc</script>

", "sanitize test"); -test_assert_equal(parse_md_to_html("# tet offensive\n"), "

tet offensive

", "heading test 3"); +test_assert_equal(parse_md_to_html("# tet offensive\n"), "

tet offensive

", "heading test 3"); -test_assert_equal(parse_md_to_html("**test abc** *a*\n## **ch*ch**"), "

test abc a

\n

ch*ch

", "bold italic test 1"); +test_assert_equal(parse_md_to_html("**test abc** *a*\n## **ch*ch**"), "

test abc a

\n

ch*ch

", "bold italic test 1"); test_assert_equal(parse_md_to_html("****a*"), "

a*

", "bold italic test 2"); @@ -35,8 +36,14 @@ test_assert_equal(parse_md_to_html("asdf![alt text](/images/ming-dynasty.png)\n! test_assert_equal(parse_md_to_html("asdf![alt text(/images/ming-dynasty.png)\n![burgeerr](wee.pong\n)"), "

asdf![alt text(/images/ming-dynasty.png)

\n

![burgeerr](wee.pong

\n

)

", "invalid image test"); -test_assert_equal(parse_md_to_html("`e\ntesting `console.log('*koala*')`"), "

`e

\n

testing console.log('*koala*')

", "code snippet test"); +test_assert_equal(parse_md_to_html("Yo quiero [cheeseburger](https://wendys.org/burger).\n[Con cheerios.](/cheerios)"), "

Yo quiero cheeseburger.

\n

Con cheerios.

", "link test 1"); -//todo: links, blockquotes, ordered lists, unordered lists, table, code block +test_assert_equal(parse_md_to_html("[a](b)\n[fake link](oops"), "

a

\n

[fake link](oops

", "link test 2"); + +test_assert_equal(parse_md_to_html("`e\n\\`testing `console.log('*koala*');`"), "

`e

\n

`testing console.log('*koala*');

", "code snippet test"); + +test_assert_equal(parse_md_to_html("## testing\n```markdown\n# title\n i like **cheeseburgers** and `code`\n```\n```"), "

testing

\n
\n# title\n i like **cheeseburgers** and `code`\n
\n

`

", "code block test"); + +//todo: blockquotes, ordered lists, unordered lists, table, code block console.log(`Total Passed: \x1B[32m${passed_tests}/${total_tests}\x1B[m\nTotal Failed: \x1B[31m${failed_tests}/${total_tests}\x1B[m`); diff --git a/makoto.ts b/makoto.ts index dbdad79..cf51017 100644 --- a/makoto.ts +++ b/makoto.ts @@ -1,9 +1,17 @@ import type { Warning } from './endosulfan'; +export type ParseResult = { + html: string, + warnings: Warning[] +} + //some minor differences with markdown spec? -export function parse_md_to_html(md: string): string { +export function parse_md_to_html_with_warnings(md: string): ParseResult { let html: string = ""; let html_line: string = ""; + let warnings: Warning[] = []; + + let line_number: number = 1; //markdown parsing vars let is_first_line: boolean = true; @@ -19,13 +27,18 @@ export function parse_md_to_html(md: string): string { let was_image: boolean = false; let image_alt: string | undefined = undefined; let image_src: string | undefined = undefined; + let was_link: boolean = false; + let link_content: string | undefined = undefined; + let link_href: string | undefined = undefined; let in_code: boolean = false; + let in_code_block: boolean = false; + let first_line_code_block: boolean = false; + let code_block_lang: string | undefined = undefined; //loop through characters let chars: string = md; for (let i=0; i < chars.length; i++) { let char: string = chars[i]; - //console.log(char, asterisk_num, in_asterisk); //sanitize input if (char === "<") { char = "<"; @@ -43,7 +56,7 @@ export function parse_md_to_html(md: string): string { if (char === "\\" && chars[i+1] !== "\n") { backslashed = true; if (i === 0 || chars[i-1] === "\n") { - html_line += "

"; + html_line = "

"+html_line; } continue; } @@ -53,6 +66,29 @@ export function parse_md_to_html(md: string): string { //it can only be the first line once :) is_first_line = false; } + //if first line of code block, create the code block div + if (first_line_code_block) { + code_block_lang = code_block_lang!.toLowerCase().trim(); + let known_langs: string[] = ["python", "py", "rust", "rs", "javascript", "js", "typescript", "ts", "java", "c", "cpp", "csharp", "html", "css", "markdown", "md", "brainfuck", "php", "bash", "perl", "sql", "ruby", "basic", "assembly", "asm", "wasm", "r", "go", "swift"] + if (!known_langs.includes(code_block_lang)) { + warnings.push({ + type: "unknown-language", + message: `Unknown language '${code_block_lang}' for code block`, + line_number, + }); + } + html_line = `

`; + code_block_lang = undefined; + first_line_code_block = false; + } + if (in_code_block && char === "\n") { + html_line += "\n"; + } + //close code block div + if (in_code_block && i === chars.length-1) { + in_code_block = false; + html_line = "
"; + } //if image was never completed if (image_alt !== undefined) { if (!html_line.startsWith("

")) { @@ -61,15 +97,50 @@ export function parse_md_to_html(md: string): string { html_line += "!["+image_alt; if (image_src !== undefined) { html_line += "]("+image_src; + warnings.push({ + type: "image-incomplete", + message: "Image incomplete, missing `)`", + line_number, + }); + } else { + warnings.push({ + type: "image-incomplete", + message: "Image incomplete, missing `]` or `(`", + line_number, + }); } image_alt = undefined; image_src = undefined; } + //if link was never completed + if (link_content !== undefined) { + if (!html_line.startsWith("

")) { + html_line = "

"+html_line; + } + html_line += "["+link_content; + if (link_href !== undefined) { + html_line += "]("+link_href; + warnings.push({ + type: "link-incomplete", + message: "Link incomplete, missing `)`", + line_number, + }); + } else { + warnings.push({ + type: "link-incomplete", + message: "Link incomplete, missing `]` or `(`", + line_number, + }); + } + link_content = undefined; + link_href = undefined; + } //if last character if (i === chars.length-1 && char !== "\n") { let add_char: boolean = true; //if in code if (in_code && char === "`") { + in_code = false; html_line += ""; add_char = false; } @@ -78,7 +149,7 @@ export function parse_md_to_html(md: string): string { add_char = false; } //handle image just ending - if (was_image && char === ")") { + if ((was_image || was_link) && char === ")") { add_char = false; } //if previous character is also newline, there hasn't been opportunity to add a

, so add it! @@ -109,7 +180,8 @@ export function parse_md_to_html(md: string): string { } html_line = ""; horizontal_num = 0; - if (horizontal_rule || was_image) { + line_number++; + if (horizontal_rule || was_image || was_link) { if (i !== chars.length - 1 && html[html.length-1] !== "\n") { //only add new line if there isn't already one, and isn't last character html += "\n"; @@ -119,6 +191,7 @@ export function parse_md_to_html(md: string): string { } horizontal_rule = false; was_image = false; + was_link = false; continue; } //ending a header, line break not needed @@ -139,6 +212,54 @@ export function parse_md_to_html(md: string): string { } continue; } + //code blocks + if (char === "`" && chars[i+1] !== "`" && (chars.slice(i-3, i) === "\n``" || (i === 2 && chars.slice(i-2, i) === "``"))) { + if (!in_code_block) { + //make sure there is ``` further on, that is not backslashed + let skip_next: boolean = false; + let end_found: boolean = false; + for (let ii=1; ii < chars.length-i; ii++) { + let adjusted_index: number = i+ii; + if (skip_next) { + skip_next = false; + continue; + } + if (chars[adjusted_index] === "\\") { + skip_next = true; + } else if (chars.slice(adjusted_index-3, adjusted_index+1) === "\n```" && (adjusted_index === chars.length-1 || chars[adjusted_index+1] === "\n")) { + end_found = true; + break; + } + } + if (end_found) { + in_code = false; + in_code_block = true; + first_line_code_block = true; + code_block_lang = ""; + //at this point html_line would have two backticks (probably a actually) in it + html_line = ""; + continue; + } else { + warnings.push({ + type: "code-block-not-closed", + message: "Code block not closed, may be missing closing backticks?", + line_number, + }); + } + } else if (in_code_block && chars[i+1] === "\n") { // || i === chars.length-1 will be handled by a different part + in_code = false; + in_code_block = false; + html_line = "\n"; + continue; + } + } else if (first_line_code_block) { + code_block_lang += char; + continue; + } else if (in_code_block) { + //do not render markdown inside code blocks... obviously + html_line += char; + continue; + } //handle code if (char === "`" && !in_code) { //make sure there is another ` in the line @@ -162,9 +283,20 @@ export function parse_md_to_html(md: string): string { if (end_found) { in_code = true; html_line += ""; + //we have to repeat some code from later on and add a

+ if (i === 0 || chars[i-1] === "\n") { + html_line = "

"+html_line; + } continue; + } else { + warnings.push({ + type: "code-snippet-not-closed", + message: "Code snippet not closed, may be missing closing backtick?", + line_number, + }); } } else if (char === "`" && in_code) { + in_code = false; html_line += ""; continue; } else if (in_code) { @@ -179,12 +311,18 @@ export function parse_md_to_html(md: string): string { continue; } else if (heading_level > 0 && char === " " && !in_heading) { in_heading = true; - html_line += ``; + html_line += ``; header_num++; continue; } else if (heading_level > 0) { - html_line += "

"+"#".repeat(heading_level); + //not a heading + html_line = "

"+"#".repeat(heading_level); heading_level = 0; + warnings.push({ + type: "heading-broken", + message: "Missing space after `#` for heading?", + line_number, + }); } } //handle horizontal rules @@ -203,17 +341,22 @@ export function parse_md_to_html(md: string): string { } else if (horizontal_num > 0) { //no longer a horizontal line html_line = "

"+"-".repeat(horizontal_num); + warnings.push({ + type: "horizontal-rule-broken", + message: "Horizontal rule broken", + line_number, + }); } } //handle images if (char === "!" && chars[i+1] === "[") { continue; - } else if (char === "]" && chars[i+1] === "(" && image_alt !== undefined) { + } else if (char === "]" && chars[i+1] === "(" && image_alt !== undefined && image_src === undefined) { continue; } else if (char === "[" && chars[i-1] === "!" && image_alt === undefined && image_src === undefined) { image_alt = ""; continue; - } else if (char === "(" && chars[i-1] === "]" && image_alt !== undefined) { + } else if (char === "(" && chars[i-1] === "]" && image_alt !== undefined && image_src === undefined) { image_src = ""; continue; } else if ((char === ")" || (chars[i+1] === ")" && i+1 === chars.length-1)) && image_src !== undefined) { @@ -231,12 +374,46 @@ export function parse_md_to_html(md: string): string { } else if (image_src !== undefined) { image_src += char; continue; - } else { + } else if (was_image) { was_image = false; } + //handle links + if (char === "[") { + link_content = ""; + continue; + } else if (char === "]" && chars[i+1] === "(" && link_content !== undefined && link_href === undefined) { + continue; + } else if (char === "(" && chars[i-1] === "]" && link_content !== undefined && link_href === undefined) { + link_href = ""; + continue; + } else if ((char === ")" || (chars[i+1] === ")" && i+1 === chars.length-1)) && link_href !== undefined && link_content !== undefined) { + let before_link: number; + if (chars[i+1] === ")" && i+1 === chars.length-1) { + link_href += char; + before_link = i-link_href.length-link_content.length-3; + } else { + before_link = i-link_href.length-link_content.length-4; + } + if (chars[before_link] === "\n" || before_link === -1) { + html_line = "

"; + } + html_line += `${link_content}`; + was_link = true; + link_content = undefined; + link_href = undefined; + continue; + } else if (link_content !== undefined && link_href === undefined) { + link_content += char; + continue; + } else if (link_href !== undefined) { + link_href += char; + continue; + } else if (was_link) { + was_link = false; + } //add beginning paragraph if (i === 0 || chars[i-1] === "\n") { - html_line += "

"; + html_line = "

"+html_line; } //handle italics and bolds if (char === "*" && asterisk_num < 2 && !in_asterisk) { @@ -272,12 +449,12 @@ export function parse_md_to_html(md: string): string { html_line += char; } - return html; + return { + html, + warnings, + }; } -//WarningFunction to generate warnings and catch possible mistakes (eg: link not completed or possible space missing after #) -export function find_warnings(md: string): Warning[] { - let warnings: Warning[] = []; - // - return warnings; +export function parse_md_to_html(md: string): string { + return parse_md_to_html_with_warnings(md).html; }