draft
some basic markdown parsing and tests
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.js
|
||||
tsconfig.json
|
||||
159
endosulfan.ts
Normal file
159
endosulfan.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
|
||||
export let total_tests: number = 0;
|
||||
export let failed_tests: number = 0;
|
||||
export let passed_tests: number = 0;
|
||||
|
||||
export function test_assert(condition: boolean, test_name: string): boolean {
|
||||
total_tests++;
|
||||
if (condition) {
|
||||
passed_tests++;
|
||||
console.log(`\x1B[32mTEST PASS\x1B[m ${test_name}`);
|
||||
return true;
|
||||
} else {
|
||||
failed_tests++;
|
||||
console.log(`\x1B[31mTEST FAIL\x1B[m ${test_name}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
type Difference = {
|
||||
start_index: number,
|
||||
end_index: number,
|
||||
};
|
||||
|
||||
//items will probably be strings or numbers,
|
||||
//but any object that is comparable after a JSON.stringify() should be fine
|
||||
export function test_assert_equal(first_item: any, second_item: any, test_name: string, silent?: boolean) {
|
||||
if (typeof first_item !== typeof second_item) {
|
||||
throw Error("Cannot compare two items of different types!");
|
||||
}
|
||||
//if the items are objects (including arrays)
|
||||
if (typeof first_item === "object") {
|
||||
first_item = JSON.stringify(first_item);
|
||||
second_item = JSON.stringify(second_item);
|
||||
}
|
||||
let passed = test_assert(first_item === second_item, test_name);
|
||||
if (!silent && !passed) {
|
||||
//log info for debugging purposes
|
||||
//log both items
|
||||
console.log(`${test_name}:\n${first_item}\n${second_item}`);
|
||||
//log differences (partially effective)
|
||||
//don't log differences if not string
|
||||
if (typeof first_item !== "string") return;
|
||||
//get longest item
|
||||
let longer = first_item.length > second_item.length ? first_item : second_item;
|
||||
//get differrences, only kinda works
|
||||
let differences: Difference[] = [];
|
||||
let offset: number = 0;
|
||||
let dispute_length: number = 0;
|
||||
for (let i=0; i < longer.length; i++) {
|
||||
if (first_item[i] === second_item[i-offset]) {
|
||||
if (i === 9) {
|
||||
console.log(offset)
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
if (dispute_length > 0) {
|
||||
//last loop
|
||||
if (i === longer.length-1) {
|
||||
dispute_length++;
|
||||
differences.push({
|
||||
start_index: i-dispute_length,
|
||||
end_index: i,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
//see if dispute ends or continues (two characters must match)
|
||||
for (let ii=0; ii < dispute_length; ii++) {
|
||||
//change offset and add difference
|
||||
if (first_item[i] === second_item[i-dispute_length-ii] && first_item[i+1] === second_item[i-dispute_length-ii+1]) {
|
||||
differences.push({
|
||||
start_index: i-dispute_length,
|
||||
end_index: i-1,
|
||||
});
|
||||
offset = dispute_length-ii;
|
||||
dispute_length = 0;
|
||||
break;
|
||||
} else if (first_item[i] === second_item[i-dispute_length+ii] && first_item[i+1] === second_item[i-dispute_length+ii+1]) {
|
||||
differences.push({
|
||||
start_index: i-dispute_length,
|
||||
end_index: i-1,
|
||||
});
|
||||
offset = dispute_length+ii;
|
||||
dispute_length = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (dispute_length === 0) {
|
||||
//dispute ends
|
||||
continue;
|
||||
}
|
||||
}
|
||||
dispute_length++;
|
||||
}
|
||||
/*
|
||||
if (first_item[i] !== second_item[i]) {
|
||||
if (first_item[i] === second_item[i-offset]) {
|
||||
continue;
|
||||
}
|
||||
//add to differences
|
||||
//if already existing difference, add to it
|
||||
let current_diff = differences.findIndex((diff) => i === diff.end_index+1);
|
||||
if (current_diff !== -1) {
|
||||
differences[current_diff].end_index = i;
|
||||
} else {
|
||||
//create new difference
|
||||
differences.push({
|
||||
start_index: i,
|
||||
end_index: i,
|
||||
});
|
||||
}
|
||||
offset++;
|
||||
}
|
||||
*/
|
||||
}
|
||||
//differences not really working right now, only log the first difference
|
||||
differences = differences.length > 0 ? [differences[0]] : [];
|
||||
for (let j=0; j < differences.length; j++) {
|
||||
let diff: Difference = differences[j];
|
||||
let start_i: number = diff.start_index;
|
||||
let end_i: number = diff.end_index;
|
||||
if (diff.start_index === diff.end_index) {
|
||||
console.log(`Difference at index ${start_i}:`);
|
||||
console.log(`${first_item.slice(start_i-2, start_i)}\x1B[30;44m${first_item[start_i]}\x1B[m${first_item.slice(start_i+1,start_i+3)}`);
|
||||
console.log(`${second_item.slice(start_i-2, start_i)}\x1B[30;44m${second_item[start_i]}\x1B[m${second_item.slice(start_i+1,start_i+3)}`);
|
||||
} else {
|
||||
//multi character difference
|
||||
console.log(`Difference at indexes ${start_i} to ${end_i}:`);
|
||||
console.log(`${first_item.slice(start_i-2, start_i)}\x1B[30;44m${first_item.slice(start_i, end_i+1)}\x1B[m${first_item.slice(end_i+1,end_i+3)}`);
|
||||
console.log(`${second_item.slice(start_i-2, start_i)}\x1B[30;44m${second_item.slice(start_i, end_i+1)}\x1B[m${second_item.slice(end_i+1,end_i+3)}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export type Warning = {
|
||||
type: string,
|
||||
message: string,
|
||||
line_number?: number,
|
||||
};
|
||||
|
||||
export type WarningFunction = (input: any) => Warning[];
|
||||
|
||||
export function generate_warnings(input: any, warning_function: WarningFunction, ignore_types: string[]) {
|
||||
let warnings: Warning[] = warning_function(input);
|
||||
let ignore_count: number = 0;
|
||||
for (let i=0; i < warnings.length; i++) {
|
||||
let warning: Warning = warnings[i];
|
||||
if (ignore_types.includes(warning.type)) {
|
||||
ignore_count++;
|
||||
continue;
|
||||
}
|
||||
if (warning.line_number) {
|
||||
console.log(`\x1B[33mWarning at ${warning.line_number}:\x1B[m "${warning.message}" (type: ${warning.type})`);
|
||||
} else {
|
||||
console.log(`\x1B[33mWarning:\x1B[m "${warning.message} (type: ${warning.type})"`);
|
||||
}
|
||||
}
|
||||
console.log(`\x1B[33m${warnings.length} warnings (${ignore_count} suppressed)\x1B[m`);
|
||||
}
|
||||
42
index.ts
Normal file
42
index.ts
Normal file
@@ -0,0 +1,42 @@
|
||||
import { parse_md_to_html } from './makoto';
|
||||
import { test_assert_equal, total_tests, failed_tests, passed_tests } from './endosulfan';
|
||||
|
||||
/*
|
||||
Quirks
|
||||
- Will only put newlines after headings, paragraphs, and horizontal rules, all others will not be in final output (exception is it will not put a newline on the last line)
|
||||
-
|
||||
*/
|
||||
|
||||
//tests
|
||||
|
||||
test_assert_equal(parse_md_to_html("a\n\n\nb"), "<p>a</p>\n<p>b</p>", "new line test 1");
|
||||
|
||||
test_assert_equal(parse_md_to_html("a\n\n\nb\n"), "<p>a</p>\n<p>b</p>", "new line test 2");
|
||||
|
||||
test_assert_equal(parse_md_to_html("a\n\n\nb\n\n"), "<p>a</p>\n<p>b</p>", "new line test 3");
|
||||
|
||||
test_assert_equal(parse_md_to_html("# testing\n## Heading#\n# Chee see\nlorem ipsum"), "<h1 id='header-0'>testing</h1>\n<h2 id='header-1'>Heading#</h2>\n<h1 id='header-2'>Chee see</h1>\n<p>lorem ipsum</p>", "heading test 1");
|
||||
|
||||
test_assert_equal(parse_md_to_html("in the sam#e way# bricks don't\n# Yay\n#a# b"), "<p>in the sam#e way# bricks don't</p>\n<h1 id='header-0'>Yay</h1>\n<p>#a# b</p>", "heading test 2");
|
||||
|
||||
test_assert_equal(parse_md_to_html("# <script>a\<bc</script>"), "<h1 id='header-0'><script>a<bc</script></h1>", "sanitize test");
|
||||
|
||||
test_assert_equal(parse_md_to_html("# tet offensive\n"), "<h1 id='header-0'>tet offensive</h1>", "heading test 3");
|
||||
|
||||
test_assert_equal(parse_md_to_html("**test abc** *a*\n## **ch*ch**"), "<p><b>test abc</b> <i>a</i></p>\n<h2 id='header-0'><b>ch*ch</b></h2>", "bold italic test 1");
|
||||
|
||||
test_assert_equal(parse_md_to_html("****a*"), "<p><b></b>a*</p>", "bold italic test 2");
|
||||
|
||||
test_assert_equal(parse_md_to_html("---\n--\n----\n--a-\n---"), "<hr>\n<p>--</p>\n<hr>\n<p>--a-</p>\n<hr>", "horizontal rule test");
|
||||
|
||||
test_assert_equal(parse_md_to_html("\\*\\*cheese\\*\\*\n*\\*cheese\\*\\*"), "<p>**cheese**</p>\n<p><i>*cheese*</i></p>", "backslash test");
|
||||
|
||||
test_assert_equal(parse_md_to_html("asdf\n"), "<p>asdf<img src=\"/images/ming-dynasty.png\" alt=\"alt text\"></p>\n<img src=\"https://burger.com/burger.png\" alt=\"(burger!)\">", "image test");
|
||||
|
||||
test_assert_equal(parse_md_to_html("asdf"), "<p>asdf</p>", "invalid image test");
|
||||
|
||||
test_assert_equal(parse_md_to_html("`e\ntesting `console.log('*koala*')`"), "<p>`e</p>\n<p>testing <code>console.log('*koala*')</code></p>", "code snippet test");
|
||||
|
||||
//todo: links, blockquotes, ordered lists, unordered lists, table, code block
|
||||
|
||||
console.log(`Total Passed: \x1B[32m${passed_tests}/${total_tests}\x1B[m\nTotal Failed: \x1B[31m${failed_tests}/${total_tests}\x1B[m`);
|
||||
283
makoto.ts
Normal file
283
makoto.ts
Normal file
@@ -0,0 +1,283 @@
|
||||
import type { Warning } from './endosulfan';
|
||||
|
||||
//some minor differences with markdown spec?
|
||||
export function parse_md_to_html(md: string): string {
|
||||
let html: string = "";
|
||||
let html_line: string = "";
|
||||
|
||||
//markdown parsing vars
|
||||
let is_first_line: boolean = true;
|
||||
let backslashed: boolean = false;
|
||||
let heading_level: number = 0;
|
||||
let in_heading: boolean = false;
|
||||
let header_num: number = 0;
|
||||
let asterisk_num: number = 0;
|
||||
let asterisk_out_num: number = 0;
|
||||
let in_asterisk: boolean = false;
|
||||
let horizontal_num: number = 0;
|
||||
let horizontal_rule: boolean = false;
|
||||
let was_image: boolean = false;
|
||||
let image_alt: string | undefined = undefined;
|
||||
let image_src: string | undefined = undefined;
|
||||
let in_code: boolean = false;
|
||||
|
||||
//loop through characters
|
||||
let chars: string = md;
|
||||
for (let i=0; i < chars.length; i++) {
|
||||
let char: string = chars[i];
|
||||
//console.log(char, asterisk_num, in_asterisk);
|
||||
//sanitize input
|
||||
if (char === "<") {
|
||||
char = "<";
|
||||
} else if (char === ">") {
|
||||
char = ">";
|
||||
}
|
||||
//handle backslashes
|
||||
if (backslashed) {
|
||||
backslashed = false;
|
||||
if (i !== chars.length-1) {
|
||||
html_line += char;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (char === "\\" && chars[i+1] !== "\n") {
|
||||
backslashed = true;
|
||||
if (i === 0 || chars[i-1] === "\n") {
|
||||
html_line += "<p>";
|
||||
}
|
||||
continue;
|
||||
}
|
||||
//end of text or newline
|
||||
if (char === "\n" || i === chars.length-1) {
|
||||
if (is_first_line) {
|
||||
//it can only be the first line once :)
|
||||
is_first_line = false;
|
||||
}
|
||||
//if image was never completed
|
||||
if (image_alt !== undefined) {
|
||||
if (!html_line.startsWith("<p>")) {
|
||||
html_line = "<p>"+html_line;
|
||||
}
|
||||
html_line += " {
|
||||
let add_char: boolean = true;
|
||||
//if in code
|
||||
if (in_code && char === "`") {
|
||||
html_line += "</code>";
|
||||
add_char = false;
|
||||
}
|
||||
//if in horizontal rule
|
||||
if (horizontal_rule) {
|
||||
add_char = false;
|
||||
}
|
||||
//handle image just ending
|
||||
if (was_image && char === ")") {
|
||||
add_char = false;
|
||||
}
|
||||
//if previous character is also newline, there hasn't been opportunity to add a <p>, so add it!
|
||||
if (chars[i-1] === "\n") {
|
||||
html_line = "<p>";
|
||||
}
|
||||
//ending a bold/italic?
|
||||
if (in_asterisk && char === "*") {
|
||||
if (asterisk_num === 2 && chars[i-1] === "*") {
|
||||
html_line += "</b>";
|
||||
in_asterisk = false;
|
||||
asterisk_num = 0;
|
||||
add_char = false;
|
||||
} else if (asterisk_num === 1) {
|
||||
html_line += "</i>";
|
||||
in_asterisk = false;
|
||||
asterisk_num = 0;
|
||||
add_char = false;
|
||||
}
|
||||
}
|
||||
if (add_char) {
|
||||
html_line += char;
|
||||
}
|
||||
}
|
||||
html += html_line;
|
||||
if (html_line.startsWith("<p>")) {
|
||||
html += "</p>\n";
|
||||
}
|
||||
html_line = "";
|
||||
horizontal_num = 0;
|
||||
if (horizontal_rule || was_image) {
|
||||
if (i !== chars.length - 1 && html[html.length-1] !== "\n") {
|
||||
//only add new line if there isn't already one, and isn't last character
|
||||
html += "\n";
|
||||
} else if (i === chars.length - 1) {
|
||||
//remove newline
|
||||
html = html.trim();
|
||||
}
|
||||
horizontal_rule = false;
|
||||
was_image = false;
|
||||
continue;
|
||||
}
|
||||
//ending a header, line break not needed
|
||||
if (in_heading) {
|
||||
html += `</h${heading_level}>\n`;
|
||||
if (i === chars.length - 1) {
|
||||
//remove newline
|
||||
html = html.trim();
|
||||
}
|
||||
heading_level = 0;
|
||||
in_heading = false;
|
||||
continue;
|
||||
}
|
||||
heading_level = 0;
|
||||
if (i === chars.length - 1) {
|
||||
//remove newline
|
||||
html = html.trim();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
//handle code
|
||||
if (char === "`" && !in_code) {
|
||||
//make sure there is another ` in the line
|
||||
let skip_next: boolean = false;
|
||||
let end_found: boolean = false;
|
||||
for (let ii=1; ii < chars.length-i; ii++) {
|
||||
if (skip_next) {
|
||||
skip_next = false;
|
||||
continue;
|
||||
}
|
||||
if (chars[i+ii] === "\\") {
|
||||
skip_next = true;
|
||||
} else if (chars[i+ii] === "\n") {
|
||||
end_found = false;
|
||||
break;
|
||||
} else if (chars[i+ii] === "`") {
|
||||
end_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (end_found) {
|
||||
in_code = true;
|
||||
html_line += "<code>";
|
||||
continue;
|
||||
}
|
||||
} else if (char === "`" && in_code) {
|
||||
html_line += "</code>";
|
||||
continue;
|
||||
} else if (in_code) {
|
||||
html_line += char;
|
||||
continue;
|
||||
}
|
||||
//handle heading levels
|
||||
//ensure headings are continuous and have after it ("#a##" or "##abc" are not a valid headings), and are at the beginning of the line
|
||||
if (chars.slice(i-heading_level-1, i) === "\n"+"#".repeat(heading_level) || (is_first_line && chars.slice(0, i) === "#".repeat(heading_level))) {
|
||||
if (char === "#" && !in_heading && heading_level <= 6) {
|
||||
heading_level++;
|
||||
continue;
|
||||
} else if (heading_level > 0 && char === " " && !in_heading) {
|
||||
in_heading = true;
|
||||
html_line += `<h${heading_level} id='header-${header_num}'>`;
|
||||
header_num++;
|
||||
continue;
|
||||
} else if (heading_level > 0) {
|
||||
html_line += "<p>"+"#".repeat(heading_level);
|
||||
heading_level = 0;
|
||||
}
|
||||
}
|
||||
//handle horizontal rules
|
||||
//similar code as headings to ensure beginning of the line, continuous
|
||||
if (chars.slice(i-horizontal_num-1, i) === "\n"+"-".repeat(horizontal_num) || (is_first_line && chars.slice(0, i) === "-".repeat(horizontal_num))) {
|
||||
if (char === "-") {
|
||||
horizontal_num++;
|
||||
if (horizontal_num === 3 || (horizontal_num === 2 && chars[chars.length-1] === "-" && i === chars.length-2)) {
|
||||
horizontal_rule = true;
|
||||
html_line = "<hr>";
|
||||
} else if (horizontal_num < 3 && (chars[i+1] === "\n" || i === chars.length-2)) {
|
||||
//if next is end or newline, but less than 3 '-'s, it is not a valid horizontal rule
|
||||
html_line = "<p>"+"-".repeat(horizontal_num);
|
||||
}
|
||||
continue;
|
||||
} else if (horizontal_num > 0) {
|
||||
//no longer a horizontal line
|
||||
html_line = "<p>"+"-".repeat(horizontal_num);
|
||||
}
|
||||
}
|
||||
//handle images
|
||||
if (char === "!" && chars[i+1] === "[") {
|
||||
continue;
|
||||
} else if (char === "]" && chars[i+1] === "(" && image_alt !== undefined) {
|
||||
continue;
|
||||
} else if (char === "[" && chars[i-1] === "!" && image_alt === undefined && image_src === undefined) {
|
||||
image_alt = "";
|
||||
continue;
|
||||
} else if (char === "(" && chars[i-1] === "]" && image_alt !== undefined) {
|
||||
image_src = "";
|
||||
continue;
|
||||
} else if ((char === ")" || (chars[i+1] === ")" && i+1 === chars.length-1)) && image_src !== undefined) {
|
||||
if (chars[i+1] === ")" && i+1 === chars.length-1) {
|
||||
image_src += char;
|
||||
}
|
||||
html_line += `<img src="${image_src}" alt="${image_alt}">`;
|
||||
was_image = true;
|
||||
image_alt = undefined;
|
||||
image_src = undefined;
|
||||
continue;
|
||||
} else if (image_alt !== undefined && image_src === undefined && !(char === "]" && chars[i+1] === "(")) {
|
||||
image_alt += char;
|
||||
continue;
|
||||
} else if (image_src !== undefined) {
|
||||
image_src += char;
|
||||
continue;
|
||||
} else {
|
||||
was_image = false;
|
||||
}
|
||||
//add beginning paragraph
|
||||
if (i === 0 || chars[i-1] === "\n") {
|
||||
html_line += "<p>";
|
||||
}
|
||||
//handle italics and bolds
|
||||
if (char === "*" && asterisk_num < 2 && !in_asterisk) {
|
||||
asterisk_num++;
|
||||
if (asterisk_num === 1 && chars[i+1] !== "*") {
|
||||
html_line += "<i>";
|
||||
in_asterisk = true;
|
||||
} else if (asterisk_num === 2) {
|
||||
html_line += "<b>";
|
||||
in_asterisk = true;
|
||||
}
|
||||
continue;
|
||||
} else if (char === "*" && in_asterisk) {
|
||||
asterisk_out_num++;
|
||||
if (asterisk_out_num === asterisk_num) {
|
||||
if (asterisk_num === 1) {
|
||||
html_line += "</i>";
|
||||
} else if (asterisk_num === 2) {
|
||||
html_line += "</b>";
|
||||
}
|
||||
in_asterisk = false;
|
||||
asterisk_num = 0;
|
||||
asterisk_out_num = 0;
|
||||
continue;
|
||||
} else if (asterisk_out_num === 1 && chars[i+1] === "*") {
|
||||
//implied that asterisk_num === 2 here due to previous if statement
|
||||
continue;
|
||||
}
|
||||
} else if (char !== "*" && in_asterisk) {
|
||||
asterisk_out_num = 0;
|
||||
}
|
||||
//
|
||||
html_line += char;
|
||||
}
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
//WarningFunction to generate warnings and catch possible mistakes (eg: link not completed or possible space missing after #)
|
||||
export function find_warnings(md: string): Warning[] {
|
||||
let warnings: Warning[] = [];
|
||||
//
|
||||
return warnings;
|
||||
}
|
||||
Reference in New Issue
Block a user