1 /** 2 * Copyright © DiamondMVC 2019 3 * License: MIT (https://github.com/DiamondMVC/Diamond/blob/master/LICENSE) 4 * Author: Jacob Jensen (bausshf) 5 */ 6 module diamond.markdown.parser; 7 8 import std.array : replace, split, array; 9 import std..string : format, strip, indexOf, isNumeric, toLower; 10 import std.conv : to; 11 import std.algorithm : canFind, startsWith, count, map, filter; 12 13 import diamond.markdown.type; 14 import diamond.markdown.part; 15 16 /** 17 * Parses markdown to html. 18 * Params: 19 * markdown = The markdown to parse. 20 * customParser = A custom parser that can be ued to customize certain markdown parts. (If it returns null, then it will skip to the default parser.) 21 * Returns: 22 * A string equivalent to the parsed html from the markdwon. 23 */ 24 string parseToHtml(string markdown, string delegate(MarkdownPart) customParser = null) 25 { 26 string result; 27 28 string[] cellAlignment; 29 size_t cellIndex; 30 31 foreach (part; parse(markdown)) 32 { 33 if (customParser) 34 { 35 auto customResult = customParser(part); 36 37 if (customResult) 38 { 39 result ~= customResult; 40 continue; 41 } 42 } 43 44 switch (part.type) 45 { 46 case MarkdownType.content: 47 { 48 result ~= part.content; 49 break; 50 } 51 52 case MarkdownType.newline: 53 { 54 result ~= "<br>\r\n"; 55 break; 56 } 57 58 case MarkdownType.ulistStart: 59 { 60 result ~= "<ul>\r\n"; 61 break; 62 } 63 64 case MarkdownType.ulistEnd: 65 { 66 result ~= "</ul>\r\n"; 67 break; 68 } 69 70 case MarkdownType.olistStart: 71 { 72 result ~= "<ol>\r\n"; 73 break; 74 } 75 76 case MarkdownType.olistEnd: 77 { 78 result ~= "</ol>\r\n"; 79 break; 80 } 81 82 case MarkdownType.listItemStart: 83 { 84 result ~= "<li>\r\n"; 85 break; 86 } 87 88 case MarkdownType.listItemEnd: 89 { 90 result ~= "</li>\r\n"; 91 break; 92 } 93 94 case MarkdownType.link: 95 { 96 auto title = part.getMetadata("title"); 97 98 result ~= "<a href=\"%s\"%s>%s</a>".format 99 ( 100 part.getMetadata("url"), 101 title ? " title=\"" ~ title ~ "\"" : "", 102 part.content 103 ); 104 break; 105 } 106 107 case MarkdownType.image: 108 { 109 auto title = part.getMetadata("title"); 110 111 result ~= "<img src=\"%s\" alt=\"%s\"%s>".format 112 ( 113 part.getMetadata("url"), 114 part.content, 115 title ? " title=\"" ~ title ~ "\"" : "" 116 ); 117 break; 118 } 119 120 case MarkdownType.blockQuoteStart: 121 { 122 result ~= "<blockquote>\r\n"; 123 break; 124 } 125 126 case MarkdownType.blockQuoteEnd: 127 { 128 result ~= "</blockquote>\r\n"; 129 break; 130 } 131 132 case MarkdownType.horizontal: 133 { 134 result ~= "<hr>\r\n"; 135 break; 136 } 137 138 case MarkdownType.tableStart: 139 { 140 cellAlignment = null; 141 142 result ~= "<table>\r\n"; 143 break; 144 } 145 146 case MarkdownType.tableEnd: 147 { 148 result ~= "</table>\r\n"; 149 break; 150 } 151 152 case MarkdownType.tableRowStart: 153 { 154 cellIndex = 0; 155 result ~= "<tr>\r\n"; 156 break; 157 } 158 159 case MarkdownType.tableRowEnd: 160 { 161 result ~= "\r\n</tr>\r\n"; 162 break; 163 } 164 165 case MarkdownType.tableHeadStart: 166 { 167 auto alignment = part.getMetadata("align"); 168 169 cellAlignment ~= alignment ? alignment : ""; 170 cellIndex = 0; 171 172 result ~= "<th>\r\n"; 173 break; 174 } 175 176 case MarkdownType.tableHeadEnd: 177 { 178 result ~= "</th>\r\n"; 179 break; 180 } 181 182 case MarkdownType.tableCellStart: 183 { 184 if (cellAlignment && cellIndex < cellAlignment.length) 185 { 186 auto alignment = cellAlignment[cellIndex]; 187 cellIndex++; 188 189 if (alignment && alignment.length) 190 { 191 result ~= "<td align=\"%s\">\r\n".format(alignment); 192 break; 193 } 194 } 195 196 result ~= "<td>\r\n"; 197 break; 198 } 199 200 case MarkdownType.tableCellEnd: 201 { 202 result ~= "</td>\r\n"; 203 break; 204 } 205 206 case MarkdownType.codeStart: 207 { 208 auto language = part.content; 209 210 if (language) 211 { 212 result ~= "<pre class=\"highlight highlight-source-%s\"><code>".format(language); 213 } 214 else 215 { 216 result ~= "<pre><code>"; 217 } 218 break; 219 } 220 221 case MarkdownType.codeEnd: 222 { 223 result ~= "</code></pre>\r\n"; 224 break; 225 } 226 227 case MarkdownType.contentWrapStart: 228 { 229 switch (part.content) 230 { 231 case "bold": 232 { 233 result ~= "<strong>"; 234 break; 235 } 236 237 case "italic": 238 { 239 result ~= "<em>"; 240 break; 241 } 242 243 case "underline": 244 { 245 result ~= "<span style=\"text-decoration: underline\">"; 246 break; 247 } 248 249 case "strike": 250 { 251 result ~= "<del>"; 252 break; 253 } 254 255 case "inlineCode": 256 { 257 result ~= "<code>"; 258 break; 259 } 260 261 default: break; 262 } 263 break; 264 } 265 266 case MarkdownType.contentWrapEnd: 267 { 268 switch (part.content) 269 { 270 case "bold": 271 { 272 result ~= "</strong>"; 273 break; 274 } 275 276 case "italic": 277 { 278 result ~= "</em>"; 279 break; 280 } 281 282 case "underline": 283 { 284 result ~= "</span>"; 285 break; 286 } 287 288 case "strike": 289 { 290 result ~= "</del>"; 291 break; 292 } 293 294 case "inlineCode": 295 { 296 result ~= "</code>"; 297 break; 298 } 299 300 default: break; 301 } 302 break; 303 } 304 305 case MarkdownType.header: 306 { 307 auto id = part.content.replace(" ", "-").toLower(); 308 309 result ~= "<h%d id=\"%s\">%s</h%d>\r\n".format(part.volume, id, part.content, part.volume); 310 break; 311 } 312 313 default: break; 314 } 315 } 316 317 return result ? result : ""; 318 } 319 320 /** 321 * Parses markdown into parts. 322 * Params: 323 * markdown = The markdown to parse. 324 * Returns: 325 * An array of markdown parts. 326 */ 327 MarkdownPart[] parse(string markdown) 328 { 329 const tab = cast(char)0x9; 330 331 MarkdownPart[] parts; 332 333 auto lines = markdown.replace("\r", "").split("\n"); 334 335 bool bold = false; 336 bool italic = false; 337 bool underline = false; 338 bool strike = false; 339 bool inlineCode = false; 340 341 size_t ulist = false; 342 size_t olist = false; 343 344 bool code = false; 345 346 bool quote = false; 347 348 bool table = false; 349 350 foreach (ref i; 0 .. lines.length) 351 { 352 auto lastLine = i > 0 ? lines[i - 1] : null; 353 auto line = lines[i]; 354 auto nextLine = i < (lines.length - 1) ? lines[i + 1] : null; 355 356 if (!line) 357 { 358 continue; 359 } 360 361 if (code) 362 { 363 if (line.strip() == "```") 364 { 365 parts ~= new MarkdownPart(MarkdownType.codeEnd); 366 367 code = false; 368 } 369 else 370 { 371 auto part = new MarkdownPart(MarkdownType.content); 372 part.content = line.replace("<", "<").replace(">", ">") ~ "\r\n"; 373 374 parts ~= part; 375 } 376 377 continue; 378 } 379 380 void parseContent(string content) 381 { 382 const backSlash = cast(char)0x5c; 383 384 MarkdownPart currentPart; 385 foreach (ref j; 0 .. content.length) 386 { 387 auto lastChar = j > 0 ? content[j - 1] : cast(char)0; 388 auto currentChar = content[j]; 389 auto nextChar = j < (content.length - 1) ? content[j + 1] : cast(char)0; 390 391 if ((bold || (!bold && (lastChar == ' ' || lastChar == tab || j == 0))) && currentChar == '*' && nextChar == '*' && lastChar != backSlash) 392 { 393 if (currentPart) 394 { 395 parts ~= currentPart; 396 currentPart = null; 397 } 398 399 bold = !bold; 400 401 auto part = new MarkdownPart(bold ? MarkdownType.contentWrapStart : MarkdownType.contentWrapEnd); 402 part.content = "bold"; 403 404 parts ~= part; 405 406 j++; 407 } 408 else if ((italic || (!italic && (lastChar == ' ' || lastChar == tab || j == 0))) && currentChar == '*' && lastChar != backSlash) 409 { 410 if (currentPart) 411 { 412 parts ~= currentPart; 413 currentPart = null; 414 } 415 416 italic = !italic; 417 418 auto part = new MarkdownPart(italic ? MarkdownType.contentWrapStart : MarkdownType.contentWrapEnd); 419 part.content = "italic"; 420 421 parts ~= part; 422 } 423 else if ((underline || (!underline && (lastChar == ' ' || lastChar == tab || j == 0))) && currentChar == '_' && lastChar != backSlash) 424 { 425 if (currentPart) 426 { 427 parts ~= currentPart; 428 currentPart = null; 429 } 430 431 underline = !underline; 432 433 auto part = new MarkdownPart(underline ? MarkdownType.contentWrapStart : MarkdownType.contentWrapEnd); 434 part.content = "underline"; 435 436 parts ~= part; 437 } 438 else if ((strike || (!strike && (lastChar == ' ' || lastChar == tab || j == 0))) && currentChar == '~' && lastChar != backSlash) 439 { 440 if (currentPart) 441 { 442 parts ~= currentPart; 443 currentPart = null; 444 } 445 446 strike = !strike; 447 448 auto part = new MarkdownPart(strike ? MarkdownType.contentWrapStart : MarkdownType.contentWrapEnd); 449 part.content = "strike"; 450 451 parts ~= part; 452 } 453 else if ((inlineCode || (!inlineCode && (lastChar == ' ' || lastChar == tab || j == 0))) && currentChar == '`' && lastChar != backSlash) 454 { 455 if (currentPart) 456 { 457 parts ~= currentPart; 458 currentPart = null; 459 } 460 461 inlineCode = !inlineCode; 462 463 auto part = new MarkdownPart(inlineCode ? MarkdownType.contentWrapStart : MarkdownType.contentWrapEnd); 464 part.content = "inlineCode"; 465 466 parts ~= part; 467 } 468 else if (currentChar != backSlash || (currentChar == backSlash && lastChar == backSlash)) 469 { 470 if (currentPart) 471 { 472 currentPart.content = currentPart.content ~ to!string(currentChar); 473 } 474 else 475 { 476 currentPart = new MarkdownPart(MarkdownType.content); 477 currentPart.content = to!string(currentChar); 478 } 479 } 480 } 481 482 if (currentPart) 483 { 484 parts ~= currentPart; 485 } 486 } 487 488 void parseUList(char ulistChar) 489 { 490 size_t indentation = line.indexOf(ulistChar) + 1; 491 492 if (!ulist || ulist < indentation) 493 { 494 ulist++; 495 496 parts ~= new MarkdownPart(MarkdownType.ulistStart); 497 } 498 else if (indentation < ulist) 499 { 500 parts ~= new MarkdownPart(MarkdownType.ulistEnd); 501 ulist--; 502 } 503 504 parts ~= new MarkdownPart(MarkdownType.listItemStart); 505 506 auto content = line[line.indexOf(ulistChar) + 1 .. $].strip(); 507 508 parseContent(content); 509 510 parts ~= new MarkdownPart(MarkdownType.listItemEnd); 511 } 512 513 if (!line.length) 514 { 515 while (ulist) 516 { 517 parts ~= new MarkdownPart(MarkdownType.ulistEnd); 518 ulist--; 519 } 520 521 while (olist) 522 { 523 parts ~= new MarkdownPart(MarkdownType.olistEnd); 524 olist--; 525 } 526 527 if (quote) 528 { 529 parts ~= new MarkdownPart(MarkdownType.blockQuoteEnd); 530 quote = false; 531 } 532 533 if (table) 534 { 535 parts ~= new MarkdownPart(MarkdownType.tableEnd); 536 table = false; 537 } 538 539 parts ~= new MarkdownPart(MarkdownType.newline); 540 continue; 541 } 542 543 // Block-quote 544 if (line[0] == '>') 545 { 546 if (!quote) 547 { 548 parts ~= new MarkdownPart(MarkdownType.blockQuoteStart); 549 quote = true; 550 } 551 552 auto content = line[1 .. $].strip(); 553 554 parseContent(content); 555 } 556 // Table 557 else if (line.canFind('|') && (table || (!table && nextLine && nextLine.count('|') == line.count('|')))) 558 { 559 auto entries = line.strip().split("|").filter!(e => e && e.length).array; 560 561 if (!table) 562 { 563 auto nextLinesEntries = nextLine.strip().split("|").filter!(e => e && e.length).array; 564 565 bool invalidTable = false; 566 567 string[] alignments; 568 569 foreach (entry; nextLinesEntries.map!(e => e.strip())) 570 { 571 auto finalResult = entry.replace(":", "").strip(); 572 573 if (finalResult.length < 3 || entry.count('-') != finalResult.length) 574 { 575 invalidTable = true; 576 break; 577 } 578 579 if (entry[0] == ':' && entry[$-1] != ':') 580 { 581 alignments ~= "left"; 582 } 583 else if (entry[0] != ':' && entry[$-1] == ':') 584 { 585 alignments ~= "right"; 586 } 587 else if (entry[0] == ':' && entry[$-1] == ':') 588 { 589 alignments ~= "center"; 590 } 591 else 592 { 593 alignments ~= ""; 594 } 595 } 596 597 i++; 598 599 if (!invalidTable) 600 { 601 parts ~= new MarkdownPart(MarkdownType.tableStart); 602 parts ~= new MarkdownPart(MarkdownType.tableRowStart); 603 604 size_t alignmentCounter; 605 foreach (entry; entries.map!(e => e.strip())) 606 { 607 auto head = new MarkdownPart(MarkdownType.tableHeadStart); 608 609 if (alignmentCounter < alignments.length) 610 { 611 head.setMetadata("align", alignments[alignmentCounter]); 612 } 613 614 alignmentCounter++; 615 616 parts ~= head; 617 618 parseContent(entry); 619 620 parts ~= new MarkdownPart(MarkdownType.tableHeadEnd); 621 } 622 623 table = true; 624 625 parts ~= new MarkdownPart(MarkdownType.tableRowEnd); 626 } 627 } 628 else 629 { 630 parts ~= new MarkdownPart(MarkdownType.tableRowStart); 631 632 foreach (entry; entries) 633 { 634 parts ~= new MarkdownPart(MarkdownType.tableCellStart); 635 636 parseContent(entry.strip()); 637 638 parts ~= new MarkdownPart(MarkdownType.tableCellEnd); 639 } 640 641 parts ~= new MarkdownPart(MarkdownType.tableRowEnd); 642 } 643 644 continue; // Don't want new-lines in tables ... 645 } 646 else if 647 ( 648 (line.strip().count('-') == line.strip().length && line.strip().count('-') >= 3) || 649 (line.strip().count('*') == line.strip().length && line.strip().count('*') >= 3) || 650 (line.strip().count('_') == line.strip().length && line.strip().count('_') >= 3) 651 ) 652 { 653 parts ~= new MarkdownPart(MarkdownType.horizontal); 654 continue; // hr shouldn't have a new line 655 } 656 // Header 657 else if (line[0] == '#') 658 { 659 auto hIndex = line.strip().indexOf(' '); 660 auto headerStart = hIndex; 661 662 if (hIndex == -1) 663 { 664 hIndex = line.strip().indexOf(tab); 665 headerStart = hIndex; 666 667 if (hIndex == -1) 668 { 669 hIndex = 0; 670 } 671 } 672 673 if (hIndex > 6) 674 { 675 hIndex = 6; 676 } 677 678 if (hIndex) 679 { 680 auto part = new MarkdownPart(MarkdownType.header); 681 part.content = line[headerStart .. $].strip(); 682 part.volume = hIndex; 683 684 parts ~= part; 685 } 686 687 continue; // Headers shouldn't have a new line 688 } 689 // Header alt 690 else if (nextLine.strip() == "======" || nextLine.strip() == "------") 691 { 692 auto part = new MarkdownPart(MarkdownType.header); 693 part.content = line.strip(); 694 part.volume = nextLine.strip() == "======" ? 1 : 2; 695 696 parts ~= part; 697 698 i++; 699 700 continue; // Headers shouldn't have a new line 701 } 702 else if (line.strip() == "```" || line.strip().startsWith("```")) 703 { 704 auto part = new MarkdownPart(MarkdownType.codeStart); 705 706 if (line.strip().length > 3) 707 { 708 part.content = line[3 .. $]; 709 } 710 711 parts ~= part; 712 code = true; 713 continue; // Code shouldn't have a new line 714 } 715 // unordered list 716 else if (line.strip().length > 2 && (line.strip()[0] == '*' || line.strip()[0] == '+' || line.strip()[0] == '-') && line.strip()[1] == ' ') 717 { 718 parseUList(line.strip()[0]); 719 continue; // Don't want <br> after </li> 720 } 721 // ordered list 722 else if (line.strip().length > 3 && line.strip().indexOf('.') > 0 && line.strip()[0 .. line.strip().indexOf('.')].isNumeric) 723 { 724 size_t indentation = 1; 725 726 foreach (c; line) 727 { 728 if (c != ' ' && c != tab) 729 { 730 break; 731 } 732 733 indentation++; 734 } 735 736 if (!olist || olist < indentation) 737 { 738 olist++; 739 740 parts ~= new MarkdownPart(MarkdownType.olistStart); 741 } 742 else if (indentation < olist) 743 { 744 parts ~= new MarkdownPart(MarkdownType.olistEnd); 745 olist--; 746 } 747 748 parts ~= new MarkdownPart(MarkdownType.listItemStart); 749 750 auto content = line[line.indexOf('.') + 1 .. $].strip(); 751 752 parseContent(content); 753 754 parts ~= new MarkdownPart(MarkdownType.listItemEnd); 755 continue; // Don't want <br> after </li> 756 } 757 // link 758 else if (line.strip()[0] == '[' && line.canFind(']') && line.canFind('(') && line.strip()[$-1] == ')') 759 { 760 auto link = line.strip(); 761 762 auto text = link[1 .. link.indexOf(']')]; 763 auto href = link[link.indexOf('(') + 1 .. $-1]; 764 765 auto firstHrefSpace = href.indexOf(' '); 766 auto url = href[0 .. firstHrefSpace == -1 ? href.length : firstHrefSpace]; 767 string title; 768 769 if (firstHrefSpace > 0) 770 { 771 title = href[firstHrefSpace + 1 .. $]; 772 } 773 774 auto part = new MarkdownPart(MarkdownType.link); 775 part.content = text; 776 part.setMetadata("url", url.strip()); 777 778 if (title) 779 { 780 part.setMetadata("title", title.strip()); 781 } 782 783 parts ~= part; 784 continue; // We don't want <br> after <a></a> 785 } 786 // image 787 else if (line.strip()[0] == '!' && line.strip()[1] == '[' && line.canFind(']') && line.canFind('(') && line.strip()[$-1] == ')') 788 { 789 auto image = line.strip(); 790 791 auto text = image[2 .. image.indexOf(']')]; 792 auto href = image[image.indexOf('(') + 1 .. $-1]; 793 794 const stringTerminator = cast(char)("\""[0]); 795 796 auto stringTerminatorIndex = href.indexOf(stringTerminator); 797 798 auto url = href[0 .. stringTerminatorIndex == -1 ? href.length : stringTerminatorIndex]; 799 string title; 800 801 if (stringTerminatorIndex > 0 && image.strip()[$-2] == stringTerminator) 802 { 803 title = href[stringTerminatorIndex + 1 .. $-1]; 804 } 805 806 auto part = new MarkdownPart(MarkdownType.image); 807 part.content = text; 808 part.setMetadata("url", url.strip()); 809 810 if (title) 811 { 812 part.setMetadata("title", title.strip()); 813 } 814 815 parts ~= part; 816 continue; // We don't want <br> after <img> 817 } 818 // Content 819 else 820 { 821 parseContent(line.strip()); 822 } 823 824 parts ~= new MarkdownPart(MarkdownType.newline); 825 } 826 827 while (ulist) 828 { 829 parts ~= new MarkdownPart(MarkdownType.ulistEnd); 830 ulist--; 831 } 832 833 while (olist) 834 { 835 parts ~= new MarkdownPart(MarkdownType.olistEnd); 836 olist--; 837 } 838 839 if (quote) 840 { 841 parts ~= new MarkdownPart(MarkdownType.blockQuoteEnd); 842 quote = false; 843 } 844 845 if (table) 846 { 847 parts ~= new MarkdownPart(MarkdownType.tableEnd); 848 table = false; 849 } 850 851 return parts ? parts : []; 852 }