Add regex for title text version of links

* Add regex for title text version of links

Original parser matched:
  [name](http:://link)

Add match for:
  [name](http:://link "title text")

* Add tests and improve regex's
* Several new tests in test_maddy_linkparser.cpp  (Some with paths for future improvement, and one to ensure an overzealous future update doesn't disallow actually-used special characters like o-umlaut).
* URLs now ignore leading/trailing spaces.
* URLs now don't match on internal spaces or quotes.
* Small grammar fix in CONTRIBUTING.md
* Updated changelog.
This commit is contained in:
Lucian Smith
2025-04-20 09:45:27 -07:00
committed by GitHub
parent af59cd13d4
commit 2a00c9fb0b
6 changed files with 166 additions and 6 deletions

View File

@@ -11,3 +11,4 @@ Andrew Mettlach (dmmettlach@gmail.com)
Evan Klitzke (evan@eklitzke.org) Evan Klitzke (evan@eklitzke.org)
Albert Schwarzkopf (dev-maddy@quitesimple.org) Albert Schwarzkopf (dev-maddy@quitesimple.org)
Ivans Saponenko (ivans.saponenko+maddy@gmail.com) Ivans Saponenko (ivans.saponenko+maddy@gmail.com)
Lucian Smith (lpsmith@uw.edu)

View File

@@ -13,6 +13,10 @@ maddy uses [semver versioning](https://semver.org/).
* ![**REMOVED**](https://img.shields.io/badge/-REMOVED-%23900) for now removed features. * ![**REMOVED**](https://img.shields.io/badge/-REMOVED-%23900) for now removed features.
## Upcoming ## Upcoming
* ![**ADDED**](https://img.shields.io/badge/-ADDED-%23099) Correctly parse links with title text, i.e. `[link](http://example.com "example")`.
* ![**FIXED**](https://img.shields.io/badge/-FIXED-%23090) Do not create invalid URLs from links with spaces, i.e. `[link](/ABC/some file)`.
* ![**FIXED**](https://img.shields.io/badge/-FIXED-%23090) Do not create invalid HTML from links with quotes, i.e. `[link](/ABC/some"file)`.
## version 1.4.0 2025-03-28 ## version 1.4.0 2025-03-28

View File

@@ -16,4 +16,4 @@ improve the code? Then [create a GitHub issue](https://github.com/progsource/mad
* Explain for what your PR is for - like providing a use-case or something similar. * Explain for what your PR is for - like providing a use-case or something similar.
* Update documentation of the Markdown syntax if anything changed there. (`docs/definitions.md`) * Update documentation of the Markdown syntax if anything changed there. (`docs/definitions.md`)
* Add a changelog entry at "Upcoming" inside of `CHANGELOG.md` * Add a changelog entry at "Upcoming" inside of `CHANGELOG.md`
* Make sure, that the tests are successful and if you wrote a bugfix, to have a test, that highlights the issue. * Make sure that the tests are successful and if you wrote a bugfix, to have a test that highlights the issue.

View File

@@ -41,6 +41,14 @@ results in
<a href="http://example.com">Text of the link</a> <a href="http://example.com">Text of the link</a>
``` ```
```
[Text of the link](http://example.com "title text")
```
results in
```html
<a href="http://example.com" title="title text">Text of the link</a>
```
## Lists ## Lists
### unordered ### unordered

View File

@@ -40,10 +40,17 @@ public:
*/ */
void Parse(std::string& line) override void Parse(std::string& line) override
{ {
static std::regex re(R"(\[([^\]]*)\]\(([^)]*)\))"); // Match [name](http:://link "title text")
static std::string replacement = "<a href=\"$2\">$1</a>"; // NOTE: the 'no quote' bit at the beginning (^") is a hack for now:
// there should eventually be something that replaces it with '%22'.
static std::regex re(R"(\[([^\]]*)\]\( *([^)^ ^"]*) *\"([^\"]*)\" *\))");
static std::string replacement = "<a href=\"$2\" title=\"$3\">$1</a>";
line = std::regex_replace(line, re, replacement); line = std::regex_replace(line, re, replacement);
// Match [name](http:://link)
static std::regex re2(R"(\[([^\]]*)\]\( *([^)^ ^"]*) *\))");
static std::string replacement2 = "<a href=\"$2\">$1</a>";
line = std::regex_replace(line, re2, replacement2);
} }
}; // class LinkParser }; // class LinkParser

View File

@@ -37,14 +37,47 @@ TEST(MADDY_LINKPARSER, ItReplacesMarkdownWithLinks)
ASSERT_EQ(expected, text); ASSERT_EQ(expected, text);
} }
TEST(MADDY_LINKPARSER, ItReplacesMarkdownWithSpacesAfterLink)
{
std::string text =
"Some text [Link Title](http://example.com ) bla [Link "
"Title](http://example.com)";
std::string expected =
"Some text <a href=\"http://example.com\">Link Title</a> bla <a "
"href=\"http://example.com\">Link Title</a>";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}
TEST(MADDY_LINKPARSER, ItHandlesURLsWithOfficiallyIllegalCharacters)
{
// Some links in the real world have characters that are not
// 'official' characters that are supposedly allowed in URLs.
std::string text =
"Wikipedia's [Möbius strip]"
"(https://en.wikipedia.org/wiki/Möbius_strip) link.";
std::string expected =
"Wikipedia's <a "
"href=\"https://en.wikipedia.org/wiki/Möbius_strip\">"
"Möbius strip</a> link.";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}
TEST( TEST(
MADDY_LINKPARSER, ItReplacesMarkdownProperlyEvenWithMultipleParenthesisInLine MADDY_LINKPARSER, ItReplacesMarkdownProperlyEvenWithMultipleParenthesisInLine
) )
{ {
std::string text = std::string text =
"(This is a [link](/ABC/some file) (the URL will include this).)"; "(This is a [link](/ABC/some_file) (the URL will not include this).)";
std::string expected = std::string expected =
"(This is a <a href=\"/ABC/some file\">link</a> (the URL will include " "(This is a <a href=\"/ABC/some_file\">link</a> (the URL will not include "
"this).)"; "this).)";
auto linkParser = std::make_shared<maddy::LinkParser>(); auto linkParser = std::make_shared<maddy::LinkParser>();
@@ -53,6 +86,99 @@ TEST(
ASSERT_EQ(expected, text); ASSERT_EQ(expected, text);
} }
TEST(MADDY_LINKPARSER, ItDoesntReplaceMarkdownWithSpaceInURL)
{
// Spaces are not allowed in URLs, so don't match them.
std::string text = "This is an invalid [link](/ABC/some file)";
std::string expected = "This is an invalid [link](/ABC/some file)";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}
TEST(MADDY_LINKPARSER, ItReplacesMarkdownWithTitleText)
{
std::string text = "Link to [name](http:://example.com \"title text\")";
std::string expected =
"Link to <a href=\"http:://example.com\" title=\"title text\">name</a>";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}
TEST(MADDY_LINKPARSER, ItReplacesMarkdownWithSpacesWithTitleText)
{
std::string text = "Link to [name](http:://example.com \"title text\")";
std::string expected =
"Link to <a href=\"http:://example.com\" title=\"title text\">name</a>";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}
TEST(MADDY_LINKPARSER, ItReplacesMarkdownWithMoreSpacesWithTitleText)
{
std::string text =
"Link to [name](http:://example.com \"title text\" )";
std::string expected =
"Link to <a href=\"http:://example.com\" title=\"title text\">name</a>";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}
TEST(MADDY_LINKPARSER, ItReplacesMarkdownWithParentheticalText)
{
std::string text = "Link to [name](http:://example.com \"title (text)\")";
std::string expected =
"Link to <a href=\"http:://example.com\" title=\"title (text)\">name</a>";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}
TEST(MADDY_LINKPARSER, ItDoesntReplaceMarkdownWithTooManyQuotes)
{
// If you have too many quotation marks, don't match:
std::string text =
"This is an invalid [link](/ABC/some_file \"title \" text \")";
std::string expected =
"This is an invalid [link](/ABC/some_file \"title \" text \")";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}
TEST(MADDY_LINKPARSER, ItDoesntReplaceMarkdownWithQuoteInLink)
{
// This is actually legal markdown, but hard to parse with regexes;
// See disabled 'ItReplacesMarkdownWithQuoteInLink' below.
//
// For now, don't try to translate it; it would produce invalid HTML.
std::string text = "Some text [Link Title](http://example.com/\"foo ) bla.";
std::string current_expected =
"Some text [Link Title](http://example.com/\"foo ) bla.";
std::string correct_expected =
"Some text <a href=\"http://example.com/%22foo\">Link Title</a> bla.";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(current_expected, text);
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
class DISABLED_MADDY_LINKPARSER : public ::testing::Test class DISABLED_MADDY_LINKPARSER : public ::testing::Test
@@ -70,3 +196,17 @@ TEST_F(DISABLED_MADDY_LINKPARSER, ItReplacesNoImageMarkdownWithLinks)
ASSERT_EQ(expected, text); ASSERT_EQ(expected, text);
} }
TEST(DISABLED_MADDY_LINKPARSER, ItReplacesMarkdownWithQuoteInLink)
{
// This is legal markdown, but hard to parse with regexes; dropping it
// here for a future update.
std::string text = "Some text [Link Title](http://example.com/\"foo ) bla.";
std::string expected =
"Some text <a href=\"http://example.com/%22foo\">Link Title</a> bla.";
auto linkParser = std::make_shared<maddy::LinkParser>();
linkParser->Parse(text);
ASSERT_EQ(expected, text);
}