Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,25 @@ The live preview connection between the local server and the browser now runs ov

 

#### [Extra spaces in CJK paragraph soft line breaks](https://quarkdown.com/wiki/localization#cjk-locales)

Soft line breaks within paragraphs (newlines in the source that are not preceded by two or more spaces or a backslash) no longer insert a space when the document language is a CJK locale (Chinese, Japanese, Korean) set via [`.doclang`](https://quarkdown.com/wiki/document-metadata).

For example, with `.doclang {zh}`:

```markdown
这是一个
中文段落
```

is now rendered as `这是一个中文段落` instead of `这是一个 中文段落`.

In all other languages, soft line breaks continue to render as a single space, following the CommonMark specification.

Thanks @CarmJos!

 

#### [Security] Fixed native content injection via string manipulation

Fixed an issue that allowed string->string functions to let unsanitized HTML through, even when the `native-content` permission was not granted.
Expand Down
7 changes: 7 additions & 0 deletions docs/localization.qd
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,10 @@ For instance, [typed boxes](box.qd) feature a localized title by default, such a
After that, assuming Canadian French is set in `.doclang`, the new entries will be available to the `.box` function.

Built-in table names and entries are listed in this page's [*Built-in localization*](#built-in-localization).

## CJK locales

When the document language is set to a CJK locale (Chinese, Japanese, Korean):

- Default paragraph spacing and indentation are affected.
- Soft line breaks within paragraphs render with no space between lines.
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.quarkdown.core.ast.base.inline

import com.quarkdown.core.ast.Node
import com.quarkdown.core.visitor.node.NodeVisitor

/**
* A soft line break within a paragraph, representing a newline in the source
* that is not a hard line break (i.e., not preceded by two spaces or a backslash).
*
* The rendering of a soft break depends on the document language:
* for CJK languages (Chinese, Japanese, Korean) no space is inserted,
* while for other languages a space is inserted.
*/
object SoftBreak : Node {
override fun <T> accept(visitor: NodeVisitor<T>) = visitor.visit(this)
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.quarkdown.core.ast.base.inline.Emphasis
import com.quarkdown.core.ast.base.inline.Image
import com.quarkdown.core.ast.base.inline.LineBreak
import com.quarkdown.core.ast.base.inline.Link
import com.quarkdown.core.ast.base.inline.SoftBreak
import com.quarkdown.core.ast.base.inline.Strong
import com.quarkdown.core.ast.base.inline.StrongEmphasis
import com.quarkdown.core.ast.base.inline.Text
Expand Down Expand Up @@ -103,6 +104,11 @@ class InlineAstBuilder : AstBuilder() {
* @see LineBreak
*/
fun lineBreak() = +LineBreak

/**
* @see SoftBreak
*/
fun softBreak() = +SoftBreak
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,17 @@ open class BaseMarkdownInlineTokenRegexPatterns {
}

/**
* A hard line break given by two or more spaces at the end of the line.
* A line break given by a newline that is not at the end of a paragraph.
* If preceded by two or more spaces or a backslash, the line break is a hard line break;
* otherwise it is a soft line break.
* @see LineBreakToken
*/
val lineBreak by lazy {
TokenRegexPattern(
name = "InlineLineBreak",
wrap = ::LineBreakToken,
regex =
"( {2,}|\\\\)\\R(?!\\s*$)",
"(?:( {2,}|\\\\))?\\R(?!\\s*$)",
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,14 @@ class CodeSpanToken(
}

/**
* A soft line break.
* A line break.
* Example:
* ```
* Line 1<space><space>
* Line 2
* ```
* @see com.quarkdown.core.ast.base.inline.LineBreak
* @see com.quarkdown.core.ast.base.inline.SoftBreak
*/
class LineBreakToken(
data: TokenData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,15 @@ interface Locale {
*/
val shortTag: String
}

/**
* The set of CJK (Chinese, Japanese, Korean) language codes.
*/
private val CJK_LANGUAGE_CODES = setOf("zh", "ja", "ko", "yue", "cmn", "wuu", "hak")

/**
* Returns `true` if this locale's language is a CJK (Chinese, Japanese, Korean) language,
* where soft line breaks within paragraphs should not insert a space.
* Returns `false` if the locale is null.
*/
fun Locale?.isCJK(): Boolean = this != null && (code.lowercase() in CJK_LANGUAGE_CODES || shortTag.lowercase() in CJK_LANGUAGE_CODES)
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import com.quarkdown.core.ast.base.inline.ReferenceDefinitionFootnote
import com.quarkdown.core.ast.base.inline.ReferenceFootnote
import com.quarkdown.core.ast.base.inline.ReferenceImage
import com.quarkdown.core.ast.base.inline.ReferenceLink
import com.quarkdown.core.ast.base.inline.SoftBreak
import com.quarkdown.core.ast.base.inline.Strikethrough
import com.quarkdown.core.ast.base.inline.Strong
import com.quarkdown.core.ast.base.inline.StrongEmphasis
Expand Down Expand Up @@ -157,7 +158,10 @@ class InlineTokenParser(
return Comment
}

override fun visit(token: LineBreakToken): Node = LineBreak
override fun visit(token: LineBreakToken): Node {
val text = token.data.text
return if (text.first() == ' ' || text.first() == '\\') LineBreak else SoftBreak
}

override fun visit(token: LinkToken): LinkNode {
val groups = token.data.groups.iterator(consumeAmount = 2)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import com.quarkdown.core.ast.NestableNode
import com.quarkdown.core.ast.Node
import com.quarkdown.core.ast.base.inline.CriticalContent
import com.quarkdown.core.ast.base.inline.PlainTextNode
import com.quarkdown.core.ast.base.inline.SoftBreak
import com.quarkdown.core.ast.dsl.buildInline
import com.quarkdown.core.ast.quarkdown.inline.TextSymbol
import com.quarkdown.core.visitor.node.NodeVisitor
Expand Down Expand Up @@ -84,6 +85,7 @@ fun InlineContent.toPlainText(renderer: NodeVisitor<CharSequence>? = null): Stri
is CriticalContent if renderer != null -> builder.append(renderer.visit(it))
is TextSymbol if renderer != null -> builder.append(renderer.visit(it))
is PlainTextNode -> builder.append(it.text)
is SoftBreak -> builder.append('\n')
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import com.quarkdown.core.ast.base.inline.Link
import com.quarkdown.core.ast.base.inline.ReferenceFootnote
import com.quarkdown.core.ast.base.inline.ReferenceImage
import com.quarkdown.core.ast.base.inline.ReferenceLink
import com.quarkdown.core.ast.base.inline.SoftBreak
import com.quarkdown.core.ast.base.inline.Strikethrough
import com.quarkdown.core.ast.base.inline.Strong
import com.quarkdown.core.ast.base.inline.StrongEmphasis
Expand Down Expand Up @@ -109,6 +110,8 @@ interface NodeVisitor<T> {

fun visit(node: LineBreak): T

fun visit(node: SoftBreak): T

fun visit(node: CriticalContent): T

fun visit(node: Link): T
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,9 @@ class BlockParserTest {

private val TextNode.rawText: String
get() {
(children.singleOrNull() as? PlainTextNode)?.let {
return it.text
}
throw IllegalStateException("rawText requires a single PlainText node")
return children
.filterIsInstance<PlainTextNode>()
.joinToString("\n") { it.text }
}

/**
Expand Down Expand Up @@ -383,7 +382,7 @@ class BlockParserTest {
with(nodes.next()) {
val paragraph = children.first() as Paragraph
assertIs<ReferenceLink>(paragraph.children.first())
assertEquals("not a typed quote.", (paragraph.children[1] as Text).text.trimStart())
assertEquals("not a typed quote.", paragraph.text.toPlainText().trimStart())
assertNull(type)
}

Expand Down Expand Up @@ -446,12 +445,12 @@ class BlockParserTest {
with(nodes.next()) {
assertEquals("label", rawText)
assertEquals("https://google.com", url)
assertNodeEquals(listOf(Text("Multiline\ntitle")), title!!)
assertEquals("Multiline\ntitle", title!!.toPlainText())
}
with(nodes.next()) {
assertEquals("label", rawText)
assertEquals("https://google.com", url)
assertNodeEquals(listOf(Text("Line 1\nLine 2\nLine 3")), title!!)
assertEquals("Line 1\nLine 2\nLine 3", title!!.toPlainText())
}
with(nodes.next()) {
assertEquals("label", rawText)
Expand Down
65 changes: 55 additions & 10 deletions quarkdown-core/src/test/kotlin/com/quarkdown/core/LexerTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -254,10 +254,16 @@ class LexerTest {
assertIsNot<CommentToken>(tokens.next())
assertIs<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
assertIs<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
assertIs<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
assertIsNot<CommentToken>(tokens.next())
}

@Test
Expand Down Expand Up @@ -288,7 +294,7 @@ class LexerTest {
assertIs<EntityToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<EntityToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<EntityToken>(tokens.next())
assertIs<EntityToken>(tokens.next())
assertIs<EntityToken>(tokens.next())
Expand All @@ -301,21 +307,50 @@ class LexerTest {
val tokens = inlineLex(readSource("/lexing/inlinefunction.md"))
assertIs<FunctionCallToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<FunctionCallToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<FunctionCallToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<FunctionCallToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<FunctionCallToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<FunctionCallToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<StrongToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<FunctionCallToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<FunctionCallToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())

assertFalse(tokens.hasNext())
Expand All @@ -335,35 +370,42 @@ class LexerTest {
assertIs<LinkToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<StrongToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<StrongToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<EmphasisToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<StrongEmphasisToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<DiamondAutolinkToken>(tokens.next())
assertIs<UrlAutolinkToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<LinkToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LinkToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<ReferenceLinkToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<ReferenceLinkToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<ReferenceLinkToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<ImageToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<ReferenceImageToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<CommentToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<StrongToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<ReferenceLinkToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<InlineMathToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertIs<FunctionCallToken>(tokens.next())
Expand Down Expand Up @@ -393,6 +435,8 @@ class LexerTest {
assertIs<PlainTextToken>(tokens.next())
assertSymbolEquals(TextSymbolReplacement.TYPOGRAPHIC_RIGHT_APOSTROPHE)
assertIs<PlainTextToken>(tokens.next())
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertSymbolEquals(TextSymbolReplacement.DOUBLE_RIGHT_ARROW)
assertIs<PlainTextToken>(tokens.next())
assertSymbolEquals(TextSymbolReplacement.NOT_EQUAL)
Expand All @@ -408,7 +452,7 @@ class LexerTest {
assertSymbolEquals(TextSymbolReplacement.EN_DASH)
assertIs<PlainTextToken>(tokens.next())
assertSymbolEquals(TextSymbolReplacement.ELLIPSIS)
assertIs<PlainTextToken>(tokens.next()) // Soft line break
assertIs<LineBreakToken>(tokens.next()) // Soft line break
assertSymbolEquals(TextSymbolReplacement.TYPOGRAPHIC_LEFT_APOSTROPHE)
assertIs<PlainTextToken>(tokens.next())
assertSymbolEquals(TextSymbolReplacement.TYPOGRAPHIC_RIGHT_APOSTROPHE)
Expand All @@ -424,6 +468,7 @@ class LexerTest {
assertSymbolEquals(TextSymbolReplacement.TYPOGRAPHIC_RIGHT_QUOTATION_MARK)
assertIs<PlainTextToken>(tokens.next())
assertSymbolEquals(TextSymbolReplacement.TRADEMARK)
assertIs<LineBreakToken>(tokens.next())
assertIs<PlainTextToken>(tokens.next())
assertSymbolEquals(TextSymbolReplacement.TYPOGRAPHIC_LEFT_QUOTATION_MARK)
assertIs<PlainTextToken>(tokens.next())
Expand Down
Loading
Loading