From 580191fb172eccbbd12c2dfbccbd8346f38de91e Mon Sep 17 00:00:00 2001 From: Aya Morisawa Date: Sat, 22 Dec 2018 00:41:54 +0900 Subject: [PATCH] Improve MFM bracket matching Co-authored-by: syuilo --- src/mfm/parser.ts | 42 +++++++------------- src/prelude/array.ts | 6 +++ test/mfm.ts | 95 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 115 insertions(+), 28 deletions(-) diff --git a/src/mfm/parser.ts b/src/mfm/parser.ts index 885b7e01c..205d5ede1 100644 --- a/src/mfm/parser.ts +++ b/src/mfm/parser.ts @@ -1,7 +1,7 @@ import * as P from 'parsimmon'; import parseAcct from '../misc/acct/parse'; import { toUnicode } from 'punycode'; -import { takeWhile } from '../prelude/array'; +import { takeWhile, cumulativeSum } from '../prelude/array'; import { Tree } from '../prelude/tree'; import * as T from '../prelude/tree'; @@ -42,30 +42,18 @@ export function createTree(type: string, children: MfmForest, props: any): MfmTr return T.createTree({ type, props }, children); } -function getTrailingPosition(x: string): number { - const brackets = [ - ['(', ')'], - ['「', '」'], - ]; - const pendingBrackets = [] as any; - const end = x.split('').findIndex(char => { - const closeMatch = brackets.map(x => x[1]).indexOf(char); - const openMatch = brackets.map(x => x[0]).indexOf(char); - if (closeMatch != -1) { - if (pendingBrackets[closeMatch] > 0) { - pendingBrackets[closeMatch]--; - return false; - } else { - return true; - } - } else if (openMatch != -1) { - pendingBrackets[openMatch] = (pendingBrackets[openMatch] || 0) + 1; - return false; - } else { - return false; - } - }); - return end > 0 ? end : x.length; +export function removeOrphanedBrackets(s: string): string { + const openBrackets = ['(', '「']; + const closeBrackets = [')', '」']; + const xs = cumulativeSum(s.split('').map(c => { + if (openBrackets.includes(c)) return 1; + if (closeBrackets.includes(c)) return -1; + return 0; + })); + const firstOrphanedCloseBracket = xs.findIndex(x => x < 0); + if (firstOrphanedCloseBracket !== -1) return s.substr(0, firstOrphanedCloseBracket); + const lastMatched = xs.lastIndexOf(0); + return s.substr(0, lastMatched + 1); } const newline = P((input, i) => { @@ -220,7 +208,7 @@ const mfm = P.createLanguage({ const match = text.match(/^#([^\s\.,!\?#]+)/i); if (!match) return P.makeFailure(i, 'not a hashtag'); let hashtag = match[1]; - hashtag = hashtag.substr(0, getTrailingPosition(hashtag)); + hashtag = removeOrphanedBrackets(hashtag); if (hashtag.match(/^[0-9]+$/)) return P.makeFailure(i, 'not a hashtag'); if (input[i - 1] != null && input[i - 1].match(/[a-z0-9]/i)) return P.makeFailure(i, 'not a hashtag'); if (hashtag.length > 50) return P.makeFailure(i, 'not a hashtag'); @@ -390,7 +378,7 @@ const mfm = P.createLanguage({ const match = text.match(/^https?:\/\/[\w\/:%#@\$&\?!\(\)\[\]~\.,=\+\-]+/); if (!match) return P.makeFailure(i, 'not a url'); let url = match[0]; - url = url.substr(0, getTrailingPosition(url)); + url = removeOrphanedBrackets(url); if (url.endsWith('.')) url = url.substr(0, url.lastIndexOf('.')); if (url.endsWith(',')) url = url.substr(0, url.lastIndexOf(',')); return P.makeSuccess(i + url.length, url); diff --git a/src/prelude/array.ts b/src/prelude/array.ts index d02de9b2e..560dfa080 100644 --- a/src/prelude/array.ts +++ b/src/prelude/array.ts @@ -109,3 +109,9 @@ export function takeWhile(f: Predicate, xs: T[]): T[] { } return ys; } + +export function cumulativeSum(xs: number[]): number[] { + const ys = Array.from(xs); // deep copy + for (let i = 1; i < ys.length; i++) ys[i] += ys[i - 1]; + return ys; +} diff --git a/test/mfm.ts b/test/mfm.ts index 4811e1bbb..6bbbe146c 100644 --- a/test/mfm.ts +++ b/test/mfm.ts @@ -6,7 +6,7 @@ import * as assert from 'assert'; import analyze from '../src/mfm/parse'; import toHtml from '../src/mfm/html'; -import { createTree as tree, createLeaf as leaf, MfmTree } from '../src/mfm/parser'; +import { createTree as tree, createLeaf as leaf, MfmTree, removeOrphanedBrackets } from '../src/mfm/parser'; function text(text: string): MfmTree { return leaf('text', { text }); @@ -49,6 +49,99 @@ describe('createTree', () => { }); }); +describe('removeOrphanedBrackets', () => { + it('single (contained)', () => { + const input = '(foo)'; + const expected = '(foo)'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('single (head)', () => { + const input = '(foo)bar'; + const expected = '(foo)bar'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('single (tail)', () => { + const input = 'foo(bar)'; + const expected = 'foo(bar)'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('a', () => { + const input = '(foo'; + const expected = ''; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('b', () => { + const input = ')foo'; + const expected = ''; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('nested', () => { + const input = 'foo(「(bar)」)'; + const expected = 'foo(「(bar)」)'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('no brackets', () => { + const input = 'foo'; + const expected = 'foo'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('with foreign bracket (single)', () => { + const input = 'foo(bar))'; + const expected = 'foo(bar)'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('with foreign bracket (open)', () => { + const input = 'foo(bar'; + const expected = 'foo'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('with foreign bracket (close)', () => { + const input = 'foo)bar'; + const expected = 'foo'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('with foreign bracket (close and open)', () => { + const input = 'foo)(bar'; + const expected = 'foo'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('various bracket type', () => { + const input = 'foo「(bar)」('; + const expected = 'foo「(bar)」'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); + + it('intersected', () => { + const input = 'foo(「)」'; + const expected = 'foo(「)」'; + const actual = removeOrphanedBrackets(input); + assert.deepStrictEqual(actual, expected); + }); +}); + describe('MFM', () => { it('can be analyzed', () => { const tokens = analyze('@himawari @hima_sub@namori.net お腹ペコい :cat: #yryr');