chat filter: Ignore character accents for matching

This lets plain latin-character filters to match messages with accents
and diacritics which are not easily typed on all keyboard layouts.

Co-authored-by: Jordan Atwood <jordan.atwood423@gmail.com>
This commit is contained in:
Adam
2021-11-13 09:50:23 -05:00
parent 67f7e7f488
commit 1362af414a
2 changed files with 46 additions and 4 deletions

View File

@@ -36,6 +36,7 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@@ -316,6 +317,9 @@ public class ChatFilterPlugin extends Plugin
{
String strippedMessage = jagexPrintableCharMatcher.retainFrom(message)
.replace('\u00A0', ' ');
String strippedAccents = StringUtils.stripAccents(strippedMessage);
assert strippedMessage.length() == strippedAccents.length();
if (username != null && shouldFilterByName(username))
{
switch (config.filterType())
@@ -332,16 +336,20 @@ public class ChatFilterPlugin extends Plugin
boolean filtered = false;
for (Pattern pattern : filteredPatterns)
{
Matcher m = pattern.matcher(strippedMessage);
Matcher m = pattern.matcher(strippedAccents);
StringBuffer sb = new StringBuffer();
StringBuilder sb = new StringBuilder();
int idx = 0;
while (m.find())
{
switch (config.filterType())
{
case CENSOR_WORDS:
m.appendReplacement(sb, StringUtils.repeat('*', m.group(0).length()));
MatchResult matchResult = m.toMatchResult();
sb.append(strippedMessage, idx, matchResult.start())
.append(StringUtils.repeat('*', matchResult.group().length()));
idx = m.end();
filtered = true;
break;
case CENSOR_MESSAGE:
@@ -350,9 +358,10 @@ public class ChatFilterPlugin extends Plugin
return null;
}
}
m.appendTail(sb);
sb.append(strippedMessage.substring(idx));
strippedMessage = sb.toString();
assert strippedMessage.length() == strippedAccents.length();
}
return filtered ? strippedMessage : message;
@@ -364,15 +373,18 @@ public class ChatFilterPlugin extends Plugin
filteredNamePatterns.clear();
Text.fromCSV(config.filteredWords()).stream()
.map(StringUtils::stripAccents)
.map(s -> Pattern.compile(Pattern.quote(s), Pattern.CASE_INSENSITIVE))
.forEach(filteredPatterns::add);
NEWLINE_SPLITTER.splitToList(config.filteredRegex()).stream()
.map(StringUtils::stripAccents)
.map(ChatFilterPlugin::compilePattern)
.filter(Objects::nonNull)
.forEach(filteredPatterns::add);
NEWLINE_SPLITTER.splitToList(config.filteredNames()).stream()
.map(StringUtils::stripAccents)
.map(ChatFilterPlugin::compilePattern)
.filter(Objects::nonNull)
.forEach(filteredNamePatterns::add);

View File

@@ -186,6 +186,36 @@ public class ChatFilterPluginTest
assertNull(chatFilterPlugin.censorMessage("Blue", "hello\u00A0osrs"));
}
@Test
public void testFilterUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("filterme");
chatFilterPlugin.updateFilteredPatterns();
assertEquals("plëäsë ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}
@Test
public void testUnicodeFiltersUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("plëäsë");
chatFilterPlugin.updateFilteredPatterns();
assertEquals("****** fïltërmë plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}
@Test
public void testMixedUnicodeFiltersUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("plëäsë, filterme");
chatFilterPlugin.updateFilteredPatterns();
assertEquals("****** ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}
@Test
public void testMessageFromFriendIsFiltered()
{