chat filter: Ignore character accents for matching

This lets plain latin-character filters to match messages with accents
and diacritics which are not easily typed on all keyboard layouts.

Co-authored-by: Jordan Atwood <jordan.atwood423@gmail.com>
This commit is contained in:
Adam
2021-11-13 09:50:23 -05:00
parent 67f7e7f488
commit 1362af414a
2 changed files with 46 additions and 4 deletions

View File

@@ -36,6 +36,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.regex.MatchResult;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException; import java.util.regex.PatternSyntaxException;
@@ -316,6 +317,9 @@ public class ChatFilterPlugin extends Plugin
{ {
String strippedMessage = jagexPrintableCharMatcher.retainFrom(message) String strippedMessage = jagexPrintableCharMatcher.retainFrom(message)
.replace('\u00A0', ' '); .replace('\u00A0', ' ');
String strippedAccents = StringUtils.stripAccents(strippedMessage);
assert strippedMessage.length() == strippedAccents.length();
if (username != null && shouldFilterByName(username)) if (username != null && shouldFilterByName(username))
{ {
switch (config.filterType()) switch (config.filterType())
@@ -332,16 +336,20 @@ public class ChatFilterPlugin extends Plugin
boolean filtered = false; boolean filtered = false;
for (Pattern pattern : filteredPatterns) for (Pattern pattern : filteredPatterns)
{ {
Matcher m = pattern.matcher(strippedMessage); Matcher m = pattern.matcher(strippedAccents);
StringBuffer sb = new StringBuffer(); StringBuilder sb = new StringBuilder();
int idx = 0;
while (m.find()) while (m.find())
{ {
switch (config.filterType()) switch (config.filterType())
{ {
case CENSOR_WORDS: case CENSOR_WORDS:
m.appendReplacement(sb, StringUtils.repeat('*', m.group(0).length())); MatchResult matchResult = m.toMatchResult();
sb.append(strippedMessage, idx, matchResult.start())
.append(StringUtils.repeat('*', matchResult.group().length()));
idx = m.end();
filtered = true; filtered = true;
break; break;
case CENSOR_MESSAGE: case CENSOR_MESSAGE:
@@ -350,9 +358,10 @@ public class ChatFilterPlugin extends Plugin
return null; return null;
} }
} }
m.appendTail(sb); sb.append(strippedMessage.substring(idx));
strippedMessage = sb.toString(); strippedMessage = sb.toString();
assert strippedMessage.length() == strippedAccents.length();
} }
return filtered ? strippedMessage : message; return filtered ? strippedMessage : message;
@@ -364,15 +373,18 @@ public class ChatFilterPlugin extends Plugin
filteredNamePatterns.clear(); filteredNamePatterns.clear();
Text.fromCSV(config.filteredWords()).stream() Text.fromCSV(config.filteredWords()).stream()
.map(StringUtils::stripAccents)
.map(s -> Pattern.compile(Pattern.quote(s), Pattern.CASE_INSENSITIVE)) .map(s -> Pattern.compile(Pattern.quote(s), Pattern.CASE_INSENSITIVE))
.forEach(filteredPatterns::add); .forEach(filteredPatterns::add);
NEWLINE_SPLITTER.splitToList(config.filteredRegex()).stream() NEWLINE_SPLITTER.splitToList(config.filteredRegex()).stream()
.map(StringUtils::stripAccents)
.map(ChatFilterPlugin::compilePattern) .map(ChatFilterPlugin::compilePattern)
.filter(Objects::nonNull) .filter(Objects::nonNull)
.forEach(filteredPatterns::add); .forEach(filteredPatterns::add);
NEWLINE_SPLITTER.splitToList(config.filteredNames()).stream() NEWLINE_SPLITTER.splitToList(config.filteredNames()).stream()
.map(StringUtils::stripAccents)
.map(ChatFilterPlugin::compilePattern) .map(ChatFilterPlugin::compilePattern)
.filter(Objects::nonNull) .filter(Objects::nonNull)
.forEach(filteredNamePatterns::add); .forEach(filteredNamePatterns::add);

View File

@@ -186,6 +186,36 @@ public class ChatFilterPluginTest
assertNull(chatFilterPlugin.censorMessage("Blue", "hello\u00A0osrs")); assertNull(chatFilterPlugin.censorMessage("Blue", "hello\u00A0osrs"));
} }
@Test
public void testFilterUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("filterme");
chatFilterPlugin.updateFilteredPatterns();
assertEquals("plëäsë ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}
@Test
public void testUnicodeFiltersUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("plëäsë");
chatFilterPlugin.updateFilteredPatterns();
assertEquals("****** fïltërmë plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}
@Test
public void testMixedUnicodeFiltersUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("plëäsë, filterme");
chatFilterPlugin.updateFilteredPatterns();
assertEquals("****** ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}
@Test @Test
public void testMessageFromFriendIsFiltered() public void testMessageFromFriendIsFiltered()
{ {