chat filter: Ignore character accents for matching
This lets plain latin-character filters to match messages with accents and diacritics which are not easily typed on all keyboard layouts. Co-authored-by: Jordan Atwood <jordan.atwood423@gmail.com>
This commit is contained in:
@@ -36,6 +36,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.regex.MatchResult;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.regex.PatternSyntaxException;
|
import java.util.regex.PatternSyntaxException;
|
||||||
@@ -316,6 +317,9 @@ public class ChatFilterPlugin extends Plugin
|
|||||||
{
|
{
|
||||||
String strippedMessage = jagexPrintableCharMatcher.retainFrom(message)
|
String strippedMessage = jagexPrintableCharMatcher.retainFrom(message)
|
||||||
.replace('\u00A0', ' ');
|
.replace('\u00A0', ' ');
|
||||||
|
String strippedAccents = StringUtils.stripAccents(strippedMessage);
|
||||||
|
assert strippedMessage.length() == strippedAccents.length();
|
||||||
|
|
||||||
if (username != null && shouldFilterByName(username))
|
if (username != null && shouldFilterByName(username))
|
||||||
{
|
{
|
||||||
switch (config.filterType())
|
switch (config.filterType())
|
||||||
@@ -332,16 +336,20 @@ public class ChatFilterPlugin extends Plugin
|
|||||||
boolean filtered = false;
|
boolean filtered = false;
|
||||||
for (Pattern pattern : filteredPatterns)
|
for (Pattern pattern : filteredPatterns)
|
||||||
{
|
{
|
||||||
Matcher m = pattern.matcher(strippedMessage);
|
Matcher m = pattern.matcher(strippedAccents);
|
||||||
|
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
int idx = 0;
|
||||||
|
|
||||||
while (m.find())
|
while (m.find())
|
||||||
{
|
{
|
||||||
switch (config.filterType())
|
switch (config.filterType())
|
||||||
{
|
{
|
||||||
case CENSOR_WORDS:
|
case CENSOR_WORDS:
|
||||||
m.appendReplacement(sb, StringUtils.repeat('*', m.group(0).length()));
|
MatchResult matchResult = m.toMatchResult();
|
||||||
|
sb.append(strippedMessage, idx, matchResult.start())
|
||||||
|
.append(StringUtils.repeat('*', matchResult.group().length()));
|
||||||
|
idx = m.end();
|
||||||
filtered = true;
|
filtered = true;
|
||||||
break;
|
break;
|
||||||
case CENSOR_MESSAGE:
|
case CENSOR_MESSAGE:
|
||||||
@@ -350,9 +358,10 @@ public class ChatFilterPlugin extends Plugin
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
m.appendTail(sb);
|
sb.append(strippedMessage.substring(idx));
|
||||||
|
|
||||||
strippedMessage = sb.toString();
|
strippedMessage = sb.toString();
|
||||||
|
assert strippedMessage.length() == strippedAccents.length();
|
||||||
}
|
}
|
||||||
|
|
||||||
return filtered ? strippedMessage : message;
|
return filtered ? strippedMessage : message;
|
||||||
@@ -364,15 +373,18 @@ public class ChatFilterPlugin extends Plugin
|
|||||||
filteredNamePatterns.clear();
|
filteredNamePatterns.clear();
|
||||||
|
|
||||||
Text.fromCSV(config.filteredWords()).stream()
|
Text.fromCSV(config.filteredWords()).stream()
|
||||||
|
.map(StringUtils::stripAccents)
|
||||||
.map(s -> Pattern.compile(Pattern.quote(s), Pattern.CASE_INSENSITIVE))
|
.map(s -> Pattern.compile(Pattern.quote(s), Pattern.CASE_INSENSITIVE))
|
||||||
.forEach(filteredPatterns::add);
|
.forEach(filteredPatterns::add);
|
||||||
|
|
||||||
NEWLINE_SPLITTER.splitToList(config.filteredRegex()).stream()
|
NEWLINE_SPLITTER.splitToList(config.filteredRegex()).stream()
|
||||||
|
.map(StringUtils::stripAccents)
|
||||||
.map(ChatFilterPlugin::compilePattern)
|
.map(ChatFilterPlugin::compilePattern)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.forEach(filteredPatterns::add);
|
.forEach(filteredPatterns::add);
|
||||||
|
|
||||||
NEWLINE_SPLITTER.splitToList(config.filteredNames()).stream()
|
NEWLINE_SPLITTER.splitToList(config.filteredNames()).stream()
|
||||||
|
.map(StringUtils::stripAccents)
|
||||||
.map(ChatFilterPlugin::compilePattern)
|
.map(ChatFilterPlugin::compilePattern)
|
||||||
.filter(Objects::nonNull)
|
.filter(Objects::nonNull)
|
||||||
.forEach(filteredNamePatterns::add);
|
.forEach(filteredNamePatterns::add);
|
||||||
|
|||||||
@@ -186,6 +186,36 @@ public class ChatFilterPluginTest
|
|||||||
assertNull(chatFilterPlugin.censorMessage("Blue", "hello\u00A0osrs"));
|
assertNull(chatFilterPlugin.censorMessage("Blue", "hello\u00A0osrs"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFilterUnicode()
|
||||||
|
{
|
||||||
|
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
|
||||||
|
when(chatFilterConfig.filteredWords()).thenReturn("filterme");
|
||||||
|
|
||||||
|
chatFilterPlugin.updateFilteredPatterns();
|
||||||
|
assertEquals("plëäsë ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnicodeFiltersUnicode()
|
||||||
|
{
|
||||||
|
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
|
||||||
|
when(chatFilterConfig.filteredWords()).thenReturn("plëäsë");
|
||||||
|
|
||||||
|
chatFilterPlugin.updateFilteredPatterns();
|
||||||
|
assertEquals("****** fïltërmë plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMixedUnicodeFiltersUnicode()
|
||||||
|
{
|
||||||
|
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
|
||||||
|
when(chatFilterConfig.filteredWords()).thenReturn("plëäsë, filterme");
|
||||||
|
|
||||||
|
chatFilterPlugin.updateFilteredPatterns();
|
||||||
|
assertEquals("****** ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMessageFromFriendIsFiltered()
|
public void testMessageFromFriendIsFiltered()
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user