From aa3f2ee50da3c0dd3e36518f9856e9db9091f8ac Mon Sep 17 00:00:00 2001 From: M66B Date: Sat, 30 Apr 2022 11:02:05 +0200 Subject: [PATCH] Require 10 words for language detection --- app/src/main/java/eu/faircode/email/TextHelper.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/app/src/main/java/eu/faircode/email/TextHelper.java b/app/src/main/java/eu/faircode/email/TextHelper.java index ae8ca6f198..d9c5362ac6 100644 --- a/app/src/main/java/eu/faircode/email/TextHelper.java +++ b/app/src/main/java/eu/faircode/email/TextHelper.java @@ -60,6 +60,7 @@ import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; public class TextHelper { + private static final int MIN_WORDS = 10; private static final int MAX_DETECT_SAMPLE_SIZE = 8192; private static final float MIN_DETECT_PROBABILITY = 0.80f; private static final String TRANSLITERATOR = "Any-Latin; Latin-ASCII"; @@ -82,6 +83,9 @@ public class TextHelper { if (TextUtils.isEmpty(text)) return null; + if (text.split("\\s+").length < MIN_WORDS) + return null; + byte[] octets = text.getBytes(); byte[] sample; if (octets.length < MAX_DETECT_SAMPLE_SIZE)