mirror of
https://github.com/M66B/FairEmail.git
synced 2026-04-05 00:23:09 +02:00
Use ref charset and language for charset detection
This commit is contained in:
@@ -437,7 +437,6 @@ public class ActivityEML extends ActivityBase {
|
||||
Object content = part.getContent();
|
||||
if (content instanceof String) {
|
||||
String text = (String) content;
|
||||
Charset detected = CharsetHelper.detect(text);
|
||||
|
||||
String charset;
|
||||
try {
|
||||
@@ -450,6 +449,7 @@ public class ActivityEML extends ActivityBase {
|
||||
charset = StandardCharsets.ISO_8859_1.name();
|
||||
|
||||
Charset cs = Charset.forName(charset);
|
||||
Charset detected = CharsetHelper.detect(text, cs);
|
||||
boolean isUtf8 = CharsetHelper.isUTF8(text.getBytes(cs));
|
||||
boolean isW1252 = !Objects.equals(text, CharsetHelper.utf8toW1252(text));
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ public class CharsetHelper {
|
||||
}
|
||||
}
|
||||
|
||||
private static native DetectResult jni_detect_charset(byte[] octets);
|
||||
private static native DetectResult jni_detect_charset(byte[] octets, String ref, String lang);
|
||||
|
||||
static boolean isUTF8(String text) {
|
||||
// Get extended ASCII characters
|
||||
@@ -159,7 +159,10 @@ public class CharsetHelper {
|
||||
}
|
||||
}
|
||||
|
||||
public static Charset detect(String text) {
|
||||
public static Charset detect(String text, Charset ref) {
|
||||
if (text == null)
|
||||
return null;
|
||||
|
||||
try {
|
||||
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
|
||||
|
||||
@@ -172,7 +175,9 @@ public class CharsetHelper {
|
||||
}
|
||||
|
||||
Log.i("compact_enc_det sample=" + sample.length);
|
||||
DetectResult detected = jni_detect_charset(sample);
|
||||
DetectResult detected = jni_detect_charset(sample,
|
||||
ref == null ? null : ref.name(),
|
||||
Locale.getDefault().getLanguage());
|
||||
|
||||
if (TextUtils.isEmpty(detected.charset)) {
|
||||
Log.e("compact_enc_det result=" + detected);
|
||||
@@ -185,7 +190,7 @@ public class CharsetHelper {
|
||||
Log.e("compact_enc_det result=" + detected + " chinese=" + chinese);
|
||||
if (!chinese)
|
||||
return null;
|
||||
} else // GBK, Big5, ISO-2022-JP, HZ-GB-2312, Shift_JIS
|
||||
} else // GBK, Big5, ISO-2022-JP, HZ-GB-2312, Shift_JIS, x-binaryenc
|
||||
Log.e("compact_enc_det result=" + detected);
|
||||
|
||||
return Charset.forName(detected.charset);
|
||||
|
||||
@@ -1840,7 +1840,7 @@ public class MessageHelper {
|
||||
if (header.trim().startsWith("=?"))
|
||||
return header;
|
||||
|
||||
Charset detected = CharsetHelper.detect(header);
|
||||
Charset detected = CharsetHelper.detect(header, StandardCharsets.ISO_8859_1);
|
||||
if (detected == null && CharsetHelper.isUTF8(header))
|
||||
detected = StandardCharsets.UTF_8;
|
||||
if (detected == null ||
|
||||
@@ -2928,7 +2928,7 @@ public class MessageHelper {
|
||||
Log.i("Charset upgrade=UTF8");
|
||||
result = new String(result.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
|
||||
} else {
|
||||
Charset detected = CharsetHelper.detect(result);
|
||||
Charset detected = CharsetHelper.detect(result, StandardCharsets.ISO_8859_1);
|
||||
if (detected == null) {
|
||||
if (CharsetHelper.isUTF8(result)) {
|
||||
Log.i("Charset plain=UTF8");
|
||||
@@ -2984,7 +2984,7 @@ public class MessageHelper {
|
||||
// Fix incorrect UTF16
|
||||
try {
|
||||
if (CHARSET16.contains(cs)) {
|
||||
Charset detected = CharsetHelper.detect(result);
|
||||
Charset detected = CharsetHelper.detect(result, cs);
|
||||
if (!CHARSET16.contains(detected))
|
||||
Log.w(new Throwable("Charset=" + cs + " detected=" + detected));
|
||||
if (StandardCharsets.US_ASCII.equals(detected) ||
|
||||
@@ -3034,7 +3034,7 @@ public class MessageHelper {
|
||||
break;
|
||||
}
|
||||
|
||||
Charset detected = CharsetHelper.detect(result);
|
||||
Charset detected = CharsetHelper.detect(result, c);
|
||||
if (c.equals(detected))
|
||||
break;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user