mirror of
https://github.com/M66B/FairEmail.git
synced 2026-03-28 11:55:20 +01:00
Improved charset detection
This commit is contained in:
@@ -26,9 +26,7 @@ import java.nio.charset.StandardCharsets;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
|
||||
class CharsetHelper {
|
||||
private static UniversalDetector detector = new UniversalDetector();
|
||||
|
||||
private static final int SAMPLE_SIZE = 2 * 1024;
|
||||
private static final int SAMPLE_SIZE = 1024;
|
||||
|
||||
static boolean isUTF8(String text) {
|
||||
// Get extended ASCII characters
|
||||
@@ -124,9 +122,14 @@ class CharsetHelper {
|
||||
|
||||
static Charset detect(String text) {
|
||||
try {
|
||||
byte[] sample = text.getBytes(StandardCharsets.ISO_8859_1);
|
||||
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
|
||||
|
||||
detector.handleData(sample, 0, Math.min(SAMPLE_SIZE, sample.length));
|
||||
int offset = 0;
|
||||
UniversalDetector detector = new UniversalDetector();
|
||||
while (offset < octets.length && !detector.isDone()) {
|
||||
detector.handleData(octets, offset, Math.min(SAMPLE_SIZE, octets.length - offset));
|
||||
offset += SAMPLE_SIZE;
|
||||
}
|
||||
detector.dataEnd();
|
||||
|
||||
String detected = detector.getDetectedCharset();
|
||||
@@ -137,8 +140,6 @@ class CharsetHelper {
|
||||
} catch (Throwable ex) {
|
||||
Log.w(ex);
|
||||
return null;
|
||||
} finally {
|
||||
detector.reset();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user