compact_enc_det proof of concept

This commit is contained in:
M66B
2020-10-14 20:54:28 +02:00
parent aeff1a39a0
commit 68bfa6c9a7
29 changed files with 21735 additions and 0 deletions

View File

@@ -28,6 +28,12 @@ import java.nio.charset.UnsupportedCharsetException;
class CharsetHelper {
private static final int SAMPLE_SIZE = 1024;
static {
System.loadLibrary("compact_enc_det");
}
private static native String jni_detect(byte[] chars);
static boolean isUTF8(String text) {
// Get extended ASCII characters
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
@@ -123,6 +129,7 @@ class CharsetHelper {
static Charset detect(String text) {
try {
byte[] octets = text.getBytes(StandardCharsets.ISO_8859_1);
Log.i("compact_enc_det=" + jni_detect(octets));
int offset = 0;
UniversalDetector detector = new UniversalDetector();