見出し画像

TenorFlow Lite 入門 / Androidによる物体検出

1. Androidによる物体検出

「TensorFlow Lite」を使ってAndroidで物体検出を行います。端末の背面カメラに映るものをリアルタイムに物体検出し、検出した物体を矩形で囲みます。

画像1

2. バージョン

・compileSdkVersion 29
・minSdkVersion 26
・targetSdkVersion 29
・tensorflow-lite:0.1.7

3. 依存関係の追加

「build.gradle(Mudule:app)」に、「CameraX」と「TensorFlow Lite」のプロジェクトの依存関係を追加します。

android {
    <<省略>>

    aaptOptions {
        noCompress "tflite"
    }
    compileOptions {
        sourceCompatibility = '1.8'
        targetCompatibility = '1.8'
    }
}
dependencies {
    <<省略>>

    // CameraX
    def camerax_version = '1.0.0-alpha06'
    implementation "androidx.camera:camera-core:${camerax_version}"
    implementation "androidx.camera:camera-camera2:${camerax_version}"

    // TensorFlow Lite
    implementation('org.tensorflow:tensorflow-lite:0.0.0-nightly') { changing = true }
    implementation('org.tensorflow:tensorflow-lite-gpu:0.0.0-nightly') { changing = true }
    implementation('org.tensorflow:tensorflow-lite-support:0.0.0-nightly') { changing = true }
}

4. マニフェストファイルの設定

「CAMERA」のパーミッションを追加します。

<uses-permission android:name="android.permission.CAMERA" />

5. アセットの準備

プロジェクトの「app/src/main/assets」に、「TensorFlow Lite Object Detection Android Demo」のページからダウンロードしたモデルとラベルを追加します。

・detect.tflite
・labelmap.txt

6. レイアウトの設定

「activity_main.xml」に「TextureView」と「ImageView」を追加します。

<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    tools:context=".MainActivity">

    <RelativeLayout android:layout_width="match_parent"
        android:layout_height="match_parent"
        android:background="#FFFFFF">
        <TextureView
            android:id="@+id/texture_view"
            android:layout_width="match_parent"
            android:layout_height="match_parent"/>

        <ImageView
            android:id="@+id/image_view"
            android:layout_width="640px"
            android:layout_height="640px"
            android:layout_margin="16dp"
            android:visibility="visible"
            app:srcCompat="@mipmap/ic_launcher" />
    </RelativeLayout>
</androidx.constraintlayout.widget.ConstraintLayout>

7. UIの作成

画像分類を行うUIを作成します。
以下の処理を行なっています。

・パーミッション
・カメラのプレビューと解析
・ObjectDetectionInterpriterにBitmapを渡して推論(後ほど説明)

◎ MainActivity.java

package net.npaka.objectdetectionex;

import androidx.appcompat.app.AppCompatActivity;
import androidx.camera.core.CameraX;
import androidx.camera.core.ImageAnalysis;
import androidx.camera.core.ImageAnalysisConfig;
import androidx.camera.core.Preview;
import androidx.camera.core.PreviewConfig;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;

import android.Manifest;
import android.content.pm.PackageManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.graphics.ImageFormat;
import android.graphics.Matrix;
import android.graphics.Point;
import android.graphics.Rect;
import android.graphics.YuvImage;
import android.media.Image;
import android.os.Bundle;
import android.view.TextureView;
import android.view.ViewGroup;
import android.view.Window;
import android.widget.ImageView;
import android.widget.RelativeLayout;
import android.widget.Toast;

import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.util.concurrent.Executors;

// MainActivity
public class MainActivity extends AppCompatActivity {
    // 定数
    private final int REQUEST_CODE_PERMISSIONS = 101;
    private final String[] REQUIRED_PERMISSIONS = new String[]{
        Manifest.permission.CAMERA};

    // UI
    private TextureView textureView;
    private ImageView imageView;

    // 推論
    private ObjectDetectionInterpriter interpriter;

    // 生成時に呼ばれる
    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        // UI
        this.textureView = findViewById(R.id.texture_view);
        this.imageView = findViewById(R.id.image_view);

        // 推論
        this.interpriter = new ObjectDetectionInterpriter(this);

        // パーミッションのチェック
        if (allPermissionsGranted()) {
            this.textureView.post(() -> startCamera());
        } else {
            ActivityCompat.requestPermissions(this,
                REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS);
        }
    }

    // パーミッション許可のリクエストの結果の取得
    @Override
    public void onRequestPermissionsResult(int requestCode,
        String[] permissions, int[] grantResults) {
        if (requestCode == REQUEST_CODE_PERMISSIONS) {
            if (allPermissionsGranted()) {
                startCamera();
             } else {
                Toast.makeText(this, "ユーザーから権限が許可されていません。",
                    Toast.LENGTH_SHORT).show();
                finish();
            }
        }
    }

    // 全てのパーミッション許可
    private boolean allPermissionsGranted() {
        for (String permission : REQUIRED_PERMISSIONS) {
            if (ContextCompat.checkSelfPermission(this, permission)
                != PackageManager.PERMISSION_GRANTED) {
                return false;
            }
        }
        return true;
    }

    // カメラの開始
    private void startCamera() {
        // プレビュー
        PreviewConfig previewConfig = new PreviewConfig.Builder().build();
        Preview preview = new Preview(previewConfig);
        preview.setOnPreviewOutputUpdateListener(
            output -> {
                // SurfaceTextureの更新
                ViewGroup parent = (ViewGroup)this.textureView.getParent();
                parent.removeView(this.textureView);
                parent.addView(this.textureView, 0);

                // SurfaceTextureをTextureViewに指定
                this.textureView.setSurfaceTexture(output.getSurfaceTexture());

                // レイアウトの調整
                Point point = new Point();
                getWindowManager().getDefaultDisplay().getSize(point);
                int w = point.x;
                int h = point.x * 4 / 3;
                RelativeLayout.LayoutParams params = new RelativeLayout.LayoutParams(w, h);
                params.addRule(RelativeLayout.CENTER_IN_PARENT);
                textureView.setLayoutParams(params);
                params = new RelativeLayout.LayoutParams(w, w);
                params.addRule(RelativeLayout.CENTER_IN_PARENT);
                imageView.setLayoutParams(params);
            });

        // 画像の解析
        ImageAnalysisConfig config = new ImageAnalysisConfig.Builder()
            .setImageReaderMode(ImageAnalysis.ImageReaderMode.ACQUIRE_LATEST_IMAGE)
            .build();
        ImageAnalysis imageAnalysis = new ImageAnalysis(config);
        imageAnalysis.setAnalyzer(Executors.newSingleThreadExecutor(),
            (image, rotationDegrees) -> {
                // 推論
                Bitmap bitmap = imageToToBitmap(image.getImage(), rotationDegrees);
                Bitmap result = this.interpriter.predict(bitmap);
                bitmap.recycle();
 
                // 結果の表示
                this.imageView.post(() -> {
                    imageView.setImageBitmap(result);
                });
            });

        // バインド
        CameraX.bindToLifecycle(this, preview, imageAnalysis);
    }

    // ImageProxy → Bitmap
    private Bitmap imageToToBitmap(Image image, int rotationDegrees) {
        byte[] data = imageToByteArray(image);
        Bitmap bitmap = BitmapFactory.decodeByteArray(data, 0, data.length);
        if (rotationDegrees == 0) {
            return bitmap;
        } else {
            return rotateBitmap(bitmap, rotationDegrees);
        } 
    }

    // Bitmapの回転
    private Bitmap rotateBitmap(Bitmap bitmap, int rotationDegrees) {
        Matrix mat = new Matrix();
        mat.postRotate(rotationDegrees);
        return Bitmap.createBitmap(bitmap, 0, 0,
            bitmap.getWidth(), bitmap.getHeight(), mat, true);
    }

    // Image → JPEGのバイト配列
    private byte[] imageToByteArray(Image image) {
        byte[] data = null;
        if (image.getFormat() == ImageFormat.JPEG) {
            Image.Plane[] planes = image.getPlanes();
            ByteBuffer buffer = planes[0].getBuffer();
            data = new byte[buffer.capacity()];
            buffer.get(data);
            return data;
        } else if (image.getFormat() == ImageFormat.YUV_420_888) {
            data = NV21toJPEG(YUV_420_888toNV21(image),
                image.getWidth(), image.getHeight());
        }
        return data;
    }

    // YUV_420_888 → NV21
    private byte[] YUV_420_888toNV21(Image image) {
        byte[] nv21;
        ByteBuffer yBuffer = image.getPlanes()[0].getBuffer();
        ByteBuffer uBuffer = image.getPlanes()[1].getBuffer();
        ByteBuffer vBuffer = image.getPlanes()[2].getBuffer();
        int ySize = yBuffer.remaining();
        int uSize = uBuffer.remaining();
        int vSize = vBuffer.remaining();
        nv21 = new byte[ySize + uSize + vSize];
        yBuffer.get(nv21, 0, ySize);
        vBuffer.get(nv21, ySize, vSize);
        uBuffer.get(nv21, ySize + vSize, uSize);
        return nv21;
    }

    // NV21 → JPEG
    private byte[] NV21toJPEG(byte[] nv21, int width, int height) {
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        YuvImage yuv = new YuvImage(nv21, ImageFormat.NV21, width, height, null);
        yuv.compressToJpeg(new Rect(0, 0, width, height), 100, out);
        return out.toByteArray();
    }
}

8. 物体検出

Bitmapを受け取り、物体検出結果を描画したBitmapを返します。
パラメータ定数を変更することで、別の画像分類モデルにも対応できます。

◎ ObjectDetectionInterpriter.java

package net.npaka.objectdetectionex;

import android.content.Context;
import android.content.res.AssetFileDescriptor;
import android.graphics.Bitmap;
import android.graphics.Canvas;
import android.graphics.Color;
import android.graphics.Paint;
import android.graphics.PorterDuff;
import android.graphics.Rect;
import android.graphics.RectF;

import org.tensorflow.lite.Interpreter;
import org.tensorflow.lite.gpu.GpuDelegate;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

// 物体検出インタープリタ
public class ObjectDetectionInterpriter {
    // パラメータ定数
    private static final int BATCH_SIZE = 1; // バッチサイズ
    private static final int INPUT_PIXELS = 3; // 入力ピクセル
    private final static int INPUT_SIZE = 300; // 入力サイズ
    private boolean IS_QUANTIZED = true; // 量子化
    private static final int NUM_DETECTIONS = 10; // 検出数
    private static final float IMAGE_MEAN = 128.0f;
    private static final float IMAGE_STD = 128.0f;

    // システム
    private Context context;
    private Interpreter interpreter;
    private List<String> labels;
    private int[] imageBuffer = new int[INPUT_SIZE * INPUT_SIZE];

    // 入力
    private ByteBuffer inBuffer;
    private Bitmap inBitmap;
    private Canvas inCanvas;
    private Rect inBitmapSrc = new Rect();
    private Rect inBitmapDst = new Rect(0, 0, INPUT_SIZE, INPUT_SIZE);

    // 出力
    private float[][][] outLocations;
    private float[][] outClasses;
    private float[][] outScores;
    private float[] numDetections;
    private Bitmap outBitmap;
    private Canvas outCanvas;
    private Paint outPaint;

    // コンストラクタ
    public ObjectDetectionInterpriter(Context context) {
        this.context = context;

        // モデルの読み込み
        MappedByteBuffer model = loadModel("detect.tflite");

        // ラベルの読み込み
        this.labels = loadLabel("labelmap.txt");

        // インタプリタの生成
        Interpreter.Options options = new Interpreter.Options();
        //options.setUseNNAPI(true); //NNAPI
        options.addDelegate(new GpuDelegate()); //GPU
        options.setNumThreads(1); // スレッド数
        this.interpreter = new Interpreter(model, options);

        // 入力の初期化
        this.inBitmap = Bitmap.createBitmap(
            INPUT_SIZE, INPUT_SIZE, Bitmap.Config.ARGB_8888);
        this.inCanvas = new Canvas(inBitmap);
        int numBytesPerChannel = IS_QUANTIZED ? 1 : 4;
        this.inBuffer = ByteBuffer.allocateDirect(
            BATCH_SIZE * INPUT_SIZE * INPUT_SIZE * INPUT_PIXELS * numBytesPerChannel);
        this.inBuffer.order(ByteOrder.nativeOrder());

        // 出力の初期化
        this.outLocations = new float[1][NUM_DETECTIONS][4];
        this.outClasses = new float[1][NUM_DETECTIONS];
        this.outScores = new float[1][NUM_DETECTIONS];
        this.numDetections = new float[1];
        this.outBitmap = Bitmap.createBitmap(
            INPUT_SIZE, INPUT_SIZE, Bitmap.Config.ARGB_8888);
        this.outCanvas = new Canvas(outBitmap);
        this.outPaint = new Paint();
    }

    // モデルの読み込み
    private MappedByteBuffer loadModel(String modelPath) {
        try {
            AssetFileDescriptor fd = this.context.getAssets().openFd(modelPath);
            FileInputStream in = new FileInputStream(fd.getFileDescriptor());
            FileChannel fileChannel = in.getChannel();
            return fileChannel.map(FileChannel.MapMode.READ_ONLY,
                fd.getStartOffset(), fd.getDeclaredLength());
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    // ラベルの読み込み
    private List<String> loadLabel(String labelPath) {
        try {
            List<String> labels = new ArrayList<>();
            BufferedReader reader = new BufferedReader(new InputStreamReader(
                this.context.getAssets().open(labelPath)));
            String line;
            while ((line = reader.readLine()) != null) {
                labels.add(line);
            }
            reader.close();
            return labels;
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    // 推論
    public Bitmap predict(Bitmap bitmap) {
        // 入力画像の生成
        int minSize = Math.min(bitmap.getWidth(), bitmap.getHeight());
        int dx = (bitmap.getWidth()-minSize)/2;
        int dy = (bitmap.getHeight()-minSize)/2;
        this.inBitmapSrc.set(dx, dy, dx+minSize, dy+minSize);
        inCanvas.drawBitmap(bitmap, this.inBitmapSrc, this.inBitmapDst, null);

        // 入力バッファの生成
        bmpToInBuffer(inBitmap);

        // 推論
        Object[] inputArray = {inBuffer};
        Map<Integer, Object> outputMap = new HashMap<>();
        outputMap.put(0, this.outLocations);
        outputMap.put(1, this.outClasses);
        outputMap.put(2, this.outScores);
        outputMap.put(3, numDetections);
        this.interpreter.runForMultipleInputsOutputs(inputArray, outputMap);

        // 結果の取得
        int numDetectionsOutput = Math.min(NUM_DETECTIONS, (int)numDetections[0]);
        ArrayList<Recognition> recongnitions = new ArrayList<>(numDetectionsOutput);
        for (int i = 0; i < numDetectionsOutput; ++i) {
            RectF detection = new RectF(
                outLocations[0][i][1] * INPUT_SIZE,
                outLocations[0][i][0] * INPUT_SIZE,
                outLocations[0][i][3] * INPUT_SIZE,
                outLocations[0][i][2] * INPUT_SIZE);
            int labelOffset = 1;
            recongnitions.add(new Recognition(""+i,
                labels.get((int) outClasses[0][i]+labelOffset), outScores[0][i], detection));
        }

        // 出力画像の生成
        this.outCanvas.drawColor(Color.TRANSPARENT, PorterDuff.Mode.CLEAR);
        this.outPaint.setTextSize(12);
        this.outPaint.setAntiAlias(true);
        for (Recognition recognition :  recongnitions) {
            if (recognition.confidence > 0.5f) {
                RectF p = recognition.location;
                this.outPaint.setStyle(Paint.Style.STROKE);
                this.outPaint.setColor(Color.BLUE);
                this.outCanvas.drawRect(p, this.outPaint);
                this.outPaint.setStyle(Paint.Style.FILL);
                this.outCanvas.drawRect(new RectF(p.left, p.top-16, p.right, p.top), this.outPaint);
                this.outPaint.setColor(Color.WHITE);
                float w = this.outPaint.measureText(recognition.title);
                this.outCanvas.drawText(recognition.title, p.left+(p.width()-w)/2, p.top-4, this.outPaint);
            }
        }
        return this.outBitmap;
    }

    // Bitmap → 入力バッファ
    private void bmpToInBuffer(Bitmap bitmap) {
        this.inBuffer.rewind();
        bitmap.getPixels(this.imageBuffer, 0, bitmap.getWidth(),
            0, 0, bitmap.getWidth(), bitmap.getHeight());
        int pixel = 0;
        for (int i = 0; i < INPUT_SIZE; ++i) {
            for (int j = 0; j < INPUT_SIZE; ++j) {
                int pixelValue = imageBuffer[pixel++];
                if (IS_QUANTIZED) {
                    inBuffer.put((byte)((pixelValue >> 16) & 0xFF));
                    inBuffer.put((byte)((pixelValue >> 8) & 0xFF));
                    inBuffer.put((byte)(pixelValue & 0xFF));
                } else {
                    inBuffer.putFloat((((pixelValue >> 16) & 0xFF)-IMAGE_MEAN)/IMAGE_STD);
                    inBuffer.putFloat((((pixelValue >> 8) & 0xFF)-IMAGE_MEAN)/IMAGE_STD);
                    inBuffer.putFloat(((pixelValue & 0xFF)-IMAGE_MEAN)/IMAGE_STD);
                }
            }
        }
    }
}

◎ Recognition.java

package net.npaka.objectdetectionex;

import android.graphics.RectF;

// 認識
public class Recognition {
    public String id; // ID
    public String title; // タイトル
    public Float confidence; // 信頼度
    public RectF location; // 位置

    // コンストラクタ
    public Recognition(String id, String title, Float confidence, RectF location) {
        this.id = id;
        this.title = title;
        this.confidence = confidence;
        this.location = location;
    }
}


この記事が気に入ったらサポートをしてみませんか?