kesu yotei

shigeki kurita

2024年8月16日 01:00

bikoudesu kesu yotei

import torch

import cv2

import numpy as np

from PIL import Image

from torchvision.transforms import Compose, Resize, ToTensor, Normalize

from ultralytics import YOLO

# MiDaSモデルのロード

midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")

# MiDaS用の前処理設定

midas_transforms = Compose([

Resize(384), # モデルに合わせたリサイズ

ToTensor(), # テンソルに変換

Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 正規化

])

# モデルを評価モードに設定

midas.eval()

# YOLOv8n-segモデルをロード

model = YOLO("./weights/yolov8n-seg.pt")

# 画像を読み込む

image = cv2.imread("./target/0001.jpg")

# 物体を検出

results = model(image)

# 画像を深度推定モデルに入力する前に変換

input_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

input_image = Image.fromarray(input_image) # numpy配列からPILイメージに変換

input_image = midas_transforms(input_image).unsqueeze(0)

# 深度推定

with torch.no_grad():

depth_map = midas(input_image).squeeze().cpu().numpy()

# 深度マップをリサイズして元の画像サイズに合わせる

depth_map = cv2.resize(depth_map, (image.shape[1], image.shape[0]))

# カメラの焦点距離（キャリブレーションから取得した値を使用）

focal_length = 500.0 # 例として500としています。実際にはキャリブレーションで取得した値を使用

# 信頼度の閾値を設定

confidence_threshold = 0.5 # 0.5以上の信頼度を持つ検出のみを処理

# クラス名の取得

class_names = model.names # クラス名はモデルの `names` 属性から取得

# 物体が検出されたかどうかのフラグ

objects_detected = False

# 物体の高さと横幅を計算

for result in results:

if result.boxes is not None and len(result.boxes) > 0:

for box, mask in zip(result.boxes, result.masks.data): # 各検出に対して処理

if box.conf > confidence_threshold: # 信頼度が閾値以上のもののみ処理

mask = mask.cpu().numpy() # テンソルからnumpy配列に変換

mask = (mask * 255).astype(np.uint8) # マスクを2値化

# バウンディングボックスを取得

x, y, w, h = cv2.boundingRect(mask)

# マスク領域の深度を取得

mask_depth = depth_map[y:y+h, x:x+w]

avg_depth = np.mean(mask_depth)

# 物体の現実世界での高さと横幅を計算

real_height = (h * avg_depth) / focal_length

real_width = (w * avg_depth) / focal_length

print(f'物体の現実世界での高さ: {real_height:.2f} メートル')

print(f'物体の現実世界での横幅: {real_width:.2f} メートル')

# クラス名の取得

class_id = int(box.cls) # クラスIDを取得

class_name = class_names[class_id] # クラス名を取得

# バウンディングボックスを画像に描画

cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)

# テキストを画像に書き込み

confidence = box.conf.item() # テンソルからスカラー値に変換

text = f'{class_name}: H={real_height:.2f} m, W={real_width:.2f} m, Confidence: {confidence:.2f}'

cv2.putText(image, text, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

objects_detected = True

# 物体が検出されなかった場合の処理

if not objects_detected:

cv2.putText(image, 'No objects detected', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

# 結果を表示

cv2.imshow("Detected Image", image)

cv2.waitKey(0)

cv2.destroyAllWindows()

この記事が気に入ったらサポートをしてみませんか？