あいいうえ

import cv2
import numpy as np
import os
import glob
import boto3
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt
from io import BytesIO

# 定数の設定
S3_BUCKET_NAME = 'your-s3-bucket'
VIDEO_KEY = 'path/to/video.mp4'
TEACHER_DATA_PREFIX = 'path/to/teacher/data/'
OUTPUT_PREFIX = 'path/to/output/'

# S3クライアントの作成
s3 = boto3.client('s3')

# 一時ディレクトリの作成
local_temp_dir = '/tmp'
frame_features_dir = os.path.join(local_temp_dir, 'frame_features')
teacher_features_dir = os.path.join(local_temp_dir, 'teacher_features')
match_dir = os.path.join(local_temp_dir, 'matches')
os.makedirs(frame_features_dir, exist_ok=True)
os.makedirs(teacher_features_dir, exist_ok=True)
os.makedirs(match_dir, exist_ok=True)

# ORB特徴量検出器
orb = cv2.ORB_create()

# S3から動画ファイルをダウンロード
def download_video_from_s3(bucket_name, key, download_path):
   s3.download_file(bucket_name, key, download_path)

# S3から教師データをダウンロード
def download_teacher_data_from_s3(bucket_name, prefix, download_dir):
   paginator = s3.get_paginator('list_objects_v2')
   for page in paginator.paginate(Bucket=bucket_name, Prefix=prefix):
       for obj in page['Contents']:
           key = obj['Key']
           if key.endswith('.jpg'):
               local_path = os.path.join(download_dir, os.path.relpath(key, prefix))
               os.makedirs(os.path.dirname(local_path), exist_ok=True)
               s3.download_file(bucket_name, key, local_path)

# 特徴量をS3にアップロード
def upload_features_to_s3(bucket_name, prefix, local_dir):
   for root, _, files in os.walk(local_dir):
       for file in files:
           local_path = os.path.join(root, file)
           s3_path = os.path.join(prefix, os.path.relpath(local_path, local_dir))
           s3.upload_file(local_path, bucket_name, s3_path)

# ステップ1: 動画からフレームを抽出し、特徴量を保存する
def extract_frame_features(video_path, output_dir):
   cap = cv2.VideoCapture(video_path)
   frame_count = 0

   while cap.isOpened():
       ret, frame = cap.read()
       if not ret:
           break

       gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
       keypoints, descriptors = orb.detectAndCompute(gray, None)

       if descriptors is not None:
           np.save(os.path.join(output_dir, f'frame_{frame_count}.npy'), descriptors)
       frame_count += 1

   cap.release()
   return frame_count

# ステップ2: 教師データの特徴量を計算し保存する
def extract_teacher_features(teacher_data_path, output_dir):
   for folder in os.listdir(teacher_data_path):
       folder_path = os.path.join(teacher_data_path, folder)
       if not os.path.isdir(folder_path):
           continue

       all_descriptors = []
       for image_path in glob.glob(os.path.join(folder_path, '*.jpg')):
           image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
           keypoints, descriptors = orb.detectAndCompute(image, None)

           if descriptors is not None:
               all_descriptors.append(descriptors)

       if all_descriptors:
           all_descriptors = np.vstack(all_descriptors)
           np.save(os.path.join(output_dir, f'{folder}_features.npy'), all_descriptors)

# ステップ3: フレームごとの特徴量と教師データフォルダの特徴量を比較し、一致度が最も大きい画像を表示・保存する
def compare_features(frame_count, frame_features_dir, teacher_features_dir, match_dir):
   bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
   similarity_scores = []

   for i in range(frame_count):
       frame_features = np.load(os.path.join(frame_features_dir, f'frame_{i}.npy'))
       best_similarity = -1
       best_folder = None

       for folder in os.listdir(teacher_features_dir):
           teacher_features = np.load(os.path.join(teacher_features_dir, folder))

           matches = bf.match(frame_features, teacher_features)
           matches = sorted(matches, key=lambda x: x.distance)

           similarity = len(matches)

           if similarity > best_similarity:
               best_similarity = similarity
               best_folder = folder

       similarity_scores.append(best_similarity)
       with open(os.path.join(match_dir, f'frame_{i}_match.txt'), 'w') as f:
           f.write(f'{best_folder}: {best_similarity}\n')

   np.save(os.path.join(match_dir, 'similarity_scores.npy'), similarity_scores)
   return similarity_scores

# ステップ4: 一致度の推移をグラフ化し表示、元データも保存
def plot_similarity_scores(similarity_scores, output_path):
   plt.plot(similarity_scores)
   plt.xlabel('Frame Index')
   plt.ylabel('Similarity Score')
   plt.title('Similarity Scores Over Frames')
   plt.savefig(os.path.join(output_path, 'similarity_scores_plot.png'))
   plt.show()

# 主処理
def main():
   local_video_path = os.path.join(local_temp_dir, 'video.mp4')
   
   # S3から動画ファイルをダウンロード
   download_video_from_s3(S3_BUCKET_NAME, VIDEO_KEY, local_video_path)

   # S3から教師データをダウンロード
   download_teacher_data_from_s3(S3_BUCKET_NAME, TEACHER_DATA_PREFIX, teacher_features_dir)

   # フレーム特徴量の抽出と保存
   frame_count = extract_frame_features(local_video_path, frame_features_dir)

   # 教師データの特徴量の抽出と保存
   extract_teacher_features(teacher_features_dir, teacher_features_dir)

   # フレーム特徴量と教師データ特徴量の比較
   similarity_scores = compare_features(frame_count, frame_features_dir, teacher_features_dir, match_dir)

   # 一致度の推移をグラフ化
   plot_similarity_scores(similarity_scores, match_dir)

   # 特徴量と一致度の結果をS3にアップロード
   upload_features_to_s3(S3_BUCKET_NAME, os.path.join(OUTPUT_PREFIX, 'frame_features'), frame_features_dir)
   upload_features_to_s3(S3_BUCKET_NAME, os.path.join(OUTPUT_PREFIX, 'teacher_features'), teacher_features_dir)
   upload_features_to_s3(S3_BUCKET_NAME, os.path.join(OUTPUT_PREFIX, 'matches'), match_dir)

if __name__ == '__main__':
   main()

この記事が気に入ったらサポートをしてみませんか?