タッチレスマウスのソースコード

2020年8月3日 18:07

解説

コードは下に置いてあります。
コメントアウトしている部分を外せば、認識しているタグの姿勢が描画されます。認識状況の確認には使えますが、処理が重くなるので、通常使用時にはコメントアウトしておくのがおすすめです。

カーソルの移動に関しては、カメラの画角に対して絶対位置ではなく、前回の認識位置との差分で移動しています。

パラーメータとして変更できるのは

gain_x = 3/2
gain_y = 5/2

move_threshold = 1/200
minimize_gain = 1/5
minimize_frames = 5

support_times = 10

このあたりです。
gain_x, gain_yは、実際のタグの移動量(px)に対して、どれだけの倍率でカーソルを動かすかです。

move_thresholdは、カーソルの移動量を小さくする閾値です。minimize_framesの会数分、この閾値を下回れば、実際の移動量にminimize_gainを掛けた移動量になります。
例えば、1920x1080のディスプレイなら、9x5ピクセル以内での移動を5回観測すると、それ以降の移動量が1/5倍されます。
これは精緻な動きをしたいときに、手の振戦(ふるえ)をカーソル移動に反映しないようにするためです。

support_timesは、タグを認識できなかったときに、前回の移動量を引き継ぐ回数です。初期設定は10なので、10フレームは補完します。
30fpsなら、10フレームなので0.3秒程度は見失っても大丈夫です。

# import numpy as np
import cv2
from pupil_apriltags import Detector
from scipy.spatial.transform import Rotation
import math
import pyautogui
import threading
import time

# def draw_pose(overlay, corners, camera_params, tag_size, z_sign=1):
#     opoints = np.array([
#         -2, -2, 0,
#         2, -2, 0,
#         2, 2, 0,
#         2, -2, -4 * z_sign,
#     ]).reshape(-1, 1, 3) * 0.5 * tag_size
#     fx, fy, cx, cy = camera_params
#     K = np.array([fx, 0, cx, 0, fy, cy, 0, 0, 1]).reshape(3, 3)
#     dcoeffs = np.zeros(5)
#
#     rvec, tvec, _objPoints = cv2.aruco.estimatePoseSingleMarkers(corners, 0.05, K, dcoeffs)
#     cv2.aruco.drawAxis(overlay, K, dcoeffs, rvec, tvec, 0.03)

def mouse_move(x, y, drag):
  if drag == 1:
      pyautogui.drag(x, y)
  else:
      pyautogui.move(x, y)

display_width, display_height = pyautogui.size()

cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)

camera_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
camera_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

pyautogui.FAILSAFE = False

at_detector = Detector(families='tag36h11',
                     nthreads=4,
                     quad_decimate=1.0,
                     quad_sigma=0.0,
                     refine_edges=1,
                     decode_sharpening=5.0,
                     debug=0)

dx = None
dy = None
move_x = 0
move_y = 0

move_lost_count = 0
left_lost_count = 0
right_lost_count = 0

gain_x = 3/2
gain_y = 5/2

mouse_state = {"moving": 0, "left_click": 0, "right_click": 0}

move_list_x = []
move_list_y = []
move_threshold = 1/200
minimize_gain = 1/5
minimize_frames = 5

support_times = 10

while(True):
  ret, frame = cap.read()
  gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
  tags = at_detector.detect(gray, estimate_tag_pose=True, camera_params=[600, 600, camera_width/2, camera_height/2], tag_size=0.02)

  moving = 0
  left_click = -1
  right_click = -1
  wheel = 0

  if len(tags) > 0 :
      for tag in tags:
          if tag.tag_id == 0:
              # draw_pose(frame, [np.array(tag.corners)], [600, 600, camera_width/2, camera_height/2], 0.03, z_sign=1)
              moving = 1
              mouse_state["moving"] = 1
              move_lost_count = 0
              left_lost_count = 0
              right_lost_count = 0
              rot = Rotation.from_matrix(tag.pose_R)
              rad = rot.as_euler('XYZ').tolist()
              if (math.pi*1/4) < rad[0] and rad[0] < (math.pi*1/2):
                  print("scroll_up")
                  pyautogui.scroll(-10)
                  break
              elif -(math.pi*1/2) < rad[0] and rad[0] < -(math.pi*1/4):
                  print("scroll_down")
                  pyautogui.scroll(10)
                  break
              elif math.pi*1/4 < rad[2] and rad[2] < math.pi*3/4:
                   if mouse_state["left_click"] == 0:
                       mouse_state["left_click"] = 1
                       print("left_mouse_down")
                       pyautogui.mouseDown(button='left')
                       dx = None
                       dy = None
              elif -(math.pi*3/4) < rad[2] and rad[2] < -(math.pi*1/4):
                   if mouse_state["right_click"] == 0:
                       mouse_state["right_click"] = 1
                       print("right_click")
                       pyautogui.click(button='right')
                       dx = None
                       dy = None
              else:
                  if mouse_state["left_click"] == 1:
                      mouse_state["left_click"] = 0
                      print("left_mouse_up")
                      pyautogui.mouseUp(button='left')
                      dx = None
                      dy = None
                  if mouse_state["right_click"] == 1:
                      mouse_state["right_click"] = 0
                      dx = None
                      dy = None

              x = round((camera_width - tag.center[0]) * (display_width/camera_width))
              y = round(tag.center[1] * (display_height/camera_height))

              if dx is None and dy is None:
                  dx = x
                  dy = y
              move_x = dx - x
              move_y = dy - y
              dx = x
              dy = y

              if abs(move_x) < display_width * move_threshold and abs(move_y) < display_width * move_threshold:
                  move_list_x.append(move_x)
                  move_list_y.append(move_y)
              else:
                  move_list_x = []
                  move_list_y = []

              if len(move_list_x) > minimize_frames and len(move_list_y) > minimize_frames:
                  move_list_x.pop(0)
                  move_list_y.pop(0)
                  if abs(sum(move_list_x) / len(move_list_x)) < display_width * move_threshold and abs(sum(move_list_y) / len(move_list_y)) < display_height * move_threshold:
                      move_x = move_x * minimize_gain
                      move_y = move_y * minimize_gain

              thread = threading.Thread(target=mouse_move, args=(-round(move_x*gain_x), -round(move_y*gain_y), mouse_state["left_click"],))
              thread.start()

          elif tag.tag_id == 1:
              left_click = 1
              left_lost_count = 0
              if mouse_state["left_click"] == 0:
                  mouse_state["left_click"] = 1
                  print("left_mouse_down")
                  pyautogui.mouseDown(button='left')

          elif tag.tag_id == 2:
              right_click = 1
              right_lost_count = 0
              if mouse_state["right_click"] == 0:
                  mouse_state["right_click"] = 1
                  print("right_click")
                  pyautogui.click(button='right')

      if left_click == 0:
          if mouse_state["left_click"] == 1:
              left_lost_count += 1
              if left_lost_count == support_times:
                  mouse_state["left_click"] = 0
                  print("left_mouse_up")
                  pyautogui.mouseUp(button='left')

      if right_click == 0:
          if mouse_state["right_click"] == 1:
              right_lost_count += 1
              if right_lost_count == support_times:
                  mouse_state["right_click"] = 0


  else:
      dx = None
      dy = None
      if mouse_state["left_click"] == 1:
          left_lost_count += 1
          if left_lost_count == support_times:
              mouse_state["left_click"] = 0
              print("left_mouse_up")
              pyautogui.mouseUp(button='left')

      if mouse_state["right_click"] == 1:
          right_lost_count += 1
          if right_lost_count == support_times:
              mouse_state["right_click"] = 0

      if mouse_state["moving"] == 1:
          move_lost_count += 1
          if move_lost_count <= support_times:
              print("support")
              thread = threading.Thread(target=mouse_move, args=(-round(move_x*gain_x), -round(move_y*gain_y), mouse_state["left_click"],))
              thread.start()

          elif move_lost_count > support_times:
              mouse_state["moving"] = 0
              print("lost")

  cv2.imshow('frame', gray)
  if cv2.waitKey(1) & 0xFF == ord('q'):
      break

cap.release()
cv2.destroyAllWindows()

ソースコードはこちらに置いておきます。時々更新するかもしれません。

AprilTag

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

OpenCV

Copyright (C) 2000-2019, Intel Corporation, all rights reserved.
Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
Copyright (C) 2009-2016, NVIDIA Corporation, all rights reserved.
Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
Copyright (C) 2015-2016, OpenCV Foundation, all rights reserved.
Copyright (C) 2015-2016, Itseez Inc., all rights reserved.
Copyright (C) 2019-2020, Xperience AI, all rights reserved.

Third party copyrights are property of their respective owners.

この記事が参加している募集

カメラのたのしみ方

53,716件

よろしければサポートをお願いいたします。デバイス、ソフトウェアの資金や、今後の活動へのモチベーションとさせていただきます！