シェルスクリプトマガジン

Pythonあれこれ(Vol.88掲載)

著者:飯尾 淳

本連載では「Pythonを昔から使っているものの、それほど使いこなしてはいない」という筆者が、いろいろな日常業務をPythonで処理することで、立派な「蛇使い」に育つことを目指します。その過程を温かく見守ってください。皆さんと共に勉強していきましょう。第18回では、米Google社が提供する機械学習フレームワーク「MediaPipe」の顔検出機能を用いて、お遊びのプログラム作成に挑戦します。

シェルスクリプトマガジン Vol.88は以下のリンク先でご購入できます。

図1 顔ランドマーク検出をするコード

import cv2
import mediapipe as mp

spec = mp.solutions.drawing_utils.DrawingSpec(thickness=1,
                                              circle_radius=1)
cap = cv2.VideoCapture(0)
with mp.solutions.face_mesh.FaceMesh(
    min_detection_confidence=0.5, 
    min_tracking_confidence=0.5) as face_mesh:
  while True:
    success, image = cap.read()
    if not success: print("Warning: No camera image"); continue
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = face_mesh.process(image)
    # Annotate the face mesh
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_face_landmarks:
      for landmarks in results.multi_face_landmarks:
        mp.solutions.drawing_utils.draw_landmarks(
            image=image, landmark_list=landmarks,
            connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
            landmark_drawing_spec=spec,
            connection_drawing_spec=spec)
    cv2.imshow('MediaPipe FaceMesh', image)
    if cv2.waitKey(5) & 0xFF == 27: break
cap.release()

図4 「顔ハメ」アプリのコード

import cv2
import numpy as np
from PIL import Image, ImageDraw
import mediapipe as mp

def scaleToHeight(img, height):
  h, w = img.shape[:2]
  width = round(w * (height / h))
  dst = cv2.resize(img, dsize=(width, height))
  return dst
def convertToRGBA(src, type):
  return Image.fromarray(cv2.cvtColor(src, type))\
              .convert('RGBA')
def trimOutside(base, over, loc):
  drw = ImageDraw.Draw(base)
  drw.rectangle([(0, 0), (loc[0]-1, over.size[1]-1)],
                fill=(0, 0, 0))
  drw.rectangle([(loc[0]+over.size[0], 0), 
                 (base.size[0]-1,over.size[1]-1)],
                fill=(0, 0, 0))
def overlayImage(src, overlay, location):
  # convert images to PIL format
  pil_src     = convertToRGBA(src,     cv2.COLOR_BGR2RGB)
  pil_overlay = convertToRGBA(overlay, cv2.COLOR_BGRA2RGBA)
  # conpose two images
  pil_tmp = Image.new('RGBA', pil_src.size, (0, 0, 0, 0))
  pil_tmp.paste(pil_overlay, location, pil_overlay)
  trimOutside(pil_tmp, pil_overlay, location)
  result_image = Image.alpha_composite(pil_src, pil_tmp)
  # convert result to OpenCV format
  return cv2.cvtColor(np.asarray(result_image),
                      cv2.COLOR_RGBA2BGRA)
def decrementTimer(timer, image, p_idx):
  h, w = image.shape[:2]
  if timer < 0:
    p_idx = (p_idx + 1) % len(panels)
    return TIMER_INITIAL_VALUE, p_idx
  elif timer == 30:
    global still
    still = image
    cv2.rectangle(image, (0, 0), (w, h), (255,255,255),
                  thickness=-1)
    return timer - 1, p_idx
  elif timer < 30:
    image = still
    return timer - 1, p_idx
  d, r = timer // 30, timer % 30
  c = 255 / 60 * r + 128
  cv2.putText(image, org=(int(w/2-100), int(h/2+100)), 
              text=str(d),
              fontFace=cv2.FONT_HERSHEY_DUPLEX,
              fontScale=10.0, color=(c, c, c),
              thickness=30,
              lineType=cv2.LINE_AA)
  return timer - 1, p_idx
# prepare the kao_hame_panels
panels = []
panels.append(cv2.imread('img1.png', cv2.IMREAD_UNCHANGED))
panels.append(cv2.imread('img2.png', cv2.IMREAD_UNCHANGED))
panels.append(cv2.imread('img3.png', cv2.IMREAD_UNCHANGED))
# capture from a camera
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
# rescale the kao_hame image
height, width = frame.shape[:2]
for i in range(len(panels)):
  panels[i] = scaleToHeight(panels[i], height)
p_idx = 0
panel = panels[p_idx]
p_height, p_width = panel.shape[:2]
# timing counter
TIMER_INITIAL_VALUE = 119
timer = TIMER_INITIAL_VALUE
with mp.solutions.face_mesh\
                 .FaceMesh(max_num_faces=1,
                           refine_landmarks=True,
                           min_detection_confidence=0.5,
                           min_tracking_confidence=0.5)\
                 as face_mesh:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      continue
    image = cv2.flip(image, 1)
    location = ((width-p_width)//2, 0)
    image = overlayImage(image, panel, location)
    image2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(image2)
    if results.multi_face_landmarks != None:
      timer, p_idx = decrementTimer(timer, image, p_idx)
      panel = panels[p_idx]
      p_height, p_width = panel.shape[:2]
    else:
      timer = TIMER_INITIAL_VALUE          # reset timer
    # triming the image
    image = image[0:p_height,
                  location[0]:location[0]+p_width]
    cv2.imshow('Virtual Face-in-Hole Cutout', image)
    if cv2.waitKey(1) & 0xFF == ord('q'): break
cap.release()
cv2.destroyAllWindows()