Pythonあれこれ（Vol.88掲載）

著者：飯尾淳

本連載では「Pythonを昔から使っているものの、それほど使いこなしてはいない」という筆者が、いろいろな日常業務をPythonで処理することで、立派な「蛇使い」に育つことを目指します。その過程を温かく見守ってください。皆さんと共に勉強していきましょう。第18回では、米Google社が提供する機械学習フレームワーク「MediaPipe」の顔検出機能を用いて、お遊びのプログラム作成に挑戦します。

シェルスクリプトマガジン Vol.88は以下のリンク先でご購入できます。

図1　顔ランドマーク検出をするコード

import cv2
import mediapipe as mp

spec = mp.solutions.drawing_utils.DrawingSpec(thickness=1,
                                              circle_radius=1)
cap = cv2.VideoCapture(0)
with mp.solutions.face_mesh.FaceMesh(
    min_detection_confidence=0.5, 
    min_tracking_confidence=0.5) as face_mesh:
  while True:
    success, image = cap.read()
    if not success: print("Warning: No camera image"); continue
    image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = face_mesh.process(image)
    # Annotate the face mesh
    image.flags.writeable = True
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    if results.multi_face_landmarks:
      for landmarks in results.multi_face_landmarks:
        mp.solutions.drawing_utils.draw_landmarks(
            image=image, landmark_list=landmarks,
            connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
            landmark_drawing_spec=spec,
            connection_drawing_spec=spec)
    cv2.imshow('MediaPipe FaceMesh', image)
    if cv2.waitKey(5) &amp; 0xFF == 27: break
cap.release()

import cv2

import mediapipe as mp

spec = mp.solutions.drawing_utils.DrawingSpec(thickness=1,

circle_radius=1)

cap = cv2.VideoCapture(0)

with mp.solutions.face_mesh.FaceMesh(

min_detection_confidence=0.5,

min_tracking_confidence=0.5) as face_mesh:

while True:

success, image = cap.read()

if not success: print("Warning: No camera image"); continue

image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)

image.flags.writeable = False

results = face_mesh.process(image)

# Annotate the face mesh

image.flags.writeable = True

image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

if results.multi_face_landmarks:

for landmarks in results.multi_face_landmarks:

mp.solutions.drawing_utils.draw_landmarks(

image=image, landmark_list=landmarks,

connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,

landmark_drawing_spec=spec,

connection_drawing_spec=spec)

cv2.imshow('MediaPipe FaceMesh', image)

if cv2.waitKey(5) & 0xFF == 27: break

cap.release()

図4　「顔ハメ」アプリのコード

import cv2
import numpy as np
from PIL import Image, ImageDraw
import mediapipe as mp

def scaleToHeight(img, height):
  h, w = img.shape&#91;:2]
  width = round(w * (height / h))
  dst = cv2.resize(img, dsize=(width, height))
  return dst
def convertToRGBA(src, type):
  return Image.fromarray(cv2.cvtColor(src, type))\
              .convert('RGBA')
def trimOutside(base, over, loc):
  drw = ImageDraw.Draw(base)
  drw.rectangle(&#91;(0, 0), (loc&#91;0]-1, over.size&#91;1]-1)],
                fill=(0, 0, 0))
  drw.rectangle(&#91;(loc&#91;0]+over.size&#91;0], 0), 
                 (base.size&#91;0]-1,over.size&#91;1]-1)],
                fill=(0, 0, 0))
def overlayImage(src, overlay, location):
  # convert images to PIL format
  pil_src     = convertToRGBA(src,     cv2.COLOR_BGR2RGB)
  pil_overlay = convertToRGBA(overlay, cv2.COLOR_BGRA2RGBA)
  # conpose two images
  pil_tmp = Image.new('RGBA', pil_src.size, (0, 0, 0, 0))
  pil_tmp.paste(pil_overlay, location, pil_overlay)
  trimOutside(pil_tmp, pil_overlay, location)
  result_image = Image.alpha_composite(pil_src, pil_tmp)
  # convert result to OpenCV format
  return cv2.cvtColor(np.asarray(result_image),
                      cv2.COLOR_RGBA2BGRA)
def decrementTimer(timer, image, p_idx):
  h, w = image.shape&#91;:2]
  if timer &lt; 0:
    p_idx = (p_idx + 1) % len(panels)
    return TIMER_INITIAL_VALUE, p_idx
  elif timer == 30:
    global still
    still = image
    cv2.rectangle(image, (0, 0), (w, h), (255,255,255),
                  thickness=-1)
    return timer - 1, p_idx
  elif timer &lt; 30:
    image = still
    return timer - 1, p_idx
  d, r = timer // 30, timer % 30
  c = 255 / 60 * r + 128
  cv2.putText(image, org=(int(w/2-100), int(h/2+100)), 
              text=str(d),
              fontFace=cv2.FONT_HERSHEY_DUPLEX,
              fontScale=10.0, color=(c, c, c),
              thickness=30,
              lineType=cv2.LINE_AA)
  return timer - 1, p_idx
# prepare the kao_hame_panels
panels = &#91;]
panels.append(cv2.imread('img1.png', cv2.IMREAD_UNCHANGED))
panels.append(cv2.imread('img2.png', cv2.IMREAD_UNCHANGED))
panels.append(cv2.imread('img3.png', cv2.IMREAD_UNCHANGED))
# capture from a camera
cap = cv2.VideoCapture(0)
ret, frame = cap.read()
# rescale the kao_hame image
height, width = frame.shape&#91;:2]
for i in range(len(panels)):
  panels&#91;i] = scaleToHeight(panels&#91;i], height)
p_idx = 0
panel = panels&#91;p_idx]
p_height, p_width = panel.shape&#91;:2]
# timing counter
TIMER_INITIAL_VALUE = 119
timer = TIMER_INITIAL_VALUE
with mp.solutions.face_mesh\
                 .FaceMesh(max_num_faces=1,
                           refine_landmarks=True,
                           min_detection_confidence=0.5,
                           min_tracking_confidence=0.5)\
                 as face_mesh:
  while cap.isOpened():
    success, image = cap.read()
    if not success:
      print("Ignoring empty camera frame.")
      continue
    image = cv2.flip(image, 1)
    location = ((width-p_width)//2, 0)
    image = overlayImage(image, panel, location)
    image2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(image2)
    if results.multi_face_landmarks != None:
      timer, p_idx = decrementTimer(timer, image, p_idx)
      panel = panels&#91;p_idx]
      p_height, p_width = panel.shape&#91;:2]
    else:
      timer = TIMER_INITIAL_VALUE          # reset timer
    # triming the image
    image = image&#91;0:p_height,
                  location&#91;0]:location&#91;0]+p_width]
    cv2.imshow('Virtual Face-in-Hole Cutout', image)
    if cv2.waitKey(1) &amp; 0xFF == ord('q'): break
cap.release()
cv2.destroyAllWindows()

100

101

102

import cv2

import numpy as np

from PIL import Image, ImageDraw

import mediapipe as mp

def scaleToHeight(img, height):

h, w = img.shape[:2]

width = round(w * (height / h))

dst = cv2.resize(img, dsize=(width, height))

return dst

def convertToRGBA(src, type):

return Image.fromarray(cv2.cvtColor(src, type))\

.convert('RGBA')

def trimOutside(base, over, loc):

drw = ImageDraw.Draw(base)

drw.rectangle([(0, 0), (loc[0]-1, over.size[1]-1)],

fill=(0, 0, 0))

drw.rectangle([(loc[0]+over.size[0], 0),

(base.size[0]-1,over.size[1]-1)],

fill=(0, 0, 0))

def overlayImage(src, overlay, location):

# convert images to PIL format

pil_src = convertToRGBA(src, cv2.COLOR_BGR2RGB)

pil_overlay = convertToRGBA(overlay, cv2.COLOR_BGRA2RGBA)

# conpose two images

pil_tmp = Image.new('RGBA', pil_src.size, (0, 0, 0, 0))

pil_tmp.paste(pil_overlay, location, pil_overlay)

trimOutside(pil_tmp, pil_overlay, location)

result_image = Image.alpha_composite(pil_src, pil_tmp)

# convert result to OpenCV format

return cv2.cvtColor(np.asarray(result_image),

cv2.COLOR_RGBA2BGRA)

def decrementTimer(timer, image, p_idx):

h, w = image.shape[:2]

if timer < 0:

p_idx = (p_idx + 1) % len(panels)

return TIMER_INITIAL_VALUE, p_idx

elif timer == 30:

global still

still = image

cv2.rectangle(image, (0, 0), (w, h), (255,255,255),

thickness=-1)

return timer - 1, p_idx

elif timer < 30:

image = still

return timer - 1, p_idx

d, r = timer // 30, timer % 30

c = 255 / 60 * r + 128

cv2.putText(image, org=(int(w/2-100), int(h/2+100)),

text=str(d),

fontFace=cv2.FONT_HERSHEY_DUPLEX,

fontScale=10.0, color=(c, c, c),

thickness=30,

lineType=cv2.LINE_AA)

return timer - 1, p_idx

# prepare the kao_hame_panels

panels = []

panels.append(cv2.imread('img1.png', cv2.IMREAD_UNCHANGED))

panels.append(cv2.imread('img2.png', cv2.IMREAD_UNCHANGED))

panels.append(cv2.imread('img3.png', cv2.IMREAD_UNCHANGED))

# capture from a camera

cap = cv2.VideoCapture(0)

ret, frame = cap.read()

# rescale the kao_hame image

height, width = frame.shape[:2]

for i in range(len(panels)):

panels[i] = scaleToHeight(panels[i], height)

p_idx = 0

panel = panels[p_idx]

p_height, p_width = panel.shape[:2]

# timing counter

TIMER_INITIAL_VALUE = 119

timer = TIMER_INITIAL_VALUE

with mp.solutions.face_mesh\

.FaceMesh(max_num_faces=1,

refine_landmarks=True,

min_detection_confidence=0.5,

min_tracking_confidence=0.5)\

as face_mesh:

while cap.isOpened():

success, image = cap.read()

if not success:

print("Ignoring empty camera frame.")

continue

image = cv2.flip(image, 1)

location = ((width-p_width)//2, 0)

image = overlayImage(image, panel, location)

image2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

results = face_mesh.process(image2)

if results.multi_face_landmarks != None:

timer, p_idx = decrementTimer(timer, image, p_idx)

panel = panels[p_idx]

p_height, p_width = panel.shape[:2]

else:

timer = TIMER_INITIAL_VALUE # reset timer

# triming the image

image = image[0:p_height,

location[0]:location[0]+p_width]

cv2.imshow('Virtual Face-in-Hole Cutout', image)

if cv2.waitKey(1) & 0xFF == ord('q'): break

cap.release()

cv2.destroyAllWindows()