著者:飯尾 淳
本連載では「Pythonを昔から使っているものの、それほど使いこなしてはいない」という筆者が、いろいろな日常業務をPythonで処理することで、立派な「蛇使い」に育つことを目指します。その過程を温かく見守ってください。皆さんと共に勉強していきましょう。第18回では、米Google社が提供する機械学習フレームワーク「MediaPipe」の顔検出機能を用いて、お遊びのプログラム作成に挑戦します。
シェルスクリプトマガジン Vol.88は以下のリンク先でご購入できます。
図1 顔ランドマーク検出をするコード
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import cv2 import mediapipe as mp spec = mp.solutions.drawing_utils.DrawingSpec(thickness=1, circle_radius=1) cap = cv2.VideoCapture(0) with mp.solutions.face_mesh.FaceMesh( min_detection_confidence=0.5, min_tracking_confidence=0.5) as face_mesh: while True: success, image = cap.read() if not success: print("Warning: No camera image"); continue image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB) image.flags.writeable = False results = face_mesh.process(image) # Annotate the face mesh image.flags.writeable = True image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) if results.multi_face_landmarks: for landmarks in results.multi_face_landmarks: mp.solutions.drawing_utils.draw_landmarks( image=image, landmark_list=landmarks, connections=mp.solutions.face_mesh.FACEMESH_TESSELATION, landmark_drawing_spec=spec, connection_drawing_spec=spec) cv2.imshow('MediaPipe FaceMesh', image) if cv2.waitKey(5) & 0xFF == 27: break cap.release() |
図4 「顔ハメ」アプリのコード
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import cv2 import numpy as np from PIL import Image, ImageDraw import mediapipe as mp def scaleToHeight(img, height): h, w = img.shape[:2] width = round(w * (height / h)) dst = cv2.resize(img, dsize=(width, height)) return dst def convertToRGBA(src, type): return Image.fromarray(cv2.cvtColor(src, type))\ .convert('RGBA') def trimOutside(base, over, loc): drw = ImageDraw.Draw(base) drw.rectangle([(0, 0), (loc[0]-1, over.size[1]-1)], fill=(0, 0, 0)) drw.rectangle([(loc[0]+over.size[0], 0), (base.size[0]-1,over.size[1]-1)], fill=(0, 0, 0)) def overlayImage(src, overlay, location): # convert images to PIL format pil_src = convertToRGBA(src, cv2.COLOR_BGR2RGB) pil_overlay = convertToRGBA(overlay, cv2.COLOR_BGRA2RGBA) # conpose two images pil_tmp = Image.new('RGBA', pil_src.size, (0, 0, 0, 0)) pil_tmp.paste(pil_overlay, location, pil_overlay) trimOutside(pil_tmp, pil_overlay, location) result_image = Image.alpha_composite(pil_src, pil_tmp) # convert result to OpenCV format return cv2.cvtColor(np.asarray(result_image), cv2.COLOR_RGBA2BGRA) def decrementTimer(timer, image, p_idx): h, w = image.shape[:2] if timer < 0: p_idx = (p_idx + 1) % len(panels) return TIMER_INITIAL_VALUE, p_idx elif timer == 30: global still still = image cv2.rectangle(image, (0, 0), (w, h), (255,255,255), thickness=-1) return timer - 1, p_idx elif timer < 30: image = still return timer - 1, p_idx d, r = timer // 30, timer % 30 c = 255 / 60 * r + 128 cv2.putText(image, org=(int(w/2-100), int(h/2+100)), text=str(d), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=10.0, color=(c, c, c), thickness=30, lineType=cv2.LINE_AA) return timer - 1, p_idx # prepare the kao_hame_panels panels = [] panels.append(cv2.imread('img1.png', cv2.IMREAD_UNCHANGED)) panels.append(cv2.imread('img2.png', cv2.IMREAD_UNCHANGED)) panels.append(cv2.imread('img3.png', cv2.IMREAD_UNCHANGED)) # capture from a camera cap = cv2.VideoCapture(0) ret, frame = cap.read() # rescale the kao_hame image height, width = frame.shape[:2] for i in range(len(panels)): panels[i] = scaleToHeight(panels[i], height) p_idx = 0 panel = panels[p_idx] p_height, p_width = panel.shape[:2] # timing counter TIMER_INITIAL_VALUE = 119 timer = TIMER_INITIAL_VALUE with mp.solutions.face_mesh\ .FaceMesh(max_num_faces=1, refine_landmarks=True, min_detection_confidence=0.5, min_tracking_confidence=0.5)\ as face_mesh: while cap.isOpened(): success, image = cap.read() if not success: print("Ignoring empty camera frame.") continue image = cv2.flip(image, 1) location = ((width-p_width)//2, 0) image = overlayImage(image, panel, location) image2 = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) results = face_mesh.process(image2) if results.multi_face_landmarks != None: timer, p_idx = decrementTimer(timer, image, p_idx) panel = panels[p_idx] p_height, p_width = panel.shape[:2] else: timer = TIMER_INITIAL_VALUE # reset timer # triming the image image = image[0:p_height, location[0]:location[0]+p_width] cv2.imshow('Virtual Face-in-Hole Cutout', image) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() |