Python：映像から特定の人物の顔の検出をしてみた

今日はdlibというC++で作成された機械学習のライブラリの中に含まれるface_recognitionというライブラリを使用して、複数の人が映った映像から特定の人物が表示されているときに、その人の顔を検出するPythonプログラムを作成してみました。

dlibについて以下のサイトを参照してください。
http://dlib.net/

face_recognitionについては以下のサイトを参照してください。
https://face-recognition.readthedocs.io/en/latest/face_recognition.html

準備

事前にVisual Studio Communityをインストールしておく必要があります。以下のサイトからダウンロードして、インストールしてください。

https://visualstudio.microsoft.com/ja/vs/community/

※インストールの際、「C++によるデスクトップ開発」をチェックすることを忘れないでください。

Python側で必要となるモジュールはOpenCVとdlib、cmake、face_recognitionです。以下のコマンドでインストールしてください。

pip install opencv-python
pip install opencv-contrib-python
pip install dlib
pip install cmake
pip install face_recognition

pip install opencv-python

pip install opencv-contrib-python

pip install dlib

pip install cmake

pip install face_recognition

実行した様子

プログラムのソースコードを解説する前に、実行した様子を示します。

プログラムを起動すると以下のようなウィンドウが表示されます。「映像読み込み」ボタンと「顔写真読み込み」ボタンそれぞれを押して、複数の人物が映った動画ファイルと特定の人物が映った画像ファイルを選択します。ここではTEDの映像と、イーロン・マスクの顔写真を読み込んでいます。

今回使った映像

今回使った画像

「検出開始」というボタンを押すと処理が始まります。イーロン・マスクが映っている場面では、このように顔が認識されています。

他の人の場合は、顔の検出はされません。

それではプログラム中身を紹介していきます。

プログラムの内容

ソースコードはこちらです。

import numpy as np
import cv2
import os
from tkinter import *
import tkinter as tk
import tkinter.filedialog as fd
import tkinter.ttk as ttk
from tkinter import messagebox
import MyUtility as my
import face_recognition
import sys

###############################################################################

class FaceMovieDetect():
    
    def run(self):
        self.root.mainloop()


    def __init__(self):
        self.b_mov = False
        self.b_pct = False
        self.dirname_mov  = ""
        self.filename_mov = ""
        self.basename_mov = ""

        self.root = tk.Tk()
        self.root.geometry("480x150")

        self.entry_mov_filepath = tk.Entry(self.root, text="", width=50, state="readonly")
        self.entry_mov_filepath.grid(row=1, column=0, padx=5, pady=5)
        self.button_movie = tk.Button(self.root, text="映像読み込み", width=20, command=self.open_moviefile)
        self.button_movie.grid(row=1, column=1, padx=5, pady=5)

        self.entry_pct_filepath = tk.Entry(self.root, text="", width=50, state="readonly")
        self.entry_pct_filepath.grid(row=2, column=0, padx=5, pady=5)
        self.button_pct = tk.Button(self.root, text="顔写真読み込み", width=20, command=self.open_pictfile)
        self.button_pct.grid(row=2, column=1, padx=5, pady=5)

        self.sep = ttk.Separator(self.root, orient="horizontal", style="blue.TSeparator")
        self.sep.grid(row=3, column=0, columnspan=2, sticky="ew")

        self.button_mov = tk.Button(self.root, text="検出開始", width=20, command=self.detect_face_in_movie, state=tk.DISABLED)
        self.button_mov.grid(row=4, column=1, padx=5, pady=5)

        self.button_exit = tk.Button(self.root, text="終了", width=20, command=self.exit_program)
        self.button_exit.grid(row=7, column=1, padx=5, pady=5)


    ###############################################################################

    def open_moviefile(self):

        filepath = my.ask_input_filename("動画ファイルを選んでください", types=[('', '*.mp4')])
        if filepath:
            self.entry_mov_filepath.configure(state='normal')
            self.entry_mov_filepath.delete(0, tk.END)
            self.entry_mov_filepath.insert('end', filepath)
            self.entry_mov_filepath.configure(state='readonly')
            self.b_mov = True
            if self.b_mov and self.b_pct:
                self.button_mov.configure(state=tk.NORMAL)

    ###############################################################################

    def open_pictfile(self):
        
        filepath = my.ask_input_filename("顔写真ファイルを選んでください", types=[('', '*.jpg | *.png')])
        if filepath:
            self.entry_pct_filepath.configure(state='normal')
            self.entry_pct_filepath.delete(0, tk.END)
            self.entry_pct_filepath.insert('end', filepath)
            self.entry_pct_filepath.configure(state='readonly')
            self.b_pct = True
            if self.b_mov and self.b_pct:
                self.button_mov.configure(state=tk.NORMAL)

    ###############################################################################

    def exit_program(self):
        self.root.quit()
        exit()

    ###############################################################################

    def detect_face_in_movie(self):

        try:
            # パスを取得する
            self.filepath_mov = self.entry_mov_filepath.get()
            self.filepath_pct = self.entry_pct_filepath.get()
            self.basename_pct = os.path.basename(self.filepath_pct)
            self.basename_pct = os.path.splitext(self.basename_pct)[0] # 拡張子を除いたファイル名を取得する
            name = self.basename_pct.upper()

            # 顔をエンコードする
            pict_np = np.fromfile(self.filepath_pct, np.uint8) # OpenCVは日本語を含むファイルを開けないのでNumpyを使って開く
            pict_img = cv2.imdecode(pict_np, cv2.IMREAD_COLOR)
            pict_img = cv2.cvtColor(pict_img, cv2.COLOR_BGR2RGB)
            pict_encode = face_recognition.face_encodings(pict_img)[0]

            # 動画の切り出し
            cap = cv2.VideoCapture(self.filepath_mov)
            while True:
                ret, img = cap.read()
                if ret is False:
                    break

                imgS = cv2.resize(img, (0, 0), None, 0.25, 0.25)  # 処理を軽くするため、サイズを4分の1にする
                imgS = cv2.cvtColor(imgS, cv2.COLOR_BGR2RGB)
                
                # 現在のフレームから顔をすべて検出する
                facesCurFrame = face_recognition.face_locations(imgS)
                encodesCurFrame = face_recognition.face_encodings(imgS, facesCurFrame)

                for encodeFace, faceLoc in zip(encodesCurFrame, facesCurFrame):
                    matches = face_recognition.compare_faces([pict_encode], encodeFace)
                    facedist = face_recognition.face_distance([pict_encode], encodeFace)
                    # 距離が最小のものがマッチしていたら顔の周りに矩形を描画する
                    matchIndex = np.argmin(facedist)
                    if matches[matchIndex]:
                        y1, x2, y2, x1 = faceLoc
                        y1, x2, y2, x1 = y1*4, x2*4, y2*4, x1*4  # 1/4にしていたのでサイズを元に戻す
                        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
                        cv2.putText(img, name, (x1+6, y2-6), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)
                        
                cv2.imshow("img", img)

                # qを押したら中断する
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            messagebox.showinfo("終了", "処理が終わりました。")

        except:
            print(sys.exc_info())
            messagebox.showinfo("エラー", "エラーが発生しました。")



###############################################################################
#
#   以下、メイン処理
#
###############################################################################

if __name__ == "__main__":
    app = FaceMovieDetect()
    app.run()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

import numpy as np

import cv2

import os

from tkinter import *

import tkinter as tk

import tkinter.filedialog as fd

import tkinter.ttk as ttk

from tkinter import messagebox

import MyUtility as my

import face_recognition

import sys

###############################################################################

class FaceMovieDetect():

def run(self):

self.root.mainloop()

def __init__(self):

self.b_mov = False

self.b_pct = False

self.dirname_mov = ""

self.filename_mov = ""

self.basename_mov = ""

self.root = tk.Tk()

self.root.geometry("480x150")

self.entry_mov_filepath = tk.Entry(self.root, text="", width=50, state="readonly")

self.entry_mov_filepath.grid(row=1, column=0, padx=5, pady=5)

self.button_movie = tk.Button(self.root, text="映像読み込み", width=20, command=self.open_moviefile)

self.button_movie.grid(row=1, column=1, padx=5, pady=5)

self.entry_pct_filepath = tk.Entry(self.root, text="", width=50, state="readonly")

self.entry_pct_filepath.grid(row=2, column=0, padx=5, pady=5)

self.button_pct = tk.Button(self.root, text="顔写真読み込み", width=20, command=self.open_pictfile)

self.button_pct.grid(row=2, column=1, padx=5, pady=5)

self.sep = ttk.Separator(self.root, orient="horizontal", style="blue.TSeparator")

self.sep.grid(row=3, column=0, columnspan=2, sticky="ew")

self.button_mov = tk.Button(self.root, text="検出開始", width=20, command=self.detect_face_in_movie, state=tk.DISABLED)

self.button_mov.grid(row=4, column=1, padx=5, pady=5)

self.button_exit = tk.Button(self.root, text="終了", width=20, command=self.exit_program)

self.button_exit.grid(row=7, column=1, padx=5, pady=5)

###############################################################################

def open_moviefile(self):

filepath = my.ask_input_filename("動画ファイルを選んでください", types=[('', '*.mp4')])

if filepath:

self.entry_mov_filepath.configure(state='normal')

self.entry_mov_filepath.delete(0, tk.END)

self.entry_mov_filepath.insert('end', filepath)

self.entry_mov_filepath.configure(state='readonly')

self.b_mov = True

if self.b_mov and self.b_pct:

self.button_mov.configure(state=tk.NORMAL)

###############################################################################

def open_pictfile(self):

filepath = my.ask_input_filename("顔写真ファイルを選んでください", types=[('', '*.jpg | *.png')])

if filepath:

self.entry_pct_filepath.configure(state='normal')

self.entry_pct_filepath.delete(0, tk.END)

self.entry_pct_filepath.insert('end', filepath)

self.entry_pct_filepath.configure(state='readonly')

self.b_pct = True

if self.b_mov and self.b_pct:

self.button_mov.configure(state=tk.NORMAL)

###############################################################################

def exit_program(self):

self.root.quit()

exit()

###############################################################################

def detect_face_in_movie(self):

try:

# パスを取得する

self.filepath_mov = self.entry_mov_filepath.get()

self.filepath_pct = self.entry_pct_filepath.get()

self.basename_pct = os.path.basename(self.filepath_pct)

self.basename_pct = os.path.splitext(self.basename_pct)[0] # 拡張子を除いたファイル名を取得する

name = self.basename_pct.upper()

# 顔をエンコードする

pict_np = np.fromfile(self.filepath_pct, np.uint8) # OpenCVは日本語を含むファイルを開けないのでNumpyを使って開く

pict_img = cv2.imdecode(pict_np, cv2.IMREAD_COLOR)

pict_img = cv2.cvtColor(pict_img, cv2.COLOR_BGR2RGB)

pict_encode = face_recognition.face_encodings(pict_img)[0]

# 動画の切り出し

cap = cv2.VideoCapture(self.filepath_mov)

while True:

ret, img = cap.read()

if ret is False:

break

imgS = cv2.resize(img, (0, 0), None, 0.25, 0.25) # 処理を軽くするため、サイズを4分の1にする

imgS = cv2.cvtColor(imgS, cv2.COLOR_BGR2RGB)

# 現在のフレームから顔をすべて検出する

facesCurFrame = face_recognition.face_locations(imgS)

encodesCurFrame = face_recognition.face_encodings(imgS, facesCurFrame)

for encodeFace, faceLoc in zip(encodesCurFrame, facesCurFrame):

matches = face_recognition.compare_faces([pict_encode], encodeFace)

facedist = face_recognition.face_distance([pict_encode], encodeFace)

# 距離が最小のものがマッチしていたら顔の周りに矩形を描画する

matchIndex = np.argmin(facedist)

if matches[matchIndex]:

y1, x2, y2, x1 = faceLoc

y1, x2, y2, x1 = y1*4, x2*4, y2*4, x1*4 # 1/4にしていたのでサイズを元に戻す

cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)

cv2.putText(img, name, (x1+6, y2-6), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

cv2.imshow("img", img)

# qを押したら中断する

if cv2.waitKey(1) & 0xFF == ord('q'):

break

messagebox.showinfo("終了", "処理が終わりました。")

except:

print(sys.exc_info())

messagebox.showinfo("エラー", "エラーが発生しました。")

###############################################################################

# 以下、メイン処理

###############################################################################

if __name__ == "__main__":

app = FaceMovieDetect()

app.run()

あとファイルやフォルダを選択するダイアログ関連の機能をまとめた以下のスクリプト（MyUtility.py）を読み込んでいます。

import tkinter as tk
import tkinter.filedialog as fd

def ask_input_filename(msg = None, types = [('', '*.*')]):
    """
            入力用ファイル名の設定
    """
    rt = tk.Tk()
    rt.withdraw()
    filename = fd.askopenfilename(title = msg, filetypes = types)
    rt.destroy()
    return filename


def ask_input_filenames(msg = None, types = [('', '*.*')]):
    """
            入力用ファイル名の設定
    """
    rt = tk.Tk()
    rt.withdraw()
    filenames = fd.askopenfilenames(title = msg, filetypes = types)
    rt.destroy()
    return filenames


def ask_directory(msg = None, initial_dir = "./"):

    rt = tk.Tk()
    rt.withdraw()
    dir = fd.askdirectory(initialdir = initial_dir)
    rt.destroy()
    return dir


def ask_output_filename(msg = None, types = [('', '*.*')], deftype = '.txt'):
    """
            出力用ファイル名の設定
    """
    rt = tk.Tk()
    rt.withdraw()
    filename = fd.asksaveasfilename(title = msg, filetypes = types, defaultextension = deftype)
    rt.destroy()
    return filename

import tkinter as tk

import tkinter.filedialog as fd

def ask_input_filename(msg = None, types = [('', '*.*')]):

"""

入力用ファイル名の設定

"""

rt = tk.Tk()

rt.withdraw()

filename = fd.askopenfilename(title = msg, filetypes = types)

rt.destroy()

return filename

def ask_input_filenames(msg = None, types = [('', '*.*')]):

"""

入力用ファイル名の設定

"""

rt = tk.Tk()

rt.withdraw()

filenames = fd.askopenfilenames(title = msg, filetypes = types)

rt.destroy()

return filenames

def ask_directory(msg = None, initial_dir = "./"):

rt = tk.Tk()

rt.withdraw()

dir = fd.askdirectory(initialdir = initial_dir)

rt.destroy()

return dir

def ask_output_filename(msg = None, types = [('', '*.*')], deftype = '.txt'):

"""

出力用ファイル名の設定

"""

rt = tk.Tk()

rt.withdraw()

filename = fd.asksaveasfilename(title = msg, filetypes = types, defaultextension = deftype)

rt.destroy()

return filename

プログラムの解説

FaceMovieDetectクラスについて説明します。

init関数	初期化関数。GUIの設定を行っています。
open_moviefile関数	動画ファイルを選択するダイアログを表示する。
open_pictfile関数	顔写真ファイルを選択するダイアログを表示する。
detect_face_in_movie関数	映像から特定の人物を検出する（これがメインの処理）

メインの処理をしているdetect_face_in_movie関数について見ていきましょう。

# パスを取得する
self.filepath_mov = self.entry_mov_filepath.get()
self.filepath_pct = self.entry_pct_filepath.get()
self.basename_pct = os.path.basename(self.filepath_pct)
self.basename_pct = os.path.splitext(self.basename_pct)[0] # 拡張子を除いたファイル名を取得する
name = self.basename_pct.upper()

# パスを取得する

self.filepath_mov = self.entry_mov_filepath.get()

self.filepath_pct = self.entry_pct_filepath.get()

self.basename_pct = os.path.basename(self.filepath_pct)

self.basename_pct = os.path.splitext(self.basename_pct)[0] # 拡張子を除いたファイル名を取得する

name = self.basename_pct.upper()

まずは動画ファイルと顔写真ファイルのパスを取得しています。また顔写真のファイル名を、その人の顔が検出されたときに映像に表示するので、「Elon Must.png」のように顔写真のファイルにはその人の名前を付けておいてください。

# 顔をエンコードする
pict_np = np.fromfile(self.filepath_pct, np.uint8) # OpenCVは日本語を含むファイルを開けないのでNumpyを使って開く
pict_img = cv2.imdecode(pict_np, cv2.IMREAD_COLOR)
pict_img = cv2.cvtColor(pict_img, cv2.COLOR_BGR2RGB)
pict_encode = face_recognition.face_encodings(pict_img)[0]

100

101

# 顔をエンコードする

pict_np = np.fromfile(self.filepath_pct, np.uint8) # OpenCVは日本語を含むファイルを開けないのでNumpyを使って開く

pict_img = cv2.imdecode(pict_np, cv2.IMREAD_COLOR)

pict_img = cv2.cvtColor(pict_img, cv2.COLOR_BGR2RGB)

pict_encode = face_recognition.face_encodings(pict_img)[0]

Numpyで顔写真のデータを開き、OpenCVでその画像を読み込んだ後、face_recognitionのface_encodings関数で顔の特徴点を抽出しています。
公式サイトによるface_encodings関数の説明は以下の通りで、引数として顔が映った画像（face_image）を取ります。他にも顔が映っている場所の情報（known_face_locations）などを指定できるようです。戻り値は顔の特徴抽出点のリストです（１つの顔につき128個のデータで、複数の顔を検出できるため「128個のデータ×顔の数分」のリストになっている）。

# 動画の切り出し
cap = cv2.VideoCapture(self.filepath_mov)
while True:
    ret, img = cap.read()
    if ret is False:
        break

    imgS = cv2.resize(img, (0, 0), None, 0.25, 0.25)  # 処理を軽くするため、サイズを4分の1にする
    imgS = cv2.cvtColor(imgS, cv2.COLOR_BGR2RGB)

103

104

105

106

107

108

109

110

111

# 動画の切り出し

cap = cv2.VideoCapture(self.filepath_mov)

while True:

ret, img = cap.read()

if ret is False:

break

imgS = cv2.resize(img, (0, 0), None, 0.25, 0.25) # 処理を軽くするため、サイズを4分の1にする

imgS = cv2.cvtColor(imgS, cv2.COLOR_BGR2RGB)

映像から1フレーム読み込んだ後、処理を軽くするために画像のサイズを4分の1にしています。また色をBGRからRGBに変換しています。

# 現在のフレームから顔をすべて検出する
facesCurFrame = face_recognition.face_locations(imgS)
encodesCurFrame = face_recognition.face_encodings(imgS, facesCurFrame)

113

114

115

# 現在のフレームから顔をすべて検出する

facesCurFrame = face_recognition.face_locations(imgS)

encodesCurFrame = face_recognition.face_encodings(imgS, facesCurFrame)

フレームの情報からface_recognitionのface_locations関数で顔の位置を特定しています。その後、face_recognitionのface_encodings関数で検出された顔の特徴点をエンコードしています。

face_locations関数の仕様は以下の通りです。

for encodeFace, faceLoc in zip(encodesCurFrame, facesCurFrame):
    matches = face_recognition.compare_faces([pict_encode], encodeFace)
    facedist = face_recognition.face_distance([pict_encode], encodeFace)
    # 距離が最小のものがマッチしていたら顔の周りに矩形を描画する
    matchIndex = np.argmin(facedist)
    if matches[matchIndex]:
        y1, x2, y2, x1 = faceLoc
        y1, x2, y2, x1 = y1*4, x2*4, y2*4, x1*4  # 1/4にしていたのでサイズを元に戻す
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(img, name, (x1+6, y2-6), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

117

118

119

120

121

122

123

124

125

126

for encodeFace, faceLoc in zip(encodesCurFrame, facesCurFrame):

matches = face_recognition.compare_faces([pict_encode], encodeFace)

facedist = face_recognition.face_distance([pict_encode], encodeFace)

# 距離が最小のものがマッチしていたら顔の周りに矩形を描画する

matchIndex = np.argmin(facedist)

if matches[matchIndex]:

y1, x2, y2, x1 = faceLoc

y1, x2, y2, x1 = y1*4, x2*4, y2*4, x1*4 # 1/4にしていたのでサイズを元に戻す

cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)

cv2.putText(img, name, (x1+6, y2-6), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 255, 255), 2)

検出された顔（動画フレーム）をforループで１つずつ取り出して、face_recognitionのcompare_faces関数で比較対象の顔（静止画）と比べています。
また検出された顔（動画フレーム）と元の画像（静止画）がどのくらい近いか（似ていいるか）を表す数値（距離）をface_recognitionのface_distance関数で計算しています。

その後、face_distance関数の戻り値facedistが最小値となる要素をnp.argmin関数で見つけ、それを変数matchIndexに入れています。ただ、今回のプログラムでは１つの顔しか渡していないので、要素は１つしかなく、matchIndexの値は必ず0になります。このように一見不要な処理をしているのは今後、複数の人を同時に検出すること考えているためです。

compare_faces関数の戻り値のうち、face_distanceが最小となる結果がマッチ（True）していた場合、検出された顔の周囲に緑色の矩形を描画し、人物の名前（静止画のファイル名）を表示する処理をしています。

compare_faces関数とface_distance関数の仕様は以下の通りです。compare_faces関数による顔の判定は、距離（face_distance）の値が0.6以下だと同一人物の顔だと判断されるようです。これはtoleranceの値を変えることで変更でき、より小さい数字にすれば元の画像（静止画）とかなり似ていなければ同一人物だと判定されなくなります。

さいごに

他にも日本の芸能人の映像で試してみましたが、美人には顔に共通要素があるようで、誤認識されるケースも多くりました。例えば、長澤まさみを新垣結衣と誤認識する（またはその逆）ことがありました。今後は同時に複数の人物を区別するプログラムを作ってみようと思います。

Python：映像から特定の人物の顔の検出をしてみた（複数人）

投稿タグ: プログラミング

MENU

準備

実行した様子

プログラムの内容

プログラムの解説

さいごに

関連記事

コメントを残すコメントをキャンセル

カテゴリー

アーカイブ

MENU

Python：映像から特定の人物の顔の検出をしてみた

準備

実行した様子

プログラムの内容

プログラムの解説

さいごに

関連記事

コメントを残す コメントをキャンセル

カテゴリー

アーカイブ

タグ

コメントを残すコメントをキャンセル