無所遁形——快把你的口罩戴上(口罩識別)
人臉識別,是基于人的臉部特征信息進行身份識別的一種生物識別技術。用攝像機或攝像頭采集含有人臉的圖像或視頻流,并自動在圖像中檢測和跟蹤人臉,進而對檢測到的人臉進行臉部識別的一系列相關技術,通常也叫做人像識別、面部識別。
疫情當下,學校封校,教室上網課,食堂就餐等等環境,口罩佩戴依舊十分有意義,單靠人員監測效率太過低下,筆者就在考慮能否讓計算機完成相關工作,就查閱了相關資料,在開源訓練集的基礎上,設計了本款口罩識別。
圖片:
視頻:
口罩識別案例
配置環境:
windows10 系統
pyCharm
Anaconda環境下的python3.7
tenforflow1.15.0
cuda10.0
整體流程:
相信小伙伴們已經迫不及待了,上代碼走起!
from tkinter import *
from tkinter.filedialog import askdirectory
from tkinter.messagebox import showinfo
import cv2
import numpy as np
from PIL import Image, ImageTk
from tkinter import ttk
import pygame
import time
import tensorflow_infer as flow
pygame.mixer.init(frequency=16000, size=-16, channels=2, buffer=4096)
detector = cv2.CascadeClassifier('haarcascades\haarcascade_frontalface_default.xml')
mask_detector = cv2.CascadeClassifier('xml\cascade.xml')
class GUI:
def __init__(self):
self.camera = None # 攝像頭
self.root = Tk()
self.root.title('maskdetection')
self.root.geometry('%dx%d' % (800, 600))
self.createFirstPage()
mainloop()
def createFirstPage(self):
self.page1 = Frame(self.root)
self.page1.pack()
Label(self.page1, text='口罩追蹤系統', font=('粗體', 20)).pack()
image = Image.open("14.jpg") # 隨便使用一張圖片做背景界面 不要太大
photo = ImageTk.PhotoImage(image = image)
self.data1 = Label(self.page1, width=780,image = photo)
self.data1.image = photo
self.data1.pack(padx=5, pady=5)
self.button11 = Button(self.page1, width=18, height=2, text="深度學習算法", bg='red', font=("宋", 12),
relief='raise',command = self.createSecondPage1)
self.button11.pack(side=LEFT, padx=25, pady = 10)
self.button13.pack(side=LEFT, padx=25, pady = 10)
self.button14 = Button(self.page1, width=18, height=2, text="退出系統", bg='gray', font=("宋", 12),
relief='raise',command = self.quitMain)
self.button14.pack(side=LEFT, padx=25, pady = 10)
def createSecondPage1(self):
self.camera = cv2.VideoCapture(0)
self.page1.pack_forget()
self.page2 = Frame(self.root)
self.page2.pack()
Label(self.page2, text='實時追蹤口罩佩戴情況', font=('粗體', 20)).pack()
self.data2 = Label(self.page2)
self.data2.pack(padx=5, pady=5)
self.button21 = Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),
relief='raise',command = self.backFirst)
self.button21.pack(padx=25,pady = 10)
self.video_loop1(self.data2)
def video_loop1(self, panela):
def slogan_short():
timeplay = 1.5
global playflag_short
playflag_short = 1
while playflag_short:
track = pygame.mixer.music.load(file_slogan_short)
print("------------請您戴好口罩")
pygame.mixer.music.play()
time.sleep(timeplay)
playflag_short = 0
time.sleep(0)
success, img = self.camera.read() # 從攝像頭讀取照片
if success:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
num,c,img = flow.inference(img, conf_thresh=0.5, iou_thresh=0.4, target_shape=(260, 260), draw_result=True,
show_result=False)
# 語音提示
# if(isinstance(num/5,int)& (c=='NoMask')):
# slogan_short()
# cv2.imshow('image', img)
# img = flow.inference(img, show_result=True, target_shape=(260, 260))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
cv2image = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA) # 轉換顏色從BGR到RGBA
current_image = Image.fromarray(cv2image) # 將圖像轉換成Image對象
imgtk = ImageTk.PhotoImage(image=current_image)
panela.imgtk = imgtk
panela.config(image=imgtk)
self.root.after(1, lambda: self.video_loop1(panela))
def select_path(self):
self.pash_= askdirectory()
path = StringVar()
path.set(self.pash_)
def createSecondPage(self):
self.camera = cv2.VideoCapture(0)
self.page1.pack_forget()
self.page2 = Frame(self.root)
self.page2.pack()
Label(self.page2, text='實時追蹤口罩佩戴情況', font=('粗體', 20)).pack()
self.data2 = Label(self.page2)
self.data2.pack(padx=5, pady=5)
self.button21 = Button(self.page2, width=18, height=2, text="返回", bg='gray', font=("宋", 12),
relief='raise',command = self.backFirst)
self.button21.pack(padx=25,pady = 10)
self.video_loop(self.data2)
def video_loop(self, panela):
success, img = self.camera.read() # 從攝像頭讀取照片
if success:
faces = detector.detectMultiScale(img, 1.1, 3)
for (x, y, w, h) in faces:
# 參數分別為 圖片、左上角坐標,右下角坐標,顏色,厚度
face = img[y:y + h, x:x + w] # 裁剪坐標為[y0:y1, x0:x1]
mask_face = mask_detector.detectMultiScale(img, 1.1, 5)
for (x2, y2, w2, h2) in mask_face:
cv2.putText(img, 'mask', (x2 - 2, y2 - 2),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255))
cv2.rectangle(img, (x2, y2), (x2 + w2, y2 + h2), (0, 0, 255), 2)
#img = mask.facesdetecter(img)
cv2image = cv2.cvtColor(img, cv2.COLOR_BGR2RGBA) # 轉換顏色從BGR到RGBA
#faces = detector.detectMultiScale(cv2image, 1.1, 3)
current_image = Image.fromarray(cv2image) # 將圖像轉換成Image對象
imgtk = ImageTk.PhotoImage(image=current_image)
panela.imgtk = imgtk
panela.config(image=imgtk)
self.root.after(1, lambda: self.video_loop(panela))
def backFirst(self):
self.page2.pack_forget()
self.page1.pack()
# 釋放攝像頭資源
self.camera.release()
cv2.destroyAllWindows()
def backMain(self):
self.root.geometry('900x600')
self.page3.pack_forget()
self.page1.pack()
def quitMain(self):
sys.exit(0)
if __name__ == '__main__':
demo = GUI()
插播一句,深度學習的項目目前完全開源,大家可以先體驗體驗:
https://demo.aizoo.com/face-mask-detection.html
深度學習(DL,?Deep?Learning)是機器學習(ML,?Machine?Learning)領域中一個新的研究方向,它被引入機器學習使其更接近于最初的目標——人工智能(AI,?Artificial?Intelligence)。
深度學習是學習樣本數據的內在規律和表示層次,這些學習過程中獲得的信息對諸如文字,圖像和聲音等數據的解釋有很大的幫助。它的最終目標是讓機器能夠像人一樣具有分析學習能力,能夠識別文字、圖像和聲音等數據。?深度學習是一個復雜的機器學習算法,在語音和圖像識別方面取得的效果,遠遠超過先前相關技術。
深度學習在搜索技術,數據挖掘,機器學習,機器翻譯,自然語言處理,多媒體學習,語音,推薦和個性化技術,以及其他相關領域都取得了很多成果。深度學習使機器模仿視聽和思考等人類的活動,解決了很多復雜的模式識別難題,使得人工智能相關技術取得了很大進步。
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import cv2
# 測試打開攝像頭檢測跟蹤人臉
# 識別人臉的xml文件,構建人臉檢測器
detector = cv2.CascadeClassifier('haarcascades\haarcascade_frontalface_default.xml')
# 獲取0號攝像頭的實例
cap = cv2.VideoCapture(0)
while True:
# 就是從攝像頭獲取到圖像,這個函數返回了兩個變量,第一個為布爾值表示成功與否,以及第二個是圖像。
ret, img = cap.read()
#轉為灰度圖
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# 獲取人臉坐標
faces = detector.detectMultiScale(gray, 1.1, 3)
for (x, y, w, h) in faces:
# 參數分別為 圖片、左上角坐標,右下角坐標,顏色,厚度
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
cv2.imshow('Mask', img)
cv2.waitKey(3)
cap.release()
cv2.destroyAllWindows()
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# -*- coding:utf-8 -*-
import cv2
import time
import argparse
import pygame
import numpy as np
from PIL import Image
from tensorflow.keras.models import model_from_json
from utils.anchor_generator import generate_anchors
from utils.anchor_decode import decode_bbox
from utils.nms import single_class_non_max_suppression
from load_model.tensorflow_loader import load_tf_model, tf_inference
# sess, graph = load_tf_model('FaceMaskDetection-master\models\face_mask_detection.pb')
sess, graph = load_tf_model('models/face_mask_detection.pb')
# anchor configuration
feature_map_sizes = [[33, 33], [17, 17], [9, 9], [5, 5], [3, 3]]
anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45], [0.64, 0.72]]
anchor_ratios = [[1, 0.62, 0.42]] * 5
file_slogan = r'video/slogan.mp3'
file_slogan_short = r'video/slogan_short.mp3'
pygame.mixer.init(frequency=16000, size=-16, channels=2, buffer=4096)
# generate anchors
anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios)
# 用于推斷,批大小為1,模型輸出形狀為[1,N,4],因此將錨點的dim擴展為[1,anchor_num,4]
anchors_exp = np.expand_dims(anchors, axis=0)
id2class = {0: 'Mask', 1: 'NoMask'}
def inference(image, conf_thresh=0.5, iou_thresh=0.4, target_shape=(160, 160), draw_result=True, show_result=True):
n = 0
n = n+1
''' 檢測推理的主要功能
#?:param image:3D numpy圖片數組
# :param conf_thresh:分類概率的最小閾值。
# :param iou_thresh:網管的IOU門限
# :param target_shape:模型輸入大小。
# :param draw_result:是否將邊框拖入圖像。
# :param show_result:是否顯示圖像。
'''
# image = np.copy(image)
output_info = []
height, width, _ = image.shape
image_resized = cv2.resize(image, target_shape)
image_np = image_resized / 255.0 # 歸一化到0~1
image_exp = np.expand_dims(image_np, axis=0)
y_bboxes_output, y_cls_output = tf_inference(sess, graph, image_exp)
# remove the batch dimension, for batch is always 1 for inference.
y_bboxes = decode_bbox(anchors_exp, y_bboxes_output)[0]
y_cls = y_cls_output[0]
# 為了加快速度,請執行單類NMS,而不是多類NMS。
bbox_max_scores = np.max(y_cls, axis=1)
bbox_max_score_classes = np.argmax(y_cls, axis=1)
# keep_idx是nms之后的活動邊界框。
keep_idxs = single_class_non_max_suppression(y_bboxes, bbox_max_scores, conf_thresh=conf_thresh,
iou_thresh=iou_thresh)
for idx in keep_idxs:
conf = float(bbox_max_scores[idx])
class_id = bbox_max_score_classes[idx]
bbox = y_bboxes[idx]
# 裁剪坐標,避免該值超出圖像邊界。
xmin = max(0, int(bbox[0] * width))
ymin = max(0, int(bbox[1] * height))
xmax = min(int(bbox[2] * width), width)
ymax = min(int(bbox[3] * height), height)
if draw_result:
if class_id == 0:
color = (0, 255, 0)
else:
color = (255, 0, 0)
cv2.rectangle(image, (xmin, ymin), (xmax, ymax), color, 2)
cv2.putText(image, "%s: %.2f" % (id2class[class_id], conf), (xmin + 2, ymin - 2),
cv2.FONT_HERSHEY_SIMPLEX, 1, color)
output_info.append([class_id, conf, xmin, ymin, xmax, ymax])
if show_result:
Image.fromarray(image).show()
# return output_info
return n,id2class,image
# 讀取攝像頭或者本地視頻路徑并處理
def run_on_video(video_path, output_video_name, conf_thresh):
cap = cv2.VideoCapture(video_path)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'XVID')
# writer = cv2.VideoWriter(output_video_name, fourcc, int(fps), (int(width), int(height)))
total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
if not cap.isOpened():
raise ValueError("Video open failed.")
return
status = True
idx = 0
while status:
start_stamp = time.time()
status, img_raw = cap.read()
img_raw = cv2.cvtColor(img_raw, cv2.COLOR_BGR2RGB)
read_frame_stamp = time.time()
if (status):
inference(img_raw,
conf_thresh,
iou_thresh=0.5,
target_shape=(260, 260),
draw_result=True,
show_result=False)
cv2.imshow('image', img_raw[:, :, ::-1])
cv2.waitKey(1)
inference_stamp = time.time()
# writer.write(img_raw)
write_frame_stamp = time.time()
idx += 1
print("%d of %d" % (idx, total_frames))
print("read_frame:%f, infer time:%f, write time:%f" % (read_frame_stamp - start_stamp,
inference_stamp - read_frame_stamp,
write_frame_stamp - inference_stamp))
# writer.release()
'''
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Face Mask Detection")
parser.add_argument('--img-mode', type=int, default=0,
help='set 1 to run on image, 0 to run on video.') # 這里設置為1:檢測圖片;還是設置為0:視頻文件(實時圖像數據)檢測
parser.add_argument('--img-path', type=str, help='path to your image.')
parser.add_argument('--video-path', type=str, default='0', help='path to your video, `0` means to use camera.')
# parser.add_argument('--hdf5', type=str, help='keras hdf5 file')
args = parser.parse_args()
if args.img_mode:
imgPath = args.img_path
# img = cv2.imread("imgPath")
img = cv2.imread(imgPath)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
inference(img, show_result=True, target_shape=(260, 260))
else:
video_path = args.video_path
if args.video_path == '0':
video_path = 0
run_on_video(video_path, '', conf_thresh=0.5)
'''
由于代碼過多無法詳細展開,如有疑問歡迎大家在評論區留言,共同探討問題。
代碼源碼地址:
基于tenforflow的口罩識別項目-Python文檔類資源-CSDN下載
本項目僅供學習參考,如有侵權告知立刪
機器學習 深度學習
版權聲明:本文內容由網絡用戶投稿,版權歸原作者所有,本站不擁有其著作權,亦不承擔相應法律責任。如果您發現本站中有涉嫌抄襲或描述失實的內容,請聯系我們jiasou666@gmail.com 處理,核實后本網站將在24小時內刪除侵權內容。
版權聲明:本文內容由網絡用戶投稿,版權歸原作者所有,本站不擁有其著作權,亦不承擔相應法律責任。如果您發現本站中有涉嫌抄襲或描述失實的內容,請聯系我們jiasou666@gmail.com 處理,核實后本網站將在24小時內刪除侵權內容。