大家好,我是小F~
MediaPipe是一款由Google開發(fā)并開源的數(shù)據(jù)流處理機器學習應(yīng)用開發(fā)框架。
它是一個基于圖的數(shù)據(jù)處理管線,用于構(gòu)建使用了多種形式的數(shù)據(jù)源,如視頻、音頻、傳感器數(shù)據(jù)以及任何時間序列數(shù)據(jù)。
MediaPipe通過將各個感知模型抽象為模塊并將其連接到可維護的圖中來解決這些問題。
項目地址:
https://github.com/google/mediapipe
今天小F就給大家介紹一下,如何使用MediaPipe實現(xiàn)姿態(tài)識別!
通過這項技術(shù),我們可以結(jié)合攝像頭,智能識別人的行為,然后做出一些處理。
比如控制電腦音量,俯臥撐計數(shù),坐姿矯正等功能。
/ 01 /
依賴安裝
使用的Python版本是3.9.7。
需要安裝以下依賴。
mediapipe==0.9.2.1
numpy==1.23.5
opencv-python==4.7.0.72
使用pip命令進行安裝,環(huán)境配置好后,就可以來看姿態(tài)識別的情況了。
有三種,包含全身、臉部、手部的姿態(tài)估計。
/ 02 /
全身姿態(tài)估計
首先是人體姿態(tài)估計,一次只能跟蹤一個人。
并且會在人的身體上顯示33個對應(yīng)的坐標點。
具體代碼如下。
import?os
import?time
import?cv2?as?cv
import?mediapipe?as?mp
class?BodyPoseDetect:
????def?__init__(self,?static_image=False,?complexity=1,?smooth_lm=True,?segmentation=False,?smooth_sm=True,?detect_conf=0.5,?track_conf=0.5):
????????self.mp_body?=?mp.solutions.pose
????????self.mp_draw?=?mp.solutions.drawing_utils
????????self.body?=?self.mp_body.Pose(static_image,?complexity,?smooth_lm,?segmentation,?smooth_sm,?detect_conf,?track_conf)
????def?detect_landmarks(self,?img,?disp=True):
????????img_rgb?=?cv.cvtColor(img,?cv.COLOR_BGR2RGB)
????????results?=?self.body.process(img_rgb)
????????detected_landmarks?=?results.pose_landmarks
????????if?detected_landmarks:
????????????if?disp:
????????????????self.mp_draw.draw_landmarks(img,?detected_landmarks,?self.mp_body.POSE_CONNECTIONS)
????????return?detected_landmarks,?img
????def?get_info(self,?detected_landmarks,?img_dims):
????????lm_list?=?[]
????????if?not?detected_landmarks:
????????????return?lm_list
????????height,?width?=?img_dims
????????for?id,?b_landmark?in?enumerate(detected_landmarks.landmark):
????????????cord_x,?cord_y?=?int(b_landmark.x?*?width),?int(b_landmark.y?*?height)
????????????lm_list.append([id,?cord_x,?cord_y])
????????return?lm_list
def?main(path,?is_image):
????if?is_image:
????????detector?=?BodyPoseDetect(static_image=True)
????????ori_img?=?cv.imread(path)
????????img?=?ori_img.copy()
????????landmarks,?output_img?=?detector.detect_landmarks(img)
????????info_landmarks?=?detector.get_info(landmarks,?img.shape[:2])
????????#?print(info_landmarks[3])
????????cv.imshow("Original",?ori_img)
????????cv.imshow("Detection",?output_img)
????????cv.waitKey(0)
????else:
????????detector?=?BodyPoseDetect()
????????cap?=?cv.VideoCapture(path)
????????prev_time?=?time.time()
????????cur_time?=?0
????????frame_width?=?int(cap.get(3))
????????frame_height?=?int(cap.get(4))
????????out?=?cv.VideoWriter('output.avi',?cv.VideoWriter_fourcc('M',?'J',?'P',?'G'),?10,?(frame_width,?frame_height))??#?保存視頻
????????while?True:
????????????ret,?frame?=?cap.read()
????????????if?not?ret:
????????????????print("Video?Over")
????????????????break
????????????img?=?frame.copy()
????????????landmarks,?output_img?=?detector.detect_landmarks(img)
????????????info_landmarks?=?detector.get_info(landmarks,?img.shape[:2])
????????????cur_time?=?time.time()
????????????fps?=?1/(cur_time?-?prev_time)
????????????prev_time?=?cur_time
????????????cv.putText(output_img,?f'FPS:?{str(int(fps))}',?(10,?70),?cv.FONT_HERSHEY_COMPLEX_SMALL,?2,?(0,?50,?170),?2)
????????????cv.namedWindow('Original',?cv.WINDOW_NORMAL)??#?窗口大小可設(shè)置
????????????cv.resizeWindow('Original',?580,?330)??#?重設(shè)大小
????????????cv.namedWindow('Detection',?cv.WINDOW_NORMAL)??#?窗口大小可設(shè)置
????????????cv.resizeWindow('Detection',?580,?330)??#?重設(shè)大小
????????????out.write(output_img)
????????????cv.imshow("Original",?frame)
????????????cv.imshow("Detection",?output_img)
????????????if?cv.waitKey(1)?&?0xFF?==?ord('q'):
????????????????break
????????cap.release()
????cv.destroyAllWindows()
if?__name__?==?"__main__":
????#?is_image?=?True
????#?media_path?=?'.\\Data\\Images\\running.jpg'
????is_image?=?False
????media_path?=?'.\\Data\\Videos\\basketball.mp4'
????if?os.path.exists(os.path.join(os.getcwd(),?media_path)):
????????main(media_path,?is_image)
????else:
????????print("Invalid?Path")
運行代碼后,結(jié)果如下。
左側(cè)是原圖,右側(cè)是檢測結(jié)果。
其中代碼里的is_image參數(shù)表示是否為圖片或視頻
而media_path參數(shù)則表示的是源文件的地址。
我們還可以看視頻的檢測效果,具體如下。
效果還不錯。
/ 03?/
臉部識別跟蹤
第二個是臉部,MediaPipe可以在臉部周圍畫一個網(wǎng)格來進行檢測和跟蹤。
具體代碼如下。
import?os
import?time
import?argparse
import?cv2?as?cv
import?mediapipe?as?mp
class?FaceDetect:
????def?__init__(self,?static_image=False,?max_faces=1,?refine=False,?detect_conf=0.5,?track_conf=0.5):
????????self.draw_utils?=?mp.solutions.drawing_utils
????????self.draw_spec?=?self.draw_utils.DrawingSpec(color=[0,?255,?0],?thickness=1,?circle_radius=2)
????????self.mp_face_track?=?mp.solutions.face_mesh
????????self.face_track?=?self.mp_face_track.FaceMesh(static_image,?max_faces,?refine,?detect_conf,?track_conf)
????def?detect_mesh(self,?img,?disp=True):
????????results?=?self.face_track.process(img)
????????detected_landmarks?=?results.multi_face_landmarks
????????if?detected_landmarks:
????????????if?disp:
????????????????for?f_landmarks?in?detected_landmarks:
????????????????????self.draw_utils.draw_landmarks(img,?f_landmarks,?self.mp_face_track.FACEMESH_CONTOURS,?self.draw_spec,?self.draw_spec)
????????return?detected_landmarks,?img
????def?get_info(self,?detected_landmarks,?img_dims):
????????landmarks_info?=?[]
????????img_height,?img_width?=?img_dims
????????for?_,?face?in?enumerate(detected_landmarks):
????????????mesh_info?=?[]
????????????for?id,?landmarks?in?enumerate(face.landmark):
????????????????x,?y?=?int(landmarks.x?*?img_width),?int(landmarks.y?*?img_height)
????????????????mesh_info.append((id,?x,?y))
????????????landmarks_info.append(mesh_info)
????????return?landmarks_info
def?main(path,?is_image=True):
????print(path)
????if?is_image:
????????detector?=?FaceDetect()
????????ori_img?=?cv.imread(path)
????????img?=?ori_img.copy()
????????landmarks,?output?=?detector.detect_mesh(img)
????????if?landmarks:
????????????mesh_info?=?detector.get_info(landmarks,?img.shape[:2])
????????????#?print(mesh_info)
????????cv.imshow("Result",?output)
????????cv.waitKey(0)
????else:
????????detector?=?FaceDetect(static_image=False)
????????cap?=?cv.VideoCapture(path)
????????curr_time?=?0
????????prev_time?=?time.time()
????????frame_width?=?int(cap.get(3))
????????frame_height?=?int(cap.get(4))
????????out?=?cv.VideoWriter('output.avi',?cv.VideoWriter_fourcc('M',?'J',?'P',?'G'),?10,?(frame_width,?frame_height))??#?保存視頻
????????while?True:
????????????ret,?frame?=?cap.read()
????????????if?not?ret:
????????????????print("Video?Over")
????????????????break
????????????img?=?frame.copy()
????????????landmarks,?output?=?detector.detect_mesh(img)
????????????if?landmarks:
????????????????mesh_info?=?detector.get_info(landmarks,?img.shape[:2])
????????????????#?print(len(mesh_info))
????????????curr_time?=?time.time()
????????????fps?=?1/(curr_time?-?prev_time)
????????????prev_time?=?curr_time
????????????cv.putText(output,?f'FPS:?{str(int(fps))}',?(10,?70),?cv.FONT_HERSHEY_COMPLEX_SMALL,?2,?(0,?50,?170),?2)
????????????cv.namedWindow('Result',?cv.WINDOW_NORMAL)??#?窗口大小可設(shè)置
????????????cv.resizeWindow('Result',?580,?330)??#?重設(shè)大小
????????????out.write(output)
????????????cv.imshow("Result",?output)
????????????if?cv.waitKey(20)?&?0xFF?==?ord('q'):
????????????????break
????????cap.release()
????cv.destroyAllWindows()
if?__name__?==?"__main__":
????#?is_image?=?True
????#?media_path?=?'.\\Data\\Images\\human_2.jpg'
????is_image?=?False
????media_path?=?'.\\Data\\Videos\\humans_3.mp4'
????if?os.path.exists(os.path.join(os.getcwd(),?media_path)):
????????main(media_path,?is_image)
????else:
????????print("Invalid?Path")
效果如下。
/ 04?/
手部跟蹤識別
最后一個是手部,可以同時跟蹤2只手并且在手部顯示相應(yīng)的坐標點。
具體代碼如下。
import?os
import?time
import?argparse
import?cv2?as?cv
import?mediapipe?as?mp
class?HandPoseDetect:
????def?__init__(self,?static_image=False,?max_hands=2,?complexity=1,?detect_conf=0.5,?track_conf=0.5):
????????self.mp_hands?=?mp.solutions.hands
????????self.mp_draw?=?mp.solutions.drawing_utils
????????self.hands?=?self.mp_hands.Hands(static_image,?max_hands,?complexity,?detect_conf,?track_conf)
????def?detect_landmarks(self,?img,?disp=True):
????????img_rgb?=?cv.cvtColor(img,?cv.COLOR_BGR2RGB)
????????results?=?self.hands.process(img_rgb)
????????detected_landmarks?=?results.multi_hand_landmarks
????????if?detected_landmarks:
????????????if?disp:
????????????????for?h_landmark?in?detected_landmarks:
????????????????????self.mp_draw.draw_landmarks(img,?h_landmark,?self.mp_hands.HAND_CONNECTIONS)
????????return?detected_landmarks,?img
????def?get_info(self,?detected_landmarks,?img_dims,?hand_no=1):
????????lm_list?=?[]
????????if?not?detected_landmarks:
????????????return?lm_list
????????if?hand_no?>?2:
????????????print('[WARNING]?Provided?hand?number?is?greater?than?max?number?2')
????????????print('[WARNING]?Calculating?information?for?hand?2')
????????????hand_no?=?2
????????elif?hand_no?<?1:
????????????print('[WARNING]?Provided?hand?number?is?less?than?min?number?1')
????????????print('[WARNING]?Calculating?information?for?hand?1')
????????if?len(detected_landmarks)?<?2:
????????????hand_no?=?0
????????else:
????????????hand_no?-=?1
????????height,?width?=?img_dims
????????for?id,?h_landmarks?in?enumerate(detected_landmarks[hand_no].landmark):
????????????cord_x,?cord_y?=?int(h_landmarks.x?*?width),?int(h_landmarks.y?*?height)
????????????lm_list.append([id,?cord_x,?cord_y])
????????return?lm_list
def?main(path,?is_image=True):
????if?is_image:
????????detector?=?HandPoseDetect(static_image=True)
????????ori_img?=?cv.imread(path)
????????img?=?ori_img.copy()
????????landmarks,?output_img?=?detector.detect_landmarks(img)
????????info_landmarks?=?detector.get_info(landmarks,?img.shape[:2],?2)
????????#?print(info_landmarks)
????????cv.imshow("Landmarks",?output_img)
????????cv.waitKey(0)
????else:
????????detector?=?HandPoseDetect()
????????cap?=?cv.VideoCapture(path)
????????prev_time?=?time.time()
????????cur_time?=?0
????????frame_width?=?int(cap.get(3))
????????frame_height?=?int(cap.get(4))
????????out?=?cv.VideoWriter('output.avi',?cv.VideoWriter_fourcc('M',?'J',?'P',?'G'),?10,?(frame_width,?frame_height))??#?保存視頻
????????while?True:
????????????ret,?frame?=?cap.read()
????????????if?not?ret:
????????????????print("Video?Over")
????????????????break
????????????img?=?frame.copy()
????????????landmarks,?output_img?=?detector.detect_landmarks(img)
????????????info_landmarks?=?detector.get_info(landmarks,?img.shape[:2],?2)
????????????#?print(info_landmarks)
????????????cur_time?=?time.time()
????????????fps?=?1/(cur_time?-?prev_time)
????????????prev_time?=?cur_time
????????????cv.putText(output_img,?f'FPS:?{str(int(fps))}',?(10,?70),?cv.FONT_HERSHEY_COMPLEX_SMALL,?2,?(0,?50,?170),?2)
????????????cv.namedWindow('Original',?cv.WINDOW_NORMAL)??#?窗口大小可設(shè)置
????????????cv.resizeWindow('Original',?580,?330)??#?重設(shè)大小
????????????cv.namedWindow('Detection',?cv.WINDOW_NORMAL)??#?窗口大小可設(shè)置
????????????cv.resizeWindow('Detection',?580,?330)??#?重設(shè)大小
????????????out.write(output_img)
????????????cv.imshow("Detection",?output_img)
????????????cv.imshow("Original",?frame)
????????????if?cv.waitKey(1)?&?0xFF?==?ord('q'):
????????????????break
????????cap.release()
????cv.destroyAllWindows()
if?__name__?==?"__main__":
????is_image?=?False
????media_path?=?'.\\Data\\Videos\\piano_playing.mp4'
????if?os.path.exists(os.path.join(os.getcwd(),?media_path)):
????????main(media_path,?is_image)
????else:
????????print("Invalid?Path")
結(jié)果如下所示。
/ 05?/
總結(jié)
以上操作,就是MediaPipe姿態(tài)識別的部分內(nèi)容。
當然我們還可以通過MediaPipe其它的識別功能,來做出有趣的事情。
比如結(jié)合攝像頭,識別手勢動作,控制電腦音量。這個大家都可以自行去學習。
相關(guān)文件及代碼都已上傳,公眾號回復(fù)【姿態(tài)識別】即可獲取。
萬水千山總是情,點個????行不行。
推薦閱讀
文章來源:http://www.zghlxwxcb.cn/news/detail-673368.html
···? END? ···文章來源地址http://www.zghlxwxcb.cn/news/detail-673368.html
到了這里,關(guān)于使用OpenCV和MediaPipe實現(xiàn)姿態(tài)識別!的文章就介紹完了。如果您還想了解更多內(nèi)容,請在右上角搜索TOY模板網(wǎng)以前的文章或繼續(xù)瀏覽下面的相關(guān)文章,希望大家以后多多支持TOY模板網(wǎng)!