From d81021e2ee1b5c5febe1502df313a2b7ab35ee14 Mon Sep 17 00:00:00 2001 From: kicap Date: Sat, 26 Aug 2023 03:14:09 +0800 Subject: [PATCH] first commit --- .gitignore | 2 ++ main.py | 84 ++++++++++++++++++++++++++++++++++++++++++++ main2.py | 50 ++++++++++++++++++++++++++ requirements.txt | 30 ++++++++++++++++ runtime.txt | 1 + templates/index.html | 10 ++++++ 6 files changed, 177 insertions(+) create mode 100644 .gitignore create mode 100644 main.py create mode 100644 main2.py create mode 100644 requirements.txt create mode 100644 runtime.txt create mode 100644 templates/index.html diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ab7fe1b --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# ignore folder env +env/ \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..094cc13 --- /dev/null +++ b/main.py @@ -0,0 +1,84 @@ +import cv2 +import mediapipe as mp +import os +import numpy as np + +# Create the assets directory if it doesn't exist +if not os.path.exists('assets'): + os.makedirs('assets') + +# Load the pose detection model +with mp.solutions.pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose: + # Get the index of the last saved pose dataset image + count = len([name for name in os.listdir('assets') if name.endswith('.jpg')]) + # Capture frames from the webcam + cap = cv2.VideoCapture(0) + + # Load saved pose images and store their landmarks and filenames + saved_landmarks = [] + saved_filenames = [] + for i in range(count): + filename = f'assets/pose_{i}.jpg' + image = cv2.imread(filename) + if image is not None: + image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + results = pose.process(image_rgb) + if results.pose_landmarks: + landmarks = np.array([[landmark.x, landmark.y, landmark.z] for landmark in results.pose_landmarks.landmark]) + saved_landmarks.append(landmarks) + saved_filenames.append(filename) + + while True: + ret, frame = cap.read() + if not ret: + break + # Convert the image to RGB + image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + # Process the image and find the landmarks + results = pose.process(image) + # Draw the landmarks on the image + image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + mp.solutions.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.solutions.pose.POSE_CONNECTIONS) + + # Compare the pose with saved pose dataset images + highest_similarity = -1 + most_similar_filename = "" + if results.pose_landmarks: + detected_landmarks = np.array([[landmark.x, landmark.y, landmark.z] for landmark in results.pose_landmarks.landmark]) + + for i, saved_landmark in enumerate(saved_landmarks): + # Calculate cosine similarity between the landmarks + similarity = np.dot(detected_landmarks.flatten(), saved_landmark.flatten()) / (np.linalg.norm(detected_landmarks) * np.linalg.norm(saved_landmark)) + + if similarity > highest_similarity: + highest_similarity = similarity + most_similar_filename = saved_filenames[i] + + # Calculate similarity percentage + similarity_percentage = round(highest_similarity * 100, 2) + + # Display the most similar filename and similarity percentage if similarity is above 96% + if similarity_percentage > 94.6: + text = f"Most Similar: {most_similar_filename} - Similarity: {similarity_percentage}%" + cv2.putText(image, text, (10, image.shape[0] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) + + # Display the image + cv2.imshow('Pose Detection', image) + + # Save the image if the 'q' key is pressed + if cv2.waitKey(1) & 0xFF == ord('q'): + filename = f'assets/pose_{count}.jpg' + # Check if the file already exists and increment the count if it does + while os.path.exists(filename): + count += 1 + filename = f'assets/pose_{count}.jpg' + cv2.imwrite(filename, image) + print(f'Saved pose dataset image: {filename}') + count += 1 + # Exit the loop if the 'ESC' key is pressed + if cv2.waitKey(1) == 27: + break + + # Release the capture and destroy the window + cap.release() + cv2.destroyAllWindows() diff --git a/main2.py b/main2.py new file mode 100644 index 0000000..1ca2582 --- /dev/null +++ b/main2.py @@ -0,0 +1,50 @@ +import cv2 +import mediapipe as mp +import os +import time + +# Create the assets directory if it doesn't exist +if not os.path.exists('assets'): + os.makedirs('assets') + +# Get the index of the last saved pose dataset image +count = len([name for name in os.listdir('assets') if name.endswith('.jpg')]) + +with mp.solutions.pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5) as pose: + cap = cv2.VideoCapture(0) + start_time = time.time() + while True: + ret, frame = cap.read() + if not ret: + break + # Convert the image to RGB + image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + # To improve performance, optionally mark the image as not writeable to pass by reference. + image.flags.writeable = False + # Process the image and find the landmarks + results = pose.process(image) + # Draw the landmarks on the image + image.flags.writeable = True + image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + mp.solutions.drawing_utils.draw_landmarks(image, results.pose_landmarks, mp.solutions.pose.POSE_CONNECTIONS) + # Display the image in a window + cv2.imshow('Pose Detection', image) + # Save the image and close the window if 5 seconds have passed + if time.time() - start_time > 5: + filename = f'assets/pose_{count}.jpg' + # Check if the file already exists and increment the count if it does + while os.path.exists(filename): + count += 1 + filename = f'assets/pose_{count}.jpg' + cv2.imwrite(filename, image) + print(f'Saved pose dataset image: {filename}') + count += 1 + # Close the window + cv2.destroyAllWindows() + break + # Exit the loop if the 'ESC' key is pressed + if cv2.waitKey(1) == 27: + break + # Release the capture and destroy the window + cap.release() + cv2.destroyAllWindows() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..aa9442d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,30 @@ +absl-py==1.4.0 +attrs==23.1.0 +blinker==1.6.2 +cffi==1.15.1 +click==8.1.7 +contourpy==1.1.0 +cycler==0.11.0 +Flask==2.3.3 +flatbuffers==23.5.26 +fonttools==4.42.1 +importlib-metadata==6.8.0 +importlib-resources==6.0.1 +itsdangerous==2.1.2 +Jinja2==3.1.2 +kiwisolver==1.4.5 +MarkupSafe==2.1.3 +matplotlib==3.7.2 +mediapipe==0.10.3 +numpy==1.25.2 +opencv-contrib-python==4.8.0.76 +packaging==23.1 +Pillow==10.0.0 +protobuf==3.20.3 +pycparser==2.21 +pyparsing==3.0.9 +python-dateutil==2.8.2 +six==1.16.0 +sounddevice==0.4.6 +Werkzeug==2.3.7 +zipp==3.16.2 diff --git a/runtime.txt b/runtime.txt new file mode 100644 index 0000000..aa61900 --- /dev/null +++ b/runtime.txt @@ -0,0 +1 @@ +python-3.9.10 \ No newline at end of file diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..971f6c1 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,10 @@ + + + + Pose Detection + + +

Pose Detection

+ + + \ No newline at end of file