hand-detection/app2.py

176 lines
6.6 KiB
Python

from flask import Flask, render_template, Response , request, jsonify, send_file
import cv2
import mediapipe as mp
import numpy as np
import json
import os
app = Flask(__name__,
static_folder='assets',
template_folder='templates'
)
# Load JSON file
with open('dataset.json', 'r') as f:
all_data = json.load(f)
the_image_path = "gambar/tutorial.jpg"
the_name = None
the_keterangan = "Posisikan satu tangan kepada menunjuk dan tangan lainnya kepada terbuka dan tunjukkan ke arah titik tangan"
# Initialize MediaPipe hand detection
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.7, min_tracking_confidence=0.7)
# Initialize MediaPipe drawing utilities
mp_drawing = mp.solutions.drawing_utils
# Initialize camera capture
cap = cv2.VideoCapture(0)
# Landmark names
landmark_names = [str(landmark) for landmark in mp_hands.HandLandmark]
def generate_frames(stat):
global the_image_path, the_name, the_keterangan
while True:
# Read frame from camera
ret, frame = cap.read()
if not ret:
break
# Convert frame to RGB for MediaPipe
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Detect hand landmarks with MediaPipe
results = hands.process(frame_rgb)
pointing_hand = []
open_palm = []
if results.multi_hand_landmarks:
for idx, hand_landmarks in enumerate(results.multi_hand_landmarks):
landmarks = hand_landmarks.landmark
# Check for gestures
gesture = None
if is_pointing_gesture(landmarks):
gesture = "Pointing"
# check the idx of the hand
pointing_hand = hand_landmarks.landmark
else:
gesture = "Open Palm"
open_palm = hand_landmarks.landmark
if gesture:
# Get the coordinates for the wrist landmark to place the text above the hand
wrist = landmarks[mp_hands.HandLandmark.WRIST]
h, w, _ = frame.shape
cx, cy = int(wrist.x * w), int(wrist.y * h)
# Add text annotation to the frame
cv2.putText(frame, gesture, (cx, cy - 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2,
cv2.LINE_AA)
if gesture == "Pointing":
if open_palm:
finger_tip = pointing_hand[mp_hands.HandLandmark.INDEX_FINGER_TIP]
distances = [np.sqrt((landmark.x - finger_tip.x) ** 2 +
(landmark.y - finger_tip.y) ** 2) for landmark in open_palm]
if distances:
closest_landmark_idx = np.argmin(distances)
# print("ini index", closest_landmark_idx)
the_image_path = all_data[closest_landmark_idx]["image_path"]
the_name = all_data[closest_landmark_idx]["name"]
the_keterangan = all_data[closest_landmark_idx]["keterangan"]
closest_landmark_name = landmark_names[closest_landmark_idx]
cv2.putText(frame, f'Closest Landmark: {closest_landmark_name}',
(int(finger_tip.x * w), int(finger_tip.y * h)),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
else:
the_image_path = "gambar/tutorial.jpg"
the_name = None
the_keterangan = "Posisikan satu tangan kepada menunjuk dan tangan lainnya kepada terbuka dan tunjukkan ke arah titik tangan"
cv2.putText(frame, 'No open palm detected', (cx, cy - 50), cv2.FONT_HERSHEY_SIMPLEX, 1,
(0, 0, 255), 2, cv2.LINE_AA)
# Draw landmarks on the frame
mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
else:
the_image_path = "gambar/tutorial.jpg"
the_name = None
the_keterangan = "Posisikan satu tangan kepada menunjuk dan tangan lainnya kepada terbuka dan tunjukkan ke arah titik tangan"
# Convert the frame to bytes
ret, buffer = cv2.imencode('.jpg', frame)
frame = buffer.tobytes()
if stat == 'camera':
yield (b'--frame\r\n'
b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
@app.route('/')
def index():
return render_template('index2.html')
@app.route('/video_feed')
def video_feed():
stat = request.args.get('stat','camera')
return Response(generate_frames(stat), mimetype='multipart/x-mixed-replace; boundary=frame')
@app.route('/get_data', methods=['GET'])
def get_data():
global the_image_path, the_name, the_keterangan
if the_image_path and the_name and the_keterangan:
return jsonify({"name": the_name, "keterangan": the_keterangan, "image_path": the_image_path})
else:
# just return empty
return jsonify({})
@app.route('/show_image')
def show_image():
image_path = request.args.get('image_path')
if image_path and os.path.exists(image_path):
return send_file(image_path, mimetype='image/jpeg')
else:
return "Image not found", 404
def is_pointing_gesture(landmarks):
# Index finger tip and other joints
index_tip = landmarks[mp_hands.HandLandmark.INDEX_FINGER_TIP]
index_dip = landmarks[mp_hands.HandLandmark.INDEX_FINGER_DIP]
index_pip = landmarks[mp_hands.HandLandmark.INDEX_FINGER_PIP]
index_mcp = landmarks[mp_hands.HandLandmark.INDEX_FINGER_MCP]
# Check if the index finger is extended
index_extended = (index_tip.y < index_dip.y < index_pip.y < index_mcp.y)
# Check if other fingers are bent
other_fingers_bent = True
for finger_tip, finger_pip in [
(mp_hands.HandLandmark.MIDDLE_FINGER_TIP, mp_hands.HandLandmark.MIDDLE_FINGER_PIP),
(mp_hands.HandLandmark.RING_FINGER_TIP, mp_hands.HandLandmark.RING_FINGER_PIP),
(mp_hands.HandLandmark.PINKY_TIP, mp_hands.HandLandmark.PINKY_PIP)]:
if landmarks[finger_tip].y < landmarks[finger_pip].y:
other_fingers_bent = False
if other_fingers_bent is False:
return True
return False
if __name__ == '__main__':
app.run(debug=True)