first commit
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
/env
|
||||
yolo*
|
||||
|
||||
115
d3.py
Normal file
115
d3.py
Normal file
@ -0,0 +1,115 @@
|
||||
import torch
|
||||
import cv2
|
||||
import pandas as pd
|
||||
import os
|
||||
import string
|
||||
from gtts import gTTS
|
||||
from pydub import AudioSegment
|
||||
from pydub.playback import play
|
||||
import tkinter as tk
|
||||
from threading import Thread
|
||||
|
||||
# Load YOLOv5 model from GitHub
|
||||
model = torch.hub.load('ultralytics/yolov5', 'yolov5n')
|
||||
|
||||
# Initialize webcam capture
|
||||
cap = cv2.VideoCapture(0) # Use 0 for default webcam, or specify a different index if needed
|
||||
|
||||
# Global variable for summary text
|
||||
summary_text = ''
|
||||
|
||||
# Function to play sound or text-to-speech
|
||||
def play_sound_or_tts(word, text_dir='suaraku', speed=1):
|
||||
filename = os.path.join(text_dir, f"{word}.wav")
|
||||
print(filename)
|
||||
|
||||
if os.path.exists(filename):
|
||||
sound = AudioSegment.from_file(filename)
|
||||
sound = sound.speedup(playback_speed=speed)
|
||||
return sound
|
||||
else:
|
||||
tts = gTTS(word)
|
||||
tts.save('temp.mp3')
|
||||
sound = AudioSegment.from_file('temp.mp3')
|
||||
sound = sound.speedup(playback_speed=speed)
|
||||
os.remove('temp.mp3')
|
||||
return sound
|
||||
|
||||
def play_full_text(text, text_dir='suaraku', speed=1.0):
|
||||
translator = str.maketrans('', '', string.punctuation)
|
||||
words = text.translate(translator).split()
|
||||
|
||||
combined_sound = AudioSegment.silent(duration=0)
|
||||
|
||||
for word in words:
|
||||
print(word)
|
||||
sound = play_sound_or_tts(word, text_dir, speed)
|
||||
combined_sound += sound
|
||||
|
||||
play(combined_sound)
|
||||
|
||||
def on_button_press():
|
||||
global summary_text
|
||||
play_button.config(text="Loading...")
|
||||
play_thread = Thread(target=play_audio_and_update_button)
|
||||
play_thread.start()
|
||||
|
||||
def play_audio_and_update_button():
|
||||
global summary_text
|
||||
play_full_text(summary_text, 'suaraku', 1.1)
|
||||
play_button.config(text="Play Summary")
|
||||
|
||||
def process_frame():
|
||||
global summary_text
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Failed to grab frame")
|
||||
return
|
||||
|
||||
result = model(frame)
|
||||
data_frame = result.pandas().xyxy[0]
|
||||
|
||||
# Filter detections with confidence above 70%
|
||||
data_frame = data_frame[data_frame['confidence'] > 0.4]
|
||||
|
||||
label_counts = data_frame['name'].value_counts()
|
||||
indexes = data_frame.index
|
||||
for index in indexes:
|
||||
x1 = int(data_frame['xmin'][index])
|
||||
y1 = int(data_frame['ymin'][index])
|
||||
x2 = int(data_frame['xmax'][index])
|
||||
y2 = int(data_frame['ymax'][index])
|
||||
label = data_frame['name'][index]
|
||||
conf = data_frame['confidence'][index]
|
||||
text = label + ' ' + str(conf.round(decimals=2))
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 2)
|
||||
cv2.putText(frame, text, (x1, y1-5), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
|
||||
|
||||
if label_counts.empty:
|
||||
summary_text = 'No objects detected.'
|
||||
else:
|
||||
summary_text = 'There are: '
|
||||
for label, count in label_counts.items():
|
||||
summary_text += f'{count} {label}, '
|
||||
summary_text = summary_text[:-2] # Remove the last comma and space
|
||||
summary_text += ' detected.'
|
||||
|
||||
cv2.putText(frame, summary_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
|
||||
cv2.imshow('Webcam Feed', frame)
|
||||
cv2.waitKey(1)
|
||||
root.after(10, process_frame)
|
||||
|
||||
# GUI setup
|
||||
root = tk.Tk()
|
||||
root.title("Object Detection with Sound Playback")
|
||||
|
||||
play_button = tk.Button(root, text="Play Summary", command=on_button_press)
|
||||
play_button.pack()
|
||||
|
||||
# Start the webcam processing
|
||||
root.after(10, process_frame)
|
||||
|
||||
root.mainloop()
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
45
detect1.py
Normal file
45
detect1.py
Normal file
@ -0,0 +1,45 @@
|
||||
import torch
|
||||
import cv2
|
||||
import pandas as pd
|
||||
|
||||
# Download model from github
|
||||
model = torch.hub.load('ultralytics/yolov5', 'yolov5n')
|
||||
|
||||
img = cv2.imread('car.jpg')
|
||||
img = cv2.resize(img, (1000, 650))
|
||||
|
||||
# Perform detection on image
|
||||
result = model(img)
|
||||
print('result: ', result)
|
||||
|
||||
# Convert detected result to pandas data frame
|
||||
data_frame = result.pandas().xyxy[0]
|
||||
print('data_frame:')
|
||||
print(data_frame)
|
||||
|
||||
# Get the counts of each label
|
||||
label_counts = data_frame['name'].value_counts()
|
||||
print('Label counts:')
|
||||
print(label_counts)
|
||||
|
||||
# Get indexes of all of the rows
|
||||
indexes = data_frame.index
|
||||
for index in indexes:
|
||||
# Find the coordinate of top left corner of bounding box
|
||||
x1 = int(data_frame['xmin'][index])
|
||||
y1 = int(data_frame['ymin'][index])
|
||||
# Find the coordinate of right bottom corner of bounding box
|
||||
x2 = int(data_frame['xmax'][index])
|
||||
y2 = int(data_frame['ymax'][index])
|
||||
|
||||
# Find label name
|
||||
label = data_frame['name'][index]
|
||||
# Find confidence score of the model
|
||||
conf = data_frame['confidence'][index]
|
||||
text = label + ' ' + str(conf.round(decimals=2))
|
||||
|
||||
cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 0), 2)
|
||||
cv2.putText(img, text, (x1, y1-5), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
|
||||
|
||||
cv2.imshow('IMAGE', img)
|
||||
cv2.waitKey(0)
|
||||
107
detect2.py
Normal file
107
detect2.py
Normal file
@ -0,0 +1,107 @@
|
||||
import torch
|
||||
import cv2
|
||||
import pandas as pd
|
||||
import os
|
||||
import string
|
||||
from gtts import gTTS
|
||||
from pydub import AudioSegment
|
||||
from pydub.playback import play
|
||||
import pygame
|
||||
import tkinter as tk
|
||||
from threading import Thread
|
||||
|
||||
# Load YOLOv5 model from GitHub
|
||||
model = torch.hub.load('ultralytics/yolov5', 'yolov5n')
|
||||
|
||||
# Initialize webcam capture
|
||||
cap = cv2.VideoCapture(0) # Use 0 for default webcam, or specify a different index if needed
|
||||
|
||||
# Global variable for summary text
|
||||
summary_text = ''
|
||||
|
||||
# Function to play sound or text-to-speech
|
||||
def play_sound_or_tts(word, text_dir='', speed=1):
|
||||
filename = os.path.join(text_dir, f"{word}.wav")
|
||||
print(filename)
|
||||
|
||||
if os.path.exists(filename):
|
||||
sound = AudioSegment.from_file(filename)
|
||||
sound = sound.speedup(playback_speed=speed)
|
||||
return sound
|
||||
else:
|
||||
tts = gTTS(word)
|
||||
tts.save('temp.mp3')
|
||||
sound = AudioSegment.from_file('temp.mp3')
|
||||
sound = sound.speedup(playback_speed=speed)
|
||||
os.remove('temp.mp3')
|
||||
return sound
|
||||
|
||||
def play_full_text(text, text_dir='', speed=1.5):
|
||||
translator = str.maketrans('', '', string.punctuation)
|
||||
words = text.translate(translator).split()
|
||||
|
||||
combined_sound = AudioSegment.silent(duration=0)
|
||||
|
||||
for word in words:
|
||||
print(word)
|
||||
sound = play_sound_or_tts(word, text_dir, speed)
|
||||
combined_sound += sound
|
||||
|
||||
play(combined_sound)
|
||||
|
||||
def on_button_press():
|
||||
global summary_text
|
||||
play_thread = Thread(target=play_full_text, args=(summary_text, '', 1.1))
|
||||
play_thread.start()
|
||||
|
||||
def process_frame():
|
||||
global summary_text
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
print("Failed to grab frame")
|
||||
return
|
||||
|
||||
result = model(frame)
|
||||
data_frame = result.pandas().xyxy[0]
|
||||
|
||||
# Filter detections with confidence above 70%
|
||||
data_frame = data_frame[data_frame['confidence'] > 0.5]
|
||||
|
||||
label_counts = data_frame['name'].value_counts()
|
||||
indexes = data_frame.index
|
||||
for index in indexes:
|
||||
x1 = int(data_frame['xmin'][index])
|
||||
y1 = int(data_frame['ymin'][index])
|
||||
x2 = int(data_frame['xmax'][index])
|
||||
y2 = int(data_frame['ymax'][index])
|
||||
label = data_frame['name'][index]
|
||||
conf = data_frame['confidence'][index]
|
||||
text = label + ' ' + str(conf.round(decimals=2))
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 255, 0), 2)
|
||||
cv2.putText(frame, text, (x1, y1-5), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
|
||||
|
||||
summary_text = 'There are: '
|
||||
for label, count in label_counts.items():
|
||||
summary_text += f'{count} {label}, '
|
||||
summary_text = summary_text[:-2]
|
||||
summary_text += ' detected.'
|
||||
|
||||
cv2.putText(frame, summary_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)
|
||||
cv2.imshow('Webcam Feed', frame)
|
||||
cv2.waitKey(1)
|
||||
root.after(10, process_frame)
|
||||
|
||||
# GUI setup
|
||||
root = tk.Tk()
|
||||
root.title("Object Detection with Sound Playback")
|
||||
|
||||
play_button = tk.Button(root, text="Play Summary", command=on_button_press)
|
||||
play_button.pack()
|
||||
|
||||
# Start the webcam processing
|
||||
root.after(10, process_frame)
|
||||
|
||||
root.mainloop()
|
||||
|
||||
cap.release()
|
||||
cv2.destroyAllWindows()
|
||||
31
play.py
Normal file
31
play.py
Normal file
@ -0,0 +1,31 @@
|
||||
import os
|
||||
from gtts import gTTS
|
||||
import pygame
|
||||
from pydub import AudioSegment
|
||||
from pydub.playback import play
|
||||
|
||||
def play_sound_or_tts(filename, text):
|
||||
# Check if the sound file exists
|
||||
if os.path.exists(filename):
|
||||
# Play the sound file using pygame
|
||||
pygame.mixer.init()
|
||||
pygame.mixer.music.load(filename)
|
||||
pygame.mixer.music.play()
|
||||
while pygame.mixer.music.get_busy(): # Wait for the sound to finish playing
|
||||
continue
|
||||
else:
|
||||
# Convert text to speech using gTTS
|
||||
tts = gTTS(text)
|
||||
tts.save('temp.mp3') # Save the generated speech to a temporary file
|
||||
|
||||
# Load the temporary file into an AudioSegment
|
||||
sound = AudioSegment.from_file('temp.mp3')
|
||||
|
||||
# Play the sound using pydub
|
||||
play(sound)
|
||||
|
||||
# Remove the temporary file after playing
|
||||
os.remove('temp.mp3')
|
||||
|
||||
# Example usage
|
||||
play_sound_or_tts('dog.wav', 'dog')
|
||||
BIN
suaraku/.DS_Store
vendored
Normal file
BIN
suaraku/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
suaraku/1.wav
Normal file
BIN
suaraku/1.wav
Normal file
Binary file not shown.
BIN
suaraku/10.wav
Normal file
BIN
suaraku/10.wav
Normal file
Binary file not shown.
BIN
suaraku/2.wav
Normal file
BIN
suaraku/2.wav
Normal file
Binary file not shown.
BIN
suaraku/3.wav
Normal file
BIN
suaraku/3.wav
Normal file
Binary file not shown.
BIN
suaraku/4.wav
Normal file
BIN
suaraku/4.wav
Normal file
Binary file not shown.
BIN
suaraku/5.wav
Normal file
BIN
suaraku/5.wav
Normal file
Binary file not shown.
BIN
suaraku/6.wav
Normal file
BIN
suaraku/6.wav
Normal file
Binary file not shown.
BIN
suaraku/7.wav
Normal file
BIN
suaraku/7.wav
Normal file
Binary file not shown.
BIN
suaraku/are.wav
Normal file
BIN
suaraku/are.wav
Normal file
Binary file not shown.
BIN
suaraku/book.wav
Normal file
BIN
suaraku/book.wav
Normal file
Binary file not shown.
BIN
suaraku/car.wav
Normal file
BIN
suaraku/car.wav
Normal file
Binary file not shown.
BIN
suaraku/cat.wav
Normal file
BIN
suaraku/cat.wav
Normal file
Binary file not shown.
BIN
suaraku/detected.wav
Normal file
BIN
suaraku/detected.wav
Normal file
Binary file not shown.
BIN
suaraku/dog.wav
Normal file
BIN
suaraku/dog.wav
Normal file
Binary file not shown.
BIN
suaraku/no.wav
Normal file
BIN
suaraku/no.wav
Normal file
Binary file not shown.
BIN
suaraku/object.wav
Normal file
BIN
suaraku/object.wav
Normal file
Binary file not shown.
BIN
suaraku/smartphone.wav
Normal file
BIN
suaraku/smartphone.wav
Normal file
Binary file not shown.
BIN
suaraku/there.wav
Normal file
BIN
suaraku/there.wav
Normal file
Binary file not shown.
BIN
suaraku/tv.wav
Normal file
BIN
suaraku/tv.wav
Normal file
Binary file not shown.
BIN
suaraku/two.wav
Normal file
BIN
suaraku/two.wav
Normal file
Binary file not shown.
Reference in New Issue
Block a user