Files
bacaan_tulisan_arab/base/api/interface.py
2022-03-21 00:07:28 +08:00

95 lines
3.2 KiB
Python

from haystackmapper import haystackmapper
from haystackreducer import haystackreducer
from haystack import haystack
from needlestorage import needlestorage
from wavsound import wavsound
from multiprocessing import Pool, Process, Manager
from calltomapper import calltomapper
import time
import os
def run():
""" run runs the database search taking three user inputs, the query wav file,
number of partitions, and number of partition samples"""
good_file = 0
while (good_file == 0):
query = raw_input("Submit .wav file to search against database (Example: button.wav): ")
if (os.path.isfile(query)):
good_file = 1
#Instantiate Wavsound objects from the wav files
t_wavsounds = {}
query_wavsound = wavsound(query)
print("\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n")
partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data())/3))+": ")
samples = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ")
# Database Structure
haystacks = []
# Database look up directory
rootdir = 'db'
for subdir, __, files in os.walk(rootdir):
for file in files:
# for debug print (subdir+"/"+file)
t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file)
# for debug print(t_wavsounds[subdir+"/"+file])
haystacks.append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data()))
query_needle_factory = needlestorage(query_wavsound,int(partition),int(samples))
haystackmap = haystackmapper(haystacks)
needles = query_needle_factory.get_needles()
len_needles = len(needles)
len_needle = len(needles[0]) # size is the same for all needles
manager = Manager()
# Map processes emit key-value pairs to emissions
return_emissions = manager.dict()
# Job is a list of processes
jobs = []
# Process number
pnum = 0
print "Number of Needles: ", len(needles)
# Database query time
start_time = time.time()
#Distribute processes using multiprocessor
for needle in needles:
p = Process(target=calltomapper, args=(haystackmap,needle,pnum,len_needles,return_emissions))
jobs.append(p)
p.start()
pnum += 1
for proc in jobs:
proc.join()
# flatten return_emissions into a list
emissions_list = sum(return_emissions.values(),[])
print "Search Result:"
result_dict = haystackreducer(emissions_list)
# Tabulate % match (wav files with 0% match are excluded from the result)
for key in result_dict:
print str(key),": ",(25-len(str(key)))*" ",str("{0:.2f}".format(int(result_dict[key])/len(needles)*100)),"% match"
# Show search time
timelapse_parallel = time.time() - start_time
print timelapse_parallel, "seconds"
if __name__ == '__main__':
print ".WAV Search Engine Version 1 (Only Python Ver 2.X.X.)"
run()