added changes on checking similarity
This commit is contained in:
118
base/api/dbtest.py
Normal file
118
base/api/dbtest.py
Normal file
@ -0,0 +1,118 @@
|
||||
from .haystackmapper import mapper
|
||||
from .haystackreducer import haystackreducer
|
||||
from .haystack import haystack
|
||||
from .needlestorage import needlestorage
|
||||
from .wavsound import wavsound
|
||||
from multiprocessing import Pool, Process, Manager
|
||||
from .calltomapper import calltomapper
|
||||
import time
|
||||
import profile
|
||||
import re
|
||||
|
||||
""" dbtest is a simulation module to measure time complexity of
|
||||
database search applied to a virtual database"""
|
||||
|
||||
def test():
|
||||
button_wavsound = wavsound('button.wav')
|
||||
|
||||
haystackss = [] # split database into list of smaller database
|
||||
keynames = []
|
||||
db_size = 300 # Set Database Size
|
||||
num_split_db = 2 # Set number of split databases
|
||||
size_split_db = int(db_size/num_split_db)
|
||||
|
||||
for i in range(num_split_db):
|
||||
haystackss.append([])
|
||||
|
||||
counter = 0
|
||||
for i in range(db_size):
|
||||
split_db_key = int(counter / size_split_db)
|
||||
keynames.append(i)
|
||||
haystackss[split_db_key].append(haystack(i,button_wavsound.get_data()))
|
||||
counter+=1
|
||||
|
||||
|
||||
#haystacks.append(haystack("7",[1, 2, 3, 4, 5]))
|
||||
|
||||
button_needle_factory = needlestorage(button_wavsound,1000,50)
|
||||
emissions = []
|
||||
|
||||
|
||||
print("USING MAP PROCESS and Manager")
|
||||
|
||||
needles = button_needle_factory.get_needles()
|
||||
print(needles[0])
|
||||
|
||||
manager = Manager()
|
||||
return_emissions = manager.dict()
|
||||
jobs = []
|
||||
pnum = 0
|
||||
|
||||
# number of needles not size of each needle
|
||||
len_needles = len(needles)
|
||||
print ("Number of Needles: ",len_needles)
|
||||
start_time = time.time()
|
||||
|
||||
|
||||
|
||||
for needle in needles:
|
||||
for haystacks in haystackss:
|
||||
p = Process(target=calltomapper, args=(haystacks,needle,pnum,len_needles*num_split_db,return_emissions))
|
||||
jobs.append(p)
|
||||
p.start()
|
||||
pnum += 1
|
||||
print(time.time() - start_time)
|
||||
|
||||
for proc in jobs:
|
||||
proc.join() # wait for each process to end completely
|
||||
|
||||
print(time.time() - start_time)
|
||||
emissions_list = sum(return_emissions.values(),[])
|
||||
print("Reduce Result:")
|
||||
print(haystackreducer(emissions_list,keynames))
|
||||
print("Done")
|
||||
print(time.time() - start_time)
|
||||
|
||||
|
||||
"""
|
||||
This is a pool implementation of parallel processing, it has been
|
||||
commented out as it was slower than the Process method
|
||||
|
||||
print(button_wavsound)
|
||||
print("Utilizing MapReduce Pattern")
|
||||
|
||||
pool = Pool(2) # if it is a quad-core machine it can be set to 4
|
||||
print(button_needle_factory.get_needles())
|
||||
emissions = pool.map(haystackmap.mapper, button_needle_factory.get_needles())
|
||||
print(emissions)
|
||||
print(haystackreducer(sum(emissions,[])))
|
||||
|
||||
emissions = []
|
||||
"""
|
||||
|
||||
""" The algorithm below is a serial method, no optimization """
|
||||
"""
|
||||
print("Long Way")
|
||||
start_long_time = time.time()
|
||||
#haystackmap.clear_emission()
|
||||
|
||||
i = 10000 # cautionary protection from accidental infinite loop
|
||||
|
||||
while i > 0:
|
||||
needle = button_needle_factory.pop_unused_needle()
|
||||
if (needle == []):
|
||||
break
|
||||
emissions += mapper(haystacks,needle)
|
||||
i -= 1
|
||||
print("Total So Far: ",len(emissions))
|
||||
|
||||
print("Final:",haystackreducer(emissions, keynames))
|
||||
timelapse_serial = time.time() - start_long_time
|
||||
print (db_size + 1, timelapse_parallel, timelapse_serial)
|
||||
|
||||
with open('output.txt', 'a') as outputfile:
|
||||
outputfile.write(str(db_size + 1) +' '+str(timelapse_parallel) +' '+str(timelapse_serial) + '\n')
|
||||
"""
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
profile.run('re.compile("mapper")')
|
||||
Reference in New Issue
Block a user