added changes on checking similarity

2022-03-21 00:07:28 +08:00
parent 638f0a9aee
commit 9da68fbc38
172 changed files with 99465 additions and 49 deletions
--- a/base/api/init.py
+++ b/base/api/init.py
--- a/base/api/pycache/init.cpython-39.pyc
+++ b/base/api/pycache/init.cpython-39.pyc
--- a/base/api/pycache/calltomapper.cpython-39.pyc
+++ b/base/api/pycache/calltomapper.cpython-39.pyc
--- a/base/api/pycache/calltoreducer.cpython-39.pyc
+++ b/base/api/pycache/calltoreducer.cpython-39.pyc
--- a/base/api/pycache/check_similarity.cpython-39.pyc
+++ b/base/api/pycache/check_similarity.cpython-39.pyc
--- a/base/api/pycache/haystack.cpython-39.pyc
+++ b/base/api/pycache/haystack.cpython-39.pyc
--- a/base/api/pycache/haystackmapper.cpython-39.pyc
+++ b/base/api/pycache/haystackmapper.cpython-39.pyc
--- a/base/api/pycache/haystackreducer.cpython-39.pyc
+++ b/base/api/pycache/haystackreducer.cpython-39.pyc
--- a/base/api/pycache/interfaceCLI.cpython-39.pyc
+++ b/base/api/pycache/interfaceCLI.cpython-39.pyc
--- a/base/api/pycache/librosa_run.cpython-39.pyc
+++ b/base/api/pycache/librosa_run.cpython-39.pyc
--- a/base/api/pycache/needlestorage.cpython-39.pyc
+++ b/base/api/pycache/needlestorage.cpython-39.pyc
--- a/base/api/pycache/run.cpython-39.pyc
+++ b/base/api/pycache/run.cpython-39.pyc
--- a/base/api/pycache/serializers.cpython-39.pyc
+++ b/base/api/pycache/serializers.cpython-39.pyc
--- a/base/api/pycache/urls.cpython-39.pyc
+++ b/base/api/pycache/urls.cpython-39.pyc
--- a/base/api/pycache/views.cpython-39.pyc
+++ b/base/api/pycache/views.cpython-39.pyc
--- a/base/api/pycache/wavsound.cpython-39.pyc
+++ b/base/api/pycache/wavsound.cpython-39.pyc
--- a/base/api/calltomapper.py
+++ b/base/api/calltomapper.py
@ -0,0 +1,10 @@
+from .haystackmapper import mapper
+
+def calltomapper(haystacks, needle, processnumber, totalprocesses, return_emissions):
+    
+    """ calltomapper is a target function of a process from interface
+    which in turn calls the method haystackmap.mapper with argument needle """
+    
+    print("Finished with process ", processnumber, " word(sample) size of ",len(needle))
+  
+    return_emissions += mapper(haystacks, needle)
--- a/base/api/calltoreducer.py
+++ b/base/api/calltoreducer.py
@ -0,0 +1,5 @@
+from .haystack import haystack
+from .haystackreducer import haystackreducer
+
+def calltoreducer(emissions, key, join):
+    join[key] = haystackreducer(emissions)
--- a/base/api/check_similarity.py
+++ b/base/api/check_similarity.py
@ -0,0 +1,30 @@
+import librosa
+import librosa.display as display
+import math
+
+def cosine_similarity1(v1,v2):
+    "compute cosine similarity of v1 to v2: (v1 dot v2)/{||v1||*||v2||)"
+    sumxx, sumxy, sumyy = 0, 0, 0
+    for i in range(len(v1)):
+        x = v1[i]; y = v2[i]
+        sumxx += x*x
+        sumyy += y*y
+        sumxy += x*y
+    return sumxy/math.sqrt(sumxx*sumyy)
+  
+def check_similarity(audio_path, audio_path2):
+    # load audio files
+    y1, sr1 = librosa.load(audio_path)
+    y2, sr2 = librosa.load(audio_path2)
+    # compute spectrogram for the audio files
+    S1 = librosa.stft(y1)
+    S2 = librosa.stft(y2)
+    # convert to power spectrogram
+    S_db = librosa.amplitude_to_db(abs(S1))
+    S2_db = librosa.amplitude_to_db(abs(S2))
+    # display spectrogram
+    # display.specshow(S_db, y_axis='linear', x_axis='time', sr=sr, hop_length=512)
+    # display.specshow(S2_db, y_axis='linear', x_axis='time', sr=sr2, hop_length=512)
+    # plt.show()
+    # compute cosine similarity
+    return cosine_similarity1(S_db.flatten(), S2_db.flatten())
--- a/base/api/dbtest.py
+++ b/base/api/dbtest.py
@ -0,0 +1,118 @@
+from .haystackmapper import mapper
+from .haystackreducer import haystackreducer
+from .haystack import haystack
+from .needlestorage import needlestorage
+from .wavsound import wavsound
+from multiprocessing import Pool, Process, Manager
+from .calltomapper import calltomapper
+import time
+import profile
+import re
+
+""" dbtest is a simulation module to measure time complexity of 
+database search applied to a virtual database"""
+
+def test(): 
+    button_wavsound = wavsound('button.wav')
+    
+    haystackss = []   # split database into list of smaller database
+    keynames = []
+    db_size = 300     # Set Database Size
+    num_split_db = 2  # Set number of split databases
+    size_split_db = int(db_size/num_split_db)
+    
+    for i in range(num_split_db):
+        haystackss.append([])
+    
+    counter = 0
+    for i in range(db_size):
+        split_db_key = int(counter / size_split_db)
+        keynames.append(i)
+        haystackss[split_db_key].append(haystack(i,button_wavsound.get_data()))
+        counter+=1
+    
+    
+    #haystacks.append(haystack("7",[1, 2, 3, 4, 5]))
+        
+    button_needle_factory = needlestorage(button_wavsound,1000,50)
+    emissions = []
+    
+
+    print("USING MAP PROCESS and Manager")
+    
+    needles = button_needle_factory.get_needles()
+    print(needles[0])
+    
+    manager = Manager()
+    return_emissions = manager.dict()    
+    jobs = []
+    pnum = 0
+    
+    # number of needles not size of each needle
+    len_needles = len(needles)
+    print ("Number of Needles: ",len_needles) 
+    start_time = time.time()
+    
+
+    
+    for needle in needles:
+        for haystacks in haystackss:   
+            p = Process(target=calltomapper, args=(haystacks,needle,pnum,len_needles*num_split_db,return_emissions))
+            jobs.append(p)
+            p.start()
+            pnum += 1    
+    print(time.time() - start_time)
+    
+    for proc in jobs:
+        proc.join() # wait for each process to end completely
+        
+    print(time.time() - start_time)
+    emissions_list = sum(return_emissions.values(),[])
+    print("Reduce Result:")    
+    print(haystackreducer(emissions_list,keynames))        
+    print("Done")
+    print(time.time() - start_time)
+    
+    
+    """
+    This is a pool implementation of parallel processing, it has been 
+    commented out as it was slower than the Process method
+    
+    print(button_wavsound)
+    print("Utilizing MapReduce Pattern")
+    
+    pool = Pool(2) # if it is a quad-core machine it can be set to 4
+    print(button_needle_factory.get_needles())
+    emissions = pool.map(haystackmap.mapper, button_needle_factory.get_needles())
+    print(emissions) 
+    print(haystackreducer(sum(emissions,[])))    
+    
+    emissions = []
+    """
+    
+    """ The algorithm below is a serial method, no optimization """
+    """
+    print("Long Way")
+    start_long_time = time.time()
+    #haystackmap.clear_emission()
+    
+    i = 10000 # cautionary protection from accidental infinite loop
+    
+    while i > 0:
+        needle = button_needle_factory.pop_unused_needle()
+        if (needle == []):
+            break
+        emissions += mapper(haystacks,needle)
+        i -= 1
+        print("Total So Far: ",len(emissions))
+    
+    print("Final:",haystackreducer(emissions, keynames))
+    timelapse_serial = time.time() - start_long_time
+    print (db_size + 1, timelapse_parallel, timelapse_serial)
+    
+    with open('output.txt', 'a') as outputfile:
+        outputfile.write(str(db_size + 1) +' '+str(timelapse_parallel) +' '+str(timelapse_serial) + '\n')    
+    """
+if __name__ == '__main__': 
+    test()
+    profile.run('re.compile("mapper")')
--- a/base/api/haystack.py
+++ b/base/api/haystack.py
@ -0,0 +1,19 @@
+
+class haystack:
+    
+    """ haystack represents a single entry in the database 
+    it contains a sequence of integers """
+        
+    def __init__(self,name,data):
+        self.name=name
+        self.data=data
+        self.length = len(data)
+        
+    def get_data(self):
+        return self.data
+    
+    def get_name(self):
+        return self.name
+    
+    def get_length(self):
+        return self.length
--- a/base/api/haystackmapper.py
+++ b/base/api/haystackmapper.py
@ -0,0 +1,65 @@
+from .haystack import haystack
+
+
+# class haystackmapper:
+    
+#     """
+#     haystackmapper is a class responsible for map process, 
+#     it emits key-value pair if a needle can be found in the haystack
+#     (key: haystack name, value: 1 represents a match)
+#     """
+    
+#     emission = []
+#     def __init__ (self, haystacks):
+#         self.haystacks = haystacks
+def mapper (stack, needle):
+    len_needle = len(needle)
+    emission = []
+    for haystack in stack:
+        len_haystack = haystack.get_length()
+        data = haystack.get_data()
+        for i in range(len_haystack):
+                
+            #get subsequence length
+            len_sub = min(len_needle, (len_haystack - i)) 
+            
+            subsequence = data[i:i + len_needle]
+            nomatch = 0
+            """ 
+            Uncomment this for debugging:                 
+            #difference = []
+            """
+            
+            if len_sub <= 1:
+                nomatch = 1
+                break
+            for i in range(len_sub): # len_sub <= len_needle
+                """ 
+                Uncomment this for debugging: 
+                if abs(needle[i]) > 100 :
+                    difference.append(abs(subsequence[i] - needle[i])/needle[i] )
+                else:
+                    difference.append(0)
+                """
+                if abs(needle[i]) > 100 and abs(subsequence[i] - needle[i]) > abs(0.05*needle[i]): 
+                    # allow slight variation
+                    nomatch = 1
+                    break
+            
+            if nomatch == 0:
+                
+                """ 
+                Uncomment this section for deubgging to show comparison
+                print(needle)
+                print(subsequence)
+                print(difference)
+                print(haystack.get_name(),nomatch)
+                """
+                
+                emission.append([haystack.get_name(), 1])
+                break
+            else:
+                pass
+    
+    return emission
+
--- a/base/api/haystackreducer.py
+++ b/base/api/haystackreducer.py
@ -0,0 +1,8 @@
+def haystackreducer(emissions):
+    """
+        haystackreducer is a class responsible for reducing emissions
+        from multiple map processes. It tallies the number of matches
+        from key-value pairs emitted from each mapper
+    """  
+    return len(emissions) 
+    
--- a/base/api/interface.py
+++ b/base/api/interface.py
@ -0,0 +1,95 @@
+from haystackmapper import haystackmapper 
+from haystackreducer import haystackreducer
+from haystack import haystack
+from needlestorage import needlestorage
+from wavsound import wavsound
+from multiprocessing import Pool, Process, Manager
+from calltomapper import calltomapper
+import time
+import os
+
+def run():
+    
+    """ run runs the database search taking three user inputs, the query wav file,
+    number of partitions, and number of partition samples"""
+    
+    good_file = 0
+    
+    while (good_file == 0):
+        query     = raw_input("Submit .wav file to search against database (Example: button.wav): ")
+        if (os.path.isfile(query)):
+            good_file = 1
+            
+    #Instantiate Wavsound objects from the wav files
+    t_wavsounds = {}
+    query_wavsound = wavsound(query)    
+    print("\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n")
+    partition = raw_input("Set number of partitions of the query from 1 to " + str(int(len(query_wavsound.get_data())/3))+": ")
+    samples   = raw_input("Set number of samples of partitions from 1 to " + partition + " (Recommend < 50): ")
+    
+    # Database Structure
+    haystacks = []
+    
+    # Database look up directory
+    rootdir    = 'db'
+    
+    for subdir, __, files in os.walk(rootdir):
+        for file in files:
+            # for debug print (subdir+"/"+file)
+            t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file)
+            # for debug print(t_wavsounds[subdir+"/"+file])
+            haystacks.append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data()))
+            
+    query_needle_factory = needlestorage(query_wavsound,int(partition),int(samples))
+    
+    
+    haystackmap = haystackmapper(haystacks)
+    
+    needles = query_needle_factory.get_needles()
+    len_needles = len(needles)
+    len_needle = len(needles[0]) # size is the same for all needles
+    
+    manager = Manager()
+    
+    # Map processes emit key-value pairs to emissions
+    return_emissions = manager.dict()    
+    
+    # Job is a list of processes
+    jobs = []
+    
+    # Process number
+    pnum = 0
+    
+    print "Number of Needles: ", len(needles)
+    
+    # Database query time
+    start_time = time.time()
+    
+    #Distribute processes using multiprocessor
+    for needle in needles:
+        p = Process(target=calltomapper, args=(haystackmap,needle,pnum,len_needles,return_emissions))
+        jobs.append(p)
+        p.start()
+        pnum += 1
+    
+    for proc in jobs:
+        proc.join() 
+    
+    # flatten return_emissions into a list
+    emissions_list = sum(return_emissions.values(),[])
+    
+    print "Search Result:"    
+
+    result_dict = haystackreducer(emissions_list)
+    
+    # Tabulate % match (wav files with 0% match are excluded from the result)
+    for key in result_dict:
+        print str(key),": ",(25-len(str(key)))*" ",str("{0:.2f}".format(int(result_dict[key])/len(needles)*100)),"% match"
+    
+    # Show search time
+    timelapse_parallel = time.time() - start_time   
+    print timelapse_parallel, "seconds"
+    
+if __name__ == '__main__': 
+    print ".WAV Search Engine Version 1 (Only Python Ver 2.X.X.)"
+    run()
--- a/base/api/interfaceCLI.py
+++ b/base/api/interfaceCLI.py
@ -0,0 +1,50 @@
+from .run import *
+from .calltoreducer import calltoreducer
+def cek_simlilarity2(recorded_sound,url_bacaan): 
+    # print (".WAV Search Engine Version 1 (For Python Ver. 3+) ")
+    
+    good_file = 0
+    
+    # while (good_file == 0):
+    #     query     = input("Submit .wav file to search against repository (Example: button.wav): ")
+    #     if (os.path.isfile(query)):
+    #         good_file = 1    
+    #     query_wavsound = wavsound(query)    
+    
+    query_wavsound = wavsound(recorded_sound) 
+                
+    # print("\n**Higher number of partitions increases false positive rates, \nwhile lower number of partitions increases false negative rates\n")
+    # samplelength = input("Set word size (sample length) (5 ~ 100) : ");
+    samplelength = 5
+    # samples   = input("Set number of samples (n) of partitions from 1 to " + str(int(len(query_wavsound.get_data())/float(samplelength))) + ": ")
+    samples   = 3
+    
+    # repository look up directory
+    # print(url_bacaan)
+    # dbdir    = input("Enter repository directory to search (example: 'db') : ")
+    dbdir    = url_bacaan
+    # max_split = int(input("Set maximum allowable number of split repositories : "))
+    max_split = 2
+    
+    # repository query time
+    start_time = time.time()        
+            
+    result_lst = run(recorded_sound, int(samplelength), samples, dbdir, max_split)
+            
+    # output
+    output = "Search Result: \n" 
+            
+    keluaran = ""
+    # Tabulate % match (wav files with 0% match are excluded from the result)
+    for pair in result_lst:
+        output += pair[0] + " : " + (40-len(pair[0]))*" " + pair[1] + "% match" + "\n"
+        keluaran =    (40-len(pair[0]))*" " + pair[1] + "% "  
+        
+    # print(keluaran)          
+    # Show search time
+    timelapse_parallel = time.time() - start_time   
+    output = output + str(timelapse_parallel) + "seconds"
+    # print(output)
+    # remove all the spaces
+    keluaran = keluaran.replace(" ","")
+    return keluaran
--- a/base/api/interfaceGUI.py
+++ b/base/api/interfaceGUI.py
@ -0,0 +1,196 @@
+import tkinter as tk
+from run import *
+from tkinter import ttk
+
+# application is a GUI interface of run.py
+
+class application:
+    
+    def __init__(self):
+        self.samples = 12
+        self.samplelength = 80
+        self.max_split = 2
+        self.root = tk.Tk()
+        self.root.wm_title("Audio Search Engine")
+        self.show_enclosure()
+        self.show_file_entry()
+        self.show_buttons()
+        self.show_result()
+        self.show_canvas()
+        self.show_menu()
+        self.root.iconbitmap("img/logo.ico")      
+        self.root.mainloop()
+        self.filename=""
+    
+    # show right enclosure frame
+    def show_enclosure(self):
+        self.group_enclosure =tk.LabelFrame(self.root, text="", padx=5, pady=5, background="white")
+        self.group_enclosure.pack(side="right")
+    
+    # show audio wavform canvas
+    def show_canvas (self):
+        self.group_canvas = tk.LabelFrame(self.group_enclosure, text="Top(Query) Bottom(Best Match)", padx=5, pady=5)
+        self.group_canvas.pack(side='top')
+        
+        self.canvas_query = tk.Canvas(self.group_canvas, width = 200, height = 100, bg = 'black')
+        self.canvas_result = tk.Canvas(self.group_canvas, width = 200, height = 100, bg = 'black')
+        # pack the canvas into a frame/form
+        
+        self.canvas_query.pack(expand = 'yes', fill = 'both') 
+        self.canvas_query.create_line(0, 50, 199.9, 50, fill="red", dash=(4, 4))
+        
+        self.canvas_result.pack(expand = 'yes', fill = 'both', side='bottom') 
+        self.canvas_result.create_line(0, 50, 199.9, 50, fill="red", dash=(4, 4))   
+                
+        #self.photo = tk.PhotoImage(file = 'img/search.png',  width = 70, height = 70 )
+        #self.canvas.create_image(5, 5, image=self.photo, anchor="nw")  
+    
+    def clear_canvas(self):
+        self.canvas_query.create_rectangle(0, 0, 200, 100, fill="black")
+        self.canvas_result.create_rectangle(0, 0, 200, 100, fill="black")
+        self.canvas_query.create_line(0, 50, 199.9, 50, fill="red", dash=(4, 4))
+        self.canvas_result.create_line(0, 50, 199.9, 50, fill="red", dash=(4, 4))
+        
+    # draw_wavform draws waveform of wav data to a target canvas
+    def draw_wavform (self, wavdata, color, target):
+        unit_x=200/len(wavdata)
+        unit_y=20/max(wavdata)
+        for i in range(len(wavdata)-1):   
+            y_1 = wavdata[i]*unit_y + 50
+            x_1 = i*unit_x
+            y_2 = wavdata[i+1]*unit_y + 50
+            x_2 = (i+1)*unit_x      
+            if target == "query":
+                self.canvas_query.create_line(x_1, y_1, x_2, y_2, fill=color, width=0.5)
+            elif target == "result":
+                self.canvas_result.create_line(x_1, y_1, x_2, y_2, fill=color, width=0.5)
+    
+    # refresh parameter labels
+    def refresh_parameters (self):
+        newtext="Number of Audio Samples to Compare: " + str(self.samples) + " Repository Maximum Split Paramter: " + str(self.max_split) + " Word Length: " + str(self.samplelength )
+        self.label_result.config(text=newtext)       
+    
+    # decrease number of samples (words or needles)   
+    def sample_up (self):
+        self.samples += 1
+        self.refresh_parameters()    
+        
+    # decrease number of samples (words or needles)                    
+    def sample_down (self):
+        if self.samples > 0:
+            self.samples -= 1
+            self.refresh_parameters()        
+            
+    # increase samplelength (word size)    
+    def word_up (self):
+        self.samplelength += 5
+        self.refresh_parameters()
+
+    # decrease samplelength (word size)
+    def word_down (self):
+        if self.samplelength > 5:
+            self.samplelength -= 5  
+            self.refresh_parameters()
+    
+    # show_menu shows the menu at the top
+    def show_menu (self): 
+        self.menu = tk.Menu(self.root)
+        self.menu.add_command(label="Search", command=self.on_button_click)
+        self.menu.add_command(label="Precision +", command=self.sample_up)
+        self.menu.add_command(label="Precision -", command=self.sample_down)
+        self.menu.add_command(label="Word Size +", command=self.word_up)
+        self.menu.add_command(label="Word Size -", command=self.word_down)        
+        self.menu.add_command(label="Quit", command=self.quit)
+        self.root.config (menu=self.menu)
+        
+    # add style (optional)
+    def add_config (self):
+        self.style = ttk.Style()
+        #self.style.configure(... enter here ...)        
+
+    # show textbox that holds the result of the search
+    def show_result (self):
+        self.group_result = tk.LabelFrame(self.root, text="", padx=5, pady=5)
+        self.group_result.pack(padx=10, pady=15, side = 'left')
+        self.label_result = tk.Label(self.group_result,  text="Number of Audio Samples to Compare: " + str(self.samples) + " Repository Maximum Split Paramter: " + str(self.max_split) + " Word Length: " + str(self.samplelength ))
+        self.label_result.pack()        
+        self.text_result = tk.Text(self.group_result, height="20")
+        self.text_result.configure(background='black', foreground='cyan')
+        self.text_result.pack()
+    
+    # show file entry input box
+    def show_file_entry(self):
+        # GROUP ENTRY
+        
+        self.group_entry = tk.LabelFrame(self.group_enclosure, text="", padx=5, pady=5)
+        self.group_entry.pack(padx=10, pady=10, side = 'bottom')
+        
+        
+        self.label_file = tk.Label(self.group_entry, text="Query Filename")
+        self.label_file.pack(padx=58)
+        self.entry_file = tk.Entry(self.group_entry, bd =5, relief="flat")
+        self.entry_file.pack()
+        self.entry_file.insert(0, "voicequery.wav")
+        self.label_db = tk.Label(self.group_entry, text="Path to repository")
+        self.label_db.pack()
+        self.entry_db = tk.Entry(self.group_entry, bd =5,  relief="flat")
+        self.entry_db.insert(0, "db_voice")
+        self.entry_db.pack()        
+        
+    # show search button below the file entry
+    def show_buttons (self):
+        root = self.root
+        tk.Button(self.group_entry, padx=15, text="Search", command=self.on_button_click).pack()
+        
+    # on click event for the button
+    def on_button_click(self):
+        
+        self.filename = self.entry_file.get()
+        self.clear_canvas()
+        if (os.path.isfile(self.filename)):  
+            query_wavsound = wavsound(self.filename)            
+            self.dbroot = self.entry_db.get()
+            samples = self.samples
+            partition = int(len(query_wavsound.get_data())/self.samplelength)
+            max_split = self.max_split
+            
+            # repository query time
+            start_time = time.time()        
+            
+            result_lst = run(self.filename, self.samplelength, samples, self.dbroot, max_split)
+            
+            # output
+            output = "Search Result: \n" 
+            
+            # Tabulate % match (wav files with 0% match are excluded from the result)
+            for pair in result_lst:
+                    output += pair[0] + " : " + (40-len(pair[0]))*" " + pair[1] + "% match" + "\n"
+                        
+            # Show search time
+            timelapse_parallel = time.time() - start_time   
+            output = output + str(timelapse_parallel) + "seconds"            
+
+            self.text_result.insert('1.0', output + "\n" )
+            
+            self.draw_wavform(query_wavsound.get_data(),"cyan","query")
+            
+            top_match_wavsoundfile = output.split()[2]
+            print( output.split())
+            print(top_match_wavsoundfile)
+            top_match_wavsound = wavsound(top_match_wavsoundfile)      
+            
+            self.draw_wavform(top_match_wavsound.get_data(),"white","result")
+            
+    # quit application       
+    def quit(self):
+        self.root.destroy()
+        
+if __name__ == '__main__': 
+    my_app = application()
+
+
+
+
+
+
+
--- a/base/api/librosa_run.py
+++ b/base/api/librosa_run.py
@ -0,0 +1,72 @@
+import librosa
+import librosa.display as display
+import matplotlib.pyplot as plt
+import os
+# from numpy import array
+from datetime import datetime
+
+from numpy.linalg import norm
+from dtw import dtw
+
+# import math
+
+import warnings
+warnings.filterwarnings("ignore")
+
+def dot(A,B): 
+    return (sum(a*b for a,b in zip(A,B)))
+
+def cosine_similarity(a,b):
+    return dot(a,b) / ( (dot(a,a) **.5) * (dot(b,b) ** .5) )
+
+def fungsi_librosa(sound_url1,sound_url2):
+  
+	print(sound_url1)
+	print(sound_url2)
+  
+	y1, sr1 = librosa.load(sound_url1)
+	y2, sr2 = librosa.load(sound_url2)
+		
+	mfcc1 = librosa.feature.mfcc(y1,sr1) 
+	mfcc2 = librosa.feature.mfcc(y2,sr2)
+
+	
+	path = 'static/created_image/'
+	if not os.path.exists(path):
+		os.makedirs(path)
+  
+	today_date = datetime.now().strftime("%Y-%m-%d")
+  
+  
+	path = path + today_date+ '/'
+	path_nya = path
+ 
+	if not os.path.exists(path):
+		os.makedirs(path)
+  
+	hour_min_sec = datetime.now().strftime("%H-%M-%S")
+  
+	plt.figure(figsize=(14, 5))
+	display.waveshow(y1, sr=sr1)
+	plt.savefig(path+'spec'+hour_min_sec+'.png')
+
+	plt.figure(figsize=(14, 5))
+	display.waveshow(y2, sr=sr2)
+	plt.savefig(path+'spec1'+hour_min_sec+'.png')
+ 
+	dist, cost, acc_cost, path = dtw(mfcc1.T, mfcc2.T, dist=lambda x, y: norm(x - y, ord=1))
+ 
+	array1 = []
+	for nums in mfcc1:
+			for val in nums:
+					array1.append(val)
+        
+	array2 = []
+	for nums in mfcc2:
+			for val in nums:
+					array2.append(val)
+ 
+	cosine_similaritynya = cosine_similarity(array1, array2) 
+ 
+	return {'dist': dist, 'cosine_similaritynya': cosine_similaritynya, 'path': path_nya, 'hour_min_sec': hour_min_sec}
+    
--- a/base/api/mapreducetest.py
+++ b/base/api/mapreducetest.py
@ -0,0 +1,64 @@
+from haystackmapper import haystackmapper 
+from haystackreducer import haystackreducer
+from haystack import haystack
+from multiprocessing import Pool, Process,  Manager
+from simplfunction import simplefunction
+from calltomapper import calltomapper
+
+if __name__ == '__main__':  
+    
+    """Map reduce test is a simple testing module to check functionality
+    of map-reduce implementation"""
+    
+    haystacks = []
+    haystacks.append(haystack("0",[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
+    haystacks.append(haystack("1",[3, 2, 2, 3, 4, 3, 6, 7, 8, 9]))
+    haystacks.append(haystack("2",[1, 6, -1, 0, 4, 0, 6, 7, 8, 9]))
+    haystacks.append(haystack("3",[3, 3, 3]))
+    
+    haystackmap = haystackmapper(haystacks)
+    
+    emissions= []
+    
+    print("USING MAP POOL") 
+    pool = Pool(2) # if it is a quad-core machine it can be set to 4
+    emissions = pool.map(haystackmap.mapper, [[2],[3]])
+    print(emissions) 
+    print(haystackreducer(sum(emissions,[])))
+    emissions= []
+    
+    print("USING MAP PROCESS")
+    p = Process(target=simplefunction, args=(1,2))
+    p.start()
+    p = Process(target=simplefunction, args=(1,3))
+    p.start()
+    p = Process(target=simplefunction, args=(1,4))
+    p.start()
+    p.join()
+    
+    print("USING MAP PROCESS WITH MANAGER")
+    needles = [2, 3]
+    manager = Manager()
+    return_emissions = manager.dict()    
+    jobs = []
+    pnum = 0
+    for needle in needles:#distributive not iteration
+        p = Process(target=calltomapper, args=(haystackmap,[needle],pnum,return_emissions))
+        jobs.append(p)
+        p.start()
+        pnum += 1
+    
+    for proc in jobs:
+        proc.join()   
+    
+    emissions_list = sum(return_emissions.values(),[])
+    print(haystackreducer(emissions_list )) 
+    
+    print("Long Way (Serial Method)")  
+    
+    needles = [2, 3]
+    haystackmap = haystackmapper(haystacks)
+    for needle in needles:#distributive not iteration
+        #print (needle)
+        emissions = emissions + haystackmap.mapper([ needle ])
+    print(haystackreducer(emissions))
--- a/base/api/needlestorage.py
+++ b/base/api/needlestorage.py
@ -0,0 +1,35 @@
+from .wavsound import wavsound
+
+class needlestorage:
+    
+    """needlestorage object generates needles (subsequence sample) of wavsound
+    object data based on number of partitions (num_chunk) and 
+    number of samples to be analyzed (limit). The needles/samples are picked
+    deterministically so that the data sampled are evenly 
+    distributed across the wav file) """
+    
+    needles = []
+    def __init__(self, wavsound, sample_length, limit):
+        u=0
+        x=0
+        self.needles = []
+        data = wavsound.get_data()[::16] # skip every 16
+        print(int(len(data)/limit)-sample_length, "is skip value")
+        # determine the gap between the starting positions of two consecutive
+        # needles (i.e. opposite of degree of overlap)
+        skips_per_neeedle = max(1,int(len(data)/limit)-sample_length)
+        for i in range (len(data)):
+            if u >= limit:
+                break
+            if (x % skips_per_neeedle == 0):
+                self.needles.append(data[i:i+sample_length])
+                u = u + 1
+            x = x + 1
+    def pop_unused_needle(self):
+        if len(self.needles) == 0:
+            return []
+        return self.needles.pop(0)
+    def get_needles(self):
+        return self.needles
+    def clear_needles(self):
+        self.needles = []
--- a/base/api/run.py
+++ b/base/api/run.py
@ -0,0 +1,109 @@
+# from haystackmapper import haystackmapper 
+from .haystackreducer import haystackreducer
+from .haystack import haystack
+from .needlestorage import needlestorage
+from .wavsound import wavsound
+from multiprocessing import Pool, Process, Manager
+from .calltomapper import calltomapper
+from .calltoreducer import calltoreducer
+from operator import itemgetter
+import time
+import os
+
+def run(query, sample_length, samples, rootdir, max_split):
+    
+    """ run runs the repository search taking three user inputs, the query wav file,
+   sample_length, and number of partition samples"""
+    
+    
+    #Instantiate Wavsound objects from the wav files
+    
+    t_wavsounds = {}
+    query_wavsound = wavsound(query)    
+    
+    # repository Structure
+    
+    haystackss = []   # split repository into list of smaller repository
+    key_names = []
+
+    # repository Spliting Parameters (1 to number of repository entries)
+    
+    db_size_per_split = 100
+    
+    for i in range(max_split):
+        haystackss.append([])    
+    
+    # Read Files in the DB
+    
+    counter = 0
+    for subdir, __, files in os.walk(rootdir):
+        for file in files:
+            key_names.append(subdir+"/"+file)
+            split_db_key = min(max_split, int(counter / db_size_per_split))
+            t_wavsounds[subdir+"/"+file] = wavsound(subdir+"/"+file)
+            haystackss[split_db_key].append(haystack(subdir+"/"+file,t_wavsounds[subdir+"/"+file].get_data()[::16]))
+            counter += 1
+            
+    query_needle_factory = needlestorage(query_wavsound,sample_length,int(samples))
+    
+    # Get segments of the query data as needles
+    needles = query_needle_factory.get_needles()
+    #print("...", len(needles), "needles")
+    query_needle_factory.clear_needles()
+    
+    # MAP --------------------------------------------------
+    
+    # Manager to keep track of all map results
+    manager = Manager()
+    
+    # Map processes emit key-value pairs to emissions
+    return_emissions = manager.list()    
+    
+    # Job is a list of processes
+    jobs = []
+    
+    # Process number
+    pnum = 0
+            
+    #Distribute processes using multiprocessor
+    len_needles = len(needles)
+    for needle in needles:
+        for haystacks in haystackss:
+            if haystacks != []:
+                #print(len_needles)
+                p = Process(target=calltomapper, args=(haystacks,needle,pnum,len_needles*len(haystackss),return_emissions))
+                jobs.append(p)
+                p.start()
+                pnum += 1
+    
+    for proc in jobs:
+        proc.join() 
+        
+    # SHUFFLE/REDUCE ------------------------------------------
+    
+    # Job is a list of processes
+    jobs = []     
+    
+    # Manager to keep track of all map results
+    manager_2 = Manager()    
+    result_dict = manager_2.dict()
+    
+    for key in key_names:
+        key_list = [1 for x in return_emissions if x[0] == key]
+        print (key, key_list)
+        q = Process(target=calltoreducer, args=(key_list, key, result_dict))
+        jobs.append(q)
+        q.start()
+        
+    for proc in jobs:
+        proc.join()         
+    
+    result_lst = []
+    print(len(needles), "is length of needles")
+    if len(result_dict.items()) != 0:
+        for key, value in sorted(result_dict.items(), key=lambda pair: pair[1], reverse=True):
+            if value > 0:
+                result_lst.append([str(key), str((int(value)/len(needles)*100))])
+            
+    needles = []
+    return result_lst
--- a/base/api/run.pyc
+++ b/base/api/run.pyc
--- a/base/api/serializers.py
+++ b/base/api/serializers.py
@ -0,0 +1,8 @@
+from rest_framework.serializers import ModelSerializer
+from base.models import tb_bacaan
+
+
+class tb_bacaanSerializer(ModelSerializer):
+    class Meta:
+        model = tb_bacaan
+        fields = '__all__'
--- a/base/api/urls.py
+++ b/base/api/urls.py
@ -0,0 +1,8 @@
+from django.urls import path
+from . import views
+
+urlpatterns = [
+    path('', views.getRoutes),
+    path('bacaans/', views.getBacaans),
+    path('bacaans/<str:pk>/', views.getBacaan),
+]
--- a/base/api/views.py
+++ b/base/api/views.py
@ -0,0 +1,64 @@
+import os
+from rest_framework.decorators import api_view
+from rest_framework.response import Response
+from base.models import tb_bacaan
+from .serializers import tb_bacaanSerializer
+from .check_similarity import check_similarity
+from .interfaceCLI import cek_simlilarity2
+
+from .librosa_run import fungsi_librosa
+
+@api_view(['GET', 'POST'])
+def getRoutes(request):
+  
+  if request.method == 'GET':
+    
+    routes = [
+      'GET /api',
+      'POST /api/bacaan',
+      
+      
+    ]
+    return Response(routes)
+  if request.method == 'POST':
+    url_bacaan = request.POST['url_bacaan']
+    nama = request.FILES['sound'].name
+    nama = nama.replace('Z', '').replace('.wav', '').replace('.', '').replace('-', ' ').replace(':', ' ').replace('T', ' ')
+    path = 'static/uploaded/'
+    # print(bool(request.FILES.get('hehe', False))) # check if file is empty , true is not empty . false is empty
+    handle_uploaded_file(request.FILES['sound'],url_bacaan,nama)
+    # check_similarity(path+nama+'.wav', 'static/audio/'+url_bacaan)
+    # similarity = check_similarity(path+nama+'.wav', 'static/audio/'+url_bacaan)
+    librosa_run = fungsi_librosa(path+nama+'.wav', 'static/audio/'+url_bacaan)
+    # remove athe last 5 char on url_bacaan
+    url_bacaan =  url_bacaan[:-5]
+    # print(url_bacaan)
+    # similarity2 = cek_simlilarity2(path+nama+'.wav', 'static/audio/'+url_bacaan)
+    dataall = {'data' : 'sini data'}
+    #add librosa_run dictinary to dataall
+    dataall.update(librosa_run)   
+    return Response(dataall)
+
+
+def handle_uploaded_file(f,url_bacaan,nama):
+    path = 'static/uploaded/' # this is the path to the folder where you want to save the file
+    isExist = os.path.exists(path) # check if the folder exist
+    if not isExist: # if not exist then create the folder
+      os.mkdir(path) # create the folder
+    with open(path+nama+'.wav', 'wb+') as destination:
+        for chunk in f.chunks():
+            destination.write(chunk)
+            
+    
+
+@api_view(['GET'])
+def getBacaans(request):
+  bacaans = tb_bacaan.objects.all()
+  serializer = tb_bacaanSerializer(bacaans, many=True)
+  return Response(serializer.data)
+
+@api_view(['GET'])
+def getBacaan(request,pk):
+  bacaan = tb_bacaan.objects.get(id=pk)
+  serializer = tb_bacaanSerializer(bacaan)
+  return Response(serializer.data)
--- a/base/api/wavread.py
+++ b/base/api/wavread.py
@ -0,0 +1,12 @@
+from .wavsound import wavsound
+
+"""wavread is a testing module to test the functionality of wavsound"""
+
+button_wavsound = wavsound('db/button.wav')
+print(button_wavsound)
+
+beep_wavsound = wavsound('db/buttonresampled.wav')
+print(beep_wavsound)
+print (len(button_wavsound.get_data()))
+print (len(beep_wavsound.get_data()),len(button_wavsound.get_data()))
+print(button_wavsound.get_chunk(0,100))
--- a/base/api/wavsound.py
+++ b/base/api/wavsound.py
@ -0,0 +1,53 @@
+import wave, struct
+
+class wavsound:
+    
+    """wavsound object collects data from a wav file and stores it
+    as a list of integers """
+    
+    def __init__(self, wav_file):
+        self.data = []# set it as a local variable
+        if wav_file == '':
+             return
+        #print(wav_file)
+        waveFile = wave.open(wav_file, 'r')
+        
+        length = waveFile.getnframes()
+        for i in range(0,length):
+            waveData = waveFile.readframes(1)
+            
+            try:
+                data = struct.unpack("<h", waveData)
+            except:
+                data = struct.unpack("<L", waveData)
+            # data = struct.unpack("<h", waveData)
+            # data = struct.unpack("<L", waveData)
+            self.data.append(int(data[0]))
+            
+    # return data    
+    def get_data(self):
+        return self.data
+    
+    # set data from a list of int
+    def set_data(self, data):
+        self.data = data  
+    
+    # copy data from other wavsound
+    def copy_from(self, other_wavsound):
+        self.data = other_wavsound.get_data()    
+    
+    # Get a small chunk of wavsound
+    def get_chunk (self, section_no, num_chunk):
+        new_wavsound = wavsound('')
+        data = self.get_data()
+        #print(len(self.get_data()))
+        length = int(len(self.get_data())/num_chunk)
+        #print(length)
+        new_wavsound.set_data(data[(length*section_no):(length*(section_no+1))])
+        return new_wavsound
+    def __repr__(self):
+        strr = ''
+        for i in self.data:
+            strr = strr + " " + str(i)
+        return strr
+