added changes on checking similarity
This commit is contained in:
65
base/api/haystackmapper.py
Normal file
65
base/api/haystackmapper.py
Normal file
@ -0,0 +1,65 @@
|
||||
from .haystack import haystack
|
||||
|
||||
|
||||
# class haystackmapper:
|
||||
|
||||
# """
|
||||
# haystackmapper is a class responsible for map process,
|
||||
# it emits key-value pair if a needle can be found in the haystack
|
||||
# (key: haystack name, value: 1 represents a match)
|
||||
# """
|
||||
|
||||
# emission = []
|
||||
# def __init__ (self, haystacks):
|
||||
# self.haystacks = haystacks
|
||||
def mapper (stack, needle):
|
||||
len_needle = len(needle)
|
||||
emission = []
|
||||
for haystack in stack:
|
||||
len_haystack = haystack.get_length()
|
||||
data = haystack.get_data()
|
||||
for i in range(len_haystack):
|
||||
|
||||
#get subsequence length
|
||||
len_sub = min(len_needle, (len_haystack - i))
|
||||
|
||||
subsequence = data[i:i + len_needle]
|
||||
nomatch = 0
|
||||
"""
|
||||
Uncomment this for debugging:
|
||||
#difference = []
|
||||
"""
|
||||
|
||||
if len_sub <= 1:
|
||||
nomatch = 1
|
||||
break
|
||||
for i in range(len_sub): # len_sub <= len_needle
|
||||
"""
|
||||
Uncomment this for debugging:
|
||||
if abs(needle[i]) > 100 :
|
||||
difference.append(abs(subsequence[i] - needle[i])/needle[i] )
|
||||
else:
|
||||
difference.append(0)
|
||||
"""
|
||||
if abs(needle[i]) > 100 and abs(subsequence[i] - needle[i]) > abs(0.05*needle[i]):
|
||||
# allow slight variation
|
||||
nomatch = 1
|
||||
break
|
||||
|
||||
if nomatch == 0:
|
||||
|
||||
"""
|
||||
Uncomment this section for deubgging to show comparison
|
||||
print(needle)
|
||||
print(subsequence)
|
||||
print(difference)
|
||||
print(haystack.get_name(),nomatch)
|
||||
"""
|
||||
|
||||
emission.append([haystack.get_name(), 1])
|
||||
break
|
||||
else:
|
||||
pass
|
||||
|
||||
return emission
|
||||
|
||||
Reference in New Issue
Block a user