-
Notifications
You must be signed in to change notification settings - Fork 1
/
query-shoe.conf
151 lines (150 loc) · 5.91 KB
/
query-shoe.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
input {
http_poller {
urls => {
"localhost" => "https://www.googleapis.com/youtube/v3/search?part=snippet&order=date&q=shoe%7Cshoes&relevanceLanguage=en&maxResults=20&fields=items(id,snippet(title,description,channelId,channelTitle,publishedAt,liveBroadcastContent))&type=video&key=AIzaSyDRU6vwN5ZhZWaZAVFRBO9JzWSc2C83UyU"
}
type => "query-shoe"
request_timeout => 120
interval => 1600
}}
filter {
if [type] == "query-shoe" {
mutate {
add_field => { "negative_terms" => "" }
add_field => { "words" => "" }
add_field => { "term" => "" }
add_field => { "high_priority" => "no" }
add_field => { "positive_terms" => "" }
add_field => { "score" => 0 }
}
split {
field => "items"
}
ruby {
init => "require 'json'"
code => "
e = event.get('items').to_s
e.gsub!(/[^0-9A-Za-z]/, ' ')
spl = e.split(' ')
spl.map!(&:downcase)
terms = event.get('split')
term = false
word_check = Array.new
nlp_terms = Array.new
unique_array = Array.new
words = Array.new
words_split = ''
comp = ''
bad_comp = ''
words = ['engineer','improved','enhanced','engineered','engineering','modified','modify','innovate','innovation','innovated','laces','ankle','arch','foot','feet','wear','design','designed','heel','tested','test','outsole','sole','puff','shank','waist','upper','lower','throat','seat','tongue','feather', 'lining','going','completed','muscles','muscle','motion','length','width','health','fitness','vamp','welt','insole','eyelet','toe','toes','leather','material','calf','leg','pattern','quarter','patent','patented','invent','invented','resist','resistant', 'weather','develop','developed','built','comfort','stability', 'grip', 'lacing', 'round', 'flat', 'curve', 'fit', 'tech', 'technology', 'latest', 'lace', 'for', 'running', 'run', 'my', 'new', 'innovation', 'idea', 'create', 'creation', 'created']
supp_words = ['engineer','improved','enhanced','engineered','engineering','innovate','innovation','innovated','laces','ankle','arch','foot','feet','wear','design','designed','heel','tested','test','outsole','sole','puff','shank','waist','upper','lower','throat','seat','patented','invent','invented','tongue','feather','modified','modify', 'lining','going','completed','muscles','muscle','motion','length','width','health','fitness','vamp','welt','insole','eyelet','toe','toes','leather','material','calf','leg','pattern','quarter','patent','resist','resistant', 'weather','develop','developed','built','for','comfort','stability', 'grip', 'lacing', 'round', 'flat', 'curve', 'fit', 'tech', 'technology', 'latest', 'lace', 'running', 'run', 'innovation', 'idea', 'create', 'creation', 'created']
negate_words = [ 'nike', 'asics','brooks','shop','shopping' 'bryant','collection', 'deal','collect', 'jordan', 'curry','lonzo','lebron','durant','lavar','subscribe','reebok', 'adidas', 'northface', 'newbalance', 'review', 'sale', 'offer', 'live','cheap','ebay','discount','amazon', 'buy', 'top', 'best', 'how', 'free' ]
unique_array = ['my','I','me','we']
scorekeeper = 0
total_terms_match = 0
spl.each do |input|
words_split.concat(input + ' ')
words.each do |word|
newword = input.downcase
#words_split.concat(input + ',')
if word.casecmp(newword) == 0
#data = word_check.is_a? Array
nlp_terms.push(input)
if !word_check.include? newword
term = true
word_check.push(newword)
comp.concat(newword + ' ')
scorekeeper += 1
total_terms_match += 1
end
#event.set('terms_score', scorekeeper)
#else
#bad_comp.concat(newword + ' ')
end
end
event.set('term', term)
negate_words.each do |negate_word|
if input.casecmp(negate_word) == 0
scorekeeper -= 1
bad_comp.concat(input + ' ')
end
end
event.set('terms_score', scorekeeper)
end
positions = Array.new
word_group = Hash.new
total_diff = 0.0
num = ''
nlp_score = 0
reference = ''
test = ''
unique_word = ''
if total_terms_match > 1
nlp_terms.each do |wc|
#start nlp process
#rem.concat(wc + ', ')
word_group = spl.each_with_index.group_by{|f,i| f}.each{|k,v| v.map!(&:last)}
word_group.each do |key, array|
#test.concat('Key: ' + key.to_s + ' VAL: ' + array.to_s + ', ')
if nlp_terms.include?(key)
#test.concat('Key: ' + key.to_s + ', ' )
word_group[key].each do |val|
#num.concat(key +', ' )
positions.push(val) unless positions.include?(val)
test.concat(val.to_s + ' ')
end
end
end
end
difference = 0
total_diff.to_f
positions.sort!
positions.each_with_index do |value, i|
next_value = positions.to_a[i+1].nil? ? 0 : positions.to_a[i+1]
unless next_value == 0
if unique_array.include?(spl[value]) && supp_words.include?(spl[next_value])
event.set('high_priority', 'yes')
#unique_word.concat(spl[value].to_s + ', ' + spl[next_value].to_s + '; ')
end
difference = next_value - value
total_diff += Math.log10(difference)
#num.concat('UNIQUE: ' + unique_word + ': ' + value.to_s + ' - ' + next_value.to_s + ', ' + total_diff.to_s + ' :' )
end
end
#reference.concat('TEXT: ' + num.to_s + ' ENDSC. TOTAL_DIFF: ' + total_diff.to_s)
nlp_score = scorekeeper - total_diff.round(2)
event.set('words', spl.to_s)
elsif total_terms_match == 1
nlp_score = 1.0
else
nlp_score = 0.0
end
event.set('score', nlp_score)
event.set('positive_terms', comp)
event.set('negative_terms', bad_comp)"
}
uuid {
target => "@uuid"
overwrite => true
}
fingerprint {
source => ["items"]
target => "fingerprint"
key => "78787878"
method => "SHA1"
concatenate_sources => true
}
}
}
output {
if [type] == "query-shoe" {
elasticsearch {
hosts => [ "localhost:9200" ]
#user => "elastic"
#password => "ChangemE456"
document_id => "%{fingerprint}"
#action => "update"
index => "pipeline-shoe-%{+YYYY.MM.dd}"
}
}
}