-
Notifications
You must be signed in to change notification settings - Fork 3
/
main.py
117 lines (98 loc) · 4.45 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import webapp2, jinja2, os, itertools
import bugretriever
from lexrank import bugreport_tokenizer, lexrank
from lexrank.extractive_summary import Sentence
import logging, traceback
jinja_environment = jinja2.Environment(
loader=jinja2.FileSystemLoader(os.path.dirname(__file__)))
def handle_404(request, response, exception):
logging.exception(exception)
response.write('Oops! I could swear this page was here!')
response.set_status(404)
def handle_500(request, response, exception):
logging.exception(exception)
response.write('A server error occurred!')
response.set_status(500)
### user can select section of bug and ask to expand on that
### this should show snippets of the top n most related portions of the
### bug just beside it. use can click on snippet, which will scroll
### the page to the comment and set the selected sentence as relevant in the summary.
### this should allow the user to 'build' the summary as he goes along.
### generating summary:
# - retrieving bug;
# - finding sentences;
# - summarizing;
# - also retrieve the similarity matrix from lexrank algorithm,
# since we will use this to locate relevant sentences;
def get_bug(project, bug_id):
return bugretriever.retrieve(project, bug_id)
def split_sentences(bug):
return [Sentence((i,j), s, bug.id)
for i,c in enumerate(bug.comments)
for j,s in enumerate(
split_sent_to_html(bugreport_tokenizer.split_sentences(c.text)))]
def split_sent_to_html(sents):
return list(sents)
def thread_to_sentences(thread):
sentences = [Sentence((i,j), s, thread.id) for i,m in enumerate(thread.messages) for j,s in enumerate(m.sentences)]
return sentences
class HomeHandler(webapp2.RequestHandler):
def get(self):
template = jinja_environment.get_template('index.html')
self.response.out.write(template.render({}))
class MainHandler(webapp2.RequestHandler):
def get(self, project, bug_id):
if project.lower() not in ['debian', 'mozilla']:
self.response.out.write('Project %s is not yet supported.' % project)
return
if 'load' in self.request.GET:
try:
bug = get_bug(project, int(bug_id))
except Exception:
logging.warn(traceback.format_exc())
self.response.out.write('''
<div class="container">
<div class="loading-status alert" style="width:200px;">Bug not found</div>
</div>''')
return
sents = split_sentences(bug)
summarizer = lexrank.LexrankSummarizer()
summary,_ = summarizer.summarize(sents, target_wc_perc=0.25, title=bug.title)
in_summary = set(s.id for s in summary)
for i,comment_sents in itertools.groupby(sents, key=lambda s: s.id[0]):
bug.comments[i].text = [{'text': s.text, 'included': s.id in in_summary} for s in comment_sents]
template_values = {
'bug': bug,
'non_empty_comments': set(c.number for c in bug.comments
if any(sent['included'] for sent in c.text))
}
template = jinja_environment.get_template('bug_report.html')
self.response.out.write(template.render(template_values))
else:
template_values = {
'title': '%s %s' % (project, bug_id),
'project': project
}
template = jinja_environment.get_template('loading_bug.html')
self.response.out.write(template.render(template_values))
app = webapp2.WSGIApplication([('/([a-zA-Z\-_]*)/([0-9]*)', MainHandler),
('/(?:index.html)?', HomeHandler)],
debug=True)
app.error_handlers[404] = handle_404
app.error_handlers[500] = handle_500