-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
76 lines (64 loc) · 1.95 KB
/
app.py
File metadata and controls
76 lines (64 loc) · 1.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from flask import Flask, redirect, url_for, request ,render_template
from nltk.corpus import stopwords
import PyPDF2,os
from nltk.tokenize import word_tokenize, sent_tokenize
app = Flask(__name__)
@app.route('/',methods = ["GET","POST"])
@app.route('/home', methods = ["GET","POST"])
def load_page():
textvalue=''
summary=''
if request.method == 'POST':
textvalue = request.form['inputText']
file = request.files['file']
if "file" not in request.files and textvalue == "":
return redirect(request.url)
elif file.filename != "" and textvalue == "":
filePath = 'static/' + file.filename
file.save(filePath)
textvalue = extract_text(filePath)
summary = text_summarizer(textvalue)
else:
summary = text_summarizer(textvalue)
return render_template('index.html', summary = summary)
def extract_text(file):
pdfFileObj = open(file, 'rb')
pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
print(pdfReader.numPages)
pageObj = pdfReader.getPage(0)
text = pageObj.extractText()
pdfFileObj.close()
return text
def text_summarizer(input):
stopWords = set(stopwords.words("english"))
words = word_tokenize(input)
freq = dict()
for w in words:
w = w.lower()
if w in stopWords:
continue
if w in freq:
freq[w] += 1
else:
freq[w] = 1
sentences = sent_tokenize(input)
freqSen = dict()
for sen in sentences:
for w, s in freq.items():
if w in sen.lower():
if sen in freqSen:
freqSen[sen] += s
else:
freqSen[sen] = s
count = 0
for sen in freqSen:
count += freqSen[sen]
if len(freqSen) != 0:
average = int(count / len(freqSen))
output = ''
for sen in sentences:
if (sen in freqSen) and (freqSen[sen] > (1.2 * average)):
output += " " + sen
return output
if __name__ == '__main__':
app.run(debug=True)