-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsqlToHtml.py
More file actions
35 lines (25 loc) · 779 Bytes
/
sqlToHtml.py
File metadata and controls
35 lines (25 loc) · 779 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import os
import MySQLdb
db = MySQLdb.connect(host="pow-db.clfpwrv3fbfn.us-west-2.rds.amazonaws.com",
port=4200,user="cedro",
passwd="password",
db="powdb")
outdir = "./crawled_files"
def main():
words = []
cursor = db.cursor()
cursor.execute("SELECT * from RawHTML")
for row in cursor.fetchall():
htmlName = row[1].rsplit('/',1)[1]
htmlText = row[2]
htmlFilePath = os.path.join(outdir, htmlName)
outfile = open(htmlFilePath, 'w')
outfile.write(htmlText)
htmlWords = htmlText.split(' ')
for word in htmlWords:
words.append(word)
wordfile = open("./dictionary.txt", 'r')
for word in words:
wordfile.write(word + "\n")
if __name__ == "__main__":
main()