-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathScrapper.py
More file actions
78 lines (65 loc) · 1.98 KB
/
Scrapper.py
File metadata and controls
78 lines (65 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import requests
import lxml
import bs4
import io
import time,datetime
from urllib.error import HTTPError,URLError
from requests.exceptions import ConnectionError
import json
def get_news(category):
#category = ""
newDictonary = {
'category': category,
'success': True,
'data': []
}
try:
print('https://inshorts.com/en/read'+"/"+category)
htmlBody = requests.get('https://inshorts.com/en/read/' + category)
except requests.exceptions.RequestException as e:
newDictonary['success'] = False
newDictonary['error'] = str(e.message)
return newDictonary
# Soup object
soup = bs4.BeautifulSoup(htmlBody.text, 'lxml')
newscard = soup.find_all(class_='news-card')
if not newscard:
newDictonary['success'] = False
newDictonary['error'] = 'Invalid Category'
return newDictonary
for card in newscard:
try:
title = card.find(class_="news-card-title").find('a').text
# print(title)
except:
title = None
try:
author = card.find(class_="news-card-author-time").find(class_='author').text
# print(author)
except:
author = None
try:
content = card.find(class_="news-card-content").find('div').text
# print(content)
except:
content = None
try:
news_link = card.find(class_="read-more").find('a').get('href')
# print(news_link)
except:
news_link = None
dict = {
'title': title,
'author': author,
'content': content,
'news_link': news_link
}
newDictonary['data'].append(dict)
return newDictonary
print("hello")
#dict = get_news('https://inshorts.com/en/read')
dict = get_news("sports")
info = dict['data']
for i in range(len(info)):
print(info[i])
print("######################")