-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraping_nse.py
More file actions
240 lines (202 loc) · 8.61 KB
/
scraping_nse.py
File metadata and controls
240 lines (202 loc) · 8.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# Importing necessary modules for system operations, timing, email handling, error tracing
import os
import time
import smtplib
import traceback
# Email MIME formatting for HTML emails
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
# Selenium modules for web automation
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
# Automatically handles ChromeDriver installation
from webdriver_manager.chrome import ChromeDriverManager
# Load environment variables from a .env file
from dotenv import load_dotenv
# Load all environment variables at once
load_dotenv()
# Load email configuration from environment variables
email_config = {
"smtp_server": os.getenv("SMTP_SERVER"),
"smtp_port": int(os.getenv("SMTP_PORT")),
"sender_email": os.getenv("SENDER_EMAIL"),
"sender_password": os.getenv("SENDER_PASSWORD"),
"receiver_email": os.getenv("RECEIVER_EMAIL")
}
def sending_mail(sender, receiver_email, subject, body, smtp_server, smtp_port, password):
"""
Send an HTML email to the provided recipient(s).
Handles both comma-separated string and list input for recipients.
"""
# Convert comma-separated string to list if needed
if isinstance(receiver_email, str):
receiver_list = [email.strip() for email in receiver_email.split(",") if email.strip()]
else:
receiver_list = receiver_email
# Create MIME email message
msg = MIMEMultipart("alternative")
msg["Subject"] = subject
msg["From"] = sender
msg["To"] = ",".join(receiver_list)
msg.attach(MIMEText(body, "html"))
# Send the email using SMTP
try:
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.starttls() # Secure the connection
server.login(sender, password)
server.sendmail(sender, receiver_list, msg.as_string())
print("Email sent successfully to: ", ", ".join(receiver_list))
except Exception as e:
print(f"Failed to send an email: {e}")
def scrape_market():
"""
Scrapes top 5 gainers and losers from NSE website using Selenium.
Returns dictionary with headers and row data for both tables.
"""
options = Options()
# Headless mode can be enabled if needed
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("user-agent=Mozilla/5.0")
# Launching the Chrome browser
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
try:
print("Loading NSE homepage...")
driver.get("https://www.nseindia.com")
time.sleep(5) # Wait for page to load
# Scroll down slightly to make elements visible
print("Scrolling down a bit...")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight / 3);")
time.sleep(5)
# Wait until market snapshot table is present
print("Waiting for Market Snapshot table...")
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "nse_table"))
)
# Click on 'View More' to access detailed gainers/losers data
print("Clicking 'View more' button...")
view_more = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.LINK_TEXT, "View More"))
)
view_more.click()
# Wait for new tab to open and switch to it
print("Waiting for the new tab to open")
WebDriverWait(driver, 10).until(lambda d: len(d.window_handles) > 1)
driver.switch_to.window(driver.window_handles[-1])
print("Succesfully switched to new tab")
# Ensure the new page has loaded completely
WebDriverWait(driver, 10).until(EC.url_contains("/market-data/top-gainers-losers"))
print("Waiting for redirect to top-gainers-losers page...")
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "topGainerTable")))
time.sleep(5)
# Select indices for SYMBOL, LTP, and %CHNG
required_indices = [0, 5, 6]
# Gainers Section
print("Extracting Gainers Table...")
gainer_tab = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#gainers']")))
gainer_tab.click()
time.sleep(5)
# Extract headers
gainer_header_elements = driver.find_elements(By.XPATH, "//table[@id='topgainer-Table']/thead/tr/th")
gainer_headers = [gainer_header_elements[i].text.strip() for i in required_indices]
# Extract rows
gainer_rows = driver.find_elements(By.XPATH, "//table[@id='topgainer-Table']/tbody/tr")
gainers_data = []
for row in gainer_rows:
cells = row.find_elements(By.TAG_NAME, "td")
if len(cells) >= max(required_indices)+1:
row_data = {
gainer_headers[i]: cells[required_indices[i]].text.strip()
for i in range(len(required_indices))
}
gainers_data.append(row_data)
# Losers Section
print("Extracting Loosers Table")
loser_tab = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#losers']")))
loser_tab.click()
time.sleep(2)
# Extract headers
loser_header_element = driver.find_elements(By.XPATH, "//table[@id='toplosers-Table']/thead/tr/th")
loser_headers = [loser_header_element[i].text.strip() for i in required_indices]
# Extract rows
loser_rows = driver.find_elements(By.XPATH, "//table[@id='toplosers-Table']/tbody/tr")
loser_data = []
for row in loser_rows:
cells = row.find_elements(By.TAG_NAME, "td")
if len(cells) >= max(required_indices)+1:
row_data = {
loser_headers[i]: cells[required_indices[i]].text.strip()
for i in range(len(required_indices))
}
loser_data.append(row_data)
# Return top 5 gainers and losers
return {
"gainers": {
"headers": gainer_headers,
"rows": gainers_data[:5]
},
"losers": {
"headers": loser_headers,
"rows": loser_data[:5]
}
}
except Exception as e:
traceback.print_exc() # Print any scraping errors
return {
"gainers": {"headers": [], "rows": []},
"losers": {"headers": [], "rows": []}
}
finally:
driver.quit() # Ensure browser is closed
def format_data_as_html(data):
"""
Converts market snapshot data (top 5 gainers/losers) into HTML tables.
Returns a single HTML string with both sections formatted.
"""
def table_section(title, headers, rows):
html = f"<h2>{title}</h2>"
html += "<table border='1' cellspacing='0' cellpadding='5'>"
html += "<tr>" + "".join([f"<th>{h}</th>" for h in headers]) + "</tr>"
for row in rows:
html += "<tr>" + "".join([f"<td>{row[h]}</td>" for h in headers]) + "</tr>"
html += "</table><br>"
return html
gainers_html = table_section("Top 5 Gainers", data["gainers"]["headers"], data["gainers"]["rows"])
losers_html = table_section("Top 5 Losers", data["losers"]["headers"], data["losers"]["rows"])
return gainers_html + losers_html
def main():
"""
Orchestrates the scraping, formatting, and emailing of top 5 NSE gainers and losers.
"""
data = scrape_market()
# Check if any data was fetched
if not data["gainers"]["rows"] or not data["losers"]["rows"]:
print("No data fetched. Email not sent.")
return
# Print gainers and losers data to console
print("Top Gainers:")
for row in data["gainers"]["rows"]:
print(row)
print("Top Losers:")
for row in data["losers"]["rows"]:
print(row)
# Convert data to HTML format
html_body = format_data_as_html(data)
# Send the formatted email
sending_mail(
sender = email_config["sender_email"],
receiver_email = email_config["receiver_email"],
subject = "📈 NSE Top 5 Gainers and Losers Today",
body = html_body,
smtp_server = email_config["smtp_server"],
smtp_port = email_config["smtp_port"],
password = email_config["sender_password"]
)
# Entry point
if __name__ == "__main__":
main()