-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcontent.js
More file actions
323 lines (278 loc) · 9.9 KB
/
content.js
File metadata and controls
323 lines (278 loc) · 9.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
// Content script - runs on web pages to extract content
// Security: Disable all logging in production
const DEBUG = false;
const log = DEBUG ? console.log : () => {};
const logError = DEBUG ? console.error : () => {};
// Define valid content types for validation
const VALID_CONTENT_TYPES = ['selection', 'full-page', 'url-only'];
chrome.runtime.onMessage.addListener((request, sender, sendResponse) => {
log('Content script received message:', request);
// Respond to ping to confirm content script is loaded
if (request.action === 'ping') {
sendResponse({ status: 'ready' });
return true;
}
if (request.action === 'getContent') {
try {
// Validate contentType before processing
if (!request.contentType || !VALID_CONTENT_TYPES.includes(request.contentType)) {
logError('Invalid content type:', request.contentType);
sendResponse(null);
return true;
}
const content = extractContent(request.contentType);
log('Extracted content:', content);
sendResponse(content);
} catch (error) {
logError('Error extracting content:', error);
sendResponse(null);
}
return true;
}
if (request.action === 'printPage') {
log('Content script received printPage message');
printPageToPdf().then(result => {
log('Sending PDF result back to background script');
sendResponse(result);
}).catch(error => {
console.error('Error in printPageToPdf:', error);
sendResponse({ success: false, error: error.message });
});
return true; // Keep message channel open for async response
}
return false; // No handler matched
});
/**
* Extract content based on the specified type
* @param {('selection'|'full-page'|'url-only')} contentType - Type of content to extract
* @returns {{html: string, text: string}|null} Extracted content or null
*/
function extractContent(contentType) {
switch (contentType) {
case 'selection':
return getSelectedContent();
case 'full-page':
return getFullPageContent();
case 'url-only':
return getUrlOnly();
default:
// Fallback to selection (should not reach here due to validation)
return getSelectedContent();
}
}
/**
* Get currently selected content from the page
* @returns {{html: string, text: string}|null}
*/
function getSelectedContent() {
const selection = window.getSelection();
if (!selection || selection.toString().trim() === '') {
return null;
}
const range = selection.getRangeAt(0);
const container = document.createElement('div');
container.appendChild(range.cloneContents());
return {
html: cleanHtml(container.innerHTML),
text: selection.toString()
};
}
/**
* Get full page content, attempting to find the main content area
* @returns {{html: string, text: string}}
*/
function getFullPageContent() {
// Try to find main content area
const mainContent =
document.querySelector('main') ||
document.querySelector('article') ||
document.querySelector('[role="main"]') ||
document.querySelector('.content') ||
document.querySelector('#content') ||
document.body;
const clone = mainContent.cloneNode(true);
// Remove script, style, and navigation elements
['script', 'style', 'nav', 'header', 'footer', 'iframe'].forEach(tag => {
clone.querySelectorAll(tag).forEach(el => el.remove());
});
return {
html: cleanHtml(clone.innerHTML),
text: clone.textContent.trim()
};
}
/**
* Get only the URL and title of the current page
* @returns {{html: string, text: string}}
*/
function getUrlOnly() {
// Security: Escape URL and title to prevent XSS
return {
html: `<p>Page: <a href="${escapeHtml(window.location.href)}">${escapeHtml(document.title)}</a></p>`,
text: `${document.title}: ${window.location.href}`
};
}
/**
* Clean and sanitize HTML content
* @param {string} html - Raw HTML to clean
* @returns {string} Cleaned HTML
*/
function cleanHtml(html) {
const div = document.createElement('div');
div.innerHTML = html;
// Security: Remove ALL potentially dangerous elements
const dangerousTags = [
'script', 'style', 'iframe', 'object', 'embed',
'link', 'meta', 'base', 'form', 'input', 'button',
'textarea', 'select', 'applet', 'audio', 'video'
];
dangerousTags.forEach(tag => {
div.querySelectorAll(tag).forEach(el => el.remove());
});
// Security: Sanitize all remaining elements
const elements = div.querySelectorAll('*');
elements.forEach(el => {
// Whitelist ONLY truly safe attributes
const safeAttrs = ['href', 'src', 'alt', 'title'];
Array.from(el.attributes).forEach(attr => {
// Remove all event handlers (onclick, onload, etc.)
if (attr.name.startsWith('on') || !safeAttrs.includes(attr.name)) {
el.removeAttribute(attr.name);
}
});
// Security: Sanitize href to prevent javascript: URLs
if (el.hasAttribute('href')) {
const href = el.getAttribute('href');
const lowerHref = href.toLowerCase().trim();
// Block dangerous protocols
if (lowerHref.startsWith('javascript:') ||
lowerHref.startsWith('data:') ||
lowerHref.startsWith('vbscript:') ||
lowerHref.startsWith('file:')) {
el.removeAttribute('href');
} else if (!href.startsWith('http://') && !href.startsWith('https://') && !href.startsWith('#')) {
// Convert relative URLs to absolute
try {
el.setAttribute('href', new URL(href, window.location.href).href);
} catch {
el.removeAttribute('href');
}
}
}
// Security: Sanitize src for images
if (el.hasAttribute('src')) {
const src = el.getAttribute('src');
const lowerSrc = src.toLowerCase().trim();
// Only allow http(s) URLs and data: image URLs
if (lowerSrc.startsWith('http://') || lowerSrc.startsWith('https://')) {
// Keep as is
} else if (lowerSrc.startsWith('data:image/')) {
// Allow data URIs for images only
} else {
// Try to convert relative URL to absolute
try {
el.setAttribute('src', new URL(src, window.location.href).href);
} catch {
el.removeAttribute('src');
}
}
}
});
// Remove empty elements (except images)
div.querySelectorAll('*').forEach(el => {
if (el.textContent.trim() === '' && !el.querySelector('img') && el.tagName !== 'IMG') {
el.remove();
}
});
return div.innerHTML;
}
/**
* Escape HTML to prevent XSS
* @param {string} text - Text to escape
* @returns {string} Escaped text
*/
function escapeHtml(text) {
if (!text) return '';
return text
.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
}
// PDF generation configuration
const PDF_CONFIG = {
// Canvas rendering settings
CANVAS_SCALE: 1, // Scale factor for canvas rendering (1 = original size)
JPEG_QUALITY: 0.8, // JPEG compression quality (0.0 - 1.0)
USE_CORS: true, // Allow cross-origin images
ALLOW_TAINT: false, // Security: don't allow tainted canvas
BACKGROUND_COLOR: '#ffffff', // Default white background
// PDF document settings
ORIENTATION: 'portrait', // Page orientation (portrait/landscape)
UNIT: 'mm', // Measurement units
FORMAT: 'a4', // Paper format
// Layout settings
MARGIN_MM: 10, // Page margins in millimeters
};
/**
* Generate PDF from current page using html2canvas and jsPDF
* @returns {Promise<{pdfData: string}>} Promise resolving to PDF data as data URL
*/
async function printPageToPdf() {
log('printPageToPdf: Starting PDF generation...');
try {
// Libraries are now bundled, so they should be available immediately
if (typeof html2canvas === 'undefined') {
throw new Error('html2canvas library not available');
}
if (typeof jspdf === 'undefined') {
throw new Error('jsPDF library not available');
}
log('Libraries confirmed available');
log('Capturing page with html2canvas...');
const canvas = await html2canvas(document.body, {
scale: PDF_CONFIG.CANVAS_SCALE,
useCORS: PDF_CONFIG.USE_CORS,
logging: DEBUG,
windowWidth: document.documentElement.scrollWidth,
windowHeight: document.documentElement.scrollHeight,
allowTaint: PDF_CONFIG.ALLOW_TAINT,
backgroundColor: PDF_CONFIG.BACKGROUND_COLOR
});
log(`Canvas captured: ${canvas.width}x${canvas.height}px`);
const imgData = canvas.toDataURL('image/jpeg', PDF_CONFIG.JPEG_QUALITY);
log(`Image data created, size: ${Math.round(imgData.length / 1024)} KB`);
const pdf = new jspdf.jsPDF({
orientation: PDF_CONFIG.ORIENTATION,
unit: PDF_CONFIG.UNIT,
format: PDF_CONFIG.FORMAT
});
const pdfWidth = pdf.internal.pageSize.getWidth();
const pdfHeight = pdf.internal.pageSize.getHeight();
const marginDouble = PDF_CONFIG.MARGIN_MM * 2;
const imgWidth = pdfWidth - marginDouble;
const imgHeight = (canvas.height * imgWidth) / canvas.width;
let heightLeft = imgHeight;
let position = PDF_CONFIG.MARGIN_MM;
pdf.addImage(imgData, 'JPEG', PDF_CONFIG.MARGIN_MM, position, imgWidth, imgHeight);
heightLeft -= (pdfHeight - marginDouble);
while (heightLeft > 0) {
position = heightLeft - imgHeight + PDF_CONFIG.MARGIN_MM;
pdf.addPage();
pdf.addImage(imgData, 'JPEG', PDF_CONFIG.MARGIN_MM, position, imgWidth, imgHeight);
heightLeft -= (pdfHeight - marginDouble);
}
const pdfData = pdf.output('dataurlstring');
log(`PDF generated successfully, size: ${Math.round(pdfData.length / 1024)} KB`);
return {
pdfData: pdfData,
success: true
};
} catch (error) {
console.error('PDF generation failed:', error);
return {
success: false,
error: error.message
};
}
}