-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcontentExtr.html
More file actions
276 lines (245 loc) · 13.5 KB
/
contentExtr.html
File metadata and controls
276 lines (245 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
<!-- v2.3 -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>ContentExtr</title>
<link href="https://minisoft.it/icons/fix.png" rel="shortcut icon" type="image/x-icon" />
<link href="https://minisoft.it/icons/fix.png" rel="apple-touch-icon" />
<script src="https://cdn.tailwindcss.com"></script>
<script>
tailwind.config = {
theme: {
extend: {
colors: {
primary: '#006E81',
'primary-dark': '#006E81',
secondary: '#814000',
}
}
}
}
</script>
</head>
<body class="bg-gray-100 flex flex-col min-h-screen">
<header class="bg-white border-b border-gray-200">
<div class="container mx-auto px-4 py-6 max-w-4xl">
<h1 class="text-3xl font-bold text-center text-gray-800">Content_Extr</h1>
<p class="text-gray-600 text-center mt-1">HTML Element Extractor</p>
</div>
</header>
<main class="container mx-auto px-4 py-8 max-w-4xl">
<!-- Instructions Section -->
<div class="bg-white rounded-lg shadow-md p-6 mb-8">
<h2 class="text-xl font-semibold text-gray-800 mb-4">Instructions</h2>
<div class="bg-slate-50 border-l-4 border-primary p-4 rounded-r">
<ol class="list-decimal pl-5 space-y-1">
<li>Paste your HTML content in the text area provided</li>
<li>Enter the attribute name you want to search for (e.g., class, id, data-attribute)</li>
<li>Enter the attribute value to match (e.g., "container", "nav-menu")</li>
<li>Click "Extract Content" to find all matching elements</li>
<li>View the extracted content, including inner text, inner HTML, and outer HTML</li>
<li>Use the "Copy All Texts" button to copy the results to your clipboard</li>
</ol>
</div>
</div>
<!-- Content -->
<div class="bg-white rounded-lg shadow-md p-6 mb-8">
<div class="mb-4">
<label class="block text-sm font-medium text-gray-700 mb-2">Paste your HTML</label>
<textarea id="htmlInput" class="w-full h-40 p-3 border border-gray-300 rounded-lg resize-y" placeholder="Paste your HTML here"></textarea>
</div>
<div class="grid grid-cols-1 md:grid-cols-2 gap-4 mb-4">
<div>
<label class="block text-sm font-medium text-gray-700 mb-2">Attribute Name</label>
<input type="text" id="attributeName" class="w-full p-3 border border-gray-300 rounded-lg" placeholder="e.g. class, id, data-*">
</div>
<div>
<label class="block text-sm font-medium text-gray-700 mb-2">Attribute Value</label>
<input type="text" id="attributeValue" class="w-full p-3 border border-gray-300 rounded-lg" placeholder="Value to match">
</div>
</div>
<div class="mb-4">
<label class="block text-sm font-medium text-gray-700 mb-2">Extraction Mode</label>
<div class="flex flex-wrap gap-4">
<label class="inline-flex items-center cursor-pointer">
<input type="radio" name="extractionMode" value="text" checked class="w-4 h-4 text-primary" onchange="toggleAttributeExtract()">
<span class="ml-2 text-gray-700">Extract Text Content</span>
</label>
<label class="inline-flex items-center cursor-pointer">
<input type="radio" name="extractionMode" value="attribute" class="w-4 h-4 text-primary" onchange="toggleAttributeExtract()">
<span class="ml-2 text-gray-700">Extract Attribute Value</span>
</label>
</div>
</div>
<div id="extractAttributeContainer" class="mb-4 hidden">
<label class="block text-sm font-medium text-gray-700 mb-2">Attribute to Extract</label>
<input type="text" id="extractAttribute" class="w-full p-3 border border-gray-300 rounded-lg" placeholder="e.g. href, src, data-id">
</div>
<button onclick="extractContent()" class="bg-primary text-white px-4 py-2 rounded-lg hover:bg-primary-dark transition duration-200">
Extract Content
</button>
</div>
<div class="bg-white rounded-lg shadow-md p-6">
<h2 class="text-xl font-semibold text-gray-800 mb-4">Results</h2>
<div id="results" class="w-full min-h-40 p-4 bg-gray-50 rounded-lg border border-gray-200">
<!-- Results will be displayed here -->
</div>
</div>
</main>
<footer class="bg-white border-t border-gray-200 mt-auto">
<div class="container mx-auto px-4 py-6 max-w-4xl">
<p class="text-gray-600 text-center">© <a href="https://minisoft.it/" class="text-primary hover:text-primary-dark">Minisoft</a> — All rights reserved</p>
</div>
</footer>
<script>
let extractedValues = [];
function toggleAttributeExtract() {
const mode = document.querySelector('input[name="extractionMode"]:checked').value;
const container = document.getElementById('extractAttributeContainer');
container.classList.toggle('hidden', mode !== 'attribute');
}
function extractContent() {
const htmlInput = document.getElementById('htmlInput').value;
const attributeName = document.getElementById('attributeName').value.trim();
const attributeValue = document.getElementById('attributeValue').value.trim();
const extractionMode = document.querySelector('input[name="extractionMode"]:checked').value;
const resultsDiv = document.getElementById('results');
extractedValues = [];
try {
// Create a temporary container
const parser = new DOMParser();
const doc = parser.parseFromString(htmlInput, 'text/html');
let elements;
if (extractionMode === 'attribute') {
// Extract attribute values
const extractAttribute = document.getElementById('extractAttribute').value.trim();
if (!extractAttribute) {
resultsDiv.innerHTML = '<p class="text-red-500">Please enter an attribute name to extract.</p>';
return;
}
// Build selector based on provided filters
if (attributeName && attributeValue) {
elements = doc.querySelectorAll(`[${attributeName}="${attributeValue}"]`);
} else if (attributeName) {
elements = doc.querySelectorAll(`[${attributeName}]`);
} else {
// No filter - select all elements with the attribute to extract
elements = doc.querySelectorAll(`[${extractAttribute}]`);
}
if (elements.length === 0) {
resultsDiv.innerHTML = '<p class="text-red-500">No matching elements found.</p>';
return;
}
let output = '<div class="space-y-4">';
elements.forEach((element, index) => {
const attrValue = element.getAttribute(extractAttribute);
if (attrValue !== null) {
extractedValues.push(attrValue);
output += `
<div class="border-b border-gray-200 pb-2">
<p class="text-gray-800"><span class="font-medium text-gray-500">${index + 1}.</span> <span class="extracted-value">${escapeHtml(attrValue)}</span></p>
</div>
`;
} else {
output += `
<div class="border-b border-gray-200 pb-2">
<p class="text-gray-400"><span class="font-medium">${index + 1}.</span> <em>(attribute not found)</em></p>
</div>
`;
}
});
output += '</div>';
output += `<p class="mt-4 text-sm text-gray-600">Found ${extractedValues.length} values from ${elements.length} elements</p>`;
output += '<button onclick="copyExtractedValues()" class="mt-4 bg-primary hover:bg-primary-dark text-white px-4 py-2 rounded-lg transition duration-200">Copy All Values</button>';
resultsDiv.innerHTML = output;
} else {
// Extract text content (original behavior) - requires attribute name/value
if (!attributeName || !attributeValue) {
resultsDiv.innerHTML = '<p class="text-red-500">Please enter both attribute name and value for text extraction.</p>';
return;
}
elements = doc.querySelectorAll(`[${attributeName}="${attributeValue}"]`);
if (elements.length === 0) {
resultsDiv.innerHTML = '<p class="text-red-500">No matching elements found.</p>';
return;
}
let output = '<div class="space-y-4">';
elements.forEach((element, index) => {
extractedValues.push(element.textContent);
output += `
<div class="border-b border-gray-200 pb-4">
<p class="font-semibold mb-2 text-gray-800">Match ${index + 1}</p>
<p class="mb-2"><span class="font-medium text-gray-700">Inner Text</span> ${element.textContent}</p>
<p class="mb-2"><span class="font-medium text-gray-700">Inner HTML</span></p>
<pre class="bg-gray-100 p-3 rounded-lg overflow-x-auto text-sm">${escapeHtml(element.innerHTML)}</pre>
<p class="mb-2"><span class="font-medium text-gray-700">Outer HTML</span></p>
<pre class="bg-gray-100 p-3 rounded-lg overflow-x-auto text-sm">${escapeHtml(element.outerHTML)}</pre>
</div>
`;
});
output += '</div>';
output += '<button onclick="copyAllText()" class="mt-4 bg-primary hover:bg-primary-dark text-white px-4 py-2 rounded-lg transition duration-200">Copy All Texts</button>';
resultsDiv.innerHTML = output;
}
} catch (error) {
resultsDiv.innerHTML = `<p class="text-red-500">Error: ${error.message}</p>`;
}
}
async function copyExtractedValues() {
if (extractedValues.length === 0) {
alert('No values to copy');
return;
}
try {
await navigator.clipboard.writeText(extractedValues.join('\n'));
showNotification('All values copied!');
} catch (err) {
alert('Failed to copy values');
}
}
async function copyAllText() {
const resultsDiv = document.getElementById('results');
const textElements = resultsDiv.querySelectorAll('p.mb-2');
let allText = '';
// Loop through each matching paragraph and append the text
textElements.forEach(element => {
if (element.textContent.trim()) {
allText += element.textContent.trim() + '\n';
}
});
if (allText) {
try {
await navigator.clipboard.writeText(allText);
showNotification('All texts copied!');
} catch (err) {
alert('Failed to copy all texts');
}
} else {
alert('No texts to copy');
}
}
function showNotification(message) {
const notification = document.createElement('div');
notification.className = 'fixed bottom-4 right-4 bg-green-500 text-white px-4 py-2 rounded-lg shadow-lg transition-opacity duration-500';
notification.textContent = message;
document.body.appendChild(notification);
setTimeout(() => {
notification.classList.add('opacity-0');
setTimeout(() => {
document.body.removeChild(notification);
}, 500);
}, 3000);
}
function escapeHtml(unsafe) {
return unsafe
.replace(/&/g, "&")
.replace(/</g, "<")
.replace(/>/g, ">")
.replace(/"/g, """)
.replace(/'/g, "'");
}
</script>
</body>
</html>