You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
- Add error handling to BaseScraperStrategy
- Improve HTML processing with additional selectors and link extraction
- Update CLI with ignoreErrors option
- Add tests for BaseScraperStrategy
it("should remove unwanted tags and keep text from allowed tags",async()=>{
103
+
constprocessor=newHtmlProcessor();
104
+
constrawContent: RawContent={
105
+
content:
106
+
"<html><head><title>Test</title></head><body><nav><ul><li><a href=\"/home\">Home</a></li></ul></nav><p>This text should remain.</p><script>alert('This should be removed');</script></body></html>",
107
+
mimeType: "text/html",
108
+
source: "https://example.com",
109
+
};
110
+
constresult=awaitprocessor.process(rawContent);
111
+
expect(result.content).toContain("This text should remain.");
112
+
expect(result.content).not.toContain("Home");
113
+
expect(result.content).not.toContain("This should be removed");
114
+
});
115
+
116
+
describe("Code block language detection",()=>{
117
+
constprocessor=newHtmlProcessor();
118
+
119
+
it("should detect language from highlight-source-<language> on a parent",async()=>{
0 commit comments