Vulnerability-Finder/test_run.py at main · roycrisses/Vulnerability-Finder · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env python3
"""
Quick test runner — bypasses the full pipeline.
Directly tests: config loader → safety → crawler → tech detector → report gen.
Uses very tight limits (5 pages, 10s timeout) for a fast smoke test.
"""
import asyncio
import json
from pathlib import Path

import structlog

structlog.configure(
    processors=[
        structlog.processors.TimeStamper(fmt="iso"),
        structlog.processors.add_log_level,
        structlog.dev.ConsoleRenderer(),
    ],
    wrapper_class=structlog.make_filtering_bound_logger(__import__("logging").INFO),
)

log = structlog.get_logger()

TARGET = "hackthissite.org"


async def run_test():
    # 1. Load config
    from orchestrator.config_loader import load_config
    config = load_config(Path("config.yaml"))

    # Override for speed
    config["crawler"]["max_pages"] = 5
    config["crawler"]["max_depth"] = 1
    config["crawler"]["timeout_sec"] = 15
    config["crawler"]["extract_js_endpoints"] = True

    log.info("config_ready", user_agent=config["meta"]["user_agent"])

    # 2. Safety check
    from orchestrator.safety import preflight_checks
    enforcer = preflight_checks(config, [TARGET])
    log.info("safety_passed")

    # 3. Build rate limiter
    from orchestrator.rate_limiter import build_rate_limiters
    limiters = build_rate_limiters(config)
    log.info("rate_limiter_ready")

    # 4. Crawl
    log.info("crawling_start", target=TARGET, max_pages=5)
    from crawler.engine import Crawler
    crawler = Crawler(config, limiters["http"])

    crawl_data = await crawler.crawl(TARGET)

    log.info(
        "crawl_done",
        pages=len(crawl_data["urls"]),
        js_endpoints=len(crawl_data["js_endpoints"]),
        forms=len(crawl_data["forms"]),
        subdomains=crawl_data["subdomains"],
        tech_hints=crawl_data["tech_hints"],
    )

    # 5. Tech detect
    from crawler.tech_detector import TechDetector
    tech = TechDetector().detect(crawl_data)
    log.info("tech_detected", tech_stack=tech)

    # 6. Smart filter plan
    from scanner.smart_filter import SmartFilter
    profile = config["profiles"]["quick"]
    plan = SmartFilter(config, profile).plan(TARGET, tech, crawl_data)
    log.info("scan_plan", tasks=[t["tool"] for t in plan])


    # 7. Print report preview (no tools needed)
    print("\n" + "="*60)
    print("  QUICK TEST RESULTS — hackthissite.org")
    print("="*60)
    print(f"\n📄 Pages crawled   : {len(crawl_data['urls'])}")
    print(f"🔗 JS API endpoints: {len(crawl_data['js_endpoints'])}")
    print(f"📝 Forms found     : {len(crawl_data['forms'])}")
    print(f"🌐 Subdomains seen : {crawl_data['subdomains']}")
    print(f"\n🔧 Tech Stack Detected:")
    for t in tech:
        print(f"   [{t['category'].upper():10s}] {t['id']}")

    if crawl_data["js_endpoints"]:
        print(f"\n🔌 API Endpoints from JS ({len(crawl_data['js_endpoints'])} found):")
        for ep in crawl_data["js_endpoints"][:20]:
            print(f"   {ep}")

    if crawl_data["forms"]:
        print(f"\n📋 Forms ({len(crawl_data['forms'])} found):")
        for f in crawl_data["forms"][:5]:
            print(f"   [{f.get('method','?').upper()}] {f.get('action','?')}")
            for inp in f.get("inputs", [])[:4]:
                print(f"      input: name={inp.get('name','?')} type={inp.get('type','?')}")

    print(f"\n📡 URLs discovered:")
    for url in crawl_data["urls"][:15]:
        print(f"   {url}")

    print(f"\n🗺️  Scan Plan (would run if tools were installed):")
    for i, task in enumerate(plan, 1):
        opts = task.get("options", {})
        sev = opts.get("severity", [])
        tags = opts.get("tags", [])
        flags = opts.get("flags", [])
        detail = ""
        if sev:   detail += f" severity={sev}"
        if tags:  detail += f" tags={tags}"
        if flags: detail += f" flags={flags}"
        print(f"   {i}. {task['tool'].upper()}{detail}")

    print("\n" + "="*60)
    print("✅ Crawler + Tech Detection working correctly!")
    print("   Install nuclei/nmap/ffuf/httpx or use Docker for full scan.")
    print("="*60 + "\n")

    # 8. Save JSON result
    result = {
        "target": TARGET,
        "urls": crawl_data["urls"],
        "js_endpoints": crawl_data["js_endpoints"],
        "forms": crawl_data["forms"],
        "subdomains": crawl_data["subdomains"],
        "tech_hints": crawl_data["tech_hints"],
        "tech_stack": tech,
        "scan_plan": plan,
    }
    out = Path("reports")
    out.mkdir(exist_ok=True)
    out_path = out / "test_crawl_result.json"
    out_path.write_text(json.dumps(result, indent=2))
    log.info("json_saved", path=str(out_path))


if __name__ == "__main__":
    asyncio.run(run_test())