Skip to content

Commit 90edb22

Browse files
Merge pull request #298 from KolevVelyan/fix-eu-parsing
Fix `.eu` parsing
2 parents 61af95f + b792305 commit 90edb22

2 files changed

Lines changed: 136 additions & 15 deletions

File tree

test/test_parser.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,117 @@ def test_cm_parse(self):
898898
}
899899
self._parse_and_compare("icp.cm", data, expected_results)
900900

901+
def test_eu_parse(self):
902+
data = """
903+
% The WHOIS service offered by EURid and the access to the records
904+
% in the EURid WHOIS database are provided for information purposes
905+
% only. It allows persons to check whether a specific domain name
906+
% is still available or not and to obtain information related to
907+
% the registration records of existing domain names.
908+
%
909+
% EURid cannot, under any circumstances, be held liable in case the
910+
% stored information would prove to be wrong, incomplete or not
911+
% accurate in any sense.
912+
%
913+
% By submitting a query, you agree not to use the information made
914+
% available to:
915+
%
916+
% - allow, enable or otherwise support the transmission of unsolicited,
917+
% commercial advertising or other solicitations whether via email or
918+
% otherwise;
919+
% - target advertising in any possible way;
920+
% - cause nuisance in any possible way by sending messages to registrants,
921+
% whether by automated, electronic processes capable of enabling
922+
% high volumes or by other possible means.
923+
%
924+
% Without prejudice to the above, it is explicitly forbidden to extract,
925+
% copy and/or use or re-utilise in any form and by any means
926+
% (electronically or not) the whole or a quantitatively or qualitatively
927+
% substantial part of the contents of the WHOIS database without prior
928+
% and explicit permission by EURid, nor in any attempt hereof, to apply
929+
% automated, electronic processes to EURid (or its systems).
930+
%
931+
% You agree that any reproduction and/or transmission of data for
932+
% commercial purposes will always be considered as the extraction of a
933+
% substantial part of the content of the WHOIS database.
934+
%
935+
% By submitting the query, you agree to abide by this policy and accept
936+
% that EURid can take measures to limit the use of its WHOIS services
937+
% to protect the privacy of its registrants or the integrity
938+
% of the database.
939+
%
940+
% The EURid WHOIS service on port 43 (textual WHOIS) never discloses
941+
% any information concerning the registrant.
942+
% Registrant and on-site contact information can be obtained through use of the
943+
% web-based WHOIS service available from the EURid website www.eurid.eu
944+
%
945+
% WHOIS eurid.eu
946+
Domain: eurid.eu
947+
Script: LATIN
948+
949+
Registrant:
950+
NOT DISCLOSED!
951+
Visit www.eurid.eu for the web-based WHOIS.
952+
953+
Technical:
954+
Organisation: EURid vzw
955+
Language: en
956+
Email: tech@eurid.eu
957+
958+
Registrar:
959+
Name: EURid vzw
960+
Website: https://www.eurid.eu
961+
962+
Name servers:
963+
ns3.eurid.eu (185.36.4.253)
964+
ns3.eurid.eu (2001:67c:9c:3937::253)
965+
nsx.eurid.eu (185.151.141.1)
966+
nsx.eurid.eu (2a02:568:fe00::6575)
967+
ns1.eurid.eu (2001:67c:9c:3937::252)
968+
ns1.eurid.eu (185.36.4.252)
969+
ns2.eurid.eu (2001:67c:40:3937::252)
970+
ns2.eurid.eu (185.36.6.252)
971+
ns4.eurid.eu (2001:67c:40:3937::253)
972+
ns4.eurid.eu (185.36.6.253)
973+
nsp.netnod.se
974+
975+
Keys:
976+
flags:KSK protocol:3 algorithm:RSA_SHA256 pubKey:AwEAAcOQldGtC33GLx8s335UscKMPlWjDXCqbhR2QyAYcfS4CZS6YHg3A1Zz/K3VurTZF68aSaRkNupZuEgt4jozE3v4+t+2qOfiATvoOCrf74hWduBPwk9Go0z7FVlDkok1/qMQmqOtih8TFP85b+w6F/uyLMZS1JowMDUzRurmHJVoT4lW9+OCdrhuQFK9vU24Y8BmacoRy6mWBCFlysizlOIodwmquOf5A+3Nz0B3TLCK4fIYJYVxCUVlpRJ7uaBS+GLD7afuxkEesReYHgPWZFSDMbXk9Ugh+qUi8tEKKFls9TM3lK9BPBcciXUhI1bRJSHftqcNpMmLqg/79SwoWGc=
977+
978+
Please visit www.eurid.eu for more info.
979+
"""
980+
981+
expected_results = {
982+
"domain_name": "eurid.eu",
983+
"script": "LATIN",
984+
"reseller_org": None,
985+
"reseller_lang": None,
986+
"reseller_email": None,
987+
"tech_org": "EURid vzw",
988+
"tech_lang": "en",
989+
"tech_email": "tech@eurid.eu",
990+
"registrar": "EURid vzw",
991+
"registrar_url": "https://www.eurid.eu",
992+
"name_servers": [
993+
"ns3.eurid.eu (185.36.4.253)",
994+
"ns3.eurid.eu (2001:67c:9c:3937::253)",
995+
"nsx.eurid.eu (185.151.141.1)",
996+
"nsx.eurid.eu (2a02:568:fe00::6575)",
997+
"ns1.eurid.eu (2001:67c:9c:3937::252)",
998+
"ns1.eurid.eu (185.36.4.252)",
999+
"ns2.eurid.eu (2001:67c:40:3937::252)",
1000+
"ns2.eurid.eu (185.36.6.252)",
1001+
"ns4.eurid.eu (2001:67c:40:3937::253)",
1002+
"ns4.eurid.eu (185.36.6.253)",
1003+
"nsp.netnod.se",
1004+
],
1005+
"dnssec_flags": "KSK",
1006+
"dnssec_protocol": "3",
1007+
"dnssec_algorithm": "RSA_SHA256",
1008+
"dnssec_pubkey": "AwEAAcOQldGtC33GLx8s335UscKMPlWjDXCqbhR2QyAYcfS4CZS6YHg3A1Zz/K3VurTZF68aSaRkNupZuEgt4jozE3v4+t+2qOfiATvoOCrf74hWduBPwk9Go0z7FVlDkok1/qMQmqOtih8TFP85b+w6F/uyLMZS1JowMDUzRurmHJVoT4lW9+OCdrhuQFK9vU24Y8BmacoRy6mWBCFlysizlOIodwmquOf5A+3Nz0B3TLCK4fIYJYVxCUVlpRJ7uaBS+GLD7afuxkEesReYHgPWZFSDMbXk9Ugh+qUi8tEKKFls9TM3lK9BPBcciXUhI1bRJSHftqcNpMmLqg/79SwoWGc=",
1009+
}
1010+
self._parse_and_compare("eurid.eu", data, expected_results)
1011+
9011012

9021013
if __name__ == "__main__":
9031014
unittest.main()

whois/parser.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,26 +1090,36 @@ def __init__(self, domain: str, text: str):
10901090
class WhoisEu(WhoisEntry):
10911091
"""Whois parser for .eu domains"""
10921092

1093-
regex: dict[str, str] = {
1094-
"domain_name": r"Domain: *([^\n\r]+)",
1095-
"tech_name": r"Technical: *Name: *([^\n\r]+)",
1096-
"tech_org": r"Technical: *Name: *[^\n\r]+\s*Organisation: *([^\n\r]+)",
1097-
"tech_phone": r"Technical: *Name: *[^\n\r]+\s*Organisation: *[^\n\r]+\s*Language: *[^\n\r]+\s*Phone: *(["
1098-
r"^\n\r]+)",
1099-
"tech_fax": r"Technical: *Name: *[^\n\r]+\s*Organisation: *[^\n\r]+\s*Language: *[^\n\r]+\s*Phone: *["
1100-
r"^\n\r]+\s*Fax: *([^\n\r]+)",
1101-
"tech_email": r"Technical: *Name: *[^\n\r]+\s*Organisation: *[^\n\r]+\s*Language: *[^\n\r]+\s*Phone: *["
1102-
r"^\n\r]+\s*Fax: *[^\n\r]+\s*Email: *([^\n\r]+)",
1103-
"registrar": r"Registrar:\n *Name: *([^\n\r]+)",
1104-
"registrar_url": r"\n *Website: *([^\n\r]+)",
1105-
"name_servers": r"Name servers:\n *([\n\S\s]+)", # list of name servers
1093+
_SECTION_BOUNDARY = r"(?:Registrar|On-site(s)|Reseller|Registrant|Name servers|Keys|Please visit www.eurid.eu for more information.)"
1094+
1095+
regex = {
1096+
"domain_name": r"Domain:\s*([^\n\r]+)",
1097+
"script": r"Script:\s*([^\n\r]+)",
1098+
"reseller_org": rf"Reseller:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?^\s*Organisation:\s*([^\n\r]+)",
1099+
"reseller_lang": rf"Reseller:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?^\s*Language:\s*([^\n\r]+)",
1100+
"reseller_email": rf"Reseller:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?^\s*Email:\s*([^\n\r]+)",
1101+
"tech_org": rf"Technical:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?^\s*Organisation:\s*([^\n\r]+)",
1102+
"tech_lang": rf"Technical:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?^\s*Language:\s*([^\n\r]+)",
1103+
"tech_email": rf"Technical:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?^\s*Email:\s*([^\n\r]+)",
1104+
"registrar": rf"Registrar:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?^\s*Name:\s*([^\n\r]+)",
1105+
"registrar_url": rf"Registrar:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?^\s*Website:\s*([^\n\r]+)",
1106+
"name_servers": rf"Name servers:\s*((?:^(?!\s*$)(?!^(?:{_SECTION_BOUNDARY})\s*:).+\n)+)",
1107+
"dnssec_flags": rf"Keys:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?flags:\s*([^\s]+)",
1108+
"dnssec_protocol": rf"Keys:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?protocol:\s*([0-9]+)",
1109+
"dnssec_algorithm": rf"Keys:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?algorithm:\s*([A-Z0-9_]+)",
1110+
"dnssec_pubkey": rf"Keys:\s*(?:(?!\n(?:{_SECTION_BOUNDARY})\s*:)[\s\S])*?pubKey:\s*([A-Za-z0-9+/=]+)",
11061111
}
11071112

1108-
def __init__(self, domain: str, text: str):
1109-
if text.strip() == "Status: AVAILABLE" or "Status: AVAILABLE" in text:
1113+
def __init__(self, domain, text):
1114+
if text.strip().endswith("Status: AVAILABLE"):
11101115
raise WhoisDomainNotFoundError(text)
11111116
else:
11121117
WhoisEntry.__init__(self, domain, text, self.regex)
1118+
1119+
def _preprocess(self, attr, value):
1120+
if attr == "name_servers" and isinstance(value, str):
1121+
return [ln.strip() for ln in value.strip().splitlines() if ln.strip()]
1122+
return value
11131123

11141124

11151125
class WhoisEe(WhoisEntry):

0 commit comments

Comments
 (0)