44
55from ausbills .models import BillMeta , Bill , PdfUrl , UrlStr
66from ausbills .util import BillExtractor , BillListExtractor
7- from ausbills .types import BillProgress , Parliament , BillTypes , ChamberProgress , Timestamp
7+ from ausbills .types import (
8+ BillProgress ,
9+ Parliament ,
10+ BillTypes ,
11+ ChamberProgress ,
12+ Timestamp ,
13+ )
814from ausbills .util .consts import *
915
10- BASE_URL = ' https://legislation.act.gov.au'
16+ BASE_URL = " https://legislation.act.gov.au"
1117
1218
1319class ACTBillList (BillListExtractor ):
1420 def __init__ (self ):
15- bill_volume = self ._download_html (
16- 'https://legislation.act.gov.au/results?category=cBil&status=Current&action=browse' ).find (
17- 'table' , {'id' : 'results-table-bill' }).find ('tbody' )
21+ bill_volume = (
22+ self ._download_html (
23+ "https://legislation.act.gov.au/results?category=cBil&status=Current&action=browse"
24+ )
25+ .find ("table" , {"id" : "results-table-bill" })
26+ .find ("tbody" )
27+ )
1828 self ._bill_list = self ._get_bill_list (bill_volume )
1929
2030 def _get_bill_list (self , bill_volume ):
2131 bill_list = []
2232 has_passed = False
23- for row in bill_volume .find_all ('tr' , recursive = False ):
24- bill_intro_date = row .find ('td' )[ ' data-order' ]
25- __title_col = row .find_all ('td' )[1 ]
33+ for row in bill_volume .find_all ("tr" , recursive = False ):
34+ bill_intro_date = row .find ("td" )[ " data-order" ]
35+ __title_col = row .find_all ("td" )[1 ]
2636 bill_title = __title_col .text .strip ()
27- bill_url = BASE_URL + __title_col .find ('a' )[ ' href' ]
28- bill_intro = self ._get_timestamp (bill_intro_date [:8 ], ' %Y%m%d' )
37+ bill_url = BASE_URL + __title_col .find ("a" )[ " href" ]
38+ bill_intro = self ._get_timestamp (bill_intro_date [:8 ], " %Y%m%d" )
2939
30- __status_col = row .find_all ('td' )[- 1 ]
31- if __status_col [' data-order' ] == ' passed' :
40+ __status_col = row .find_all ("td" )[- 1 ]
41+ if __status_col [" data-order" ] == " passed" :
3242 has_passed = True
33- passed_date = self ._get_timestamp (
34- __status_col .contents [1 ], '%d %B %Y' )
43+ passed_date = self ._get_timestamp (__status_col .contents [1 ], "%d %B %Y" )
3544 else :
3645 passed_date = None
37- bill_type = self ._parse_type (row .find_all ('td' )[2 ].text )
46+ bill_type = self ._parse_type (row .find_all ("td" )[2 ].text )
3847 bill_id = bill_url [- 6 :- 1 ]
3948
4049 if has_passed :
41- prog_dict = {BillProgress .FIRST .value : True , BillProgress .ASSENTED .value : True }
50+ prog_dict = {
51+ BillProgress .FIRST .value : True ,
52+ BillProgress .ASSENTED .value : True ,
53+ }
4254 chamber_progress = ChamberProgress .THIRD_READING .value
43- else :
44- prog_dict = {BillProgress .FIRST .value : True , BillProgress .ASSENTED .value : False } # Bills will always remain in the first house in a unicameral parliament
55+ else :
56+ prog_dict = {
57+ BillProgress .FIRST .value : True ,
58+ BillProgress .ASSENTED .value : False ,
59+ } # Bills will always remain in the first house in a unicameral parliament
4560 chamber_progress = ChamberProgress .FIRST_READING .value
4661
47- bill_list .append ({
48- TITLE : bill_title ,
49- URL : bill_url ,
50- BILL_TYPE : bill_type ,
51- INTRO_ASSEMBLY : bill_intro ,
52- PASSED_ASSEMBLY : passed_date ,
53- PASSED : prog_dict ,
54- CHAMBER_PROGRESS : chamber_progress ,
55- ID : bill_id ,
56- })
62+ bill_list .append (
63+ {
64+ TITLE : bill_title ,
65+ URL : bill_url ,
66+ BILL_TYPE : bill_type ,
67+ INTRO_ASSEMBLY : bill_intro ,
68+ PASSED_ASSEMBLY : passed_date ,
69+ PASSED : prog_dict ,
70+ CHAMBER_PROGRESS : chamber_progress ,
71+ ID : bill_id ,
72+ }
73+ )
5774 return bill_list
5875
5976 def _parse_type (self , type_string ):
60- if type_string == ' GOV' :
77+ if type_string == " GOV" :
6178 return BillTypes .GOVERNMENT .value
62- elif type_string == ' PMB' :
79+ elif type_string == " PMB" :
6380 return BillTypes .PRIVATE_MEMBER .value
6481
6582
@@ -86,76 +103,92 @@ class ACTBillObject(BillExtractor):
86103
87104 def __init__ (self , bill_meta : BillMetaACT ):
88105 self .bill_soup = self ._download_html (bill_meta .link )
89- self .bill_meta_list = self .bill_soup .find ('dl' ).find_all ('dd' )
106+ self .bill_meta_list = self .bill_soup .find ("dl" ).find_all ("dd" )
90107 self .url = bill_meta .link
91- if ( len (self .bill_meta_list ) is None ) :
108+ if len (self .bill_meta_list ) is None :
92109 raise self .ExtractorError (
93- f'Could not find extra bill metadata:\n \n { self .bill_meta_list } ' )
110+ f"Could not find extra bill metadata:\n \n { self .bill_meta_list } "
111+ )
94112
95113 def __str__ (self ):
96114 return f"<Bill | URL: '{ self .url } '>"
97115
98116 def __repr__ (self ):
99- return ( ' <{}.{} : {} object at {}>' .format (
117+ return " <{}.{} : {} object at {}>" .format (
100118 self .__class__ .__module__ ,
101119 self .__class__ .__name__ ,
102- self .url .split ('/b/' )[- 1 ].replace ('/' , '' ),
103- hex (id (self ))))
120+ self .url .split ("/b/" )[- 1 ].replace ("/" , "" ),
121+ hex (id (self )),
122+ )
104123
105124 def _get_sponsor (self ):
106125 return self .bill_meta_list [1 ].text .strip ()
107126
108127 def _get_text_links (self ):
109128 urls = []
110- table = self .bill_soup .find (
111- 'h3' , {'tabindex' : '0' }).findNext ('table' ).find ('tbody' )
112- for index , entry in enumerate (table .find_all ('tr' )):
129+ table = (
130+ self .bill_soup .find ("h3" , {"tabindex" : "0" }).findNext ("table" ).find ("tbody" )
131+ )
132+ for index , entry in enumerate (table .find_all ("tr" )):
113133 time = self ._get_timestamp (
114- table .find_all ('td' )[1 ]['data-order' ][:8 ], '%Y%m%d' )
115- url = BASE_URL + entry .find (
116- 'a' , {'class' : 'button download pdf' })['href' ]
117-
118- urls .append ({
119- '__time' : time ,
120- '__id' : index ,
121- 'url' : url ,
122- })
134+ table .find_all ("td" )[1 ]["data-order" ][:8 ], "%Y%m%d"
135+ )
136+ url = BASE_URL + entry .find ("a" , {"class" : "button download pdf" })["href" ]
137+
138+ urls .append (
139+ {
140+ "__time" : time ,
141+ "__id" : index ,
142+ "url" : url ,
143+ }
144+ )
123145 return urls
124146
125147 def _get_em_links (self ):
126148 urls = []
127- table = self .bill_soup .find (
128- 'h3' , {'tabindex' : None }).findNext ('table' ).find ('tbody' )
129- for index , row in enumerate (table .find_all ('tr' )):
149+ table = (
150+ self .bill_soup .find ("h3" , {"tabindex" : None })
151+ .findNext ("table" )
152+ .find ("tbody" )
153+ )
154+ for index , row in enumerate (table .find_all ("tr" )):
130155 time = self ._get_timestamp (
131- table .find_all ('td' )[1 ]['data-order' ][:8 ], '%Y%m%d' )
132- url = BASE_URL + row .find (
133- 'a' , {'class' : 'button download pdf' })['href' ]
134- urls .append ({
135- '__time' : time ,
136- '__id' : index ,
137- 'url' : url ,
138- 'house' : BillProgress .FIRST .value ,
139- })
156+ table .find_all ("td" )[1 ]["data-order" ][:8 ], "%Y%m%d"
157+ )
158+ url = BASE_URL + row .find ("a" , {"class" : "button download pdf" })["href" ]
159+ urls .append (
160+ {
161+ "__time" : time ,
162+ "__id" : index ,
163+ "url" : url ,
164+ "house" : BillProgress .FIRST .value ,
165+ }
166+ )
140167 return urls
141168
142169 def _get_scrutiny_link (self ):
143- notes_col = self .bill_soup .find (
144- 'h3' , {'tabindex' : '0' }).findNext (
145- 'table' ).find ('tbody' ).find ('td' , {'class' : 'notes' })
146- if (notes_col is not None ):
147- for a in notes_col .find_all ('a' ):
148- if (a .contents [0 ] == 'Scrutiny Committee report' ):
149- return a ['href' ]
170+ notes_col = (
171+ self .bill_soup .find ("h3" , {"tabindex" : "0" })
172+ .findNext ("table" )
173+ .find ("tbody" )
174+ .find ("td" , {"class" : "notes" })
175+ )
176+ if notes_col is not None :
177+ for a in notes_col .find_all ("a" ):
178+ if a .contents [0 ] == "Scrutiny Committee report" :
179+ return a ["href" ]
150180
151181 def _get_speech_link (self ):
152- notes_col = self .bill_soup .find (
153- 'h3' , {'tabindex' : '0' }).findNext (
154- 'table' ).find ('tbody' ).find ('td' , {'class' : 'notes' })
155- if (notes_col is not None ):
156- for a in notes_col .find_all ('a' ):
157- if (a .contents [0 ] == 'Presentation speech' ):
158- return a ['href' ]
182+ notes_col = (
183+ self .bill_soup .find ("h3" , {"tabindex" : "0" })
184+ .findNext ("table" )
185+ .find ("tbody" )
186+ .find ("td" , {"class" : "notes" })
187+ )
188+ if notes_col is not None :
189+ for a in notes_col .find_all ("a" ):
190+ if a .contents [0 ] == "Presentation speech" :
191+ return a ["href" ]
159192
160193
161194def get_bills_metadata () -> List [BillMetaACT ]:
@@ -171,20 +204,22 @@ def get_bills_metadata() -> List[BillMetaACT]:
171204 passed_assembly = bill_dict [PASSED_ASSEMBLY ],
172205 intro_assembly = bill_dict [INTRO_ASSEMBLY ],
173206 id = bill_dict [ID ],
174- chamber_progress = bill_dict [CHAMBER_PROGRESS ]
207+ chamber_progress = bill_dict [CHAMBER_PROGRESS ],
175208 )
176209 _bill_meta_list .append (bill_meta )
177- return ( _bill_meta_list )
210+ return _bill_meta_list
178211
179212
180213def get_bill (bill_meta : BillMetaACT ) -> BillACT :
181214 act_helper = ACTBillObject (bill_meta )
182215 bill_act = BillACT (
183- ** dataclasses .asdict (bill_meta ), # Copy metadata we already got as separate instance.
216+ ** dataclasses .asdict (
217+ bill_meta
218+ ), # Copy metadata we already got as separate instance.
184219 sponsor = act_helper ._get_sponsor (),
185220 bill_text_links = act_helper ._get_text_links (),
186221 bill_em_links = act_helper ._get_em_links (),
187222 intro_speech = act_helper ._get_speech_link (),
188- scrutiny_report = act_helper ._get_scrutiny_link ()
223+ scrutiny_report = act_helper ._get_scrutiny_link (),
189224 )
190225 return bill_act
0 commit comments