Skip to content
This repository was archived by the owner on Apr 14, 2026. It is now read-only.

Commit 4f37823

Browse files
committed
Formatting and README changes
1 parent 03e2408 commit 4f37823

19 files changed

Lines changed: 975 additions & 734 deletions

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# Aus Bills
1+
# ausbills
22

3-
🇦🇺 This is a package is for obtaining parliament bills for Australian governments.
3+
🇦🇺 This is a package is for obtaining parliament bills for Australian parliaments.
44

5-
## Install via pip3
5+
## Install via pip
66

77
```
8-
pip3 install ausbills
8+
pip install ausbills
99
```
1010

1111
---
@@ -18,11 +18,11 @@ Most parliaments host their bills online in 2 forms:
1818

1919
1. A table with a small amount of data about each bill
2020

21-
![ACT bill list](.github/img/ACT_bill_list.PNG)
21+
![ACT bill list](.github/img/ACT_bill_list.PNG)
2222

2323
2. Individual pages with all the info for a given bill.
2424

25-
![ACT bill](.github/img/ACT_bill.PNG)
25+
![ACT bill](.github/img/ACT_bill.PNG)
2626

2727
`ausbills` gives you access to all of this information.
2828

ausbills/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from ausbills.log import get_logger
22

3-
log = get_logger('ausbills')
3+
log = get_logger("ausbills")

ausbills/json_encoder.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
class AusBillsJsonEncoder(json.JSONEncoder):
1717
def default(self, obj):
1818
if isinstance(obj, Maybe):
19-
return {"$nothing": None} if \
20-
obj.is_nothing() else {"$just": obj.value}
19+
return {"$nothing": None} if obj.is_nothing() else {"$just": obj.value}
2120
if isinstance(obj, Tag):
2221
return {"$bs4.tag": obj.encode()}
2322
if isinstance(obj, bytes):
@@ -28,8 +27,9 @@ def default(self, obj):
2827
return {"$house": obj.value}
2928
if isinstance(obj, datetime):
3029
return {"$dateIso8601": obj.isoformat()}
31-
log.warning("Got something of unexpected type"
32-
"({}\n\nObj: {}\n\ndir: {}"
33-
.format(type(obj), str(obj), dir(obj)))
30+
log.warning(
31+
"Got something of unexpected type"
32+
"({}\n\nObj: {}\n\ndir: {}".format(type(obj), str(obj), dir(obj))
33+
)
3434

3535
return json_d(self, obj)

ausbills/models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
from typing import Dict, List, NewType
33
import json
44

5-
PdfUrl = NewType('PdfUrl', str)
6-
UrlStr = NewType('UrlStr', str)
5+
PdfUrl = NewType("PdfUrl", str)
6+
UrlStr = NewType("UrlStr", str)
77

88

99
@dataclass
@@ -22,7 +22,7 @@ class Bill:
2222
bill_text_links: List[Dict]
2323

2424
def asDict(self) -> dict:
25-
return(self.__dict__)
25+
return self.__dict__
2626

2727
def asJson(self) -> str:
28-
return(json.dumps(self.asDict(), indent=2))
28+
return json.dumps(self.asDict(), indent=2)

ausbills/parliament/act.py

Lines changed: 113 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -4,62 +4,79 @@
44

55
from ausbills.models import BillMeta, Bill, PdfUrl, UrlStr
66
from ausbills.util import BillExtractor, BillListExtractor
7-
from ausbills.types import BillProgress, Parliament, BillTypes, ChamberProgress, Timestamp
7+
from ausbills.types import (
8+
BillProgress,
9+
Parliament,
10+
BillTypes,
11+
ChamberProgress,
12+
Timestamp,
13+
)
814
from ausbills.util.consts import *
915

10-
BASE_URL = 'https://legislation.act.gov.au'
16+
BASE_URL = "https://legislation.act.gov.au"
1117

1218

1319
class ACTBillList(BillListExtractor):
1420
def __init__(self):
15-
bill_volume = self._download_html(
16-
'https://legislation.act.gov.au/results?category=cBil&status=Current&action=browse').find(
17-
'table', {'id': 'results-table-bill'}).find('tbody')
21+
bill_volume = (
22+
self._download_html(
23+
"https://legislation.act.gov.au/results?category=cBil&status=Current&action=browse"
24+
)
25+
.find("table", {"id": "results-table-bill"})
26+
.find("tbody")
27+
)
1828
self._bill_list = self._get_bill_list(bill_volume)
1929

2030
def _get_bill_list(self, bill_volume):
2131
bill_list = []
2232
has_passed = False
23-
for row in bill_volume.find_all('tr', recursive=False):
24-
bill_intro_date = row.find('td')['data-order']
25-
__title_col = row.find_all('td')[1]
33+
for row in bill_volume.find_all("tr", recursive=False):
34+
bill_intro_date = row.find("td")["data-order"]
35+
__title_col = row.find_all("td")[1]
2636
bill_title = __title_col.text.strip()
27-
bill_url = BASE_URL + __title_col.find('a')['href']
28-
bill_intro = self._get_timestamp(bill_intro_date[:8], '%Y%m%d')
37+
bill_url = BASE_URL + __title_col.find("a")["href"]
38+
bill_intro = self._get_timestamp(bill_intro_date[:8], "%Y%m%d")
2939

30-
__status_col = row.find_all('td')[-1]
31-
if __status_col['data-order'] == 'passed':
40+
__status_col = row.find_all("td")[-1]
41+
if __status_col["data-order"] == "passed":
3242
has_passed = True
33-
passed_date = self._get_timestamp(
34-
__status_col.contents[1], '%d %B %Y')
43+
passed_date = self._get_timestamp(__status_col.contents[1], "%d %B %Y")
3544
else:
3645
passed_date = None
37-
bill_type = self._parse_type(row.find_all('td')[2].text)
46+
bill_type = self._parse_type(row.find_all("td")[2].text)
3847
bill_id = bill_url[-6:-1]
3948

4049
if has_passed:
41-
prog_dict = {BillProgress.FIRST.value: True, BillProgress.ASSENTED.value: True}
50+
prog_dict = {
51+
BillProgress.FIRST.value: True,
52+
BillProgress.ASSENTED.value: True,
53+
}
4254
chamber_progress = ChamberProgress.THIRD_READING.value
43-
else:
44-
prog_dict = {BillProgress.FIRST.value: True, BillProgress.ASSENTED.value: False} # Bills will always remain in the first house in a unicameral parliament
55+
else:
56+
prog_dict = {
57+
BillProgress.FIRST.value: True,
58+
BillProgress.ASSENTED.value: False,
59+
} # Bills will always remain in the first house in a unicameral parliament
4560
chamber_progress = ChamberProgress.FIRST_READING.value
4661

47-
bill_list.append({
48-
TITLE: bill_title,
49-
URL: bill_url,
50-
BILL_TYPE: bill_type,
51-
INTRO_ASSEMBLY: bill_intro,
52-
PASSED_ASSEMBLY: passed_date,
53-
PASSED: prog_dict,
54-
CHAMBER_PROGRESS: chamber_progress,
55-
ID: bill_id,
56-
})
62+
bill_list.append(
63+
{
64+
TITLE: bill_title,
65+
URL: bill_url,
66+
BILL_TYPE: bill_type,
67+
INTRO_ASSEMBLY: bill_intro,
68+
PASSED_ASSEMBLY: passed_date,
69+
PASSED: prog_dict,
70+
CHAMBER_PROGRESS: chamber_progress,
71+
ID: bill_id,
72+
}
73+
)
5774
return bill_list
5875

5976
def _parse_type(self, type_string):
60-
if type_string == 'GOV':
77+
if type_string == "GOV":
6178
return BillTypes.GOVERNMENT.value
62-
elif type_string == 'PMB':
79+
elif type_string == "PMB":
6380
return BillTypes.PRIVATE_MEMBER.value
6481

6582

@@ -86,76 +103,92 @@ class ACTBillObject(BillExtractor):
86103

87104
def __init__(self, bill_meta: BillMetaACT):
88105
self.bill_soup = self._download_html(bill_meta.link)
89-
self.bill_meta_list = self.bill_soup.find('dl').find_all('dd')
106+
self.bill_meta_list = self.bill_soup.find("dl").find_all("dd")
90107
self.url = bill_meta.link
91-
if(len(self.bill_meta_list) is None):
108+
if len(self.bill_meta_list) is None:
92109
raise self.ExtractorError(
93-
f'Could not find extra bill metadata:\n\n{self.bill_meta_list}')
110+
f"Could not find extra bill metadata:\n\n{self.bill_meta_list}"
111+
)
94112

95113
def __str__(self):
96114
return f"<Bill | URL: '{self.url}'>"
97115

98116
def __repr__(self):
99-
return ('<{}.{} : {} object at {}>'.format(
117+
return "<{}.{} : {} object at {}>".format(
100118
self.__class__.__module__,
101119
self.__class__.__name__,
102-
self.url.split('/b/')[-1].replace('/', ''),
103-
hex(id(self))))
120+
self.url.split("/b/")[-1].replace("/", ""),
121+
hex(id(self)),
122+
)
104123

105124
def _get_sponsor(self):
106125
return self.bill_meta_list[1].text.strip()
107126

108127
def _get_text_links(self):
109128
urls = []
110-
table = self.bill_soup.find(
111-
'h3', {'tabindex': '0'}).findNext('table').find('tbody')
112-
for index, entry in enumerate(table.find_all('tr')):
129+
table = (
130+
self.bill_soup.find("h3", {"tabindex": "0"}).findNext("table").find("tbody")
131+
)
132+
for index, entry in enumerate(table.find_all("tr")):
113133
time = self._get_timestamp(
114-
table.find_all('td')[1]['data-order'][:8], '%Y%m%d')
115-
url = BASE_URL + entry.find(
116-
'a', {'class': 'button download pdf'})['href']
117-
118-
urls.append({
119-
'__time': time,
120-
'__id': index,
121-
'url': url,
122-
})
134+
table.find_all("td")[1]["data-order"][:8], "%Y%m%d"
135+
)
136+
url = BASE_URL + entry.find("a", {"class": "button download pdf"})["href"]
137+
138+
urls.append(
139+
{
140+
"__time": time,
141+
"__id": index,
142+
"url": url,
143+
}
144+
)
123145
return urls
124146

125147
def _get_em_links(self):
126148
urls = []
127-
table = self.bill_soup.find(
128-
'h3', {'tabindex': None}).findNext('table').find('tbody')
129-
for index, row in enumerate(table.find_all('tr')):
149+
table = (
150+
self.bill_soup.find("h3", {"tabindex": None})
151+
.findNext("table")
152+
.find("tbody")
153+
)
154+
for index, row in enumerate(table.find_all("tr")):
130155
time = self._get_timestamp(
131-
table.find_all('td')[1]['data-order'][:8], '%Y%m%d')
132-
url = BASE_URL + row.find(
133-
'a', {'class': 'button download pdf'})['href']
134-
urls.append({
135-
'__time': time,
136-
'__id': index,
137-
'url': url,
138-
'house': BillProgress.FIRST.value,
139-
})
156+
table.find_all("td")[1]["data-order"][:8], "%Y%m%d"
157+
)
158+
url = BASE_URL + row.find("a", {"class": "button download pdf"})["href"]
159+
urls.append(
160+
{
161+
"__time": time,
162+
"__id": index,
163+
"url": url,
164+
"house": BillProgress.FIRST.value,
165+
}
166+
)
140167
return urls
141168

142169
def _get_scrutiny_link(self):
143-
notes_col = self.bill_soup.find(
144-
'h3', {'tabindex': '0'}).findNext(
145-
'table').find('tbody').find('td', {'class': 'notes'})
146-
if(notes_col is not None):
147-
for a in notes_col.find_all('a'):
148-
if(a.contents[0] == 'Scrutiny Committee report'):
149-
return a['href']
170+
notes_col = (
171+
self.bill_soup.find("h3", {"tabindex": "0"})
172+
.findNext("table")
173+
.find("tbody")
174+
.find("td", {"class": "notes"})
175+
)
176+
if notes_col is not None:
177+
for a in notes_col.find_all("a"):
178+
if a.contents[0] == "Scrutiny Committee report":
179+
return a["href"]
150180

151181
def _get_speech_link(self):
152-
notes_col = self.bill_soup.find(
153-
'h3', {'tabindex': '0'}).findNext(
154-
'table').find('tbody').find('td', {'class': 'notes'})
155-
if(notes_col is not None):
156-
for a in notes_col.find_all('a'):
157-
if(a.contents[0] == 'Presentation speech'):
158-
return a['href']
182+
notes_col = (
183+
self.bill_soup.find("h3", {"tabindex": "0"})
184+
.findNext("table")
185+
.find("tbody")
186+
.find("td", {"class": "notes"})
187+
)
188+
if notes_col is not None:
189+
for a in notes_col.find_all("a"):
190+
if a.contents[0] == "Presentation speech":
191+
return a["href"]
159192

160193

161194
def get_bills_metadata() -> List[BillMetaACT]:
@@ -171,20 +204,22 @@ def get_bills_metadata() -> List[BillMetaACT]:
171204
passed_assembly=bill_dict[PASSED_ASSEMBLY],
172205
intro_assembly=bill_dict[INTRO_ASSEMBLY],
173206
id=bill_dict[ID],
174-
chamber_progress=bill_dict[CHAMBER_PROGRESS]
207+
chamber_progress=bill_dict[CHAMBER_PROGRESS],
175208
)
176209
_bill_meta_list.append(bill_meta)
177-
return(_bill_meta_list)
210+
return _bill_meta_list
178211

179212

180213
def get_bill(bill_meta: BillMetaACT) -> BillACT:
181214
act_helper = ACTBillObject(bill_meta)
182215
bill_act = BillACT(
183-
**dataclasses.asdict(bill_meta), # Copy metadata we already got as separate instance.
216+
**dataclasses.asdict(
217+
bill_meta
218+
), # Copy metadata we already got as separate instance.
184219
sponsor=act_helper._get_sponsor(),
185220
bill_text_links=act_helper._get_text_links(),
186221
bill_em_links=act_helper._get_em_links(),
187222
intro_speech=act_helper._get_speech_link(),
188-
scrutiny_report=act_helper._get_scrutiny_link()
223+
scrutiny_report=act_helper._get_scrutiny_link(),
189224
)
190225
return bill_act

0 commit comments

Comments
 (0)