Skip to content

Commit 4be53dd

Browse files
committed
fix? double-counted usage
1 parent 3c24a6b commit 4be53dd

1 file changed

Lines changed: 44 additions & 32 deletions

File tree

osf/metrics/es8_metrics.py

Lines changed: 44 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,17 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
9090
https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html
9191
https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html
9292
'''
93+
UNIQUE_TOGETHER_FIELDS = (
94+
'platform_iri',
95+
'sessionhour_id',
96+
'action_labels',
97+
# include some non-field properties for more complex logic to
98+
# slightly better approximate `counter:Double-Click Filtering`
99+
# and allow for multiple pages describing the same item_iri
100+
'_page_url_or_osfid', # non-field property
101+
'_timestamp_date', # non-field property
102+
'_timestamp_30sec_window', # non-field property
103+
)
93104

94105
# inherited fields:
95106
# timestamp: datetime.datetime
@@ -125,14 +136,41 @@ def record(cls, **kwargs):
125136
kwargs['user_is_authenticated'] = bool(kwargs.get('user_id'))
126137
return super().record(**kwargs)
127138

139+
@property
140+
def _page_url_or_osfid(self):
141+
# for UNIQUE_TOGETHER_FIELDS
142+
return (
143+
self.pageview_info.page_url
144+
if self.pageview_info is not None and self.pageview_info.page_url is not None
145+
else self.item_osfid
146+
)
147+
148+
@property
149+
def _timestamp_date(self):
150+
# for UNIQUE_TOGETHER_FIELDS
151+
return self.timestamp.date()
152+
153+
@property
154+
def _timestamp_30sec_window(self):
155+
# for UNIQUE_TOGETHER_FIELDS
156+
# slice the day into an array of 30-second windows,
157+
# find this timestamp's windowslice index
158+
day_start = datetime.datetime(
159+
self.timestamp.year,
160+
self.timestamp.month,
161+
self.timestamp.day,
162+
tzinfo=self.timestamp.tzinfo,
163+
)
164+
time_in_seconds = (self.timestamp - day_start).total_seconds()
165+
return int(time_in_seconds / 30) # 30-second windows
166+
128167
@functools.cached_property
129168
def _osfid_referent(self):
130169
# for use by autofill methods, if needed
131170
_osfguid = osfdb.Guid.load(self.item_osfid)
132171
return _osfguid.referent if _osfguid else None
133172

134173
def clean(self):
135-
super().clean()
136174
self._autofill_platform_iri()
137175
self._autofill_item_iri_and_osfid()
138176
self._autofill_item_public()
@@ -141,6 +179,8 @@ def clean(self):
141179
self._autofill_within_iris()
142180
self._autofill_pageview()
143181
self._autofill_database_iri()
182+
self._clean_action_labels()
183+
super().clean()
144184

145185
def _autofill_platform_iri(self):
146186
if self.platform_iri is None:
@@ -214,37 +254,9 @@ def _autofill_database_iri(self):
214254
else:
215255
self.database_iri = _provider.get_semantic_iri()
216256

217-
def _get_unique_together_values(self):
218-
"""get "unique together" values for "ON CONFLICT UPDATE" behavior
219-
220-
override djelme.BaseDjelmeRecord._get_unique_together_values
221-
for more complex logic than UNIQUE_TOGETHER_FIELDS
222-
to slightly better approximate `counter:Double-Click Filtering`
223-
"""
224-
# note: copied from osf.metrics.counted_usage._fill_document_id
225-
target_identifier = (
226-
self.pageview_info.page_url
227-
if self.pageview_info is not None and self.pageview_info.page_url is not None
228-
else self.item_osfid
229-
)
230-
# slice the day into an array of 30-second windows,
231-
# find this timestamp's windowslice index
232-
day_start = datetime.datetime(
233-
self.timestamp.year,
234-
self.timestamp.month,
235-
self.timestamp.day,
236-
tzinfo=self.timestamp.tzinfo,
237-
)
238-
time_in_seconds = (self.timestamp - day_start).total_seconds()
239-
time_window = int(time_in_seconds / 30) # 30-second windows
240-
return ( # unique-together values:
241-
self.platform_iri,
242-
target_identifier,
243-
self.sessionhour_id,
244-
self.timestamp.date(),
245-
time_window,
246-
','.join(sorted(self.action_labels)),
247-
)
257+
def _clean_action_labels(self):
258+
if self.action_labels:
259+
self.action_labels = sorted(self.action_labels)
248260

249261

250262
class RegistriesModerationMetricsEs8(djelme.EventRecord):

0 commit comments

Comments
 (0)