Skip to content

Commit dce545d

Browse files
authored
Merge pull request #11702 from bodintsov/feature/save-data-metrics-both-old-and-new
[ENG-9704] Feature/save data metrics both old and new
2 parents ce85704 + c4fa5f7 commit dce545d

24 files changed

Lines changed: 627 additions & 160 deletions

addons/base/views.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from framework.flask import redirect
3535
from framework.sentry import log_exception
3636
from framework.transactions.handlers import no_auto_transaction
37+
from osf.metrics.es8_metrics import OsfCountedUsageRecord
3738
from website import settings
3839
from addons.base import signals as file_signals
3940
from addons.base.utils import format_last_known_metadata, get_mfr_url
@@ -691,6 +692,17 @@ def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node):
691692
version=fileversion.identifier,
692693
path=file_node.path,
693694
)
695+
OsfCountedUsageRecord.record(
696+
user_id=getattr(user, '_id', None),
697+
item_osfid=resource._id,
698+
action_labels=[
699+
OsfCountedUsageRecord.ActionLabel.VIEW.value,
700+
OsfCountedUsageRecord.ActionLabel.WEB.value,
701+
],
702+
# HACK: we don't have the user request, so fabricate a one-off session id
703+
# (this means no double-click filtering and inflated "unique" view counts)
704+
client_session_id=str(uuid.uuid4()),
705+
)
694706
except es_exceptions.ConnectionError:
695707
log_exception()
696708

@@ -718,6 +730,16 @@ def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node)
718730
version=fileversion.identifier,
719731
path=file_node.path,
720732
)
733+
OsfCountedUsageRecord.record(
734+
user_id=getattr(user, '_id', None),
735+
item_osfid=resource._id,
736+
action_labels=[
737+
OsfCountedUsageRecord.ActionLabel.DOWNLOAD.value,
738+
],
739+
# HACK: we don't have the user request, so fabricate a one-off session id
740+
# (this means no double-click filtering and inflated "unique" download counts)
741+
client_session_id=str(uuid.uuid4()),
742+
)
721743
except es_exceptions.ConnectionError:
722744
log_exception()
723745

api/metrics/serializers.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@
66
from api.base.serializers import BaseAPISerializer
77
from api.base.utils import absolute_reverse
88
from osf.metrics.counted_usage import CountedAuthUsage, PageviewInfo
9+
from osf.metrics.es8_metrics import (
10+
OsfCountedUsageRecord,
11+
PageviewInfo as PageviewInfoEs8,
12+
)
913
from website import settings as website_settings
1014

1115
logger = logging.getLogger(__name__)
@@ -42,7 +46,7 @@ class PageviewInfoSerializer(ser.Serializer):
4246

4347

4448
class CountedAuthUsageSerializer(ser.Serializer):
45-
item_guid = ser.CharField(max_length=255, required=False)
49+
item_guid = ser.CharField(max_length=255, required=True)
4650
client_session_id = ser.CharField(max_length=255, required=False)
4751
provider_id = ser.CharField(max_length=255, required=False)
4852

@@ -64,8 +68,21 @@ def validate(self, data):
6468

6569
def create(self, validated_data):
6670
pageview_info = None
71+
pageview_info_es8 = None
6772
if pageview_info_data := validated_data.get('pageview_info'):
6873
pageview_info = PageviewInfo(**pageview_info_data)
74+
pageview_info_es8 = PageviewInfoEs8(**pageview_info_data)
75+
OsfCountedUsageRecord.record(
76+
item_osfid=validated_data['item_guid'],
77+
action_labels=validated_data.get('action_labels'),
78+
provider_id=validated_data.get('provider_id'),
79+
pageview_info=pageview_info_es8,
80+
# used to create a COUNTER session-hour id, not stored:
81+
client_session_id=validated_data.get('client_session_id'),
82+
user_id=self.context.get('user_id'),
83+
request_host=self.context.get('request_host'),
84+
request_useragent=self.context.get('request_useragent'),
85+
)
6986
return CountedAuthUsage.record(
7087
platform_iri=website_settings.DOMAIN,
7188
provider_id=validated_data.get('provider_id'),

api/metrics/views.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,14 @@ class CountedAuthUsageView(JSONAPIBaseView):
394394
serializer_class = CountedAuthUsageSerializer
395395

396396
def post(self, request, *args, **kwargs):
397-
serializer = self.serializer_class(data=request.data)
397+
serializer = self.serializer_class(
398+
data=request.data,
399+
context={
400+
'user_id': request.user._id if request.user.is_authenticated else None,
401+
'request_host': request.get_host(),
402+
'request_useragent': request.META.get('HTTP_USER_AGENT', ''),
403+
},
404+
)
398405
serializer.is_valid(raise_exception=True)
399406
session_id, user_is_authenticated = self._get_session_id(
400407
request,
@@ -404,6 +411,8 @@ def post(self, request, *args, **kwargs):
404411
return HttpResponse(status=201)
405412

406413
def _get_session_id(self, request, client_session_id=None):
414+
# NOTE: to remove after osfmetrics 6to8 migration -- logic moved to djelme
415+
407416
# get a session id as described in the COUNTER code of practice:
408417
# https://cop5.projectcounter.org/en/5.0.2/07-processing/03-counting-unique-items.html
409418
# -- different from the "login session" tracked by `osf.models.Session` (which

api_tests/metrics/test_counted_usage.py

Lines changed: 49 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
# UserFactory,
1212
)
1313
from api_tests.utils import create_test_file
14+
from elasticsearch_metrics.tests.util import djelme_test_backends
1415

1516

1617
COUNTED_USAGE_URL = '/_/metrics/events/counted_usage/'
@@ -69,6 +70,12 @@ def test_required_attributes(self, app, attrs):
6970

7071
@pytest.mark.django_db
7172
class TestComputedFields:
73+
74+
@pytest.fixture(autouse=True)
75+
def _real_elastic(self):
76+
with djelme_test_backends():
77+
yield
78+
7279
@pytest.fixture(autouse=True)
7380
def mock_domain(self):
7481
domain = 'http://example.foo/'
@@ -81,15 +88,22 @@ def mock_now(self):
8188
with mock.patch('django.utils.timezone.now', return_value=timestamp):
8289
yield timestamp
8390

91+
@pytest.fixture
92+
def preprint(self, request):
93+
return PreprintFactory(
94+
is_public=True,
95+
is_published=True,
96+
)
97+
8498
@pytest.fixture()
8599
def user(self):
86100
with mock.patch('osf.models.base.generate_guid', return_value='guidy'):
87101
return AuthUserFactory()
88102

89-
def test_by_client_session_id(self, app, mock_save, user):
103+
def test_by_client_session_id(self, app, mock_save, user, preprint):
90104
payload = counted_usage_payload(
91105
client_session_id='hello',
92-
item_guid='zyxwv',
106+
item_guid=preprint._id,
93107
action_labels=['view', 'api'],
94108
pageview_info={'page_url': 'http://example.foo/blahblah/blee'},
95109
)
@@ -104,7 +118,7 @@ def test_by_client_session_id(self, app, mock_save, user):
104118
expected_doc_id='3239044c7462dd318edd0522a0ed7d84b9c6502ef16cb40dfcae6c1f456d57a2',
105119
expected_attrs={
106120
'platform_iri': 'http://example.foo/',
107-
'item_guid': 'zyxwv',
121+
'item_guid': preprint._id,
108122
# session_id: sha256(b'hello|1981-01-01').hexdigest()
109123
'session_id': '5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34',
110124
'action_labels': ['view', 'api'],
@@ -116,10 +130,10 @@ def test_by_client_session_id(self, app, mock_save, user):
116130
},
117131
)
118132

119-
def test_by_client_session_id_anon(self, app, mock_save):
133+
def test_by_client_session_id_anon(self, app, mock_save, preprint):
120134
payload = counted_usage_payload(
121135
client_session_id='hello',
122-
item_guid='zyxwv',
136+
item_guid=preprint._id,
123137
action_labels=['view', 'web'],
124138
pageview_info={
125139
'page_url': 'http://example.foo/bliz/',
@@ -137,7 +151,7 @@ def test_by_client_session_id_anon(self, app, mock_save):
137151
expected_doc_id='d01759e963893f9dc9b2ccf016a5ef29135673779802b5578f31449543677e82',
138152
expected_attrs={
139153
'platform_iri': 'http://example.foo/',
140-
'item_guid': 'zyxwv',
154+
'item_guid': preprint._id,
141155
# session_id: sha256(b'hello|1981-01-01').hexdigest()
142156
'session_id': '5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34',
143157
'action_labels': ['view', 'web'],
@@ -151,9 +165,9 @@ def test_by_client_session_id_anon(self, app, mock_save):
151165
},
152166
)
153167

154-
def test_by_user_auth(self, app, mock_save, user):
168+
def test_by_user_auth(self, app, mock_save, user, preprint):
155169
payload = counted_usage_payload(
156-
item_guid='yxwvu',
170+
item_guid=preprint._id,
157171
action_labels=['view', 'web'],
158172
pageview_info={
159173
'page_url': 'http://osf.io/mst3k',
@@ -171,7 +185,7 @@ def test_by_user_auth(self, app, mock_save, user):
171185
expected_doc_id='7b8bc27c6d90fb45aa5bbd02deceba9f7384ed61b9a6e7253317c262020b94c2',
172186
expected_attrs={
173187
'platform_iri': 'http://example.foo/',
174-
'item_guid': 'yxwvu',
188+
'item_guid': preprint._id,
175189
# session_id: sha256(b'guidy|1981-01-01|0').hexdigest()
176190
'session_id': 'ec768abb16c3411570af99b9d635c2c32d1ca31d1b25eec8ee73759e7242e74a',
177191
'action_labels': ['view', 'web'],
@@ -185,10 +199,14 @@ def test_by_user_auth(self, app, mock_save, user):
185199
},
186200
)
187201

188-
def test_by_useragent_header(self, app, mock_save):
202+
def test_by_useragent_header(self, app, mock_save, preprint):
189203
payload = counted_usage_payload(
190-
item_guid='yxwvu',
204+
item_guid=preprint._id,
191205
action_labels=['view', 'api'],
206+
pageview_info={
207+
'page_url': 'http://example.foo/bliz/',
208+
'referer_url': 'http://elsewhere.baz/index.php',
209+
},
192210
)
193211
headers = {
194212
'User-Agent': 'haha',
@@ -198,21 +216,33 @@ def test_by_useragent_header(self, app, mock_save):
198216
assert_saved_with(
199217
mock_save,
200218
# doc_id: sha256(b'http://example.foo/|yxwvu|97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a|1981-01-01|3|api,view').hexdigest()
201-
expected_doc_id='d669528b30f443ffe506e183537af9624ef290090e90a200ecce7b7ca19c77f7',
219+
expected_doc_id='6d7549df6734bb955eb832c6316ffae46c2959c95b5817ab4fcb341dbc875c23',
202220
expected_attrs={
203221
'platform_iri': 'http://example.foo/',
204-
'item_guid': 'yxwvu',
222+
'item_guid': preprint._id,
205223
# session_id: sha256(b'localhost:80|haha|1981-01-01|0').hexdigest()
206224
'session_id': '97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a',
207225
'action_labels': ['view', 'api'],
208-
'pageview_info': None,
226+
'pageview_info': {
227+
'page_url': 'http://example.foo/bliz/',
228+
'page_path': '/bliz',
229+
'referer_url': 'http://elsewhere.baz/index.php',
230+
'referer_domain': 'elsewhere.baz',
231+
'hour_of_day': 0,
232+
},
209233
},
210234
)
211235

212236

213237
@pytest.mark.parametrize('item_public', [True, False])
214238
@pytest.mark.django_db
215239
class TestGuidFields:
240+
241+
@pytest.fixture(autouse=True)
242+
def _real_elastic(self):
243+
with djelme_test_backends():
244+
yield
245+
216246
@pytest.fixture
217247
def preprint(self, item_public):
218248
return PreprintFactory(
@@ -257,7 +287,7 @@ def test_preprint_file(self, app, mock_save, preprint, item_public):
257287
item_guid=preprint._id,
258288
action_labels=['view', 'web'],
259289
)
260-
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
290+
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
261291
assert resp.status_code == 201
262292
assert_saved_with(
263293
mock_save,
@@ -276,7 +306,7 @@ def test_preprint_file(self, app, mock_save, preprint, item_public):
276306
item_guid=preprint.primary_file.get_guid(create=True)._id,
277307
action_labels=['view', 'web'],
278308
)
279-
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
309+
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
280310
assert resp.status_code == 201
281311
assert_saved_with(
282312
mock_save,
@@ -295,7 +325,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
295325
item_guid=child_reg_file_guid,
296326
action_labels=['view', 'web'],
297327
)
298-
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
328+
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
299329
assert resp.status_code == 201
300330
assert_saved_with(
301331
mock_save,
@@ -318,7 +348,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
318348
item_guid=child_reg._id,
319349
action_labels=['view', 'web'],
320350
)
321-
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
351+
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
322352
assert resp.status_code == 201
323353
assert_saved_with(
324354
mock_save,
@@ -340,7 +370,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
340370
item_guid=parent_reg._id,
341371
action_labels=['view', 'web'],
342372
)
343-
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
373+
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
344374
assert resp.status_code == 201
345375
assert_saved_with(
346376
mock_save,

osf/management/commands/monthly_reporters_go.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ def monthly_reporter_do(reporter_key: str, yearmonth: str, report_kwargs: dict):
8585
framework.sentry.log_exception(exc)
8686
return
8787

88-
_report = _reporter.report(**report_kwargs)
89-
if _report is not None:
88+
_reports = _reporter.report(**report_kwargs)
89+
for _report in _reports:
9090
_report.report_yearmonth = _reporter.yearmonth
9191
_report.save()
9292
_followup_task = _reporter.followup_task(_report)

osf/metrics/es8_metrics.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,28 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
111111
class Meta:
112112
timeseries_index_timedepth = MONTHLY
113113

114+
class ActionLabel(enum.Enum):
115+
SEARCH = 'search' # counter:Search
116+
VIEW = 'view' # counter:Investigation
117+
DOWNLOAD = 'download' # counter:Request
118+
WEB = 'web' # counter:Regular (aka "pageview")
119+
API = 'api' # counter:TDM (aka "non-web api usage")
120+
121+
@classmethod
122+
def record(cls, **kwargs):
123+
# autofill `user_is_authenticated` before `user_id` discarded (couldn't in `clean`)
124+
if 'user_is_authenticated' not in kwargs:
125+
kwargs['user_is_authenticated'] = bool(kwargs.get('user_id'))
126+
return super().record(**kwargs)
127+
114128
@functools.cached_property
115129
def _osfid_referent(self):
116130
# for use by autofill methods, if needed
117131
return osfdb.Guid.load(self.item_osfid)
118132

119133
def clean(self):
120134
super().clean()
135+
self._autofill_platform_iri()
121136
self._autofill_item_iri_and_osfid()
122137
self._autofill_item_public()
123138
self._autofill_item_type()
@@ -126,6 +141,10 @@ def clean(self):
126141
self._autofill_pageview()
127142
self._autofill_database_iri()
128143

144+
def _autofill_platform_iri(self):
145+
if self.platform_iri is None:
146+
self.platform_iri = website_settings.DOMAIN
147+
129148
def _autofill_item_iri_and_osfid(self):
130149
if self.item_osfid and not self.item_iri:
131150
self.item_iri = osf_iri(self.item_osfid)
@@ -227,14 +246,6 @@ def _get_unique_together_values(self):
227246
)
228247

229248

230-
class ActionLabel(enum.Enum):
231-
SEARCH = 'search' # counter:Search
232-
VIEW = 'view' # counter:Investigation
233-
DOWNLOAD = 'download' # counter:Request
234-
WEB = 'web' # counter:Regular (aka "pageview")
235-
API = 'api' # counter:TDM (aka "non-web api usage")
236-
237-
238249
class RegistriesModerationMetricsEs8(djelme.EventRecord):
239250
registration_id: str
240251
provider_id: str

0 commit comments

Comments
 (0)