Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions addons/base/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from framework.flask import redirect
from framework.sentry import log_exception
from framework.transactions.handlers import no_auto_transaction
from osf.metrics.es8_metrics import OsfCountedUsageRecord
from website import settings
from addons.base import signals as file_signals
from addons.base.utils import format_last_known_metadata, get_mfr_url
Expand Down Expand Up @@ -691,6 +692,17 @@ def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node):
version=fileversion.identifier,
path=file_node.path,
)
OsfCountedUsageRecord.record(
user_id=getattr(user, '_id', None),
item_osfid=resource._id,
action_labels=[
OsfCountedUsageRecord.ActionLabel.VIEW.value,
OsfCountedUsageRecord.ActionLabel.WEB.value,
],
# HACK: we don't have the user request, so fabricate a one-off session id
# (this means no double-click filtering and inflated "unique" view counts)
client_session_id=str(uuid.uuid4()),
)
except es_exceptions.ConnectionError:
log_exception()

Expand Down Expand Up @@ -718,6 +730,16 @@ def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node)
version=fileversion.identifier,
path=file_node.path,
)
OsfCountedUsageRecord.record(
user_id=getattr(user, '_id', None),
item_osfid=resource._id,
action_labels=[
OsfCountedUsageRecord.ActionLabel.DOWNLOAD.value,
],
# HACK: we don't have the user request, so fabricate a one-off session id
# (this means no double-click filtering and inflated "unique" download counts)
client_session_id=str(uuid.uuid4()),
)
except es_exceptions.ConnectionError:
log_exception()

Expand Down
19 changes: 18 additions & 1 deletion api/metrics/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from api.base.serializers import BaseAPISerializer
from api.base.utils import absolute_reverse
from osf.metrics.counted_usage import CountedAuthUsage, PageviewInfo
from osf.metrics.es8_metrics import (
OsfCountedUsageRecord,
PageviewInfo as PageviewInfoEs8,
)
from website import settings as website_settings

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -42,7 +46,7 @@ class PageviewInfoSerializer(ser.Serializer):


class CountedAuthUsageSerializer(ser.Serializer):
item_guid = ser.CharField(max_length=255, required=False)
item_guid = ser.CharField(max_length=255, required=True)
client_session_id = ser.CharField(max_length=255, required=False)
provider_id = ser.CharField(max_length=255, required=False)

Expand All @@ -64,8 +68,21 @@ def validate(self, data):

def create(self, validated_data):
pageview_info = None
pageview_info_es8 = None
if pageview_info_data := validated_data.get('pageview_info'):
pageview_info = PageviewInfo(**pageview_info_data)
pageview_info_es8 = PageviewInfoEs8(**pageview_info_data)
OsfCountedUsageRecord.record(
item_osfid=validated_data['item_guid'],
action_labels=validated_data.get('action_labels'),
provider_id=validated_data.get('provider_id'),
pageview_info=pageview_info_es8,
# used to create a COUNTER session-hour id, not stored:
client_session_id=validated_data.get('client_session_id'),
user_id=self.context.get('user_id'),
request_host=self.context.get('request_host'),
request_useragent=self.context.get('request_useragent'),
)
return CountedAuthUsage.record(
platform_iri=website_settings.DOMAIN,
provider_id=validated_data.get('provider_id'),
Expand Down
11 changes: 10 additions & 1 deletion api/metrics/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,14 @@ class CountedAuthUsageView(JSONAPIBaseView):
serializer_class = CountedAuthUsageSerializer

def post(self, request, *args, **kwargs):
serializer = self.serializer_class(data=request.data)
serializer = self.serializer_class(
data=request.data,
context={
'user_id': request.user._id if request.user.is_authenticated else None,
'request_host': request.get_host(),
'request_useragent': request.META.get('HTTP_USER_AGENT', ''),
},
)
serializer.is_valid(raise_exception=True)
session_id, user_is_authenticated = self._get_session_id(
request,
Expand All @@ -404,6 +411,8 @@ def post(self, request, *args, **kwargs):
return HttpResponse(status=201)

def _get_session_id(self, request, client_session_id=None):
# NOTE: to remove after osfmetrics 6to8 migration -- logic moved to djelme

# get a session id as described in the COUNTER code of practice:
# https://cop5.projectcounter.org/en/5.0.2/07-processing/03-counting-unique-items.html
# -- different from the "login session" tracked by `osf.models.Session` (which
Expand Down
68 changes: 49 additions & 19 deletions api_tests/metrics/test_counted_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# UserFactory,
)
from api_tests.utils import create_test_file
from elasticsearch_metrics.tests.util import djelme_test_backends


COUNTED_USAGE_URL = '/_/metrics/events/counted_usage/'
Expand Down Expand Up @@ -69,6 +70,12 @@ def test_required_attributes(self, app, attrs):

@pytest.mark.django_db
class TestComputedFields:

@pytest.fixture(autouse=True)
def _real_elastic(self):
with djelme_test_backends():
yield

@pytest.fixture(autouse=True)
def mock_domain(self):
domain = 'http://example.foo/'
Expand All @@ -81,15 +88,22 @@ def mock_now(self):
with mock.patch('django.utils.timezone.now', return_value=timestamp):
yield timestamp

@pytest.fixture
def preprint(self, request):
return PreprintFactory(
is_public=True,
is_published=True,
)

@pytest.fixture()
def user(self):
with mock.patch('osf.models.base.generate_guid', return_value='guidy'):
return AuthUserFactory()

def test_by_client_session_id(self, app, mock_save, user):
def test_by_client_session_id(self, app, mock_save, user, preprint):
payload = counted_usage_payload(
client_session_id='hello',
item_guid='zyxwv',
item_guid=preprint._id,
action_labels=['view', 'api'],
pageview_info={'page_url': 'http://example.foo/blahblah/blee'},
)
Expand All @@ -104,7 +118,7 @@ def test_by_client_session_id(self, app, mock_save, user):
expected_doc_id='3239044c7462dd318edd0522a0ed7d84b9c6502ef16cb40dfcae6c1f456d57a2',
expected_attrs={
'platform_iri': 'http://example.foo/',
'item_guid': 'zyxwv',
'item_guid': preprint._id,
# session_id: sha256(b'hello|1981-01-01').hexdigest()
'session_id': '5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34',
'action_labels': ['view', 'api'],
Expand All @@ -116,10 +130,10 @@ def test_by_client_session_id(self, app, mock_save, user):
},
)

def test_by_client_session_id_anon(self, app, mock_save):
def test_by_client_session_id_anon(self, app, mock_save, preprint):
payload = counted_usage_payload(
client_session_id='hello',
item_guid='zyxwv',
item_guid=preprint._id,
action_labels=['view', 'web'],
pageview_info={
'page_url': 'http://example.foo/bliz/',
Expand All @@ -137,7 +151,7 @@ def test_by_client_session_id_anon(self, app, mock_save):
expected_doc_id='d01759e963893f9dc9b2ccf016a5ef29135673779802b5578f31449543677e82',
expected_attrs={
'platform_iri': 'http://example.foo/',
'item_guid': 'zyxwv',
'item_guid': preprint._id,
# session_id: sha256(b'hello|1981-01-01').hexdigest()
'session_id': '5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34',
'action_labels': ['view', 'web'],
Expand All @@ -151,9 +165,9 @@ def test_by_client_session_id_anon(self, app, mock_save):
},
)

def test_by_user_auth(self, app, mock_save, user):
def test_by_user_auth(self, app, mock_save, user, preprint):
payload = counted_usage_payload(
item_guid='yxwvu',
item_guid=preprint._id,
action_labels=['view', 'web'],
pageview_info={
'page_url': 'http://osf.io/mst3k',
Expand All @@ -171,7 +185,7 @@ def test_by_user_auth(self, app, mock_save, user):
expected_doc_id='7b8bc27c6d90fb45aa5bbd02deceba9f7384ed61b9a6e7253317c262020b94c2',
expected_attrs={
'platform_iri': 'http://example.foo/',
'item_guid': 'yxwvu',
'item_guid': preprint._id,
# session_id: sha256(b'guidy|1981-01-01|0').hexdigest()
'session_id': 'ec768abb16c3411570af99b9d635c2c32d1ca31d1b25eec8ee73759e7242e74a',
'action_labels': ['view', 'web'],
Expand All @@ -185,10 +199,14 @@ def test_by_user_auth(self, app, mock_save, user):
},
)

def test_by_useragent_header(self, app, mock_save):
def test_by_useragent_header(self, app, mock_save, preprint):
payload = counted_usage_payload(
item_guid='yxwvu',
item_guid=preprint._id,
action_labels=['view', 'api'],
pageview_info={
'page_url': 'http://example.foo/bliz/',
'referer_url': 'http://elsewhere.baz/index.php',
},
)
headers = {
'User-Agent': 'haha',
Expand All @@ -198,21 +216,33 @@ def test_by_useragent_header(self, app, mock_save):
assert_saved_with(
mock_save,
# doc_id: sha256(b'http://example.foo/|yxwvu|97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a|1981-01-01|3|api,view').hexdigest()
expected_doc_id='d669528b30f443ffe506e183537af9624ef290090e90a200ecce7b7ca19c77f7',
expected_doc_id='6d7549df6734bb955eb832c6316ffae46c2959c95b5817ab4fcb341dbc875c23',
expected_attrs={
'platform_iri': 'http://example.foo/',
'item_guid': 'yxwvu',
'item_guid': preprint._id,
# session_id: sha256(b'localhost:80|haha|1981-01-01|0').hexdigest()
'session_id': '97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a',
'action_labels': ['view', 'api'],
'pageview_info': None,
'pageview_info': {
'page_url': 'http://example.foo/bliz/',
'page_path': '/bliz',
'referer_url': 'http://elsewhere.baz/index.php',
'referer_domain': 'elsewhere.baz',
'hour_of_day': 0,
},
},
)


@pytest.mark.parametrize('item_public', [True, False])
@pytest.mark.django_db
class TestGuidFields:

@pytest.fixture(autouse=True)
def _real_elastic(self):
with djelme_test_backends():
yield

@pytest.fixture
def preprint(self, item_public):
return PreprintFactory(
Expand Down Expand Up @@ -257,7 +287,7 @@ def test_preprint_file(self, app, mock_save, preprint, item_public):
item_guid=preprint._id,
action_labels=['view', 'web'],
)
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
assert resp.status_code == 201
assert_saved_with(
mock_save,
Expand All @@ -276,7 +306,7 @@ def test_preprint_file(self, app, mock_save, preprint, item_public):
item_guid=preprint.primary_file.get_guid(create=True)._id,
action_labels=['view', 'web'],
)
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
assert resp.status_code == 201
assert_saved_with(
mock_save,
Expand All @@ -295,7 +325,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
item_guid=child_reg_file_guid,
action_labels=['view', 'web'],
)
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
assert resp.status_code == 201
assert_saved_with(
mock_save,
Expand All @@ -318,7 +348,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
item_guid=child_reg._id,
action_labels=['view', 'web'],
)
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
assert resp.status_code == 201
assert_saved_with(
mock_save,
Expand All @@ -340,7 +370,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil
item_guid=parent_reg._id,
action_labels=['view', 'web'],
)
resp = app.post_json_api(COUNTED_USAGE_URL, payload)
resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'})
assert resp.status_code == 201
assert_saved_with(
mock_save,
Expand Down
4 changes: 2 additions & 2 deletions osf/management/commands/monthly_reporters_go.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ def monthly_reporter_do(reporter_key: str, yearmonth: str, report_kwargs: dict):
framework.sentry.log_exception(exc)
return

_report = _reporter.report(**report_kwargs)
if _report is not None:
_reports = _reporter.report(**report_kwargs)
for _report in _reports:
_report.report_yearmonth = _reporter.yearmonth
_report.save()
_followup_task = _reporter.followup_task(_report)
Expand Down
27 changes: 19 additions & 8 deletions osf/metrics/es8_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,28 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord):
class Meta:
timeseries_index_timedepth = MONTHLY

class ActionLabel(enum.Enum):
SEARCH = 'search' # counter:Search
VIEW = 'view' # counter:Investigation
DOWNLOAD = 'download' # counter:Request
WEB = 'web' # counter:Regular (aka "pageview")
API = 'api' # counter:TDM (aka "non-web api usage")

@classmethod
def record(cls, **kwargs):
# autofill `user_is_authenticated` before `user_id` discarded (couldn't in `clean`)
if 'user_is_authenticated' not in kwargs:
kwargs['user_is_authenticated'] = bool(kwargs.get('user_id'))
return super().record(**kwargs)

@functools.cached_property
def _osfid_referent(self):
# for use by autofill methods, if needed
return osfdb.Guid.load(self.item_osfid)

def clean(self):
super().clean()
self._autofill_platform_iri()
self._autofill_item_iri_and_osfid()
self._autofill_item_public()
self._autofill_item_type()
Expand All @@ -126,6 +141,10 @@ def clean(self):
self._autofill_pageview()
self._autofill_database_iri()

def _autofill_platform_iri(self):
if self.platform_iri is None:
self.platform_iri = website_settings.DOMAIN

def _autofill_item_iri_and_osfid(self):
if self.item_osfid and not self.item_iri:
self.item_iri = osf_iri(self.item_osfid)
Expand Down Expand Up @@ -227,14 +246,6 @@ def _get_unique_together_values(self):
)


class ActionLabel(enum.Enum):
SEARCH = 'search' # counter:Search
VIEW = 'view' # counter:Investigation
DOWNLOAD = 'download' # counter:Request
WEB = 'web' # counter:Regular (aka "pageview")
API = 'api' # counter:TDM (aka "non-web api usage")


class RegistriesModerationMetricsEs8(djelme.EventRecord):
registration_id: str
provider_id: str
Expand Down
Loading
Loading