diff --git a/addons/base/views.py b/addons/base/views.py index ebcd662966b..1e4a8433469 100644 --- a/addons/base/views.py +++ b/addons/base/views.py @@ -34,6 +34,7 @@ from framework.flask import redirect from framework.sentry import log_exception from framework.transactions.handlers import no_auto_transaction +from osf.metrics.es8_metrics import OsfCountedUsageRecord from website import settings from addons.base import signals as file_signals from addons.base.utils import format_last_known_metadata, get_mfr_url @@ -691,6 +692,17 @@ def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node): version=fileversion.identifier, path=file_node.path, ) + OsfCountedUsageRecord.record( + user_id=getattr(user, '_id', None), + item_osfid=resource._id, + action_labels=[ + OsfCountedUsageRecord.ActionLabel.VIEW.value, + OsfCountedUsageRecord.ActionLabel.WEB.value, + ], + # HACK: we don't have the user request, so fabricate a one-off session id + # (this means no double-click filtering and inflated "unique" view counts) + client_session_id=str(uuid.uuid4()), + ) except es_exceptions.ConnectionError: log_exception() @@ -718,6 +730,16 @@ def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node) version=fileversion.identifier, path=file_node.path, ) + OsfCountedUsageRecord.record( + user_id=getattr(user, '_id', None), + item_osfid=resource._id, + action_labels=[ + OsfCountedUsageRecord.ActionLabel.DOWNLOAD.value, + ], + # HACK: we don't have the user request, so fabricate a one-off session id + # (this means no double-click filtering and inflated "unique" download counts) + client_session_id=str(uuid.uuid4()), + ) except es_exceptions.ConnectionError: log_exception() diff --git a/api/metrics/serializers.py b/api/metrics/serializers.py index 5bbde293505..1e867b43981 100644 --- a/api/metrics/serializers.py +++ b/api/metrics/serializers.py @@ -6,6 +6,10 @@ from api.base.serializers import BaseAPISerializer from api.base.utils import absolute_reverse from osf.metrics.counted_usage import CountedAuthUsage, PageviewInfo +from osf.metrics.es8_metrics import ( + OsfCountedUsageRecord, + PageviewInfo as PageviewInfoEs8, +) from website import settings as website_settings logger = logging.getLogger(__name__) @@ -42,7 +46,7 @@ class PageviewInfoSerializer(ser.Serializer): class CountedAuthUsageSerializer(ser.Serializer): - item_guid = ser.CharField(max_length=255, required=False) + item_guid = ser.CharField(max_length=255, required=True) client_session_id = ser.CharField(max_length=255, required=False) provider_id = ser.CharField(max_length=255, required=False) @@ -64,8 +68,21 @@ def validate(self, data): def create(self, validated_data): pageview_info = None + pageview_info_es8 = None if pageview_info_data := validated_data.get('pageview_info'): pageview_info = PageviewInfo(**pageview_info_data) + pageview_info_es8 = PageviewInfoEs8(**pageview_info_data) + OsfCountedUsageRecord.record( + item_osfid=validated_data['item_guid'], + action_labels=validated_data.get('action_labels'), + provider_id=validated_data.get('provider_id'), + pageview_info=pageview_info_es8, + # used to create a COUNTER session-hour id, not stored: + client_session_id=validated_data.get('client_session_id'), + user_id=self.context.get('user_id'), + request_host=self.context.get('request_host'), + request_useragent=self.context.get('request_useragent'), + ) return CountedAuthUsage.record( platform_iri=website_settings.DOMAIN, provider_id=validated_data.get('provider_id'), diff --git a/api/metrics/views.py b/api/metrics/views.py index 69c44027ec9..508f649ee13 100644 --- a/api/metrics/views.py +++ b/api/metrics/views.py @@ -394,7 +394,14 @@ class CountedAuthUsageView(JSONAPIBaseView): serializer_class = CountedAuthUsageSerializer def post(self, request, *args, **kwargs): - serializer = self.serializer_class(data=request.data) + serializer = self.serializer_class( + data=request.data, + context={ + 'user_id': request.user._id if request.user.is_authenticated else None, + 'request_host': request.get_host(), + 'request_useragent': request.META.get('HTTP_USER_AGENT', ''), + }, + ) serializer.is_valid(raise_exception=True) session_id, user_is_authenticated = self._get_session_id( request, @@ -404,6 +411,8 @@ def post(self, request, *args, **kwargs): return HttpResponse(status=201) def _get_session_id(self, request, client_session_id=None): + # NOTE: to remove after osfmetrics 6to8 migration -- logic moved to djelme + # get a session id as described in the COUNTER code of practice: # https://cop5.projectcounter.org/en/5.0.2/07-processing/03-counting-unique-items.html # -- different from the "login session" tracked by `osf.models.Session` (which diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py index e2cb7040037..04c15a5b516 100644 --- a/api_tests/metrics/test_counted_usage.py +++ b/api_tests/metrics/test_counted_usage.py @@ -11,6 +11,7 @@ # UserFactory, ) from api_tests.utils import create_test_file +from elasticsearch_metrics.tests.util import djelme_test_backends COUNTED_USAGE_URL = '/_/metrics/events/counted_usage/' @@ -69,6 +70,12 @@ def test_required_attributes(self, app, attrs): @pytest.mark.django_db class TestComputedFields: + + @pytest.fixture(autouse=True) + def _real_elastic(self): + with djelme_test_backends(): + yield + @pytest.fixture(autouse=True) def mock_domain(self): domain = 'http://example.foo/' @@ -81,15 +88,22 @@ def mock_now(self): with mock.patch('django.utils.timezone.now', return_value=timestamp): yield timestamp + @pytest.fixture + def preprint(self, request): + return PreprintFactory( + is_public=True, + is_published=True, + ) + @pytest.fixture() def user(self): with mock.patch('osf.models.base.generate_guid', return_value='guidy'): return AuthUserFactory() - def test_by_client_session_id(self, app, mock_save, user): + def test_by_client_session_id(self, app, mock_save, user, preprint): payload = counted_usage_payload( client_session_id='hello', - item_guid='zyxwv', + item_guid=preprint._id, action_labels=['view', 'api'], pageview_info={'page_url': 'http://example.foo/blahblah/blee'}, ) @@ -104,7 +118,7 @@ def test_by_client_session_id(self, app, mock_save, user): expected_doc_id='3239044c7462dd318edd0522a0ed7d84b9c6502ef16cb40dfcae6c1f456d57a2', expected_attrs={ 'platform_iri': 'http://example.foo/', - 'item_guid': 'zyxwv', + 'item_guid': preprint._id, # session_id: sha256(b'hello|1981-01-01').hexdigest() 'session_id': '5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34', 'action_labels': ['view', 'api'], @@ -116,10 +130,10 @@ def test_by_client_session_id(self, app, mock_save, user): }, ) - def test_by_client_session_id_anon(self, app, mock_save): + def test_by_client_session_id_anon(self, app, mock_save, preprint): payload = counted_usage_payload( client_session_id='hello', - item_guid='zyxwv', + item_guid=preprint._id, action_labels=['view', 'web'], pageview_info={ 'page_url': 'http://example.foo/bliz/', @@ -137,7 +151,7 @@ def test_by_client_session_id_anon(self, app, mock_save): expected_doc_id='d01759e963893f9dc9b2ccf016a5ef29135673779802b5578f31449543677e82', expected_attrs={ 'platform_iri': 'http://example.foo/', - 'item_guid': 'zyxwv', + 'item_guid': preprint._id, # session_id: sha256(b'hello|1981-01-01').hexdigest() 'session_id': '5b7c8b0a740a5b23712258a9d1164d2af008df02a8e3d339f16ead1d19595b34', 'action_labels': ['view', 'web'], @@ -151,9 +165,9 @@ def test_by_client_session_id_anon(self, app, mock_save): }, ) - def test_by_user_auth(self, app, mock_save, user): + def test_by_user_auth(self, app, mock_save, user, preprint): payload = counted_usage_payload( - item_guid='yxwvu', + item_guid=preprint._id, action_labels=['view', 'web'], pageview_info={ 'page_url': 'http://osf.io/mst3k', @@ -171,7 +185,7 @@ def test_by_user_auth(self, app, mock_save, user): expected_doc_id='7b8bc27c6d90fb45aa5bbd02deceba9f7384ed61b9a6e7253317c262020b94c2', expected_attrs={ 'platform_iri': 'http://example.foo/', - 'item_guid': 'yxwvu', + 'item_guid': preprint._id, # session_id: sha256(b'guidy|1981-01-01|0').hexdigest() 'session_id': 'ec768abb16c3411570af99b9d635c2c32d1ca31d1b25eec8ee73759e7242e74a', 'action_labels': ['view', 'web'], @@ -185,10 +199,14 @@ def test_by_user_auth(self, app, mock_save, user): }, ) - def test_by_useragent_header(self, app, mock_save): + def test_by_useragent_header(self, app, mock_save, preprint): payload = counted_usage_payload( - item_guid='yxwvu', + item_guid=preprint._id, action_labels=['view', 'api'], + pageview_info={ + 'page_url': 'http://example.foo/bliz/', + 'referer_url': 'http://elsewhere.baz/index.php', + }, ) headers = { 'User-Agent': 'haha', @@ -198,14 +216,20 @@ def test_by_useragent_header(self, app, mock_save): assert_saved_with( mock_save, # doc_id: sha256(b'http://example.foo/|yxwvu|97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a|1981-01-01|3|api,view').hexdigest() - expected_doc_id='d669528b30f443ffe506e183537af9624ef290090e90a200ecce7b7ca19c77f7', + expected_doc_id='6d7549df6734bb955eb832c6316ffae46c2959c95b5817ab4fcb341dbc875c23', expected_attrs={ 'platform_iri': 'http://example.foo/', - 'item_guid': 'yxwvu', + 'item_guid': preprint._id, # session_id: sha256(b'localhost:80|haha|1981-01-01|0').hexdigest() 'session_id': '97098dd3f7cd26053c0d0264d1c84eaeea8e08d2c55ca34017ffbe53c749ba5a', 'action_labels': ['view', 'api'], - 'pageview_info': None, + 'pageview_info': { + 'page_url': 'http://example.foo/bliz/', + 'page_path': '/bliz', + 'referer_url': 'http://elsewhere.baz/index.php', + 'referer_domain': 'elsewhere.baz', + 'hour_of_day': 0, + }, }, ) @@ -213,6 +237,12 @@ def test_by_useragent_header(self, app, mock_save): @pytest.mark.parametrize('item_public', [True, False]) @pytest.mark.django_db class TestGuidFields: + + @pytest.fixture(autouse=True) + def _real_elastic(self): + with djelme_test_backends(): + yield + @pytest.fixture def preprint(self, item_public): return PreprintFactory( @@ -257,7 +287,7 @@ def test_preprint_file(self, app, mock_save, preprint, item_public): item_guid=preprint._id, action_labels=['view', 'web'], ) - resp = app.post_json_api(COUNTED_USAGE_URL, payload) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'}) assert resp.status_code == 201 assert_saved_with( mock_save, @@ -276,7 +306,7 @@ def test_preprint_file(self, app, mock_save, preprint, item_public): item_guid=preprint.primary_file.get_guid(create=True)._id, action_labels=['view', 'web'], ) - resp = app.post_json_api(COUNTED_USAGE_URL, payload) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'}) assert resp.status_code == 201 assert_saved_with( mock_save, @@ -295,7 +325,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil item_guid=child_reg_file_guid, action_labels=['view', 'web'], ) - resp = app.post_json_api(COUNTED_USAGE_URL, payload) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'}) assert resp.status_code == 201 assert_saved_with( mock_save, @@ -318,7 +348,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil item_guid=child_reg._id, action_labels=['view', 'web'], ) - resp = app.post_json_api(COUNTED_USAGE_URL, payload) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'}) assert resp.status_code == 201 assert_saved_with( mock_save, @@ -340,7 +370,7 @@ def test_child_registration_file(self, app, mock_save, child_reg_file_guid, chil item_guid=parent_reg._id, action_labels=['view', 'web'], ) - resp = app.post_json_api(COUNTED_USAGE_URL, payload) + resp = app.post_json_api(COUNTED_USAGE_URL, payload, headers={'User-Agent': 'blarg'}) assert resp.status_code == 201 assert_saved_with( mock_save, diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py index 218b45da1df..6e2b1c9bc72 100644 --- a/osf/management/commands/monthly_reporters_go.py +++ b/osf/management/commands/monthly_reporters_go.py @@ -85,8 +85,8 @@ def monthly_reporter_do(reporter_key: str, yearmonth: str, report_kwargs: dict): framework.sentry.log_exception(exc) return - _report = _reporter.report(**report_kwargs) - if _report is not None: + _reports = _reporter.report(**report_kwargs) + for _report in _reports: _report.report_yearmonth = _reporter.yearmonth _report.save() _followup_task = _reporter.followup_task(_report) diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py index 7118ab3cc28..2ab6ec022ec 100644 --- a/osf/metrics/es8_metrics.py +++ b/osf/metrics/es8_metrics.py @@ -111,6 +111,20 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord): class Meta: timeseries_index_timedepth = MONTHLY + class ActionLabel(enum.Enum): + SEARCH = 'search' # counter:Search + VIEW = 'view' # counter:Investigation + DOWNLOAD = 'download' # counter:Request + WEB = 'web' # counter:Regular (aka "pageview") + API = 'api' # counter:TDM (aka "non-web api usage") + + @classmethod + def record(cls, **kwargs): + # autofill `user_is_authenticated` before `user_id` discarded (couldn't in `clean`) + if 'user_is_authenticated' not in kwargs: + kwargs['user_is_authenticated'] = bool(kwargs.get('user_id')) + return super().record(**kwargs) + @functools.cached_property def _osfid_referent(self): # for use by autofill methods, if needed @@ -118,6 +132,7 @@ def _osfid_referent(self): def clean(self): super().clean() + self._autofill_platform_iri() self._autofill_item_iri_and_osfid() self._autofill_item_public() self._autofill_item_type() @@ -126,6 +141,10 @@ def clean(self): self._autofill_pageview() self._autofill_database_iri() + def _autofill_platform_iri(self): + if self.platform_iri is None: + self.platform_iri = website_settings.DOMAIN + def _autofill_item_iri_and_osfid(self): if self.item_osfid and not self.item_iri: self.item_iri = osf_iri(self.item_osfid) @@ -227,14 +246,6 @@ def _get_unique_together_values(self): ) -class ActionLabel(enum.Enum): - SEARCH = 'search' # counter:Search - VIEW = 'view' # counter:Investigation - DOWNLOAD = 'download' # counter:Request - WEB = 'web' # counter:Regular (aka "pageview") - API = 'api' # counter:TDM (aka "non-web api usage") - - class RegistriesModerationMetricsEs8(djelme.EventRecord): registration_id: str provider_id: str diff --git a/osf/metrics/reporters/download_count.py b/osf/metrics/reporters/download_count.py index f772722dc31..bbc6d0cb655 100644 --- a/osf/metrics/reporters/download_count.py +++ b/osf/metrics/reporters/download_count.py @@ -1,14 +1,22 @@ from osf.models import PageCounter from osf.metrics.reports import DownloadCountReport +from osf.metrics.es8_metrics import DownloadCountReportEs8 +from osf.metrics.utils import cycle_coverage_date from ._base import DailyReporter class DownloadCountReporter(DailyReporter): def report(self, date): download_count = int(PageCounter.get_all_downloads_on_date(date) or 0) - return [ - DownloadCountReport( - daily_file_downloads=download_count, - report_date=date, - ), - ] + reports = [] + report_es8 = DownloadCountReportEs8( + cycle_coverage=cycle_coverage_date(date), + daily_file_downloads=download_count, + ) + reports.append(report_es8) + report = DownloadCountReport( + daily_file_downloads=report_es8.daily_file_downloads, + report_date=date, + ) + reports.append(report) + return reports diff --git a/osf/metrics/reporters/institution_summary.py b/osf/metrics/reporters/institution_summary.py index 892e337aec4..1a6cfbbbca7 100644 --- a/osf/metrics/reporters/institution_summary.py +++ b/osf/metrics/reporters/institution_summary.py @@ -9,9 +9,15 @@ RegistrationRunningTotals, ) from osf.models import Institution +from osf.metrics.es8_metrics import ( + InstitutionSummaryReportEs8, + RunningTotal as RunningTotalEs8, + NodeRunningTotals as NodeRunningTotalsEs8, + RegistrationRunningTotals as RegistrationRunningTotalsEs8 +) +from osf.metrics.utils import cycle_coverage_date from ._base import DailyReporter - logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -39,16 +45,15 @@ def report(self, date): created__date__lte=date, type='osf.registration', ) - - report = InstitutionSummaryReport( - report_date=date, + report_es8 = InstitutionSummaryReportEs8( + cycle_coverage=cycle_coverage_date(date), institution_id=institution._id, institution_name=institution.name, - users=RunningTotal( + users=RunningTotalEs8( total=institution.get_institution_users().filter(is_active=True).count(), total_daily=institution.get_institution_users().filter(date_confirmed__date=date).count(), ), - nodes=NodeRunningTotals( + nodes=NodeRunningTotalsEs8( total=node_qs.count(), public=node_qs.filter(public_query).count(), private=node_qs.filter(private_query).count(), @@ -58,7 +63,7 @@ def report(self, date): private_daily=node_qs.filter(private_query & daily_query).count(), ), # Projects use get_roots to remove children - projects=NodeRunningTotals( + projects=NodeRunningTotalsEs8( total=node_qs.get_roots().count(), public=node_qs.filter(public_query).get_roots().count(), private=node_qs.filter(private_query).get_roots().count(), @@ -67,7 +72,7 @@ def report(self, date): public_daily=node_qs.filter(public_query & daily_query).get_roots().count(), private_daily=node_qs.filter(private_query & daily_query).get_roots().count(), ), - registered_nodes=RegistrationRunningTotals( + registered_nodes=RegistrationRunningTotalsEs8( total=registration_qs.count(), public=registration_qs.filter(public_query).count(), embargoed=registration_qs.filter(private_query).count(), @@ -78,7 +83,7 @@ def report(self, date): embargoed_daily=registration_qs.filter(private_query & daily_query).count(), embargoed_v2_daily=registration_qs.filter(private_query & daily_query & embargo_v2_query).count(), ), - registered_projects=RegistrationRunningTotals( + registered_projects=RegistrationRunningTotalsEs8( total=registration_qs.get_roots().count(), public=registration_qs.filter(public_query).get_roots().count(), embargoed=registration_qs.filter(private_query).get_roots().count(), @@ -87,7 +92,60 @@ def report(self, date): total_daily=registration_qs.filter(daily_query).get_roots().count(), public_daily=registration_qs.filter(public_query & daily_query).get_roots().count(), embargoed_daily=registration_qs.filter(private_query & daily_query).get_roots().count(), - embargoed_v2_daily=registration_qs.filter(private_query & daily_query & embargo_v2_query).get_roots().count(), + embargoed_v2_daily=registration_qs.filter( + private_query & daily_query & embargo_v2_query).get_roots().count(), + ), + ) + reports.append(report_es8) + + report = InstitutionSummaryReport( + report_date=date, + institution_id=institution._id, + institution_name=institution.name, + users=RunningTotal( + total=report_es8.users.total, + total_daily=report_es8.users.total_daily, + ), + nodes=NodeRunningTotals( + total=report_es8.nodes.total, + public=report_es8.nodes.public, + private=report_es8.nodes.private, + + total_daily=report_es8.nodes.total_daily, + public_daily=report_es8.nodes.public_daily, + private_daily=report_es8.nodes.private_daily, + ), + # Projects use get_roots to remove children + projects=NodeRunningTotals( + total=report_es8.projects.total, + public=report_es8.projects.public, + private=report_es8.projects.private, + + total_daily=report_es8.projects.total_daily, + public_daily=report_es8.projects.public_daily, + private_daily=report_es8.projects.private_daily, + ), + registered_nodes=RegistrationRunningTotals( + total=report_es8.registered_nodes.total, + public=report_es8.registered_nodes.public, + embargoed=report_es8.registered_nodes.embargoed, + embargoed_v2=report_es8.registered_nodes.embargoed_v2, + + total_daily=report_es8.registered_nodes.total_daily, + public_daily=report_es8.registered_nodes.public_daily, + embargoed_daily=report_es8.registered_nodes.embargoed_daily, + embargoed_v2_daily=report_es8.registered_nodes.embargoed_v2_daily, + ), + registered_projects=RegistrationRunningTotals( + total=report_es8.registered_projects.total, + public=report_es8.registered_projects.public, + embargoed=report_es8.registered_projects.embargoed, + embargoed_v2=report_es8.registered_projects.embargoed_v2, + + total_daily=report_es8.registered_projects.total_daily, + public_daily=report_es8.registered_projects.public_daily, + embargoed_daily=report_es8.registered_projects.embargoed_daily, + embargoed_v2_daily=report_es8.registered_projects.embargoed_v2_daily, ), ) diff --git a/osf/metrics/reporters/institution_summary_monthly.py b/osf/metrics/reporters/institution_summary_monthly.py index 4748860db32..1f9afaaf7f7 100644 --- a/osf/metrics/reporters/institution_summary_monthly.py +++ b/osf/metrics/reporters/institution_summary_monthly.py @@ -5,9 +5,10 @@ from osf.models.spam import SpamStatus from addons.osfstorage.models import OsfStorageFile from osf.metrics.reports import InstitutionMonthlySummaryReport +from osf.metrics.es8_metrics import InstitutionMonthlySummaryReportEs8 +from osf.metrics.utils import cycle_coverage_yearmonth from ._base import MonthlyReporter - class InstitutionalSummaryMonthlyReporter(MonthlyReporter): """Generate an InstitutionMonthlySummaryReport for each institution.""" @@ -20,7 +21,8 @@ def iter_report_kwargs(self, continue_after: dict | None = None): def report(self, **report_kwargs): _institution = Institution.objects.get(pk=report_kwargs['institution_pk']) - return self.generate_report(_institution) + reports = self.generate_report(_institution) + return reports def generate_report(self, institution): node_queryset = institution.nodes.filter( @@ -31,8 +33,9 @@ def generate_report(self, institution): ) preprint_queryset = self.get_published_preprints(institution, self.yearmonth) - - return InstitutionMonthlySummaryReport( + reports = [] + report_es8 = InstitutionMonthlySummaryReportEs8( + cycle_coverage=cycle_coverage_yearmonth(self.yearmonth), institution_id=institution._id, user_count=institution.get_institution_users().count(), private_project_count=self._get_count(node_queryset, 'osf.node', is_public=False), @@ -45,6 +48,23 @@ def generate_report(self, institution): monthly_logged_in_user_count=self.get_monthly_logged_in_user_count(institution, self.yearmonth), monthly_active_user_count=self.get_monthly_active_user_count(institution, self.yearmonth), ) + reports.append(report_es8) + + report = InstitutionMonthlySummaryReport( + institution_id=report_es8.institution_id, + user_count=report_es8.user_count, + private_project_count=report_es8.private_project_count, + public_project_count=report_es8.public_project_count, + public_registration_count=report_es8.public_registration_count, + embargoed_registration_count=report_es8.embargoed_registration_count, + published_preprint_count=report_es8.published_preprint_count, + storage_byte_count=report_es8.storage_byte_count, + public_file_count=report_es8.public_file_count, + monthly_logged_in_user_count=report_es8.monthly_logged_in_user_count, + monthly_active_user_count=report_es8.monthly_active_user_count, + ) + reports.append(report) + return reports def _get_count(self, node_queryset, node_type, is_public): return node_queryset.filter(type=node_type, is_public=is_public, root_id=F('pk')).count() diff --git a/osf/metrics/reporters/institutional_users.py b/osf/metrics/reporters/institutional_users.py index 512472a3d96..ae36e59196c 100644 --- a/osf/metrics/reporters/institutional_users.py +++ b/osf/metrics/reporters/institutional_users.py @@ -1,4 +1,5 @@ import dataclasses +from typing import List from django.contrib.contenttypes.models import ContentType from django.db.models import Q, F, Sum @@ -7,7 +8,8 @@ from osf.models.spam import SpamStatus from addons.osfstorage.models import OsfStorageFile from osf.metrics.reports import InstitutionalUserReport -from osf.metrics.utils import YearMonth +from osf.metrics.utils import YearMonth, cycle_coverage_yearmonth +from osf.metrics.es8_metrics import InstitutionalUserReportEs8 from ._base import MonthlyReporter @@ -38,7 +40,8 @@ def report(self, **report_kwargs): _institution = osfdb.Institution.objects.get(pk=report_kwargs['institution_pk']) _user = osfdb.OSFUser.objects.get(pk=report_kwargs['user_pk']) _helper = _InstiUserReportHelper(_institution, _user, self.yearmonth) - return _helper.report + _report = next(r for r in _helper.reports if isinstance(r, InstitutionalUserReport)) + return _report # helper @@ -47,11 +50,13 @@ class _InstiUserReportHelper: institution: osfdb.Institution user: osfdb.OSFUser yearmonth: YearMonth - report: InstitutionalUserReport = dataclasses.field(init=False) + reports: List[InstitutionalUserReport | InstitutionalUserReportEs8] = dataclasses.field(init=False) def __post_init__(self): _affiliation = self.user.get_institution_affiliation(self.institution._id) - self.report = InstitutionalUserReport( + self.reports = [] + report_es8 = InstitutionalUserReportEs8( + cycle_coverage=cycle_coverage_yearmonth(self.yearmonth), institution_id=self.institution._id, user_id=self.user._id, user_name=self.user.fullname, @@ -72,6 +77,25 @@ def __post_init__(self): published_preprint_count=self._published_preprint_queryset().count(), storage_byte_count=self._storage_byte_count(), ) + self.reports.append(report_es8) + report = InstitutionalUserReport( + institution_id=report_es8.institution_id, + user_id=report_es8.user_id, + user_name=report_es8.user_name, + department_name=report_es8.department_name, + month_last_login=report_es8.month_last_login, + month_last_active=report_es8.month_last_active, + account_creation_date=report_es8.account_creation_date, + orcid_id=report_es8.orcid_id, + public_project_count=report_es8.public_project_count, + private_project_count=report_es8.private_project_count, + public_registration_count=report_es8.public_registration_count, + embargoed_registration_count=report_es8.embargoed_registration_count, + public_file_count=report_es8.public_file_count, + published_preprint_count=report_es8.published_preprint_count, + storage_byte_count=report_es8.storage_byte_count, + ) + self.reports.append(report) @property def before_datetime(self): diff --git a/osf/metrics/reporters/new_user_domain.py b/osf/metrics/reporters/new_user_domain.py index ec13aad860f..a19abaeb22f 100644 --- a/osf/metrics/reporters/new_user_domain.py +++ b/osf/metrics/reporters/new_user_domain.py @@ -3,6 +3,8 @@ from osf.models import OSFUser from osf.metrics.reports import NewUserDomainReport +from osf.metrics.es8_metrics import NewUserDomainReportEs8 +from osf.metrics.utils import cycle_coverage_date from ._base import DailyReporter logger = logging.getLogger(__name__) @@ -20,11 +22,19 @@ def report(self, date): email.split('@')[-1] for email in new_user_emails ) - return [ - NewUserDomainReport( - report_date=date, + reports = [] + for domain_name, count in domain_names.items(): + report_es8 = NewUserDomainReportEs8( + cycle_coverage=cycle_coverage_date(date), domain_name=domain_name, new_user_count=count, ) - for domain_name, count in domain_names.items() - ] + reports.append(report_es8) + + report = NewUserDomainReport( + report_date=date, + domain_name=report_es8.domain_name, + new_user_count=report_es8.new_user_count, + ) + reports.append(report) + return reports diff --git a/osf/metrics/reporters/node_count.py b/osf/metrics/reporters/node_count.py index 0a4120ca1f9..8e9842ae78e 100644 --- a/osf/metrics/reporters/node_count.py +++ b/osf/metrics/reporters/node_count.py @@ -7,9 +7,14 @@ NodeRunningTotals, RegistrationRunningTotals, ) +from osf.metrics.es8_metrics import ( + NodeSummaryReportEs8, + NodeRunningTotals as NodeRunningTotalsEs8, + RegistrationRunningTotals as RegistrationRunningTotalsEs8 +) +from osf.metrics.utils import cycle_coverage_date from ._base import DailyReporter - logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -35,11 +40,11 @@ def report(self, date): embargo_v2_query = Q(root__embargo__end_date__date__gt=date) exclude_spam = ~Q(spam_status__in=[SpamStatus.SPAM, SpamStatus.FLAGGED]) - - report = NodeSummaryReport( - report_date=date, + reports = [] + report_es8 = NodeSummaryReportEs8( + cycle_coverage=cycle_coverage_date(date), # Nodes - the number of projects and components - nodes=NodeRunningTotals( + nodes=NodeRunningTotalsEs8( total=node_qs.count(), total_excluding_spam=node_qs.filter(exclude_spam).count(), public=node_qs.filter(public_query).count(), @@ -50,7 +55,7 @@ def report(self, date): private_daily=node_qs.filter(private_query & created_today_query).count(), ), # Projects - the number of top-level only projects - projects=NodeRunningTotals( + projects=NodeRunningTotalsEs8( total=node_qs.get_roots().count(), total_excluding_spam=node_qs.get_roots().filter(exclude_spam).count(), public=node_qs.filter(public_query).get_roots().count(), @@ -61,7 +66,7 @@ def report(self, date): private_daily=node_qs.filter(private_query & created_today_query).get_roots().count(), ), # Registered Nodes - the number of registered projects and components - registered_nodes=RegistrationRunningTotals( + registered_nodes=RegistrationRunningTotalsEs8( total=registration_qs.count(), public=registration_qs.filter(public_query).count(), embargoed=registration_qs.filter(private_query).count(), @@ -75,7 +80,7 @@ def report(self, date): ), # Registered Projects - the number of registered top level projects - registered_projects=RegistrationRunningTotals( + registered_projects=RegistrationRunningTotalsEs8( total=registration_qs.get_roots().count(), public=registration_qs.filter(public_query).get_roots().count(), embargoed=registration_qs.filter(private_query).get_roots().count(), @@ -88,5 +93,58 @@ def report(self, date): withdrawn_daily=registration_qs.filter(retracted_query & retracted_today_query).get_roots().count(), ), ) + reports.append(report_es8) + report = NodeSummaryReport( + report_date=date, + # Nodes - the number of projects and components + nodes=NodeRunningTotals( + total=report_es8.nodes.total, + total_excluding_spam=report_es8.nodes.total_excluding_spam, + public=report_es8.nodes.public, + private=report_es8.nodes.private, + total_daily=report_es8.nodes.total_daily, + total_daily_excluding_spam=report_es8.nodes.total_daily_excluding_spam, + public_daily=report_es8.nodes.public_daily, + private_daily=report_es8.nodes.private_daily, + ), + # Projects - the number of top-level only projects + projects=NodeRunningTotals( + total=report_es8.projects.total, + total_excluding_spam=report_es8.projects.total_excluding_spam, + public=report_es8.projects.public, + private=report_es8.projects.private, + total_daily=report_es8.projects.total_daily, + total_daily_excluding_spam=report_es8.projects.total_daily_excluding_spam, + public_daily=report_es8.projects.public_daily, + private_daily=report_es8.projects.private_daily, + ), + # Registered Nodes - the number of registered projects and components + registered_nodes=RegistrationRunningTotals( + total=report_es8.registered_nodes.total, + public=report_es8.registered_nodes.public, + embargoed=report_es8.registered_nodes.embargoed, + embargoed_v2=report_es8.registered_nodes.embargoed_v2, + withdrawn=report_es8.registered_nodes.withdrawn, + total_daily=report_es8.registered_nodes.total_daily, + public_daily=report_es8.registered_nodes.public_daily, + embargoed_daily=report_es8.registered_nodes.embargoed_daily, + embargoed_v2_daily=report_es8.registered_nodes.embargoed_v2_daily, + withdrawn_daily=report_es8.registered_nodes.withdrawn_daily, + ), + # Registered Projects - the number of registered top level projects + registered_projects=RegistrationRunningTotals( + total=report_es8.registered_projects.total, + public=report_es8.registered_projects.public, + embargoed=report_es8.registered_projects.embargoed, + embargoed_v2=report_es8.registered_projects.embargoed_v2, + withdrawn=report_es8.registered_projects.withdrawn, + total_daily=report_es8.registered_projects.total_daily, + public_daily=report_es8.registered_projects.public_daily, + embargoed_daily=report_es8.registered_projects.embargoed_daily, + embargoed_v2_daily=report_es8.registered_projects.embargoed_v2_daily, + withdrawn_daily=report_es8.registered_projects.withdrawn_daily, + ), + ) + reports.append(report) - return [report] + return reports diff --git a/osf/metrics/reporters/osfstorage_file_count.py b/osf/metrics/reporters/osfstorage_file_count.py index 2f35e1e81fd..f93ed180ebb 100644 --- a/osf/metrics/reporters/osfstorage_file_count.py +++ b/osf/metrics/reporters/osfstorage_file_count.py @@ -4,9 +4,13 @@ from osf.metrics.reports import OsfstorageFileCountReport, FileRunningTotals from osf.models import AbstractNode, Preprint +from osf.metrics.es8_metrics import ( + OsfstorageFileCountReportEs8, + FileRunningTotals as FileRunningTotalsEs8 +) +from osf.metrics.utils import cycle_coverage_date from ._base import DailyReporter - logger = logging.getLogger(__name__) logging.basicConfig(level=logging.INFO) @@ -32,9 +36,11 @@ def report(self, date): daily_query = Q(created__date=date) - report = OsfstorageFileCountReport( - report_date=date, - files=FileRunningTotals( + reports = [] + + report_es8 = OsfstorageFileCountReportEs8( + cycle_coverage=cycle_coverage_date(date), + files=FileRunningTotalsEs8( total=file_qs.count(), public=file_qs.filter(public_query).count(), private=file_qs.filter(private_query).count(), @@ -43,5 +49,19 @@ def report(self, date): private_daily=file_qs.filter(private_query & daily_query).count(), ), ) + reports.append(report_es8) + + report = OsfstorageFileCountReport( + report_date=date, + files=FileRunningTotals( + total=report_es8.files.total, + public=report_es8.files.public, + private=report_es8.files.private, + total_daily=report_es8.files.total_daily, + public_daily=report_es8.files.public_daily, + private_daily=report_es8.files.private_daily, + ), + ) + reports.append(report) - return [report] + return reports diff --git a/osf/metrics/reporters/preprint_count.py b/osf/metrics/reporters/preprint_count.py index 23f68bc7736..7827f0ef40c 100644 --- a/osf/metrics/reporters/preprint_count.py +++ b/osf/metrics/reporters/preprint_count.py @@ -3,6 +3,8 @@ from osf.metrics import PreprintSummaryReport from website import settings +from osf.metrics.es8_metrics import PreprintSummaryReportEs8 +from osf.metrics.utils import cycle_coverage_date from ._base import DailyReporter logger = logging.getLogger(__name__) @@ -48,13 +50,20 @@ def report(self, date): for preprint_provider in PreprintProvider.objects.all(): elastic_query = get_elastic_query(date, preprint_provider) resp = requests.post(f'{settings.SHARE_URL}api/v2/search/creativeworks/_search', json=elastic_query).json() - reports.append( - PreprintSummaryReport( - report_date=date, - provider_key=preprint_provider._id, - preprint_count=resp['hits']['total'], - ) + + report_es8 = PreprintSummaryReportEs8( + cycle_coverage=cycle_coverage_date(date), + provider_key=preprint_provider._id, + preprint_count=resp['hits']['total'], + ) + reports.append(report_es8) + + report = PreprintSummaryReport( + report_date=date, + provider_key=report_es8.provider_key, + preprint_count=report_es8.preprint_count, ) + reports.append(report) logger.info('{} Preprints counted for the provider {}'.format(resp['hits']['total'], preprint_provider.name)) return reports diff --git a/osf/metrics/reporters/private_spam_metrics.py b/osf/metrics/reporters/private_spam_metrics.py index 40f259af325..49850605cd0 100644 --- a/osf/metrics/reporters/private_spam_metrics.py +++ b/osf/metrics/reporters/private_spam_metrics.py @@ -1,8 +1,11 @@ from osf.metrics.reports import PrivateSpamMetricsReport from osf.external.oopspam.client import OOPSpamClient from osf.external.askismet.client import AkismetClient +from osf.metrics.es8_metrics import PrivateSpamMetricsReportEs8 +from osf.metrics.utils import cycle_coverage_yearmonth from ._base import MonthlyReporter + class PrivateSpamMetricsReporter(MonthlyReporter): report_name = 'Private Spam Metrics' @@ -13,8 +16,10 @@ def report(self): oopspam_client = OOPSpamClient() akismet_client = AkismetClient() - report = PrivateSpamMetricsReport( - report_yearmonth=str(self.yearmonth), + reports = [] + + report_es8 = PrivateSpamMetricsReportEs8( + cycle_coverage=cycle_coverage_yearmonth(self.yearmonth), node_oopspam_flagged=oopspam_client.get_flagged_count(target_month, next_month, category='node'), node_oopspam_hammed=oopspam_client.get_hammed_count(target_month, next_month, category='node'), node_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='node'), @@ -24,5 +29,19 @@ def report(self): preprint_akismet_flagged=akismet_client.get_flagged_count(target_month, next_month, category='preprint'), preprint_akismet_hammed=akismet_client.get_hammed_count(target_month, next_month, category='preprint') ) + reports.append(report_es8) + + report = PrivateSpamMetricsReport( + report_yearmonth=str(self.yearmonth), + node_oopspam_flagged=report_es8.node_oopspam_flagged, + node_oopspam_hammed=report_es8.node_oopspam_hammed, + node_akismet_flagged=report_es8.node_akismet_flagged, + node_akismet_hammed=report_es8.node_akismet_hammed, + preprint_oopspam_flagged=report_es8.preprint_oopspam_flagged, + preprint_oopspam_hammed=report_es8.preprint_oopspam_hammed, + preprint_akismet_flagged=report_es8.preprint_akismet_flagged, + preprint_akismet_hammed=report_es8.preprint_akismet_hammed, + ) + reports.append(report) - return report + return reports diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py index 7df405d385f..d9b0dd0734c 100644 --- a/osf/metrics/reporters/public_item_usage.py +++ b/osf/metrics/reporters/public_item_usage.py @@ -3,6 +3,9 @@ import typing import waffle + +from osf.metrics.es8_metrics import PublicItemUsageReportEs8 + if typing.TYPE_CHECKING: import elasticsearch6_dsl as edsl @@ -18,7 +21,7 @@ PreprintView, ) from osf.metrics.reports import PublicItemUsageReport -from osf.metrics.utils import YearMonth +from osf.metrics.utils import YearMonth, cycle_coverage_yearmonth from osf import models as osfdb from website import settings as website_settings from ._base import MonthlyReporter @@ -61,16 +64,17 @@ def report(self, **report_kwargs): if _guid is None or _guid.referent is None: raise _SkipItem _obj = _guid.referent - _report = self._init_report(_obj) - self._fill_report_counts(_report, _obj) - if not any(( - _report.view_count, - _report.view_session_count, - _report.download_count, - _report.download_session_count, - )): - raise _SkipItem - return _report + _reports = self._init_report(_obj) + for _report in _reports: + self._fill_report_counts(_report, _obj) + if not any(( + _report.view_count, + _report.view_session_count, + _report.download_count, + _report.download_session_count, + )): + raise _SkipItem + return _reports except _SkipItem: return None @@ -131,16 +135,27 @@ def _preprintdownload_osfids(self, after_osfid: str | None) -> typing.Iterator[s ) return _iter_composite_bucket_keys(_search, 'agg_osfid', 'osfid', after=after_osfid) - def _init_report(self, osf_obj) -> PublicItemUsageReport: + def _init_report(self, osf_obj) -> typing.List[PublicItemUsageReport | PublicItemUsageReportEs8]: if not _is_item_public(osf_obj): raise _SkipItem - return PublicItemUsageReport( + reports = [] + report_es8 = PublicItemUsageReportEs8( + cycle_coverage=cycle_coverage_yearmonth(self.yearmonth), item_osfid=osf_obj._id, item_type=[get_item_type(osf_obj)], provider_id=[get_provider_id(osf_obj)], platform_iri=[website_settings.DOMAIN], + ) + reports.append(report_es8) + report = PublicItemUsageReport( + item_osfid=report_es8.item_osfid, + item_type=report_es8.item_type, + provider_id=report_es8.provider_id, + platform_iri=report_es8.platform_iri, # leave counts null; will be set if there's data ) + reports.append(report) + return reports def _fill_report_counts(self, report, osf_obj): if ( @@ -154,31 +169,43 @@ def _fill_report_counts(self, report, osf_obj): ( report.view_count, report.view_session_count, - ) = self._countedusage_view_counts(osf_obj) + ) = self._countedusage_view_counts(osf_obj, cumulative=False) ( report.download_count, report.download_session_count, - ) = self._countedusage_download_counts(osf_obj) + ) = self._countedusage_download_counts(osf_obj, cumulative=False) + + ( + report.cumulative_view_count, + report.cumulative_view_session_count, + ) = self._countedusage_view_counts(osf_obj, cumulative=True) - def _base_usage_search(self): + ( + report.cumulative_download_count, + report.cumulative_download_session_count, + ) = self._countedusage_download_counts(osf_obj, cumulative=True) + + def _base_usage_search(self, cumulative: bool = False): + timestamp_filter = { + 'lt': self.yearmonth.month_end(), + } + if not cumulative: + timestamp_filter['gte'] = self.yearmonth.month_start() return ( CountedAuthUsage.search() .filter('term', item_public=True) - .filter('range', timestamp={ - 'gte': self.yearmonth.month_start(), - 'lt': self.yearmonth.month_end(), - }) + .filter('range', timestamp=timestamp_filter) .extra(size=0) # only aggregations, no hits ) - def _countedusage_view_counts(self, osf_obj) -> tuple[int, int]: + def _countedusage_view_counts(self, osf_obj, cumulative: bool = False) -> tuple[int, int]: '''compute view_session_count separately to avoid double-counting (the same session may be represented in both the composite agg on `item_guid` and that on `surrounding_guids`) ''' _search = ( - self._base_usage_search() + self._base_usage_search(cumulative=cumulative) .query( 'bool', filter=[ @@ -206,10 +233,10 @@ def _countedusage_view_counts(self, osf_obj) -> tuple[int, int]: ) return (_view_count, _view_session_count) - def _countedusage_download_counts(self, osf_obj) -> tuple[int, int]: + def _countedusage_download_counts(self, osf_obj, cumulative: bool = False) -> tuple[int, int]: '''aggregate downloads on each osfid (not including components/files)''' _search = ( - self._base_usage_search() + self._base_usage_search(cumulative=cumulative) .filter('term', item_guid=osf_obj._id) .filter('term', action_labels=CountedAuthUsage.ActionLabel.DOWNLOAD.value) ) diff --git a/osf/metrics/reporters/spam_count.py b/osf/metrics/reporters/spam_count.py index 319381fe899..23c74697a54 100644 --- a/osf/metrics/reporters/spam_count.py +++ b/osf/metrics/reporters/spam_count.py @@ -1,9 +1,11 @@ from osf.models import OSFUser from osf.metrics.reports import SpamSummaryReport -from ._base import MonthlyReporter from osf.models import PreprintLog, NodeLog from osf.models.spam import SpamStatus +from osf.metrics.es8_metrics import SpamSummaryReportEs8 +from osf.metrics.utils import cycle_coverage_yearmonth +from ._base import MonthlyReporter class SpamCountReporter(MonthlyReporter): @@ -11,9 +13,9 @@ def report(self, **report_kwargs): assert not report_kwargs target_month = self.yearmonth.month_start() next_month = self.yearmonth.month_end() - - return SpamSummaryReport( - # Node Log entries + reports = [] + report_es8 = SpamSummaryReportEs8( + cycle_coverage=cycle_coverage_yearmonth(self.yearmonth), node_confirmed_spam=NodeLog.objects.filter( action=NodeLog.CONFIRM_SPAM, created__gt=target_month, @@ -79,3 +81,23 @@ def report(self, **report_kwargs): created__lt=next_month, ).count() ) + reports.append(report_es8) + report = SpamSummaryReport( + # Node Log entries + node_confirmed_spam=report_es8.node_confirmed_spam, + node_confirmed_ham=report_es8.node_confirmed_ham, + node_flagged=report_es8.node_flagged, + # Registration Log entries + registration_confirmed_spam=report_es8.registration_confirmed_spam, + registration_confirmed_ham=report_es8.registration_confirmed_ham, + registration_flagged=report_es8.registration_flagged, + # Preprint Log entries + preprint_confirmed_spam=report_es8.preprint_confirmed_spam, + preprint_confirmed_ham=report_es8.preprint_confirmed_ham, + preprint_flagged=report_es8.preprint_flagged, + # New Users marked as Spam/Ham + user_marked_as_spam=report_es8.user_marked_as_spam, + user_marked_as_ham=report_es8.user_marked_as_ham, + ) + reports.append(report) + return reports diff --git a/osf/metrics/reporters/storage_addon_usage.py b/osf/metrics/reporters/storage_addon_usage.py index 704254795f0..f630f8809ba 100644 --- a/osf/metrics/reporters/storage_addon_usage.py +++ b/osf/metrics/reporters/storage_addon_usage.py @@ -13,6 +13,12 @@ from osf.metrics.reports import StorageAddonUsage, RunningTotal, UsageByStorageAddon from osf.models import SpamStatus, Tag from website import settings +from osf.metrics.es8_metrics import ( + StorageAddonUsageEs8, + UsageByStorageAddon as UsageByStorageAddonEs8, + RunningTotal as RunningTotalEs8 +) +from osf.metrics.utils import cycle_coverage_date from ._base import DailyReporter logger = logging.getLogger(__name__) @@ -125,45 +131,84 @@ def report(self, date): if 'storage' in addon_config.categories } + usage_by_addon_es8 = [] usage_by_addon = [] for short_name, addon_config in storage_addon_configs.items(): user_counts = storage_addon_user_counts(date, addon_config.get_model('UserSettings')) node_counts = storage_addon_node_counts(date, addon_config.get_model('NodeSettings')) - - usage_by_addon.append( - UsageByStorageAddon( - addon_shortname=short_name, - enabled_usersettings=RunningTotal( - total=user_counts.get('enabled_total', 0), - total_daily=user_counts.get('enabled_daily', 0), - ), - deleted_usersettings=RunningTotal( - total=user_counts.get('deleted_total', 0), - total_daily=user_counts.get('deleted_daily', 0), - ), - linked_usersettings=RunningTotal( - total=user_counts.get('linked_total', 0), - total_daily=user_counts.get('linked_daily', 0), - ), - usersetting_links=RunningTotal( - total=user_counts.get('link_count_total', 0), - total_daily=user_counts.get('link_count_daily', 0), - ), - connected_nodesettings=RunningTotal( - total=node_counts.get('connected_total', 0), - total_daily=node_counts.get('connected_daily', 0), - ), - disconnected_nodesettings=RunningTotal( - total=node_counts.get('disconnected_total', 0), - total_daily=node_counts.get('disconnected_daily', 0), - ), - deleted_nodesettings=RunningTotal( - total=node_counts.get('deleted_total', 0), - total_daily=node_counts.get('deleted_daily', 0), - ), + usage_by_storage_addon_es_8 = UsageByStorageAddonEs8( + addon_shortname=short_name, + enabled_usersettings=RunningTotalEs8( + total=user_counts.get('enabled_total', 0), + total_daily=user_counts.get('enabled_daily', 0), + ), + deleted_usersettings=RunningTotalEs8( + total=user_counts.get('deleted_total', 0), + total_daily=user_counts.get('deleted_daily', 0), + ), + linked_usersettings=RunningTotalEs8( + total=user_counts.get('linked_total', 0), + total_daily=user_counts.get('linked_daily', 0), + ), + usersetting_links=RunningTotalEs8( + total=user_counts.get('link_count_total', 0), + total_daily=user_counts.get('link_count_daily', 0), + ), + connected_nodesettings=RunningTotalEs8( + total=node_counts.get('connected_total', 0), + total_daily=node_counts.get('connected_daily', 0), + ), + disconnected_nodesettings=RunningTotalEs8( + total=node_counts.get('disconnected_total', 0), + total_daily=node_counts.get('disconnected_daily', 0), + ), + deleted_nodesettings=RunningTotalEs8( + total=node_counts.get('deleted_total', 0), + total_daily=node_counts.get('deleted_daily', 0), + ), + ) + usage_by_addon_es8.append(usage_by_storage_addon_es_8) + usage_by_storage_addon = UsageByStorageAddon( + addon_shortname=usage_by_storage_addon_es_8.short_name, + enabled_usersettings=RunningTotal( + total=usage_by_storage_addon_es_8.enabled_usersettings.total, + total_daily=usage_by_storage_addon_es_8.enabled_usersettings.total_daily, + ), + deleted_usersettings=RunningTotal( + total=usage_by_storage_addon_es_8.deleted_usersettings.total, + total_daily=usage_by_storage_addon_es_8.deleted_usersettings.total_daily, + ), + linked_usersettings=RunningTotal( + total=usage_by_storage_addon_es_8.linked_usersettings.total, + total_daily=usage_by_storage_addon_es_8.linked_usersettings.total_daily, + ), + usersetting_links=RunningTotal( + total=usage_by_storage_addon_es_8.usersetting_links.total, + total_daily=usage_by_storage_addon_es_8.usersetting_links.total_daily, + ), + connected_nodesettings=RunningTotal( + total=usage_by_storage_addon_es_8.connected_nodesettings.total, + total_daily=usage_by_storage_addon_es_8.connected_nodesettings.total_daily, + ), + disconnected_nodesettings=RunningTotal( + total=usage_by_storage_addon_es_8.disconnected_nodesettings.total, + total_daily=usage_by_storage_addon_es_8.disconnected_nodesettings.total_daily, + ), + deleted_nodesettings=RunningTotal( + total=usage_by_storage_addon_es_8.deleted_nodesettings.total, + total_daily=usage_by_storage_addon_es_8.deleted_nodesettings.total_daily, ) ) - return [StorageAddonUsage( + usage_by_addon.append(usage_by_storage_addon) + reports = [] + report_es8 = StorageAddonUsageEs8( + cycle_coverage=cycle_coverage_date(date), + usage_by_addon=usage_by_addon, + ) + reports.append(report_es8) + report = StorageAddonUsage( report_date=date, usage_by_addon=usage_by_addon, - )] + ) + reports.append(report) + return reports diff --git a/osf/metrics/reporters/user_count.py b/osf/metrics/reporters/user_count.py index e0a61c7bb10..089fcb63f47 100644 --- a/osf/metrics/reporters/user_count.py +++ b/osf/metrics/reporters/user_count.py @@ -1,14 +1,17 @@ from osf.models import OSFUser from osf.metrics import UserSummaryReport +from osf.metrics.es8_metrics import UserSummaryReportEs8 +from osf.metrics.utils import cycle_coverage_date from ._base import DailyReporter class UserCountReporter(DailyReporter): def report(self, report_date): - report = UserSummaryReport( - report_date=report_date, + reports = [] + report_es8 = UserSummaryReportEs8( + cycle_coverage=cycle_coverage_date(report_date), active=OSFUser.objects.filter(is_active=True, date_confirmed__date__lte=report_date).count(), deactivated=OSFUser.objects.filter(date_disabled__isnull=False, date_disabled__date__lte=report_date).count(), merged=OSFUser.objects.filter(date_registered__date__lte=report_date, merged_by__isnull=False).count(), @@ -16,5 +19,16 @@ def report(self, report_date): new_users_with_institution_daily=OSFUser.objects.filter(is_active=True, date_confirmed__date=report_date, institutionaffiliation__isnull=False).count(), unconfirmed=OSFUser.objects.filter(date_registered__date__lte=report_date, date_confirmed__isnull=True).count(), ) + reports.append(report_es8) + report = UserSummaryReport( + report_date=report_date, + active=report_es8.active, + deactivated=report_es8.deactivated, + merged=report_es8.merged, + new_users_daily=report_es8.new_users_daily, + new_users_with_institution_daily=report_es8.new_users_with_institution_daily, + unconfirmed=report_es8.unconfirmed, + ) + reports.append(report) - return [report] + return reports diff --git a/osf/metrics/utils.py b/osf/metrics/utils.py index 973b8bf1ef3..c5d49f293cf 100644 --- a/osf/metrics/utils.py +++ b/osf/metrics/utils.py @@ -6,6 +6,28 @@ from hashlib import sha256 from typing import ClassVar +from elasticsearch_metrics.util.timeparts import format_timeparts + + +def cycle_coverage_date(given_date: datetime.date) -> str: + """ + >>> cycle_coverage_date(datetime.date(1234, 5, 6)) + '1234.5.6' + >>> cycle_coverage_date(datetime.datetime(7654, 3, 2, 1)) + '7654.3.2' + """ + return format_timeparts(given_date, 3) + + +def cycle_coverage_yearmonth(given_ym: YearMonth | datetime.date) -> str: + """ + >>> cycle_coverage_yearmonth(YearMonth(2222, 33)) + '2222.33' + >>> cycle_coverage_yearmonth(datetime.date(1234, 5, 6)) + '1234.5' + """ + return format_timeparts((given_ym.year, given_ym.month), 2) + def stable_key(*key_parts): """hash function for use in osf.metrics diff --git a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py index 05baa4d38e7..f40b5dacec6 100644 --- a/osf_tests/metrics/reporters/test_institutional_summary_reporter.py +++ b/osf_tests/metrics/reporters/test_institutional_summary_reporter.py @@ -3,6 +3,7 @@ import logging from django.test import TestCase from osf.metrics.reporters import InstitutionalSummaryMonthlyReporter +from osf.metrics.reports import InstitutionMonthlySummaryReport from osf.metrics.utils import YearMonth from osf_tests.factories import ( InstitutionFactory, @@ -79,10 +80,10 @@ def _create_active_user(cls, institution, date_confirmed): def test_report_generation(self): reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth) - reports = list_monthly_reports(reporter) - self.assertEqual(len(reports), 1) + reports_raw = list_monthly_reports(reporter) + self.assertEqual(len(reports_raw[0]), 2) - report = reports[0] + report = next(r for r in reports_raw[0] if isinstance(r, InstitutionMonthlySummaryReport)) self.assertEqual(report.institution_id, self._institution._id) self.assertEqual(report.user_count, 2) # _logged_in_user and _active_user self.assertEqual(report.public_project_count, 1) @@ -115,7 +116,8 @@ def test_report_generation_multiple_institutions(self): # Run the reporter for the current month (February 2018) reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth) - reports = list_monthly_reports(reporter) + reports_raw = list_monthly_reports(reporter) + reports = [item for sublist in reports_raw for item in sublist if isinstance(item, InstitutionMonthlySummaryReport)] self.assertEqual(len(reports), 3) # Reports for self._institution, institution2, institution3 # Extract reports by institution @@ -264,7 +266,8 @@ def test_high_counts_multiple_institutions(self): if enable_benchmarking: reporter_start_time = time.time() reporter = InstitutionalSummaryMonthlyReporter(self._yearmonth) - reports = list_monthly_reports(reporter) + reports_raw = list_monthly_reports(reporter) + reports = [item for sublist in reports_raw for item in sublist if isinstance(item, InstitutionMonthlySummaryReport)] assert len(reports) == additional_institution_count + 1 if enable_benchmarking: diff --git a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py index 69bd266285a..082b330afd8 100644 --- a/osf_tests/metrics/reporters/test_public_item_usage_reporter.py +++ b/osf_tests/metrics/reporters/test_public_item_usage_reporter.py @@ -174,8 +174,10 @@ def test_no_data(self, ym_empty): def test_reporter(self, ym_empty, ym_sparse, ym_busy, sparse_month_usage, busy_month_item0, busy_month_item1, busy_month_item2, item0): _empty = list_monthly_reports(PublicItemUsageReporter(ym_empty)) - _sparse = list_monthly_reports(PublicItemUsageReporter(ym_sparse)) - _busy = list_monthly_reports(PublicItemUsageReporter(ym_busy)) + _sparse_raw = list_monthly_reports(PublicItemUsageReporter(ym_sparse)) + _sparse = [item for sublist in _sparse_raw for item in sublist if isinstance(item, PublicItemUsageReport)] + _busy_raw = list_monthly_reports(PublicItemUsageReporter(ym_busy)) + _busy = [item for sublist in _busy_raw for item in sublist if isinstance(item, PublicItemUsageReport)] # empty month: assert _empty == [] diff --git a/osf_tests/metrics/test_spam_count_reporter.py b/osf_tests/metrics/test_spam_count_reporter.py index 0e7ba6956bf..448a8136f7a 100644 --- a/osf_tests/metrics/test_spam_count_reporter.py +++ b/osf_tests/metrics/test_spam_count_reporter.py @@ -1,6 +1,7 @@ import pytest from datetime import datetime from osf.metrics.reporters.private_spam_metrics import PrivateSpamMetricsReporter +from osf.metrics.reports import PrivateSpamMetricsReport from osf.metrics.utils import YearMonth from osf_tests.factories import NodeLogFactory, NodeFactory from unittest.mock import patch @@ -30,7 +31,8 @@ def test_private_spam_metrics_reporter(): mock_akismet_get_hammed_count.return_value = 10 reporter = PrivateSpamMetricsReporter(report_yearmonth) - report = reporter.report() + reports_raw = reporter.report() + report = next(r for r in reports_raw if isinstance(r, PrivateSpamMetricsReport)) assert report.node_oopspam_flagged == 10, f"Expected 10, got {report.node_oopspam_flagged}" assert report.node_oopspam_hammed == 5, f"Expected 5, got {report.node_oopspam_hammed}" diff --git a/osf_tests/metrics/test_utils.py b/osf_tests/metrics/test_utils.py index a9d312f2331..47f16be6404 100644 --- a/osf_tests/metrics/test_utils.py +++ b/osf_tests/metrics/test_utils.py @@ -1,15 +1,20 @@ -from datetime import date +import datetime import pytest -from osf.metrics.utils import stable_key +from osf.metrics.utils import ( + stable_key, + cycle_coverage_date, + cycle_coverage_yearmonth, + YearMonth, +) class TestStableKey: @pytest.mark.parametrize('args, expected_key', [ (['foo'], '2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae'), - ([date(1953, 7, 2)], '3943be98daa91031ee7d0e0765472ce1b4a50a21f8c6dcd31047d530a50ada93'), - (['floo', 'blar', date(3049, 2, 2)], '853cef24d58fa8cd69b20d7dfbcdbd33f20ccda1a14f57e25e43c2533504b64f'), + ([datetime.date(1953, 7, 2)], '3943be98daa91031ee7d0e0765472ce1b4a50a21f8c6dcd31047d530a50ada93'), + (['floo', 'blar', datetime.date(3049, 2, 2)], '853cef24d58fa8cd69b20d7dfbcdbd33f20ccda1a14f57e25e43c2533504b64f'), ([1, 2, 7.3], '6ab892f8109fd23b03ab24aebc4e343ed2a058d9a72f750bf90ba051627d233e'), ]) def test_successes(self, args, expected_key): @@ -24,3 +29,13 @@ def test_successes(self, args, expected_key): def test_value_errors(self, args): with pytest.raises(ValueError): stable_key(*args) + + +def test_cycle_coverage_date(): + assert cycle_coverage_date(datetime.date(1234, 5, 6)) == '1234.5.6' + assert cycle_coverage_date(datetime.datetime(7654, 3, 2, 1)) == '7654.3.2' + + +def test_cycle_coverage_yearmonth(): + assert cycle_coverage_yearmonth(YearMonth(2222, 33)) == '2222.33' + assert cycle_coverage_yearmonth(datetime.date(1234, 5, 6)) == '1234.5'