Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18235.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add caching support to media endpoints.
69 changes: 62 additions & 7 deletions synapse/media/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,12 +224,7 @@ def _quote(x: str) -> str:

request.setHeader(b"Content-Disposition", disposition.encode("ascii"))

# cache for at least a day.
# XXX: we might want to turn this off for data we don't want to
# recommend caching as it's sensitive or private - or at least
# select private. don't bother setting Expires as all our
# clients are smart enough to be happy with Cache-Control
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
_add_cache_headers(request)

if file_size is not None:
request.setHeader(b"Content-Length", b"%d" % (file_size,))
Expand All @@ -240,6 +235,26 @@ def _quote(x: str) -> str:
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")


def _add_cache_headers(request: Request) -> None:
"""Adds the appropriate cache headers to the response"""

# Cache for at least a day.
Comment thread
erikjohnston marked this conversation as resolved.
Outdated
#
# We set this to "public,s-maxage=0,proxy-revalidate" to allow CDNs to cache
# the media, so long as they "revalidate" the media on every request. By
# revalidate, we mean send the request to Synapse with a `If-None-Match`
# header, to which Synapse can either respond with a 304 if the user is
# authenticated/authorized, or a 401/403 if they're not.
request.setHeader(
b"Cache-Control", b"public,max-age=86400,s-maxage=0,proxy-revalidate"
)

# Set an ETag header to allow requesters to use it in requests to check if
# the cache is still valid. Since media is immutable (though may be
# deleted), we just set this to a constant.
request.setHeader(b"ETag", b"1")


# separators as defined in RFC2616. SP and HT are handled separately.
# see _can_encode_filename_as_token.
_FILENAME_SEPARATOR_CHARS = {
Expand Down Expand Up @@ -336,13 +351,15 @@ def _quote(x: str) -> str:

from synapse.media.media_storage import MultipartFileConsumer

_add_cache_headers(request)

# note that currently the json_object is just {}, this will change when linked media
# is implemented
multipart_consumer = MultipartFileConsumer(
clock,
request,
media_type,
{},
{}, # Note: if we change this we need to change the returned ETag.
disposition,
media_length,
)
Expand Down Expand Up @@ -419,6 +436,44 @@ async def respond_with_responder(
finish_request(request)


def respond_with_304(request: SynapseRequest) -> None:
request.setResponseCode(304)

# could alternatively use request.notifyFinish() and flip a flag when
# the Deferred fires, but since the flag is RIGHT THERE it seems like
# a waste.
if request._disconnected:
logger.warning(
"Not sending response to request %s, already disconnected.", request
)
return None

_add_cache_headers(request)

request.finish()


def check_for_cached_entry_and_respond(request: SynapseRequest) -> bool:
"""Check if the request has a conditional header that allows us to return a
304 Not Modified response, and if it has return a 304 response.
Comment thread
erikjohnston marked this conversation as resolved.
Outdated

# This handles clients and intermediary proxies caching media.
Comment thread
erikjohnston marked this conversation as resolved.
Outdated

Returns True if we have responded."""

# We've checked the user has access to the media, so we now check if it
# is a "conditional request" and we can just return a `304 Not Modified`
# response. Since media is immutable (though may be deleted), we just
# check this is the expected constant.
etag = request.getHeader("If-None-Match")
if etag == "1":
Comment thread
erikjohnston marked this conversation as resolved.
Outdated
# Return a `304 Not modified`.
respond_with_304(request)
return True

return False


class Responder(ABC):
"""Represents a response that can be streamed to the requester.

Expand Down
17 changes: 17 additions & 0 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
FileInfo,
Responder,
ThumbnailInfo,
check_for_cached_entry_and_respond,
get_filename_from_headers,
respond_404,
respond_with_multipart_responder,
Expand Down Expand Up @@ -459,6 +460,11 @@ async def get_local_media(

self.mark_recently_accessed(None, media_id)

# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return

media_type = media_info.media_type
if not media_type:
media_type = "application/octet-stream"
Expand Down Expand Up @@ -538,6 +544,17 @@ async def get_remote_media(
allow_authenticated,
)

# Check if the media is cached on the client, if so return 304. We need
# to do this after we have fetched remote media, as we need it to do the
# auth.
if check_for_cached_entry_and_respond(request):
# We always need to use the responder.
if responder:
with responder:
pass

return

# We deliberately stream the file outside the lock
if responder and media_info:
upload_name = name if name else media_info.upload_name
Expand Down
19 changes: 19 additions & 0 deletions synapse/media/thumbnailer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from synapse.media._base import (
FileInfo,
ThumbnailInfo,
check_for_cached_entry_and_respond,
respond_404,
respond_with_file,
respond_with_multipart_responder,
Expand Down Expand Up @@ -294,6 +295,11 @@ async def respond_local_thumbnail(
if media_info.authenticated:
raise NotFoundError()

# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return

thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
await self._select_and_respond_with_thumbnail(
request,
Expand Down Expand Up @@ -334,6 +340,11 @@ async def select_or_generate_local_thumbnail(
if media_info.authenticated:
raise NotFoundError()

# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return

thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
for info in thumbnail_infos:
t_w = info.width == desired_width
Expand Down Expand Up @@ -431,6 +442,10 @@ async def select_or_generate_remote_thumbnail(
respond_404(request)
return

# Check if the media is cached on the client, if so return 304.
if check_for_cached_entry_and_respond(request):
return

thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
Expand Down Expand Up @@ -510,6 +525,10 @@ async def respond_remote_thumbnail(
if media_info.authenticated:
raise NotFoundError()

# Check if the media is cached on the client, if so return 304.
if check_for_cached_entry_and_respond(request):
return

thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
Expand Down
39 changes: 39 additions & 0 deletions tests/federation/test_federation_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,45 @@ def test_file_download(self) -> None:
found_file = any(SMALL_PNG in field for field in stripped_bytes)
self.assertTrue(found_file)

def test_federation_etag(self) -> None:
"""Test that federation ETags work"""

content = io.BytesIO(b"file_to_stream")
content_uri = self.get_success(
self.media_repo.create_content(
"text/plain",
"test_upload",
content,
46,
UserID.from_string("@user_id:whatever.org"),
)
)

channel = self.make_signed_federation_request(
"GET",
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
)
self.pump()
self.assertEqual(200, channel.code)

# We expect exactly one ETag header.
etags = channel.headers.getRawHeaders("ETag")
self.assertIsNotNone(etags)
assert etags is not None # For mypy
self.assertEqual(len(etags), 1)
etag = etags[0]

# Refetching with the etag should result in 304 and empty body.
channel = self.make_signed_federation_request(
"GET",
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
custom_headers=[("If-None-Match", etag)],
)
self.pump()
self.assertEqual(channel.code, 304)
self.assertEqual(channel.is_finished(), True)
self.assertNotIn("body", channel.result)


class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
Expand Down
110 changes: 110 additions & 0 deletions tests/rest/client/test_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -2676,3 +2676,113 @@ def test_authenticated_media(self) -> None:
access_token=self.tok,
)
self.assertEqual(channel10.code, 200)

def test_authenticated_media_etag(self) -> None:
"""Test that ETag work correctly with authenticated media over client
Comment thread
erikjohnston marked this conversation as resolved.
Outdated
APIs"""

# upload some local media with authentication on
channel = self.make_request(
"POST",
"_matrix/media/v3/upload?filename=test_png_upload",
SMALL_PNG,
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
assert res is not None
uri = res.split("mxc://")[1]

# Check standard media endpoint
self._check_caching(f"/download/{uri}")

# check thumbnails as well
params = "?width=32&height=32&method=crop"
self._check_caching(f"/thumbnail/{uri}{params}")

# Inject a piece of remote media.
file_id = "abcdefg12345"
file_info = FileInfo(server_name="lonelyIsland", file_id=file_id)

media_storage = self.hs.get_media_repository().media_storage

ctx = media_storage.store_into_file(file_info)
(f, fname) = self.get_success(ctx.__aenter__())
f.write(SMALL_PNG)
self.get_success(ctx.__aexit__(None, None, None))

# we write the authenticated status when storing media, so this should pick up
# config and authenticate the media
self.get_success(
self.store.store_cached_remote_media(
origin="lonelyIsland",
media_id="52",
media_type="image/png",
media_length=1,
time_now_ms=self.clock.time_msec(),
upload_name="remote_test.png",
filesystem_id=file_id,
)
)

# ensure we have thumbnails for the non-dynamic code path
if self.extra_config == {"dynamic_thumbnails": False}:
self.get_success(
self.repo._generate_thumbnails(
"lonelyIsland", "52", file_id, "image/png"
)
)

self._check_caching("/download/lonelyIsland/52")

params = "?width=32&height=32&method=crop"
self._check_caching(f"/thumbnail/lonelyIsland/52{params}")

def _check_caching(self, path: str) -> None:
"""
Checks that:
1. fetching the path returns an ETag header
2. refetching with the ETag returns a 304 without a body
3. refetching with the ETag but through unauthenticated endpoint
returns 404
"""

# Request media over authenticated endpoint, should be found
channel1 = self.make_request(
"GET",
f"/_matrix/client/v1/media{path}",
access_token=self.tok,
shorthand=False,
)
self.assertEqual(channel1.code, 200)

# Should have a single ETag field
etags = channel1.headers.getRawHeaders("ETag")
self.assertIsNotNone(etags)
assert etags is not None # For mypy
self.assertEqual(len(etags), 1)
etag = etags[0]

# Refetching with the etag should result in 304 and empty body.
channel2 = self.make_request(
"GET",
f"/_matrix/client/v1/media{path}",
access_token=self.tok,
shorthand=False,
custom_headers=[("If-None-Match", etag)],
)
self.assertEqual(channel2.code, 304)
self.assertEqual(channel2.is_finished(), True)
self.assertNotIn("body", channel2.result)

# Refetching with the etag but no access token should result in 404.
channel3 = self.make_request(
"GET",
f"/_matrix/media/r0{path}",
shorthand=False,
custom_headers=[("If-None-Match", etag)],
)
self.assertEqual(channel3.code, 404)