Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/18235.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add caching support to media endpoints.
74 changes: 67 additions & 7 deletions synapse/media/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@
# Maximum allowed timeout_ms for download and thumbnail requests
MAXIMUM_ALLOWED_MAX_TIMEOUT_MS = 60_000

# The ETag header value to use for immutable media. This can be anything.
_IMMUTABLE_ETAG = "1"


def respond_404(request: SynapseRequest) -> None:
assert request.path is not None
Expand Down Expand Up @@ -224,12 +227,7 @@ def _quote(x: str) -> str:

request.setHeader(b"Content-Disposition", disposition.encode("ascii"))

# cache for at least a day.
# XXX: we might want to turn this off for data we don't want to
# recommend caching as it's sensitive or private - or at least
# select private. don't bother setting Expires as all our
# clients are smart enough to be happy with Cache-Control
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
_add_cache_headers(request)

if file_size is not None:
request.setHeader(b"Content-Length", b"%d" % (file_size,))
Expand All @@ -240,6 +238,26 @@ def _quote(x: str) -> str:
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")


def _add_cache_headers(request: Request) -> None:
"""Adds the appropriate cache headers to the response"""

# Cache on the client for at least a day.
#
# We set this to "public,s-maxage=0,proxy-revalidate" to allow CDNs to cache
# the media, so long as they "revalidate" the media on every request. By
# revalidate, we mean send the request to Synapse with a `If-None-Match`
# header, to which Synapse can either respond with a 304 if the user is
# authenticated/authorized, or a 401/403 if they're not.
request.setHeader(
b"Cache-Control", b"public,max-age=86400,s-maxage=0,proxy-revalidate"
)

# Set an ETag header to allow requesters to use it in requests to check if
# the cache is still valid. Since media is immutable (though may be
# deleted), we just set this to a constant.
request.setHeader(b"ETag", _IMMUTABLE_ETAG)


# separators as defined in RFC2616. SP and HT are handled separately.
# see _can_encode_filename_as_token.
_FILENAME_SEPARATOR_CHARS = {
Expand Down Expand Up @@ -336,13 +354,15 @@ def _quote(x: str) -> str:

from synapse.media.media_storage import MultipartFileConsumer

_add_cache_headers(request)

# note that currently the json_object is just {}, this will change when linked media
# is implemented
multipart_consumer = MultipartFileConsumer(
clock,
request,
media_type,
{},
{}, # Note: if we change this we need to change the returned ETag.
disposition,
media_length,
)
Expand Down Expand Up @@ -419,6 +439,46 @@ async def respond_with_responder(
finish_request(request)


def respond_with_304(request: SynapseRequest) -> None:
request.setResponseCode(304)

# could alternatively use request.notifyFinish() and flip a flag when
# the Deferred fires, but since the flag is RIGHT THERE it seems like
# a waste.
if request._disconnected:
logger.warning(
"Not sending response to request %s, already disconnected.", request
)
return None

_add_cache_headers(request)

request.finish()


def check_for_cached_entry_and_respond(request: SynapseRequest) -> bool:
"""Check if the request has a conditional header that allows us to return a
304 Not Modified response, and if it does, return a 304 response.

This handles clients and intermediary proxies caching media.
This method assumes that the user has already been
authorised to request the media.

Returns True if we have responded."""

# We've checked the user has access to the media, so we now check if it
# is a "conditional request" and we can just return a `304 Not Modified`
# response. Since media is immutable (though may be deleted), we just
# check this is the expected constant.
etag = request.getHeader("If-None-Match")
if etag == _IMMUTABLE_ETAG:
# Return a `304 Not modified`.
respond_with_304(request)
return True

return False


class Responder(ABC):
"""Represents a response that can be streamed to the requester.

Expand Down
17 changes: 17 additions & 0 deletions synapse/media/media_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
FileInfo,
Responder,
ThumbnailInfo,
check_for_cached_entry_and_respond,
get_filename_from_headers,
respond_404,
respond_with_multipart_responder,
Expand Down Expand Up @@ -459,6 +460,11 @@ async def get_local_media(

self.mark_recently_accessed(None, media_id)

# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return

media_type = media_info.media_type
if not media_type:
media_type = "application/octet-stream"
Expand Down Expand Up @@ -538,6 +544,17 @@ async def get_remote_media(
allow_authenticated,
)

# Check if the media is cached on the client, if so return 304. We need
# to do this after we have fetched remote media, as we need it to do the
# auth.
if check_for_cached_entry_and_respond(request):
# We always need to use the responder.
if responder:
with responder:
pass

return

# We deliberately stream the file outside the lock
if responder and media_info:
upload_name = name if name else media_info.upload_name
Expand Down
19 changes: 19 additions & 0 deletions synapse/media/thumbnailer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from synapse.media._base import (
FileInfo,
ThumbnailInfo,
check_for_cached_entry_and_respond,
respond_404,
respond_with_file,
respond_with_multipart_responder,
Expand Down Expand Up @@ -294,6 +295,11 @@ async def respond_local_thumbnail(
if media_info.authenticated:
raise NotFoundError()

# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return

thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
await self._select_and_respond_with_thumbnail(
request,
Expand Down Expand Up @@ -334,6 +340,11 @@ async def select_or_generate_local_thumbnail(
if media_info.authenticated:
raise NotFoundError()

# Once we've checked auth we can return early if the media is cached on
# the client
if check_for_cached_entry_and_respond(request):
return

thumbnail_infos = await self.store.get_local_media_thumbnails(media_id)
for info in thumbnail_infos:
t_w = info.width == desired_width
Expand Down Expand Up @@ -431,6 +442,10 @@ async def select_or_generate_remote_thumbnail(
respond_404(request)
return

# Check if the media is cached on the client, if so return 304.
if check_for_cached_entry_and_respond(request):
return

thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
Expand Down Expand Up @@ -510,6 +525,10 @@ async def respond_remote_thumbnail(
if media_info.authenticated:
raise NotFoundError()

# Check if the media is cached on the client, if so return 304.
if check_for_cached_entry_and_respond(request):
return

thumbnail_infos = await self.store.get_remote_media_thumbnails(
server_name, media_id
)
Expand Down
39 changes: 39 additions & 0 deletions tests/federation/test_federation_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,45 @@ def test_file_download(self) -> None:
found_file = any(SMALL_PNG in field for field in stripped_bytes)
self.assertTrue(found_file)

def test_federation_etag(self) -> None:
"""Test that federation ETags work"""

content = io.BytesIO(b"file_to_stream")
content_uri = self.get_success(
self.media_repo.create_content(
"text/plain",
"test_upload",
content,
46,
UserID.from_string("@user_id:whatever.org"),
)
)

channel = self.make_signed_federation_request(
"GET",
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
)
self.pump()
self.assertEqual(200, channel.code)

# We expect exactly one ETag header.
etags = channel.headers.getRawHeaders("ETag")
self.assertIsNotNone(etags)
assert etags is not None # For mypy
self.assertEqual(len(etags), 1)
etag = etags[0]

# Refetching with the etag should result in 304 and empty body.
channel = self.make_signed_federation_request(
"GET",
f"/_matrix/federation/v1/media/download/{content_uri.media_id}",
custom_headers=[("If-None-Match", etag)],
)
self.pump()
self.assertEqual(channel.code, 304)
self.assertEqual(channel.is_finished(), True)
self.assertNotIn("body", channel.result)


class FederationThumbnailTest(unittest.FederatingHomeserverTestCase):
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
Expand Down
110 changes: 110 additions & 0 deletions tests/rest/client/test_media.py
Original file line number Diff line number Diff line change
Expand Up @@ -2676,3 +2676,113 @@ def test_authenticated_media(self) -> None:
access_token=self.tok,
)
self.assertEqual(channel10.code, 200)

def test_authenticated_media_etag(self) -> None:
"""Test that ETag works correctly with authenticated media over client
APIs"""

# upload some local media with authentication on
channel = self.make_request(
"POST",
"_matrix/media/v3/upload?filename=test_png_upload",
SMALL_PNG,
self.tok,
shorthand=False,
content_type=b"image/png",
custom_headers=[("Content-Length", str(67))],
)
self.assertEqual(channel.code, 200)
res = channel.json_body.get("content_uri")
assert res is not None
uri = res.split("mxc://")[1]

# Check standard media endpoint
self._check_caching(f"/download/{uri}")

# check thumbnails as well
params = "?width=32&height=32&method=crop"
self._check_caching(f"/thumbnail/{uri}{params}")

# Inject a piece of remote media.
file_id = "abcdefg12345"
file_info = FileInfo(server_name="lonelyIsland", file_id=file_id)

media_storage = self.hs.get_media_repository().media_storage

ctx = media_storage.store_into_file(file_info)
(f, fname) = self.get_success(ctx.__aenter__())
f.write(SMALL_PNG)
self.get_success(ctx.__aexit__(None, None, None))

# we write the authenticated status when storing media, so this should pick up
# config and authenticate the media
self.get_success(
self.store.store_cached_remote_media(
origin="lonelyIsland",
media_id="52",
media_type="image/png",
media_length=1,
time_now_ms=self.clock.time_msec(),
upload_name="remote_test.png",
filesystem_id=file_id,
)
)

# ensure we have thumbnails for the non-dynamic code path
if self.extra_config == {"dynamic_thumbnails": False}:
self.get_success(
self.repo._generate_thumbnails(
"lonelyIsland", "52", file_id, "image/png"
)
)

self._check_caching("/download/lonelyIsland/52")

params = "?width=32&height=32&method=crop"
self._check_caching(f"/thumbnail/lonelyIsland/52{params}")

def _check_caching(self, path: str) -> None:
"""
Checks that:
1. fetching the path returns an ETag header
2. refetching with the ETag returns a 304 without a body
3. refetching with the ETag but through unauthenticated endpoint
returns 404
"""

# Request media over authenticated endpoint, should be found
channel1 = self.make_request(
"GET",
f"/_matrix/client/v1/media{path}",
access_token=self.tok,
shorthand=False,
)
self.assertEqual(channel1.code, 200)

# Should have a single ETag field
etags = channel1.headers.getRawHeaders("ETag")
self.assertIsNotNone(etags)
assert etags is not None # For mypy
self.assertEqual(len(etags), 1)
etag = etags[0]

# Refetching with the etag should result in 304 and empty body.
channel2 = self.make_request(
"GET",
f"/_matrix/client/v1/media{path}",
access_token=self.tok,
shorthand=False,
custom_headers=[("If-None-Match", etag)],
)
self.assertEqual(channel2.code, 304)
self.assertEqual(channel2.is_finished(), True)
self.assertNotIn("body", channel2.result)

# Refetching with the etag but no access token should result in 404.
channel3 = self.make_request(
"GET",
f"/_matrix/media/r0{path}",
shorthand=False,
custom_headers=[("If-None-Match", etag)],
)
self.assertEqual(channel3.code, 404)