Azure-Samples · mattgotteiner · Sep 25, 2023 · Sep 5, 2023 · Sep 6, 2023 · Sep 7, 2023
diff --git a/LoginAndAclSetup.md b/LoginAndAclSetup.md
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@
 - [Enabling optional features](#enabling-optional-features)
   - [Enabling Application Insights](#enabling-application-insights)
   - [Enabling authentication](#enabling-authentication)
+  - [Enabling login and document level access control](#enabling-login-and-document-level-access-control)
 - [Using the app](#using-the-app)
 - [Running locally](#running-locally)
 - [Productionizing](#productionizing)
@@ -215,6 +216,10 @@ By default, the deployed Azure web app will have no authentication or access res
 
 To then limit access to a specific set of users or groups, you can follow the steps from [Restrict your Azure AD app to a set of users](https://learn.microsoft.com/azure/active-directory/develop/howto-restrict-your-app-to-a-set-of-users) by changing "Assignment Required?" option under the Enterprise Application, and then assigning users/groups access.  Users not granted explicit access will receive the error message -AADSTS50105: Your administrator has configured the application <app_name> to block users unless they are specifically granted ('assigned') access to the application.-
 
+### Enabling login and document level access control
+
+By default, the deployed Azure web app allows users to chat with all your indexed data. You can enable an optional login system using Azure Active Directory to restrict access to indexed data based on the logged in user. Enable the optional login and document level access control system by following [this guide](./LoginAndAclSetup.md).
+
 ## Running locally
 
 You can only run locally **after** having successfully run the `azd up` command. If you haven't yet, follow the steps in [Azure deployment](#azure-deployment) above.

diff --git a/app/backend/app.py b/app/backend/app.py
@@ -30,12 +30,15 @@
 from approaches.readdecomposeask import ReadDecomposeAsk
 from approaches.readretrieveread import ReadRetrieveReadApproach
 from approaches.retrievethenread import RetrieveThenReadApproach
+from core.authentication import AuthenticationHelper
 
 CONFIG_OPENAI_TOKEN = "openai_token"
 CONFIG_CREDENTIAL = "azure_credential"
 CONFIG_ASK_APPROACHES = "ask_approaches"
 CONFIG_CHAT_APPROACHES = "chat_approaches"
 CONFIG_BLOB_CONTAINER_CLIENT = "blob_container_client"
+CONFIG_AUTH_CLIENT = "auth_client"
+CONFIG_SEARCH_CLIENT = "search_client"
 
 bp = Blueprint("routes", __name__, static_folder="static")
 
@@ -45,6 +48,13 @@ async def index():
     return await bp.send_static_file("index.html")
 
 
+# Empty page is recommended for login redirect to work.
+# See https://github.com/AzureAD/microsoft-authentication-library-for-js/blob/dev/lib/msal-browser/docs/initialization.md#redirecturi-considerations for more information
+@bp.route("/redirect")
+async def redirect():
+    return ""
+
+
 @bp.route("/favicon.ico")
 async def favicon():
     return await bp.send_static_file("favicon.ico")
@@ -78,6 +88,8 @@ async def ask():
     if not request.is_json:
         return jsonify({"error": "request must be json"}), 415
     request_json = await request.get_json()
+    auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
+    auth_claims = await auth_helper.get_auth_claims_if_enabled(request.headers)
     approach = request_json["approach"]
     try:
         impl = current_app.config[CONFIG_ASK_APPROACHES].get(approach)
@@ -86,7 +98,7 @@ async def ask():
         # Workaround for: https://github.com/openai/openai-python/issues/371
         async with aiohttp.ClientSession() as s:
             openai.aiosession.set(s)
-            r = await impl.run(request_json["question"], request_json.get("overrides") or {})
+            r = await impl.run(request_json["question"], request_json.get("overrides") or {}, auth_claims)
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /ask")
@@ -98,6 +110,8 @@ async def chat():
     if not request.is_json:
         return jsonify({"error": "request must be json"}), 415
     request_json = await request.get_json()
+    auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
+    auth_claims = await auth_helper.get_auth_claims_if_enabled(request.headers)
     approach = request_json["approach"]
     try:
         impl = current_app.config[CONFIG_CHAT_APPROACHES].get(approach)
@@ -106,7 +120,9 @@ async def chat():
         # Workaround for: https://github.com/openai/openai-python/issues/371
         async with aiohttp.ClientSession() as s:
             openai.aiosession.set(s)
-            r = await impl.run_without_streaming(request_json["history"], request_json.get("overrides", {}))
+            r = await impl.run_without_streaming(
+                request_json["history"], request_json.get("overrides", {}), auth_claims
+            )
         return jsonify(r)
     except Exception as e:
         logging.exception("Exception in /chat")
@@ -123,12 +139,16 @@ async def chat_stream():
     if not request.is_json:
         return jsonify({"error": "request must be json"}), 415
     request_json = await request.get_json()
+    auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
+    auth_claims = await auth_helper.get_auth_claims_if_enabled(request.headers)
     approach = request_json["approach"]
     try:
         impl = current_app.config[CONFIG_CHAT_APPROACHES].get(approach)
         if not impl:
             return jsonify({"error": "unknown approach"}), 400
-        response_generator = impl.run_with_streaming(request_json["history"], request_json.get("overrides", {}))
+        response_generator = impl.run_with_streaming(
+            request_json["history"], request_json.get("overrides", {}), auth_claims
+        )
         response = await make_response(format_as_ndjson(response_generator))
         response.timeout = None  # type: ignore
         return response
@@ -137,6 +157,13 @@ async def chat_stream():
         return jsonify({"error": str(e)}), 500
 
 
+# Send MSAL.js settings to the client UI
+@bp.route("/auth_setup", methods=["GET"])
+def auth_setup():
+    auth_helper = current_app.config[CONFIG_AUTH_CLIENT]
+    return jsonify(auth_helper.get_auth_setup_for_client())
+
+
 @bp.before_request
 async def ensure_openai_token():
     if openai.api_type != "azure_ad":
@@ -168,6 +195,12 @@ async def setup_clients():
     # Used only with non-Azure OpenAI deployments
     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
     OPENAI_ORGANIZATION = os.getenv("OPENAI_ORGANIZATION")
+    AZURE_USE_AUTHENTICATION = os.getenv("AZURE_USE_AUTHENTICATION", "").lower() == "true"
+    AZURE_SERVER_APP_ID = os.getenv("AZURE_SERVER_APP_ID")
+    AZURE_SERVER_APP_SECRET = os.getenv("AZURE_SERVER_APP_SECRET")
+    AZURE_CLIENT_APP_ID = os.getenv("AZURE_CLIENT_APP_ID")
+    AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID")
+    TOKEN_CACHE_PATH = os.getenv("TOKEN_CACHE_PATH")
 
     KB_FIELDS_CONTENT = os.getenv("KB_FIELDS_CONTENT", "content")
     KB_FIELDS_SOURCEPAGE = os.getenv("KB_FIELDS_SOURCEPAGE", "sourcepage")
@@ -178,6 +211,16 @@ async def setup_clients():
     # If you encounter a blocking error during a DefaultAzureCredential resolution, you can exclude the problematic credential by using a parameter (ex. exclude_shared_token_cache_credential=True)
     azure_credential = DefaultAzureCredential(exclude_shared_token_cache_credential=True)
 
+    # Set up authentication helper
+    auth_helper = AuthenticationHelper(
+        use_authentication=AZURE_USE_AUTHENTICATION,
+        server_app_id=AZURE_SERVER_APP_ID,
+        server_app_secret=AZURE_SERVER_APP_SECRET,
+        client_app_id=AZURE_CLIENT_APP_ID,
+        tenant_id=AZURE_TENANT_ID,
+        token_cache_path=TOKEN_CACHE_PATH,
+    )
+
     # Set up clients for Cognitive Search and Storage
     search_client = SearchClient(
         endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net",
@@ -204,7 +247,9 @@ async def setup_clients():
         openai.organization = OPENAI_ORGANIZATION
 
     current_app.config[CONFIG_CREDENTIAL] = azure_credential
+    current_app.config[CONFIG_SEARCH_CLIENT] = search_client
     current_app.config[CONFIG_BLOB_CONTAINER_CLIENT] = blob_container_client
+    current_app.config[CONFIG_AUTH_CLIENT] = auth_helper
 
     # Various approaches to integrate GPT and external knowledge, most applications will use a single one of these patterns
     # or some derivative, here we include several for exploration purposes

diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
@@ -1,8 +1,22 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
+from core.authentication import AuthenticationHelper
 
-class AskApproach(ABC):
+
+class Approach(ABC):
+    def build_filter(self, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> str:
+        exclude_category = overrides.get("exclude_category") or None
+        security_filter = AuthenticationHelper.build_security_filters(overrides, auth_claims)
+        filters = []
+        if exclude_category:
+            filters.append("category ne '{}'".format(exclude_category.replace("'", "''")))
+        if security_filter:
+            filters.append(security_filter)
+        return None if len(filters) == 0 else " and ".join(filters)
+
+
+class AskApproach(Approach):
     @abstractmethod
-    async def run(self, q: str, overrides: dict[str, Any]) -> dict[str, Any]:
+    async def run(self, q: str, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> dict[str, Any]:
         ...
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -5,12 +5,13 @@
 from azure.search.documents.aio import SearchClient
 from azure.search.documents.models import QueryType
 
+from approaches.approach import Approach
 from core.messagebuilder import MessageBuilder
 from core.modelhelper import get_token_limit
 from text import nonewlines
 
 
-class ChatReadRetrieveReadApproach:
+class ChatReadRetrieveReadApproach(Approach):
     # Chat roles
     SYSTEM = "system"
     USER = "user"
@@ -73,14 +74,17 @@ def __init__(
         self.chatgpt_token_limit = get_token_limit(chatgpt_model)
 
     async def run_until_final_call(
-        self, history: list[dict[str, str]], overrides: dict[str, Any], should_stream: bool = False
+        self,
+        history: list[dict[str, str]],
+        overrides: dict[str, Any],
+        auth_claims: dict[str, Any],
+        should_stream: bool = False,
     ) -> tuple:
         has_text = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         has_vector = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_captions = True if overrides.get("semantic_captions") and has_text else False
-        top = overrides.get("top") or 3
-        exclude_category = overrides.get("exclude_category") or None
-        filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
+        top = overrides.get("top", 3)
+        filter = self.build_filter(overrides, auth_claims)
 
         user_query_request = "Generate search query for: " + history[-1]["user"]
 
@@ -195,10 +199,8 @@ async def run_until_final_call(
             system_message,
             self.chatgpt_model,
             history,
-            # Model does not handle lengthy system messages well.
-            # Moved sources to latest user conversation to solve follow up questions prompt.
             history[-1]["user"] + "\n\nSources:\n" + content,
-            max_tokens=self.chatgpt_token_limit,
+            max_tokens=self.chatgpt_token_limit,  # Model does not handle lengthy system messages well. Moving sources to latest user conversation to solve follow up questions prompt.
         )
         msg_to_display = "\n\n".join([str(message) for message in messages])
 
@@ -219,17 +221,23 @@ async def run_until_final_call(
         )
         return (extra_info, chat_coroutine)
 
-    async def run_without_streaming(self, history: list[dict[str, str]], overrides: dict[str, Any]) -> dict[str, Any]:
-        extra_info, chat_coroutine = await self.run_until_final_call(history, overrides, should_stream=False)
+    async def run_without_streaming(
+        self, history: list[dict[str, str]], overrides: dict[str, Any], auth_claims: dict[str, Any]
+    ) -> dict[str, Any]:
+        extra_info, chat_coroutine = await self.run_until_final_call(
+            history, overrides, auth_claims, should_stream=False
+        )
         chat_resp = await chat_coroutine
         chat_content = chat_resp.choices[0].message.content
         extra_info["answer"] = chat_content
         return extra_info
 
     async def run_with_streaming(
-        self, history: list[dict[str, str]], overrides: dict[str, Any]
+        self, history: list[dict[str, str]], overrides: dict[str, Any], auth_claims: dict[str, Any]
     ) -> AsyncGenerator[dict, None]:
-        extra_info, chat_coroutine = await self.run_until_final_call(history, overrides, should_stream=True)
+        extra_info, chat_coroutine = await self.run_until_final_call(
+            history, overrides, auth_claims, should_stream=True
+        )
         yield extra_info
         async for event in await chat_coroutine:
             # "2023-07-01-preview" API version has a bug where first response has empty choices
@@ -247,8 +255,7 @@ def get_messages_from_history(
     ) -> list:
         message_builder = MessageBuilder(system_prompt, model_id)
 
-        # Add examples to show the chat what responses we want.
-        # It will try to mimic any responses and make sure they match the rules laid out in the system message.
+        # Add examples to show the chat what responses we want. It will try to mimic any responses and make sure they match the rules laid out in the system message.
         for shot in few_shots:
             message_builder.append_message(shot.get("role"), shot.get("content"))
 

diff --git a/app/backend/approaches/readdecomposeask.py b/app/backend/approaches/readdecomposeask.py
@@ -37,13 +37,14 @@ def __init__(
         self.content_field = content_field
         self.openai_host = openai_host
 
-    async def search(self, query_text: str, overrides: dict[str, Any]) -> tuple[list[str], str]:
+    async def search(
+        self, query_text: str, overrides: dict[str, Any], auth_claims: dict[str, Any]
+    ) -> tuple[list[str], str]:
         has_text = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         has_vector = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_captions = True if overrides.get("semantic_captions") and has_text else False
-        top = overrides.get("top") or 3
-        exclude_category = overrides.get("exclude_category") or None
-        filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
+        top = overrides.get("top", 3)
+        filter = self.build_filter(overrides, auth_claims)
 
         # If retrieval mode includes vectors, compute an embedding for the query
         if has_vector:
@@ -109,12 +110,12 @@ async def lookup(self, q: str) -> Optional[str]:
             return "\n".join([d["content"] async for d in r])
         return None
 
-    async def run(self, q: str, overrides: dict[str, Any]) -> dict[str, Any]:
+    async def run(self, q: str, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> dict[str, Any]:
         search_results = None
 
         async def search_and_store(q: str) -> Any:
             nonlocal search_results
-            search_results, content = await self.search(q, overrides)
+            search_results, content = await self.search(q, overrides, auth_claims)
             return content
 
         # Use to capture thought process during iterations

diff --git a/app/backend/approaches/readretrieveread.py b/app/backend/approaches/readretrieveread.py
@@ -68,13 +68,12 @@ def __init__(
         self.content_field = content_field
         self.openai_host = openai_host
 
-    async def retrieve(self, query_text: str, overrides: dict[str, Any]) -> Any:
+    async def retrieve(self, query_text: str, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> Any:
         has_text = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         has_vector = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_captions = True if overrides.get("semantic_captions") and has_text else False
-        top = overrides.get("top") or 3
-        exclude_category = overrides.get("exclude_category") or None
-        filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
+        top = overrides.get("top", 3)
+        filter = self.build_filter(overrides, auth_claims)
 
         # If retrieval mode includes vectors, compute an embedding for the query
         if has_vector:
@@ -122,12 +121,12 @@ async def retrieve(self, query_text: str, overrides: dict[str, Any]) -> Any:
         content = "\n".join(results)
         return results, content
 
-    async def run(self, q: str, overrides: dict[str, Any]) -> dict[str, Any]:
+    async def run(self, q: str, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> dict[str, Any]:
         retrieve_results = None
 
         async def retrieve_and_store(q: str) -> Any:
             nonlocal retrieve_results
-            retrieve_results, content = await self.retrieve(q, overrides)
+            retrieve_results, content = await self.retrieve(q, overrides, auth_claims)
             return content
 
         # Use to capture thought process during iterations

diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py
@@ -57,13 +57,12 @@ def __init__(
         self.sourcepage_field = sourcepage_field
         self.content_field = content_field
 
-    async def run(self, q: str, overrides: dict[str, Any]) -> dict[str, Any]:
+    async def run(self, q: str, overrides: dict[str, Any], auth_claims: dict[str, Any]) -> dict[str, Any]:
         has_text = overrides.get("retrieval_mode") in ["text", "hybrid", None]
         has_vector = overrides.get("retrieval_mode") in ["vectors", "hybrid", None]
         use_semantic_captions = True if overrides.get("semantic_captions") and has_text else False
-        top = overrides.get("top") or 3
-        exclude_category = overrides.get("exclude_category") or None
-        filter = "category ne '{}'".format(exclude_category.replace("'", "''")) if exclude_category else None
+        top = overrides.get("top", 3)
+        filter = self.build_filter(overrides, auth_claims)
 
         # If retrieval mode includes vectors, compute an embedding for the query
         if has_vector: