awesome-llm-apps/rag_tutorials/agentic_rag_embedding_gemma/agentic_rag_embeddinggemma.py at main · Shubhamsaboo/awesome-llm-apps · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import streamlit as st
from agno.agent import Agent
from agno.knowledge.embedder.ollama import OllamaEmbedder
from agno.knowledge.knowledge import Knowledge
from agno.models.ollama import Ollama
from agno.vectordb.lancedb import LanceDb, SearchType

# Page configuration
st.set_page_config(
    page_title="Agentic RAG with Google's EmbeddingGemma",
    page_icon="🔥",
    layout="wide"
)

@st.cache_resource
def load_knowledge_base():
    knowledge_base = Knowledge(
        vector_db=LanceDb(
            table_name="recipes",
            uri="tmp/lancedb",
            search_type=SearchType.vector,
            embedder=OllamaEmbedder(id="embeddinggemma:latest", dimensions=768),
        ),
    )
    return knowledge_base

# Initialize URLs in session state
if 'urls' not in st.session_state:
    st.session_state.urls = []
if 'urls_loaded' not in st.session_state:
    st.session_state.urls_loaded = set()

kb = load_knowledge_base()

# Load initial URLs if any (only load once per URL)
for url in st.session_state.urls:
    if url not in st.session_state.urls_loaded:
        kb.add_content(url=url)
        st.session_state.urls_loaded.add(url)

agent = Agent(
    model=Ollama(id="llama3.2:latest"),
    knowledge=kb,
    instructions=[
        "Search the knowledge base for relevant information and base your answers on it.",
        "Be clear, and generate well-structured answers.",
        "Use clear headings, bullet points, or numbered lists where appropriate.",
    ],
    search_knowledge=True,
    debug_mode=False,
    markdown=True,
)

# Sidebar for adding knowledge sources
with st.sidebar:
    col1, col2, col3 = st.columns(3)
    with col1:
        st.image("google.png")
    with col2:
        st.image("ollama.png")
    with col3:
        st.image("agno.png")
    st.header("🌐 Add Knowledge Sources")
    new_url = st.text_input(
        "Add URL",
        placeholder="https://example.com/sample.pdf",
        help="Enter a PDF URL to add to the knowledge base",
    )
    if st.button("➕ Add URL", type="primary"):
        if new_url:
            if new_url not in st.session_state.urls:
                st.session_state.urls.append(new_url)
                with st.spinner("📥 Adding new URL..."):
                    kb.add_content(url=new_url)
                    st.session_state.urls_loaded.add(new_url)
                st.success(f"✅ Added: {new_url}")
                st.rerun()
            else:
                st.warning("This URL has already been added.")
        else:
            st.error("Please enter a URL")

    # Display current URLs
    if st.session_state.urls:
        st.subheader("📚 Current Knowledge Sources")
        for i, url in enumerate(st.session_state.urls, 1):
            st.markdown(f"{i}. {url}")

# Main title and description
st.title("🔥 Agentic RAG with EmbeddingGemma (100% local)")
st.markdown(
    """
This app demonstrates an agentic RAG system using local models via [Ollama](https://ollama.com/):

- **EmbeddingGemma** for creating vector embeddings
- **LanceDB** as the local vector database

Add PDF URLs in the sidebar to start and ask questions about the content.
    """
)

query = st.text_input("Enter your question:")

# Simple answer generation
if st.button("🚀 Get Answer", type="primary"):
    if not query:
        st.error("Please enter a question")
    else:
        st.markdown("### 💡 Answer")

        with st.spinner("🔍 Searching knowledge and generating answer..."):
            try:
                response = ""
                resp_container = st.empty()
                gen = agent.run(query, stream=True)
                for resp_chunk in gen:
                    # Display response
                    if resp_chunk.content is not None:
                        response += resp_chunk.content
                        resp_container.markdown(response)
            except Exception as e:
                st.error(f"Error: {e}")

with st.expander("📖 How This Works"):
    st.markdown(
        """
**This app uses the Agno framework to create an intelligent Q&A system:**

1. **Knowledge Loading**: PDF URLs are processed and stored in LanceDB vector database
2. **EmbeddingGemma as Embedder**: EmbeddingGemma generates local embeddings for semantic search
3. **Llama 3.2**: The Llama 3.2 model generates answers based on retrieved context

**Key Components:**
- `EmbeddingGemma` as the embedder
- `LanceDB` as the vector database
- `Knowledge`: Manages document loading from PDF URLs
- `OllamaEmbedder`: Uses EmbeddingGemma for embeddings
- `Agno Agent`: Orchestrates everything to answer questions
        """
    )