Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/chatgpt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ This tool leverages OpenAI's ChatGPT API to generate responses based on user-pro
Users can upload context data in various formats and ask questions or execute prompts related to that data.
The tool then uploads the data to a OpenAI server and processes them using the selected ChatGPT model, returning an AI-generated response tailored to the context provided.

To utilize this tool, users need to input their OpenAI API key in the user preferences. To obtain an API key, visit https://platform.openai.com/account/api-keys.
To utilize this tool, users need to input their OpenAI API key in the credentials section. To obtain an API key, visit https://platform.openai.com/account/api-keys.

Make sure to setup the payment method in your OpenAI account to use the API key in here: https://platform.openai.com/settings/organization/billing/

Expand Down
19 changes: 0 additions & 19 deletions tools/chatgpt/README.rst

This file was deleted.

275 changes: 190 additions & 85 deletions tools/chatgpt/chatgpt.py
Original file line number Diff line number Diff line change
@@ -1,96 +1,201 @@
from __future__ import annotations

import base64
import json
import os
import sys
from collections.abc import Iterable, Sequence
from dataclasses import dataclass
from typing import cast, ClassVar, TypeAlias

from openai import AuthenticationError, OpenAI
from openai.types.chat import ChatCompletion
from openai.types.chat.chat_completion_content_part_image_param import (
ChatCompletionContentPartImageParam,
ImageURL,
)
from openai.types.chat.chat_completion_content_part_param import (
ChatCompletionContentPartParam,
)
from openai.types.chat.chat_completion_content_part_text_param import (
ChatCompletionContentPartTextParam,
)
from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
from openai.types.chat.chat_completion_user_message_param import (
ChatCompletionUserMessageParam,
)

MessageContentItem: TypeAlias = ChatCompletionContentPartParam
ContextFile: TypeAlias = tuple[str, str]


@dataclass(frozen=True)
class MessageBuilder:
question: str
context_files: Sequence[ContextFile]

_MEDIA_TYPE_MAP: ClassVar[dict[str, str]] = {
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".png": "image/png",
".gif": "image/gif",
".webp": "image/webp",
}

_MAX_IMAGE_BYTES: ClassVar[int] = 20 * 1024 * 1024

def build(self) -> list[MessageContentItem]:
"""Construct the completion request payload."""
message: list[MessageContentItem] = [{"type": "text", "text": self.question}]

for path, file_type in self.context_files:
if file_type == "image":
message.append(self._build_image_content(path))
else:
message.append(self._build_text_content(path))

return message

def _build_image_content(self, path: str) -> ChatCompletionContentPartImageParam:
"""Encode an image context file for model consumption."""
if os.path.getsize(path) > self._MAX_IMAGE_BYTES:
raise ValueError(
f"File {path} exceeds the 20MB limit and will not be processed."
)

_, ext = os.path.splitext(path)
media_type = self._MEDIA_TYPE_MAP.get(ext.lower(), "image/jpeg")
with open(path, "rb") as img_file:
image_data = base64.standard_b64encode(img_file.read()).decode("utf-8")

image_url_payload = ImageURL(
url=f"data:{media_type};base64,{image_data}", detail="auto"
)
return ChatCompletionContentPartImageParam(
type="image_url", image_url=image_url_payload
)

def _build_text_content(self, path: str) -> ChatCompletionContentPartTextParam:
"""Read a text context file and wrap it in a templated message."""
try:
with open(path, "r", encoding="utf-8", errors="ignore") as text_file:
file_content = text_file.read()
except OSError as exc:
raise ValueError(f"Error reading file {path}: {exc}") from exc

basename = os.path.basename(path)
return ChatCompletionContentPartTextParam(
type="text",
text=f"--- Content of {basename} ---\n{file_content}\n",
)


def parse_context_files(raw: str) -> list[ContextFile]:
"""Parse and validate the JSON encoded context file descriptors."""
try:
decoded = json.loads(raw)
except json.JSONDecodeError as exc:
raise ValueError("Invalid JSON payload for context files.") from exc

if not isinstance(decoded, list):
raise ValueError("Context files payload must be a list.")

context_files = json.loads(sys.argv[1])
question = sys.argv[2]
model = sys.argv[3]
with open(sys.argv[4], "r") as f:
openai_api_key = f.read().strip()
if not openai_api_key:
print("OpenAI API key is not provided in user preferences!")
sys.exit(1)

client = OpenAI(api_key=openai_api_key)

file_search_file_streams = []
image_files = []

for path, type in context_files:
if type == "image":
if os.path.getsize(path) > 20 * 1024 * 1024:
print(f"File {path} exceeds the 20MB limit and will not be processed.")
sys.exit(1)
file = client.files.create(file=open(path, "rb"), purpose="vision")
promt = {"type": "image_file", "image_file": {"file_id": file.id}}
image_files.append(promt)
else:
file_search_file_streams.append(open(path, "rb"))

try:
assistant = client.beta.assistants.create(
instructions=(
"You will receive questions about files from file searches "
"and image files. For file search queries, identify and "
"retrieve the relevant files based on the question. "
"For image file queries, analyze the image content and "
"provide relevant information or insights based on the image data."
),
parsed: list[ContextFile] = []
for entry in decoded:
if (
isinstance(entry, list)
and len(entry) == 2
and isinstance(entry[0], str)
and isinstance(entry[1], str)
):
parsed.append((entry[0], entry[1]))
else:
raise ValueError(
"Each context file entry must be a pair of strings [path, type]."
)

return parsed


def build_messages(
question: str, context_files: Sequence[ContextFile]
) -> list[MessageContentItem]:
"""Helper to hide the dataclass implementation detail from callers."""
return MessageBuilder(question=question, context_files=context_files).build()


def call_chat_completion(
client: OpenAI, model: str, messages: Iterable[MessageContentItem]
) -> ChatCompletion:
"""Request a chat completion using the given client."""
user_message = ChatCompletionUserMessageParam(role="user", content=list(messages))
payload: list[ChatCompletionMessageParam] = [
cast(ChatCompletionMessageParam, user_message)
]
return client.chat.completions.create(
model=model,
tools=[{"type": "file_search"}] if file_search_file_streams else [],
)
except AuthenticationError as e:
print(f"Authentication error: {e.message}")
sys.exit(1)
except Exception as e:
print(f"An error occurred: {str(e)}")
sys.exit(1)

if file_search_file_streams:
vector_store = client.beta.vector_stores.create()
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
vector_store_id=vector_store.id, files=file_search_file_streams
)
assistant = client.beta.assistants.update(
assistant_id=assistant.id,
tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
messages=payload,
)

messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": question,
},
*image_files,
],
}
]
thread = client.beta.threads.create(messages=messages)
run = client.beta.threads.runs.create_and_poll(
thread_id=thread.id, assistant_id=assistant.id
)
assistant_messages = list(
client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id)
)
if not assistant_messages:

def main(argv: Sequence[str]) -> int:
if len(argv) < 4:
print("Usage: chatgpt.py <context_files_json> <question> <model>")
return 1

try:
context_files = parse_context_files(argv[1])
except ValueError as exc:
print(str(exc))
return 1

question = argv[2]
model = argv[3]
question = question.replace("__cn__", "\n")

openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
print("OpenAI API key is not provided in credentials!")
return 1

client = OpenAI(api_key=openai_api_key)

try:
message_content = build_messages(question, context_files)
response = call_chat_completion(client, model, message_content)
except AuthenticationError as exc:
print(f"Authentication error: {exc}")
return 1
except ValueError as exc:
print(str(exc))
return 1
except Exception as exc: # noqa: BLE001 - keep reporting unexpected OpenAI errors
print(f"An error occurred: {exc}")
return 1

if not response.choices:
print(
"No output was generated!\nPlease ensure that your OpenAI account has sufficient credits.\n"
"You can check your balance here: https://platform.openai.com/settings/organization/billing"
)
return 1

message = response.choices[0].message
content = getattr(message, "content", None)
if not content:
print(
"No output was generated!\nPlease ensure that your OpenAI account has sufficient credits.\n"
"You can check your balance here: https://platform.openai.com/settings/organization/billing"
)
return 1

print(
"No output was generated!\nPlease ensure that your OpenAI account has sufficient credits.\n"
"You can check your balance here: https://platform.openai.com/settings/organization/billing"
f"Successfully generated response for:\n{question[:100]}{'...' if len(question) > 100 else ''}"
)
sys.exit(1)
message_content = assistant_messages[0].content[0].text.value
print("Output has been saved!")
with open("output.txt", "w") as f:
f.write(message_content)

for image in image_files:
client.files.delete(image["image_file"]["file_id"])
if file_search_file_streams:
client.beta.vector_stores.delete(vector_store.id)
client.beta.threads.delete(thread.id)
client.beta.assistants.delete(assistant.id)
with open("output.md", "w", encoding="utf-8") as file_handle:
file_handle.write(content)
return 0


if __name__ == "__main__":
sys.exit(main(sys.argv))
Loading
Loading