Spaces:

patrickbdevaney
/

Chat-With-Swarms.ai

Sleeping

App Files Files Community

patrickbdevaney commited on Dec 31, 2024

Commit

4a9c4cd

1 Parent(s): 0078b66

1st launch attempt at rag openai chatbot

Browse files

Files changed (8) hide show

app.py +136 -0
chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/data_level0.bin +3 -0
chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/header.bin +3 -0
chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/index_metadata.pickle +3 -0
chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/length.bin +3 -0
chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/link_lists.bin +3 -0
chromadb_storage_openai_upgrade/chroma.sqlite3 +3 -0
requirements.txt +13 -0

app.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os
+import openai
+import chromadb
+import numpy as np
+from dotenv import load_dotenv
+import gradio as gr
+import logging
+# Load environment variables (optional if you want to keep the dotenv usage)
+load_dotenv()
+# Define OpenAI-based model
+class OpenAIChatbot:
+    def __init__(self, api_key):
+        self.embedding_model = "text-embedding-3-large"  # OpenAI model with 3072 dimensions
+        self.chat_model = "gpt-4o"
+        self.api_key = api_key
+    def get_response(self, prompt):
+        """Get a response from OpenAI GPT-4 model."""
+        try:
+            openai.api_key = self.api_key
+            response = openai.chat.completions.create(
+                model=self.chat_model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful AI assistant."},
+                    {"role": "user", "content": prompt}
+                ]
+            )
+            # Correctly access the message content in the response
+            return response.choices[0].message.content
+        except Exception as e:
+            print(f"Error generating response: {e}")
+            return "Error: Unable to generate a response."
+    def text_to_embedding(self, text):
+        """Convert text to embedding using OpenAI embedding model."""
+        try:
+            openai.api_key = self.api_key
+            response = openai.embeddings.create(
+                model=self.embedding_model,
+                input=text
+            )
+            # Access the embedding using the 'data' attribute
+            embedding = np.array(response.data[0].embedding)
+            print(f"Generated embedding for text: {text}")
+            return embedding
+        except Exception as e:
+            print(f"Error generating embedding: {e}")
+            return None
+# Modify LocalEmbeddingStore to ensure correct dimensionality (3072) in ChromaDB
+class LocalEmbeddingStore:
+    def __init__(self, storage_dir="./chromadb_storage_openai_upgrade"):
+        # Use ChromaDB client with persistent storage
+        self.client = chromadb.PersistentClient(path=storage_dir)
+        self.collection_name = "chatbot_docs"
+        # Get the collection without adding new embeddings
+        self.collection = self.client.get_or_create_collection(name=self.collection_name)
+    def search_embedding(self, query_embedding, num_results=3):
+        """Search for the most relevant document based on embedding similarity."""
+        if query_embedding.shape[0] != 3072:
+            raise ValueError("Query embedding dimensionality must be 3072.")
+        print(f"Query embedding: {query_embedding}")  # Debugging: Log the query embedding
+        results = self.collection.query(
+            query_embeddings=[query_embedding.tolist()],  # Ensure embeddings are converted to list format
+            n_results=num_results
+        )
+        print(f"Search results: {results}")  # Debugging: Print results to check for any issues
+        return results['documents'], results['distances']
+# Modify RAGSystem to integrate ChromaDB search
+class RAGSystem:
+    def __init__(self, openai_client, embedding_store):
+        self.openai_client = openai_client
+        self.embedding_store = embedding_store
+    def get_most_relevant_document(self, query_embedding, similarity_threshold=0.7):
+        """Retrieve the most relevant document based on cosine similarity."""
+        docs, distances = self.embedding_store.search_embedding(query_embedding)
+        # Check if the results are empty or have low relevance
+        if not docs or not distances or distances[0][0] < similarity_threshold:
+            print("No relevant documents found or similarity is too low.")
+            return None, None  # Return None if no relevant documents found
+        return docs[0], distances[0][0]  # Return the most relevant document and the first distance value
+    def chat_with_rag(self, user_input):
+        """Handle the RAG process."""
+        query_embedding = self.openai_client.text_to_embedding(user_input)
+        if query_embedding is None or query_embedding.size == 0:
+            return "Failed to generate embeddings."
+        context_document_id, similarity_score = self.get_most_relevant_document(query_embedding)
+        if not context_document_id:
+            return "No relevant documents found."
+        # Assuming metadata retrieval works
+        context_metadata = f"Metadata for {context_document_id}"  # Placeholder, implement as needed
+        prompt = f"""Context (similarity score {similarity_score:.2f}):
+{context_metadata}
+User: {user_input}
+AI:"""
+        return self.openai_client.get_response(prompt)
+# Gradio UI
+def chat_ui(user_input, api_key, chat_history):
+    """Handle chat interactions and update history."""
+    if not api_key.strip():
+        return "Please provide your OpenAI API key before proceeding."
+    # Initialize OpenAIChatbot with the user's API key
+    chatbot = OpenAIChatbot(api_key)
+    embedding_store = LocalEmbeddingStore(storage_dir="./chromadb_storage_openai_upgrade")
+    rag_system = RAGSystem(openai_client=chatbot, embedding_store=embedding_store)
+    if not user_input.strip():
+        return chat_history
+    ai_response = rag_system.chat_with_rag(user_input)
+    chat_history.append((user_input, ai_response))
+    return chat_history
+# Gradio interface
+with gr.Blocks() as demo:
+    api_key_input = gr.Textbox(label="Enter your OpenAI API Key", placeholder="API Key here...", type="password")
+    chat_history = gr.Chatbot(label="OpenAI Chatbot with RAG", elem_id="chatbox")
+    user_input = gr.Textbox(placeholder="Enter your prompt here...")
+    submit_button = gr.Button("Submit")
+    submit_button.click(chat_ui, inputs=[user_input, api_key_input, chat_history], outputs=chat_history)
+if __name__ == "__main__":
+    demo.launch()

chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fdb1852b1f33ec1f06620483cd9e9d769c843b4adf1363f384efb44adee359d
+size 12428000

chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d262a9eef1210b3dc49c38d785567356b10abbbd2fb6d37eca142c5b84b5c26
+size 100

chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25f02c7038ec98303a0cb01efa8c999074d64d11ce28277128cf076afb8005f5
+size 46767

chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:077b99a97702a239d38650d7bc68f98136901063b4cf7df1e8601ffe6fbac674
+size 4000

chromadb_storage_openai_upgrade/b567b3ad-af02-467e-8f86-862c4b7a61d1/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5be490c12afb0e518d3f633b8ebe724a648b6309a86c1654727144bbb7f5c7fa
+size 8420

chromadb_storage_openai_upgrade/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:43cc6200827205a8da0155b1bd590a4916a7e9124eb0d555a06f2ac40c3d4e27
+size 47734784

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+python-dotenv
+gradio
+groq
+chromadb
+langchain
+pymupdf
+numpy
+fpdf
+pygments
+weasyprint
+langchain-community
+transformers
+sentence-transformers