s4um1l commited on
Commit
209e402
·
1 Parent(s): 5fa73da

assignment checkpoint

Browse files
.env.example ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # OpenAI API key - required for embedding and generation
2
+ OPENAI_API_KEY=your_openai_api_key_here
Dockerfile CHANGED
@@ -1,31 +1,73 @@
 
1
 
2
- # Get a distribution that has uv already installed
3
- FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  # Add user - this is the user that will run the app
6
- # If you do not set user, the app will run as root (undesirable)
7
  RUN useradd -m -u 1000 user
8
- USER user
9
 
10
- # Set the home directory and path
11
- ENV HOME=/home/user \
12
- PATH=/home/user/.local/bin:$PATH
 
 
 
 
 
 
 
 
13
 
 
 
 
 
14
  ENV UVICORN_WS_PROTOCOL=websockets
15
 
16
 
17
- # Set the working directory
18
- WORKDIR $HOME/app
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- # Copy the app to the container
21
- COPY --chown=user . $HOME/app
22
 
23
- # Install the dependencies
24
- # RUN uv sync --frozen
25
- RUN uv sync
26
 
27
- # Expose the port
28
  EXPOSE 7860
29
 
30
- # Run the app
31
- CMD ["uv", "run", "chainlit", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM ghcr.io/astral-sh/uv:python3.10-bookworm-slim as builder
2
 
3
+ WORKDIR /app
4
+
5
+ # Copy project files for dependency installation
6
+ COPY pyproject.toml /app/
7
+ COPY README.md /app/
8
+
9
+ # Show pyproject.toml content
10
+ RUN cat pyproject.toml
11
+
12
+ # Create a virtual environment and install dependencies using uv sync
13
+ RUN uv venv /app/.venv && \
14
+ . /app/.venv/bin/activate && \
15
+ uv sync
16
+
17
+ # Verify installations with the virtual environment
18
+ RUN . /app/.venv/bin/activate && \
19
+ python -c "import numpy; print(f'NumPy version: {numpy.__version__}')" && \
20
+ python -c "import pandas; print(f'Pandas version: {pandas.__version__}')" && \
21
+ python -c "import uvicorn; print(f'Uvicorn version: {uvicorn.__version__}')"
22
+
23
+ # Second stage for the final image
24
+ FROM ghcr.io/astral-sh/uv:python3.10-bookworm-slim
25
 
26
  # Add user - this is the user that will run the app
 
27
  RUN useradd -m -u 1000 user
 
28
 
29
+ # Copy the virtual environment from the builder stage
30
+ COPY --from=builder /app/.venv /home/user/app/.venv
31
+
32
+ # Install Node.js for building the frontend
33
+ RUN apt-get update && apt-get install -y \
34
+ curl \
35
+ gnupg \
36
+ && curl -sL https://deb.nodesource.com/setup_18.x | bash - \
37
+ && apt-get install -y nodejs \
38
+ && apt-get clean \
39
+ && rm -rf /var/lib/apt/lists/*
40
 
41
+ # Set up user environment
42
+ ENV HOME=/home/user \
43
+ PATH=/home/user/app/.venv/bin:$PATH
44
+
45
  ENV UVICORN_WS_PROTOCOL=websockets
46
 
47
 
48
+ # Verify dependencies are available in the final image
49
+ RUN python -c "import numpy; print(f'NumPy version: {numpy.__version__}')" && \
50
+ python -c "import pandas; print(f'Pandas version: {pandas.__version__}')" && \
51
+ python -c "import uvicorn; print(f'Uvicorn version: {uvicorn.__version__}')"
52
+
53
+ # Copy frontend code and build it
54
+ COPY --chown=user frontend /home/user/app/frontend
55
+ USER user
56
+ WORKDIR /home/user/app/frontend
57
+ RUN npm install && npm run build
58
+
59
+ # Copy backend code
60
+ WORKDIR /home/user/app
61
+ COPY --chown=user backend /home/user/app/backend
62
 
63
+ # Copy aimakerspace module
64
+ COPY --chown=user aimakerspace /home/user/app/aimakerspace
65
 
66
+ # Set the working directory to the backend folder
67
+ WORKDIR /home/user/app/backend
 
68
 
69
+ # Expose port for FastAPI on Hugging Face
70
  EXPOSE 7860
71
 
72
+ # Start the FastAPI server
73
+ CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -168,6 +168,10 @@ Simply put, this downloads the file as a temp file, we load it in with `TextFile
168
 
169
  Why do we want to support streaming? What about streaming is important, or useful?
170
 
 
 
 
 
171
  ### On Chat Start:
172
 
173
  The next scope is where "the magic happens". On Chat Start is when a user begins a chat session. This will happen whenever a user opens a new chat window, or refreshes an existing chat window.
@@ -210,6 +214,20 @@ Now, we'll save that into our user session!
210
 
211
  Why are we using User Session here? What about Python makes us need to use this? Why not just store everything in a global variable?
212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
  ### On Message
214
 
215
  First, we load our chain from the user session:
@@ -334,11 +352,147 @@ Upload a PDF file of the recent DeepSeek-R1 paper and ask the following question
334
  3. What is this paper about?
335
 
336
  Does this application pass your vibe check? Are there any immediate pitfalls you're noticing?
 
 
 
 
 
337
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  ## 🚧 CHALLENGE MODE 🚧
339
 
340
  For the challenge mode, please instead create a simple FastAPI backend with a simple React (or any other JS framework) frontend.
341
 
342
  You can use the same prompt templates and RAG pipeline as we did here - but you'll need to modify the code to work with FastAPI and React.
343
 
344
- Deploy this application to Hugging Face Spaces!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  Why do we want to support streaming? What about streaming is important, or useful?
170
 
171
+ ```
172
+ Improved User experience helping reduce percieved response time. Also making application interactive.
173
+ ```
174
+
175
  ### On Chat Start:
176
 
177
  The next scope is where "the magic happens". On Chat Start is when a user begins a chat session. This will happen whenever a user opens a new chat window, or refreshes an existing chat window.
 
214
 
215
  Why are we using User Session here? What about Python makes us need to use this? Why not just store everything in a global variable?
216
 
217
+ ```
218
+ User Session - > The user session is designed to persist data in memory through the life cycle of a chat session. Each user session is unique to a user and a given chat session.
219
+ Enables users to:
220
+ - Their own chat history
221
+ - Their own state (e.g., selected options, intermediate results, etc.)
222
+ - Their own context
223
+
224
+ What about Python makes us need to use this? Why not just store everything in a global variable?
225
+ Python Global Variables
226
+ - Shared across all users: A global variable in Python is shared across all threads/processes unless isolated manually.
227
+ - Not thread-safe: In multi-threaded setups, concurrent access to globals can lead to race conditions.
228
+ - Stateless in serverless setups: If you're running Chainlit on something like a cloud function or serverless platform, globals may reset between requests.
229
+ ```
230
+
231
  ### On Message
232
 
233
  First, we load our chain from the user session:
 
352
  3. What is this paper about?
353
 
354
  Does this application pass your vibe check? Are there any immediate pitfalls you're noticing?
355
+ ```
356
+ The application fails vibe check as when we ask What is this paper about it responds -> I dont know the answer
357
+ If we looked at retrieved context by logging it we will see it retrieves mostly the author name and hence doesn't give LLM good context.
358
+ Fixing retrieval should be the key to answer these questions.
359
+
360
 
361
+ ```
362
+ ---
363
+ ```
364
+ here is example retrieval
365
+ 2025-04-14 12:48:19 - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
366
+ Context: flect deeply on the boundaries of the unknown, 2024a. URL https://qwenlm
367
+ .github.io/blog/qwq-32b-preview/ .
368
+ Qwen. Qwen2.5: A party of foundation models, 2024b. URL https://qwenlm.github.io/b
369
+ log/qwen2.5 .
370
+ D. Rein, B. L. Hou, A. C. Stickland, J. Petty, R. Y. Pang, J. Dirani, J. Michael, and S. R. Bowman.
371
+ GPQA: A graduate-level google-proof q&a benchmark. arXiv preprint arXiv:2311.12022 , 2023.
372
+ Z. Shao, P . Wang, Q. Zhu, R. Xu, J. Song, M. Zhang, Y. Li, Y. Wu, and D. Guo. Deepseekmath:
373
+ Pushing the limits of mathematical reasoning in open language models. arXiv preprint
374
+ arXiv:2402.03300, 2024.
375
+ D. Silver, T. Hubert, J. Schrittwieser, I. Antonoglou, M. Lai, A. Guez, M. Lanctot, L. Sifre,
376
+ D. Kumaran, T. Graepel, T. P . Lillicrap, K. Simonyan, and D. Hassabis. Mastering chess and
377
+ shogi by self-play with a general reinforcement learning algorithm. CoRR , abs/1712.01815,
378
+ 2017a. URL http://arxiv.org/abs/1712.01815 .
379
+ 18
380
+ D. Silver, J. Schrittwieser, K. Simonyan, I. Antonoglou, A. Huang, A. Guez, T
381
+ 3-06747-5.
382
+ J. Uesato, N. Kushman, R. Kumar, F. Song, N. Siegel, L. Wang, A. Creswell, G. Irving, and
383
+ I. Higgins. Solving math word problems with process-and outcome-based feedback. arXiv
384
+ preprint arXiv:2211.14275, 2022.
385
+ P . Wang, L. Li, Z. Shao, R. Xu, D. Dai, Y. Li, D. Chen, Y. Wu, and Z. Sui. Math-shepherd: A label-
386
+ free step-by-step verifier for llms in mathematical reasoning. arXiv preprint arXiv:2312.08935 ,
387
+ 2023.
388
+ X. Wang, J. Wei, D. Schuurmans, Q. Le, E. Chi, S. Narang, A. Chowdhery, and D. Zhou.
389
+ Self-consistency improves chain of thought reasoning in language models. arXiv preprint
390
+ arXiv:2203.11171, 2022.
391
+ Y. Wang, X. Ma, G. Zhang, Y. Ni, A. Chandra, S. Guo, W. Ren, A. Arulraj, X. He, Z. Jiang, T. Li,
392
+ M. Ku, K. Wang, A. Zhuang, R. Fan, X. Yue, and W. Chen. Mmlu-pro: A more robust and
393
+ challenging multi-task language understanding benchmark. CoRR , abs/2406.01574, 2024.
394
+ URL https://doi.org/10.48550/arXiv.2406.01574 .
395
+ C. S. Xia, Y. Deng, S. Dunn, and L. Zhang. Agentless: Demystifyin
396
+
397
+ challenging multi-task language understanding benchmark. CoRR , abs/2406.01574, 2024.
398
+ URL https://doi.org/10.48550/arXiv.2406.01574 .
399
+ C. S. Xia, Y. Deng, S. Dunn, and L. Zhang. Agentless: Demystifying llm-based software
400
+ engineering agents. arXiv preprint, 2024.
401
+ H. Xin, Z. Z. Ren, J. Song, Z. Shao, W. Zhao, H. Wang, B. Liu, L. Zhang, X. Lu, Q. Du, W. Gao,
402
+ Q. Zhu, D. Yang, Z. Gou, Z. F. Wu, F. Luo, and C. Ruan. Deepseek-prover-v1.5: Harnessing
403
+ proof assistant feedback for reinforcement learning and monte-carlo tree search, 2024. URL
404
+ https://arxiv.org/abs/2408.08152 .
405
+ J. Zhou, T. Lu, S. Mishra, S. Brahma, S. Basu, Y. Luan, D. Zhou, and L. Hou. Instruction-following
406
+ evaluation for large language models. arXiv preprint arXiv:2311.07911, 2023.
407
+ 19
408
+ Appendix
409
+ A. Contributions and Acknowledgments
410
+ Core Contributors
411
+ Daya Guo
412
+ Dejian Yang
413
+ Haowei Zhang
414
+ Junxiao Song
415
+ Ruoyu Zhang
416
+ Runxin Xu
417
+ Qihao Zhu
418
+ Shirong Ma
419
+ Peiyi Wang
420
+ Xiao Bi
421
+ Xiaokang Zhang
422
+ Xingkai Yu
423
+ Yu Wu
424
+ Z.F. Wu
425
+ Zhibin Gou
426
+ Zhihong Shao
427
+ Zhuoshu Li
428
+
429
+ md .
430
+ Anthropic. Claude 3.5 sonnet, 2024. URL https://www.anthropic.com/news/claude-3
431
+ -5-sonnet .
432
+ M. Chen, J. Tworek, H. Jun, Q. Yuan, H. P . de Oliveira Pinto, J. Kaplan, H. Edwards, Y. Burda,
433
+ N. Joseph, G. Brockman, A. Ray, R. Puri, G. Krueger, M. Petrov, H. Khlaaf, G. Sastry, P . Mishkin,
434
+ B. Chan, S. Gray, N. Ryder, M. Pavlov, A. Power, L. Kaiser, M. Bavarian, C. Winter, P . Tillet,
435
+ F. P . Such, D. Cummings, M. Plappert, F. Chantzis, E. Barnes, A. Herbert-Voss, W. H. Guss,
436
+ A. Nichol, A. Paino, N. Tezak, J. Tang, I. Babuschkin, S. Balaji, S. Jain, W. Saunders, C. Hesse,
437
+ A. N. Carr, J. Leike, J. Achiam, V . Misra, E. Morikawa, A. Radford, M. Knight, M. Brundage,
438
+ M. Murati, K. Mayer, P . Welinder, B. McGrew, D. Amodei, S. McCandlish, I. Sutskever, and
439
+ W. Zaremba. Evaluating large language models trained on code. CoRR , abs/2107.03374, 2021.
440
+ URL https://arxiv.org/abs/2107.03374 .
441
+ A. Dubey, A. Jauhri, A. Pandey, A. Kadian, A. Al-Dahle, A. Letman, A. Mathur, A. Schelten,
442
+ A. Yang, A. Fan, e
443
+ ```
444
  ## 🚧 CHALLENGE MODE 🚧
445
 
446
  For the challenge mode, please instead create a simple FastAPI backend with a simple React (or any other JS framework) frontend.
447
 
448
  You can use the same prompt templates and RAG pipeline as we did here - but you'll need to modify the code to work with FastAPI and React.
449
 
450
+ Deploy this application to Hugging Face Spaces!
451
+
452
+ # FastAPI & React RAG Chat Application
453
+
454
+ A document question-answering application built with FastAPI, React, and a Pythonic RAG (Retrieval Augmented Generation) implementation.
455
+
456
+ ## Features
457
+
458
+ - Upload PDF and text files
459
+ - Chat with your documents
460
+ - Modern React UI with Chakra UI components
461
+ - Pythonic RAG implementation with OpenAI models
462
+ - Dockerized for easy deployment
463
+ - Uses UV for Python package management
464
+
465
+
466
+ ## Environment Variables
467
+
468
+ Create a `.env` file with the following variables:
469
+
470
+ ```
471
+ OPENAI_API_KEY=your_openai_api_key_here
472
+ ```
473
+
474
+ ## Credits
475
+
476
+ Based on the Pythonic RAG implementation from the AI Makerspace course.
477
+
478
+ ## Docker Deployment
479
+
480
+ ### Building the Docker Image
481
+
482
+ Build the Docker image with:
483
+
484
+ ```bash
485
+ docker build -t rag-app .
486
+ ```
487
+
488
+ ### Running the Container
489
+
490
+ Run the container:
491
+
492
+ ```bash
493
+ docker run -p 7860:7860 --env OPENAI_API_KEY=your_key_here rag-app
494
+ ```
495
+
496
+ Replace `your_key_here` with your actual OpenAI API key.
497
+
498
+ Access the application at http://localhost:7860
aimakerspace/openai_utils/__init__.py CHANGED
@@ -0,0 +1 @@
 
 
1
+ # OpenAI utilities module
aimakerspace/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Utils module for AI Makerspace
aimakerspace/utils/session_manager.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ import logging
3
+
4
+ # Configure logging
5
+ logger = logging.getLogger(__name__)
6
+
7
+ class SessionManager:
8
+ """
9
+ Manages user sessions for processing and query operations
10
+ """
11
+ def __init__(self):
12
+ """Initialize the session manager with an empty sessions dictionary"""
13
+ self.sessions = {}
14
+ logger.info("SessionManager initialized")
15
+
16
+ def create_session(self, initial_value="processing"):
17
+ """Create a new session with a UUID and return the session ID"""
18
+ session_id = str(uuid.uuid4())
19
+ self.sessions[session_id] = initial_value
20
+ logger.info(f"Created session: {session_id} with initial value: {initial_value}")
21
+ return session_id
22
+
23
+ def update_session(self, session_id, value):
24
+ """Update the session with the given ID to the provided value"""
25
+ self.sessions[session_id] = value
26
+ logger.info(f"Updated session: {session_id}")
27
+
28
+ def session_exists(self, session_id):
29
+ """Check if a session with the given ID exists"""
30
+ return session_id in self.sessions
31
+
32
+ def get_session(self, session_id):
33
+ """Get the session data for the given ID"""
34
+ return self.sessions.get(session_id)
35
+
36
+ def get_sessions_summary(self):
37
+ """Get a summary of all sessions"""
38
+ session_info = {}
39
+
40
+ for session_id, session_data in self.sessions.items():
41
+ if session_data == "processing":
42
+ status = "processing"
43
+ elif session_data == "failed":
44
+ status = "failed"
45
+ else:
46
+ # It's a RAG pipeline object
47
+ status = "ready"
48
+ # Count documents in vector DB
49
+ doc_count = 0
50
+
51
+ if hasattr(session_data, 'vector_db_retriever'):
52
+ if hasattr(session_data.vector_db_retriever, 'documents'):
53
+ doc_count = len(session_data.vector_db_retriever.documents)
54
+ elif hasattr(session_data.vector_db_retriever, 'vectors'):
55
+ doc_count = len(session_data.vector_db_retriever.vectors)
56
+
57
+ session_info[session_id] = {
58
+ "status": status
59
+ }
60
+
61
+ if status == "ready":
62
+ session_info[session_id]["document_count"] = doc_count
63
+
64
+ return {
65
+ "session_count": len(self.sessions),
66
+ "sessions": session_info,
67
+ "memory_usage": {
68
+ "session_count": len(self.sessions)
69
+ }
70
+ }
app.py CHANGED
@@ -36,7 +36,7 @@ class RetrievalAugmentedQAPipeline:
36
  context_prompt = ""
37
  for context in context_list:
38
  context_prompt += context[0] + "\n"
39
-
40
  formatted_system_prompt = system_role_prompt.create_message()
41
 
42
  formatted_user_prompt = user_role_prompt.create_message(question=user_query, context=context_prompt)
 
36
  context_prompt = ""
37
  for context in context_list:
38
  context_prompt += context[0] + "\n"
39
+ print(f"Context: {context_prompt}")
40
  formatted_system_prompt = system_role_prompt.create_message()
41
 
42
  formatted_user_prompt = user_role_prompt.create_message(question=user_query, context=context_prompt)
backend/main.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.responses import FileResponse
5
+ from pydantic import BaseModel
6
+ import uvicorn
7
+ import os
8
+ import tempfile
9
+ import shutil
10
+ from typing import List, Optional, Dict, Any
11
+ import pathlib
12
+ import asyncio
13
+ import logging
14
+ import time
15
+ import traceback
16
+ import uuid
17
+
18
+ # Configure logging
19
+ logging.basicConfig(level=logging.INFO,
20
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Import our RAG components
24
+ from rag import RetrievalAugmentedQAPipeline, process_file, setup_vector_db
25
+
26
+ # Add local aimakerspace module to the path
27
+ import sys
28
+ sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), ""))
29
+
30
+ # Import from local aimakerspace module
31
+ from aimakerspace.utils.session_manager import SessionManager
32
+
33
+ # Load environment variables
34
+ from dotenv import load_dotenv
35
+ load_dotenv()
36
+
37
+ app = FastAPI()
38
+
39
+ # Configure CORS - allow all origins explicitly for development
40
+ app.add_middleware(
41
+ CORSMiddleware,
42
+ allow_origins=["*"], # This will allow all origins
43
+ allow_credentials=True,
44
+ allow_methods=["*"], # Allow all methods
45
+ allow_headers=["*"], # Allow all headers
46
+ expose_headers=["*"]
47
+ )
48
+
49
+ # Initialize session manager
50
+ session_manager = SessionManager()
51
+
52
+ class QueryRequest(BaseModel):
53
+ session_id: str
54
+ query: str
55
+
56
+ class QueryResponse(BaseModel):
57
+ response: str
58
+ session_id: str
59
+
60
+ # Set file size limit to 10MB - adjust as needed
61
+ FILE_SIZE_LIMIT = 10 * 1024 * 1024 # 10MB
62
+
63
+ async def process_file_background(temp_path: str, filename: str, session_id: str):
64
+ """Process file in background and set up the RAG pipeline"""
65
+ try:
66
+ start_time = time.time()
67
+ logger.info(f"Background processing started for file: {filename} (session: {session_id})")
68
+
69
+ # Set max processing time (5 minutes)
70
+ max_processing_time = 300 # seconds
71
+
72
+ # Process the file
73
+ logger.info(f"Starting text extraction for file: {filename}")
74
+ try:
75
+ texts = process_file(temp_path, filename)
76
+ logger.info(f"Processed file into {len(texts)} text chunks (took {time.time() - start_time:.2f}s)")
77
+
78
+ # Check if processing is taking too long already
79
+ if time.time() - start_time > max_processing_time / 2:
80
+ logger.warning(f"Text extraction took more than half the allowed time. Limiting chunks...")
81
+ # Limit to a smaller number if extraction took a long time
82
+ max_chunks = 50
83
+ if len(texts) > max_chunks:
84
+ logger.warning(f"Limiting text chunks from {len(texts)} to {max_chunks}")
85
+ texts = texts[:max_chunks]
86
+ except Exception as e:
87
+ logger.error(f"Error during text extraction: {str(e)}")
88
+ logger.error(traceback.format_exc())
89
+ session_manager.update_session(session_id, "failed")
90
+ os.unlink(temp_path)
91
+ return
92
+
93
+ # Setup vector database - This is the part that might be hanging
94
+ logger.info(f"Starting vector DB creation for {len(texts)} chunks")
95
+ embedding_start = time.time()
96
+
97
+ # Create a task with overall timeout
98
+ try:
99
+ async def setup_with_timeout():
100
+ return await setup_vector_db(texts)
101
+
102
+ # Wait for vector DB setup with timeout
103
+ vector_db = await asyncio.wait_for(
104
+ setup_with_timeout(),
105
+ timeout=max_processing_time - (time.time() - start_time)
106
+ )
107
+
108
+ # Get document count - check if documents property is available
109
+ if hasattr(vector_db, 'documents'):
110
+ doc_count = len(vector_db.documents)
111
+ else:
112
+ # If using the original VectorDatabase implementation that uses vectors dict
113
+ doc_count = len(vector_db.vectors) if hasattr(vector_db, 'vectors') else 0
114
+
115
+ logger.info(f"Created vector database with {doc_count} documents (took {time.time() - embedding_start:.2f}s)")
116
+
117
+ # Create RAG pipeline
118
+ logger.info(f"Creating RAG pipeline for session {session_id}")
119
+ rag_pipeline = RetrievalAugmentedQAPipeline(vector_db_retriever=vector_db)
120
+
121
+ # Store pipeline in session manager
122
+ session_manager.update_session(session_id, rag_pipeline)
123
+ logger.info(f"Updated session {session_id} with processed pipeline (total time: {time.time() - start_time:.2f}s)")
124
+
125
+ except asyncio.TimeoutError:
126
+ logger.error(f"Vector database creation timed out after {time.time() - embedding_start:.2f}s")
127
+ session_manager.update_session(session_id, "failed")
128
+ except Exception as e:
129
+ logger.error(f"Error in vector database creation: {str(e)}")
130
+ logger.error(traceback.format_exc())
131
+ session_manager.update_session(session_id, "failed")
132
+
133
+ # Clean up temp file
134
+ os.unlink(temp_path)
135
+ logger.info(f"Removed temporary file: {temp_path}")
136
+
137
+ except Exception as e:
138
+ logger.error(f"Error in background processing for session {session_id}: {str(e)}")
139
+ logger.error(traceback.format_exc()) # Log the full error traceback
140
+ # Mark the session as failed rather than removing it
141
+ session_manager.update_session(session_id, "failed")
142
+ # Try to clean up temp file if it exists
143
+ try:
144
+ if os.path.exists(temp_path):
145
+ os.unlink(temp_path)
146
+ logger.info(f"Cleaned up temporary file after error: {temp_path}")
147
+ except Exception as cleanup_error:
148
+ logger.error(f"Error cleaning up temp file: {str(cleanup_error)}")
149
+
150
+ @app.post("/upload/")
151
+ async def upload_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
152
+ try:
153
+ logger.info(f"Received upload request for file: {file.filename}")
154
+
155
+ # Check file size first
156
+ file_size = 0
157
+ chunk_size = 1024 * 1024 # 1MB chunks for reading
158
+ contents = bytearray()
159
+
160
+ # Read file in chunks to avoid memory issues
161
+ while True:
162
+ chunk = await file.read(chunk_size)
163
+ if not chunk:
164
+ break
165
+ file_size += len(chunk)
166
+ contents.extend(chunk)
167
+
168
+ # Check size limit
169
+ if file_size > FILE_SIZE_LIMIT:
170
+ logger.warning(f"File too large: {file_size/1024/1024:.2f}MB exceeds limit of {FILE_SIZE_LIMIT/1024/1024}MB")
171
+ return HTTPException(
172
+ status_code=413,
173
+ detail=f"File too large. Maximum size is {FILE_SIZE_LIMIT/1024/1024}MB"
174
+ )
175
+
176
+ logger.info(f"File size: {file_size/1024/1024:.2f}MB")
177
+
178
+ # Reset file stream for processing
179
+ file_content = bytes(contents)
180
+
181
+ # Create a temporary file
182
+ suffix = f".{file.filename.split('.')[-1]}"
183
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
184
+ # Write file content to temp file
185
+ temp_file.write(file_content)
186
+ temp_path = temp_file.name
187
+ logger.info(f"Created temporary file at: {temp_path}")
188
+
189
+ # Generate session ID and create session
190
+ session_id = session_manager.create_session("processing")
191
+ logger.info(f"Created session ID: {session_id}")
192
+
193
+ # Process file in background
194
+ background_tasks.add_task(
195
+ process_file_background,
196
+ temp_path,
197
+ file.filename,
198
+ session_id
199
+ )
200
+
201
+ return {"session_id": session_id, "message": "File uploaded and processing started"}
202
+
203
+ except Exception as e:
204
+ logger.error(f"Error processing upload: {str(e)}")
205
+ logger.error(traceback.format_exc()) # Log the full error traceback
206
+ raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")
207
+
208
+ @app.post("/query/", response_model=QueryResponse)
209
+ async def process_query(request: QueryRequest):
210
+ logger.info(f"Received query request for session: {request.session_id}")
211
+
212
+ # Check if session exists
213
+ if not session_manager.session_exists(request.session_id):
214
+ logger.warning(f"Session not found: {request.session_id}")
215
+ raise HTTPException(status_code=404, detail="Session not found. Please upload a document first.")
216
+
217
+ # Get session data
218
+ session_data = session_manager.get_session(request.session_id)
219
+
220
+ # Check if processing is still ongoing
221
+ if session_data == "processing":
222
+ logger.info(f"Document still processing for session: {request.session_id}")
223
+ raise HTTPException(status_code=409, detail="Document is still being processed. Please try again in a moment.")
224
+
225
+ # Check if processing failed
226
+ if session_data == "failed":
227
+ logger.error(f"Processing failed for session: {request.session_id}")
228
+ raise HTTPException(status_code=500, detail="Document processing failed. Please try uploading again.")
229
+
230
+ try:
231
+ logger.info(f"Processing query: '{request.query}' for session: {request.session_id}")
232
+
233
+ # Get response from RAG pipeline
234
+ start_time = time.time()
235
+ result = await session_data.arun_pipeline(request.query)
236
+
237
+ # In a streaming setup, we'd handle this differently
238
+ # For simplicity, we're collecting the entire response
239
+ response_text = ""
240
+ async for chunk in result["response"]:
241
+ response_text += chunk
242
+
243
+ logger.info(f"Generated response of length: {len(response_text)} (took {time.time() - start_time:.2f}s)")
244
+
245
+ return {
246
+ "response": response_text,
247
+ "session_id": request.session_id
248
+ }
249
+
250
+ except Exception as e:
251
+ logger.error(f"Error processing query for session {request.session_id}: {str(e)}")
252
+ logger.error(traceback.format_exc()) # Log the full error traceback
253
+ raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
254
+
255
+ @app.get("/health")
256
+ def health_check():
257
+ return {"status": "healthy"}
258
+
259
+ @app.get("/test")
260
+ def test_endpoint():
261
+ return {"message": "Backend is accessible"}
262
+
263
+ @app.get("/session/{session_id}/status")
264
+ async def session_status(session_id: str):
265
+ """Check if a session exists and its processing status"""
266
+ logger.info(f"Checking status for session: {session_id}")
267
+
268
+ if not session_manager.session_exists(session_id):
269
+ logger.warning(f"Session not found: {session_id}")
270
+ return {"exists": False, "status": "not_found"}
271
+
272
+ session_data = session_manager.get_session(session_id)
273
+
274
+ if session_data == "processing":
275
+ logger.info(f"Session {session_id} is still processing")
276
+ return {"exists": True, "status": "processing"}
277
+
278
+ if session_data == "failed":
279
+ logger.error(f"Session {session_id} processing failed")
280
+ return {"exists": True, "status": "failed"}
281
+
282
+ logger.info(f"Session {session_id} is ready")
283
+ return {"exists": True, "status": "ready"}
284
+
285
+ @app.get("/debug/sessions")
286
+ async def debug_sessions():
287
+ """Return debug information about all sessions - for diagnostic use only"""
288
+ logger.info("Accessed debug sessions endpoint")
289
+
290
+ # Get summary of all sessions
291
+ sessions_summary = session_manager.get_sessions_summary()
292
+
293
+ return sessions_summary
294
+
295
+ # For Hugging Face Spaces deployment, serve the static files from the React build
296
+ frontend_path = pathlib.Path(__file__).parent.parent / "frontend" / "build"
297
+ if frontend_path.exists():
298
+ app.mount("/", StaticFiles(directory=str(frontend_path), html=True), name="frontend")
299
+
300
+ @app.get("/", include_in_schema=False)
301
+ async def serve_frontend():
302
+ return FileResponse(str(frontend_path / "index.html"))
303
+
304
+ if __name__ == "__main__":
305
+ uvicorn.run("main:app", host="0.0.0.0", port=8000)
backend/pyproject.toml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "rag-backend"
7
+ version = "0.1.0"
8
+ description = "FastAPI backend for RAG chat application"
9
+ requires-python = ">=3.9"
10
+ license = { text = "MIT" }
11
+ dependencies = [
12
+ "fastapi==0.104.1",
13
+ "uvicorn==0.23.2",
14
+ "python-multipart==0.0.6",
15
+ "pydantic==2.4.2",
16
+ "openai>=1.0.0",
17
+ "python-dotenv==1.0.0",
18
+ "numpy>=1.20.0",
19
+ "pandas>=1.3.0",
20
+ "scikit-learn>=1.0.0",
21
+ "tiktoken>=0.5.0",
22
+ "PyPDF2>=3.0.0",
23
+ ]
24
+
25
+ [project.optional-dependencies]
26
+ dev = [
27
+ "pytest",
28
+ "black",
29
+ ]
30
+
31
+ [tool.pytest]
32
+ testpaths = ["tests"]
33
+
34
+ [tool.black]
35
+ line-length = 88
36
+ target-version = ["py39"]
backend/rag.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict, Any
3
+ import tempfile
4
+ import shutil
5
+ import logging
6
+ import time
7
+ import traceback
8
+ import asyncio
9
+
10
+ # Configure logging
11
+ logging.basicConfig(level=logging.INFO,
12
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Make sure aimakerspace is in the path
16
+ import sys
17
+ sys.path.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), ""))
18
+
19
+ # Import from local aimakerspace module
20
+ from aimakerspace.text_utils import CharacterTextSplitter, TextFileLoader, PDFLoader
21
+ from aimakerspace.vectordatabase import VectorDatabase
22
+ from aimakerspace.openai_utils.embedding import EmbeddingModel
23
+ from openai import OpenAI
24
+
25
+ # Initialize OpenAI client
26
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
27
+ logger.info(f"Initialized OpenAI client with API key: {'valid key' if os.getenv('OPENAI_API_KEY') else 'API KEY MISSING!'}")
28
+
29
+ class RetrievalAugmentedQAPipeline:
30
+ def __init__(self, vector_db_retriever: VectorDatabase) -> None:
31
+ self.vector_db_retriever = vector_db_retriever
32
+
33
+ async def arun_pipeline(self, user_query: str):
34
+ """
35
+ Run the RAG pipeline with the given user query.
36
+ Returns a stream of response chunks.
37
+ """
38
+ try:
39
+ # 1. Retrieve relevant documents
40
+ logger.info(f"RAG Pipeline: Retrieving documents for query: '{user_query}'")
41
+ relevant_docs = self.vector_db_retriever.search_by_text(user_query, k=4)
42
+
43
+ if not relevant_docs:
44
+ logger.warning("No relevant documents found in vector database")
45
+ documents_context = "No relevant information found in the document."
46
+ else:
47
+ logger.info(f"Found {len(relevant_docs)} relevant document chunks")
48
+ # Format documents
49
+ documents_context = "\n\n".join([doc[0] for doc in relevant_docs])
50
+
51
+ # Debug similarity scores
52
+ doc_scores = [f"{i+1}. Score: {doc[1]:.4f}" for i, doc in enumerate(relevant_docs)]
53
+ logger.info(f"Document similarity scores: {', '.join(doc_scores) if doc_scores else 'No documents'}")
54
+
55
+ # 2. Create messaging payload
56
+ messages = [
57
+ {"role": "system", "content": f"""You are a helpful AI assistant that answers questions based on the provided document context.
58
+ If the answer is not in the context, say that you don't know based on the available information.
59
+ Use the following document extracts to answer the user's question:
60
+
61
+ {documents_context}"""},
62
+ {"role": "user", "content": user_query}
63
+ ]
64
+
65
+ # 3. Call LLM and stream the output
66
+ async def generate_response():
67
+ try:
68
+ logger.info("Initiating streaming completion from OpenAI")
69
+ stream = client.chat.completions.create(
70
+ model="gpt-3.5-turbo",
71
+ messages=messages,
72
+ temperature=0.2,
73
+ stream=True
74
+ )
75
+
76
+ for chunk in stream:
77
+ if chunk.choices[0].delta.content:
78
+ yield chunk.choices[0].delta.content
79
+ except Exception as e:
80
+ logger.error(f"Error generating stream: {str(e)}")
81
+ yield f"\n\nI apologize, but I encountered an error while generating a response: {str(e)}"
82
+
83
+ return {
84
+ "response": generate_response()
85
+ }
86
+
87
+ except Exception as e:
88
+ logger.error(f"Error in RAG pipeline: {str(e)}")
89
+ logger.error(traceback.format_exc())
90
+ return {
91
+ "response": (chunk for chunk in [f"I apologize, but an error occurred: {str(e)}"])
92
+ }
93
+
94
+ def process_file(file_path: str, file_name: str) -> List[str]:
95
+ """Process an uploaded file and convert it to text chunks"""
96
+ logger.info(f"Processing file: {file_name} at path: {file_path}")
97
+
98
+ try:
99
+ # Determine loader based on file extension
100
+ if file_name.lower().endswith('.txt'):
101
+ logger.info(f"Using TextFileLoader for {file_name}")
102
+ loader = TextFileLoader(file_path)
103
+ loader.load()
104
+ elif file_name.lower().endswith('.pdf'):
105
+ logger.info(f"Using PDFLoader for {file_name}")
106
+ loader = PDFLoader(file_path)
107
+ loader.load()
108
+ else:
109
+ logger.warning(f"Unsupported file type: {file_name}")
110
+ return ["Unsupported file format. Please upload a .txt or .pdf file."]
111
+
112
+ # Get documents from loader
113
+ documents = loader.documents
114
+ if documents and len(documents) > 0:
115
+ logger.info(f"Loaded document with {len(documents[0])} characters")
116
+ else:
117
+ logger.warning("No document content loaded")
118
+ return ["No content found in the document"]
119
+
120
+ # Split text into chunks
121
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
122
+ text_chunks = text_splitter.split_texts(documents)
123
+
124
+ logger.info(f"Split document into {len(text_chunks)} chunks")
125
+ return text_chunks
126
+
127
+ except Exception as e:
128
+ logger.error(f"Error processing file: {str(e)}")
129
+ logger.error(traceback.format_exc())
130
+ return [f"Error processing file: {str(e)}"]
131
+
132
+ async def setup_vector_db(texts: List[str]) -> VectorDatabase:
133
+ """Create vector database from text chunks"""
134
+ logger.info(f"Setting up vector database with {len(texts)} text chunks")
135
+
136
+ embedding_model = EmbeddingModel()
137
+ vector_db = VectorDatabase(embedding_model=embedding_model)
138
+
139
+ try:
140
+ await vector_db.abuild_from_list(texts)
141
+
142
+ vector_db.documents = texts
143
+
144
+ logger.info(f"Vector database built with {len(texts)} documents")
145
+ return vector_db
146
+ except Exception as e:
147
+ logger.error(f"Error setting up vector database: {str(e)}")
148
+ logger.error(traceback.format_exc())
149
+
150
+ fallback_db = VectorDatabase(embedding_model=embedding_model)
151
+ error_text = "I'm sorry, but there was an error processing the document."
152
+ fallback_db.insert(error_text, [0.0] * 1536)
153
+ fallback_db.documents = [error_text]
154
+ return fallback_db
frontend/package.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "rag-chat-frontend",
3
+ "version": "0.1.0",
4
+ "private": true,
5
+ "dependencies": {
6
+ "@chakra-ui/react": "^2.8.0",
7
+ "@emotion/react": "^11.11.1",
8
+ "@emotion/styled": "^11.11.0",
9
+ "axios": "^1.5.0",
10
+ "framer-motion": "^10.16.4",
11
+ "react": "^18.2.0",
12
+ "react-dom": "^18.2.0",
13
+ "react-dropzone": "^14.2.3",
14
+ "react-icons": "^4.11.0",
15
+ "react-markdown": "^8.0.7",
16
+ "react-scripts": "5.0.1"
17
+ },
18
+ "scripts": {
19
+ "start": "react-scripts start",
20
+ "build": "react-scripts build",
21
+ "test": "react-scripts test",
22
+ "eject": "react-scripts eject"
23
+ },
24
+ "eslintConfig": {
25
+ "extends": [
26
+ "react-app",
27
+ "react-app/jest"
28
+ ]
29
+ },
30
+ "browserslist": {
31
+ "production": [
32
+ ">0.2%",
33
+ "not dead",
34
+ "not op_mini all"
35
+ ],
36
+ "development": [
37
+ "last 1 chrome version",
38
+ "last 1 firefox version",
39
+ "last 1 safari version"
40
+ ]
41
+ }
42
+ }
frontend/public/favicon.ico ADDED
frontend/public/index.html ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
6
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
7
+ <meta name="theme-color" content="#000000" />
8
+ <meta
9
+ name="description"
10
+ content="Chat with your documents using RAG technology"
11
+ />
12
+ <link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
13
+ <!--
14
+ manifest.json provides metadata used when your web app is installed on a
15
+ user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
16
+ -->
17
+ <link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
18
+ <!--
19
+ Notice the use of %PUBLIC_URL% in the tags above.
20
+ It will be replaced with the URL of the `public` folder during the build.
21
+ Only files inside the `public` folder can be referenced from the HTML.
22
+
23
+ Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
24
+ work correctly both with client-side routing and a non-root public URL.
25
+ Learn how to configure a non-root public URL by running `npm run build`.
26
+ -->
27
+ <title>Document Chat</title>
28
+ </head>
29
+ <body>
30
+ <noscript>You need to enable JavaScript to run this app.</noscript>
31
+ <div id="root"></div>
32
+ <!--
33
+ This HTML file is a template.
34
+ If you open it directly in the browser, you will see an empty page.
35
+
36
+ You can add webfonts, meta tags, or analytics to this file.
37
+ The build step will place the bundled scripts into the <body> tag.
38
+
39
+ To begin the development, run `npm start` or `yarn start`.
40
+ To create a production bundle, use `npm run build` or `yarn build`.
41
+ -->
42
+ </body>
43
+ </html>
frontend/public/manifest.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "short_name": "Document Chat",
3
+ "name": "Chat with Your Documents",
4
+ "icons": [
5
+ {
6
+ "src": "favicon.ico",
7
+ "sizes": "64x64 32x32 24x24 16x16",
8
+ "type": "image/x-icon"
9
+ }
10
+ ],
11
+ "start_url": ".",
12
+ "display": "standalone",
13
+ "theme_color": "#000000",
14
+ "background_color": "#ffffff"
15
+ }
frontend/public/robots.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # https://www.robotstxt.org/robotstxt.html
2
+ User-agent: *
3
+ Disallow:
frontend/src/App.js ADDED
@@ -0,0 +1,582 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import React, { useState, useRef } from 'react';
2
+ import {
3
+ ChakraProvider,
4
+ Box,
5
+ VStack,
6
+ HStack,
7
+ Text,
8
+ Input,
9
+ Button,
10
+ Flex,
11
+ Heading,
12
+ Container,
13
+ useToast,
14
+ Divider,
15
+ Progress,
16
+ extendTheme,
17
+ Image
18
+ } from '@chakra-ui/react';
19
+ import axios from 'axios';
20
+ import { useDropzone } from 'react-dropzone';
21
+ import { FiSend, FiUpload } from 'react-icons/fi';
22
+ import ReactMarkdown from 'react-markdown';
23
+
24
+ // Star Wars theme
25
+ const starWarsTheme = extendTheme({
26
+ colors: {
27
+ brand: {
28
+ 100: '#ffe81f', // Star Wars yellow
29
+ 200: '#ffe81f',
30
+ 300: '#ffe81f',
31
+ 400: '#ffe81f',
32
+ 500: '#ffe81f',
33
+ 600: '#d6c119',
34
+ 700: '#a99a14',
35
+ 800: '#7c710f',
36
+ 900: '#4f480a',
37
+ },
38
+ imperial: {
39
+ 500: '#ff0000', // Empire red
40
+ },
41
+ rebel: {
42
+ 500: '#4bd5ee', // Rebel blue
43
+ },
44
+ dark: {
45
+ 500: '#000000', // Dark side
46
+ },
47
+ light: {
48
+ 500: '#ffffff', // Light side
49
+ },
50
+ space: {
51
+ 100: '#05050f',
52
+ 500: '#0a0a1f',
53
+ 900: '#000005',
54
+ }
55
+ },
56
+ fonts: {
57
+ heading: "'Star Jedi', 'Roboto', sans-serif",
58
+ body: "'Roboto', sans-serif",
59
+ },
60
+ styles: {
61
+ global: {
62
+ body: {
63
+ bg: 'space.500',
64
+ color: 'light.500',
65
+ },
66
+ },
67
+ },
68
+ });
69
+
70
+ // API URL - Using the browser's current hostname for backend access
71
+ const getAPIURL = () => {
72
+ // If we're in development mode (running with npm start)
73
+ if (process.env.NODE_ENV === 'development') {
74
+ return 'http://localhost:8000';
75
+ }
76
+
77
+ // When running in production, use the same host with the backend port
78
+ // This works because we're exposing the backend port in docker-compose
79
+ // If port is 7860, use the same port (Hugging Face scenario)
80
+ const currentPort = window.location.port;
81
+ if (currentPort === '7860') {
82
+ return `http://${window.location.hostname}:${currentPort}`;
83
+ } else {
84
+ return `http://${window.location.hostname}:8000`;
85
+ }
86
+ };
87
+
88
+ const API_URL = process.env.REACT_APP_API_URL || getAPIURL();
89
+
90
+ // Debug log
91
+ console.log('Using API URL:', API_URL);
92
+ console.log('Environment:', process.env.NODE_ENV);
93
+ console.log('Window location:', window.location.hostname);
94
+
95
+ // Add axios default timeout and error handling
96
+ axios.defaults.timeout = 120000; // 120 seconds
97
+ axios.interceptors.response.use(
98
+ response => response,
99
+ error => {
100
+ console.error('Axios error:', error);
101
+ // Log the specific details
102
+ if (error.response) {
103
+ // The request was made and the server responded with a status code
104
+ // that falls out of the range of 2xx
105
+ console.error('Error response data:', error.response.data);
106
+ console.error('Error response status:', error.response.status);
107
+ console.error('Error response headers:', error.response.headers);
108
+ } else if (error.request) {
109
+ // The request was made but no response was received
110
+ console.error('Error request:', error.request);
111
+ if (error.code === 'ECONNABORTED') {
112
+ console.error('Request timed out after', axios.defaults.timeout, 'ms');
113
+ }
114
+ } else {
115
+ // Something happened in setting up the request that triggered an Error
116
+ console.error('Error message:', error.message);
117
+ }
118
+ return Promise.reject(error);
119
+ }
120
+ );
121
+
122
+ function ChatMessage({ message, isUser }) {
123
+ return (
124
+ <Box
125
+ bg={isUser ? 'rebel.500' : 'imperial.500'}
126
+ p={3}
127
+ borderRadius="md"
128
+ borderWidth="1px"
129
+ borderColor={isUser ? 'brand.500' : 'dark.500'}
130
+ alignSelf={isUser ? 'flex-end' : 'flex-start'}
131
+ maxW="80%"
132
+ boxShadow="0 0 5px"
133
+ color={isUser ? 'dark.500' : 'light.500'}
134
+ >
135
+ <Text fontWeight="bold" fontSize="sm" mb={1}>
136
+ {isUser ? 'Rebel Commander' : 'Jedi Archives'}
137
+ </Text>
138
+ <ReactMarkdown>{message}</ReactMarkdown>
139
+ </Box>
140
+ );
141
+ }
142
+
143
+ function FileUploader({ onFileUpload }) {
144
+ const toast = useToast();
145
+ const [isUploading, setIsUploading] = useState(false);
146
+ const [uploadProgress, setUploadProgress] = useState(0);
147
+ const [processingStatus, setProcessingStatus] = useState(null);
148
+
149
+ const { getRootProps, getInputProps } = useDropzone({
150
+ maxFiles: 1,
151
+ maxSize: 5 * 1024 * 1024, // 5MB max size
152
+ accept: {
153
+ 'text/plain': ['.txt'],
154
+ 'application/pdf': ['.pdf']
155
+ },
156
+ onDropRejected: (rejectedFiles) => {
157
+ toast({
158
+ title: 'Transmission rejected',
159
+ description: rejectedFiles[0]?.errors[0]?.message || 'File rejected by the Empire',
160
+ status: 'error',
161
+ duration: 5000,
162
+ isClosable: true,
163
+ });
164
+ },
165
+ onDrop: async (acceptedFiles) => {
166
+ if (acceptedFiles.length === 0) return;
167
+
168
+ setIsUploading(true);
169
+ setUploadProgress(0);
170
+ const file = acceptedFiles[0];
171
+
172
+ // Check file size
173
+ if (file.size > 5 * 1024 * 1024) {
174
+ toast({
175
+ title: 'File too large for hyperdrive',
176
+ description: 'Maximum file size is 5MB - even the Death Star plans were smaller',
177
+ status: 'error',
178
+ duration: 5000,
179
+ isClosable: true,
180
+ });
181
+ setIsUploading(false);
182
+ return;
183
+ }
184
+
185
+ const formData = new FormData();
186
+ formData.append('file', file);
187
+
188
+ try {
189
+ // Either use the API_URL or direct backend based on environment
190
+ const uploadUrl = `${API_URL}/upload/`;
191
+ console.log('Uploading file to:', uploadUrl);
192
+
193
+ const response = await axios.post(uploadUrl, formData, {
194
+ headers: {
195
+ 'Content-Type': 'multipart/form-data',
196
+ },
197
+ onUploadProgress: (progressEvent) => {
198
+ const percentCompleted = Math.round((progressEvent.loaded * 100) / progressEvent.total);
199
+ setUploadProgress(percentCompleted);
200
+ }
201
+ });
202
+
203
+ console.log('Upload response:', response.data);
204
+ setProcessingStatus('starting');
205
+
206
+ // Start polling for document processing status
207
+ const sessionId = response.data.session_id;
208
+ const pollStatus = async () => {
209
+ try {
210
+ const statusUrl = `${API_URL}/session/${sessionId}/status`;
211
+ console.log('Checking status at:', statusUrl);
212
+
213
+ const statusResponse = await axios.get(statusUrl);
214
+ console.log('Status response:', statusResponse.data);
215
+
216
+ if (statusResponse.data.status === 'ready') {
217
+ setProcessingStatus('complete');
218
+ onFileUpload(sessionId, file.name);
219
+ return;
220
+ } else if (statusResponse.data.status === 'failed') {
221
+ setProcessingStatus('failed');
222
+ toast({
223
+ title: 'Processing failed',
224
+ description: 'There was a disturbance in the Force. Please try again with a different file.',
225
+ status: 'error',
226
+ duration: 7000,
227
+ isClosable: true,
228
+ });
229
+ setIsUploading(false);
230
+ return;
231
+ }
232
+
233
+ // Still processing, continue polling
234
+ setProcessingStatus('processing');
235
+ setTimeout(pollStatus, 3000);
236
+ } catch (error) {
237
+ console.error('Error checking status:', error);
238
+
239
+ // Continue polling if there are non-critical errors
240
+ if (error.code === 'ECONNABORTED') {
241
+ // Request timed out
242
+ toast({
243
+ title: 'Status check timed out',
244
+ description: 'Your document is being processed by the Jedi Council. Please be patient, this may take time.',
245
+ status: 'warning',
246
+ duration: 7000,
247
+ isClosable: true,
248
+ });
249
+ setProcessingStatus('timeout');
250
+ // Keep polling, but with a longer delay
251
+ setTimeout(pollStatus, 10000);
252
+ } else {
253
+ // Other errors, but still try to continue polling
254
+ setTimeout(pollStatus, 5000);
255
+ }
256
+ }
257
+ };
258
+
259
+ // Start polling
260
+ setTimeout(pollStatus, 1000);
261
+
262
+ } catch (error) {
263
+ console.error('Error uploading file:', error);
264
+ setProcessingStatus(null);
265
+
266
+ let errorMessage = 'Network error - the Death Star has jammed our comms';
267
+
268
+ if (error.response) {
269
+ errorMessage = error.response.data?.detail || `Imperial error (${error.response.status})`;
270
+ } else if (error.code === 'ECONNABORTED') {
271
+ errorMessage = 'Request timed out. Even the Millennium Falcon would struggle with this file.';
272
+ }
273
+
274
+ toast({
275
+ title: 'Upload failed',
276
+ description: errorMessage,
277
+ status: 'error',
278
+ duration: 5000,
279
+ isClosable: true,
280
+ });
281
+
282
+ setIsUploading(false);
283
+ }
284
+ }
285
+ });
286
+
287
+ // Status message based on current processing state
288
+ const getStatusMessage = () => {
289
+ switch(processingStatus) {
290
+ case 'starting':
291
+ return 'Initiating hyperspace jump...';
292
+ case 'processing':
293
+ return 'The Force is analyzing your document... This may take several minutes.';
294
+ case 'timeout':
295
+ return 'Document processing is taking longer than expected. Patience, young Padawan...';
296
+ case 'failed':
297
+ return 'Document processing failed. The dark side clouded this document.';
298
+ case 'complete':
299
+ return 'Your document has joined the Jedi Archives!';
300
+ default:
301
+ return '';
302
+ }
303
+ };
304
+
305
+ return (
306
+ <Box
307
+ {...getRootProps()}
308
+ border="2px dashed"
309
+ borderColor="brand.500"
310
+ borderRadius="md"
311
+ p={10}
312
+ textAlign="center"
313
+ cursor="pointer"
314
+ bg="space.100"
315
+ _hover={{ bg: 'space.900', borderColor: 'rebel.500' }}
316
+ >
317
+ <input {...getInputProps()} />
318
+ <VStack spacing={2}>
319
+ <FiUpload size={30} color="#ffe81f" />
320
+ <Text>Drop a holocron (PDF or text file) here, or click to select</Text>
321
+ <Text fontSize="sm" color="brand.500">
322
+ Max file size: 5MB - suitable for Death Star plans
323
+ </Text>
324
+ {isUploading && (
325
+ <>
326
+ <Text color="brand.500">Uploading to the Jedi Archives...</Text>
327
+ <Progress
328
+ value={uploadProgress}
329
+ size="sm"
330
+ colorScheme="yellow"
331
+ width="100%"
332
+ borderRadius="md"
333
+ />
334
+ {processingStatus && (
335
+ <Text
336
+ color={processingStatus === 'failed' ? 'imperial.500' : 'brand.500'}
337
+ fontSize="sm"
338
+ mt={2}
339
+ >
340
+ {getStatusMessage()}
341
+ </Text>
342
+ )}
343
+ </>
344
+ )}
345
+ </VStack>
346
+ </Box>
347
+ );
348
+ }
349
+
350
+ function App() {
351
+ const [sessionId, setSessionId] = useState(null);
352
+ const [fileName, setFileName] = useState(null);
353
+ const [messages, setMessages] = useState([]);
354
+ const [inputText, setInputText] = useState('');
355
+ const [isProcessing, setIsProcessing] = useState(false);
356
+ const [isDocProcessing, setIsDocProcessing] = useState(false);
357
+ const messagesEndRef = useRef(null);
358
+ const toast = useToast();
359
+
360
+ const handleFileUpload = (newSessionId, name) => {
361
+ setSessionId(newSessionId);
362
+ setFileName(name);
363
+ setIsDocProcessing(true);
364
+ setMessages([
365
+ { text: `Processing ${name}. May the Force be with you...`, isUser: false }
366
+ ]);
367
+
368
+ // Poll for document processing status
369
+ const checkStatus = async () => {
370
+ try {
371
+ const response = await axios.get(`${API_URL}/session/${newSessionId}/status`);
372
+ console.log('Status response:', response.data);
373
+
374
+ if (response.data.status === 'ready') {
375
+ setIsDocProcessing(false);
376
+ setMessages([
377
+ { text: `"${name}" has been added to the Jedi Archives. What knowledge do you seek?`, isUser: false }
378
+ ]);
379
+ return;
380
+ }
381
+
382
+ // Continue polling if still processing
383
+ if (response.data.status === 'processing') {
384
+ setTimeout(checkStatus, 2000);
385
+ }
386
+ } catch (error) {
387
+ console.error('Error checking status:', error);
388
+ // Continue polling even if there's an error
389
+ setTimeout(checkStatus, 3000);
390
+ }
391
+ };
392
+
393
+ checkStatus();
394
+ };
395
+
396
+ const handleSendMessage = async () => {
397
+ if (!inputText.trim() || !sessionId || isDocProcessing) return;
398
+
399
+ const userMessage = inputText;
400
+ setInputText('');
401
+ setMessages(prev => [...prev, { text: userMessage, isUser: true }]);
402
+ setIsProcessing(true);
403
+
404
+ try {
405
+ // Either use the API_URL or direct backend based on environment
406
+ const queryUrl = `${API_URL}/query/`;
407
+ console.log('Sending query to:', queryUrl);
408
+
409
+ const response = await axios.post(queryUrl, {
410
+ session_id: sessionId,
411
+ query: userMessage
412
+ });
413
+
414
+ console.log('Query response:', response.data);
415
+ setMessages(prev => [...prev, { text: response.data.response, isUser: false }]);
416
+ } catch (error) {
417
+ console.error('Error sending message:', error);
418
+
419
+ // Handle specific errors
420
+ if (error.response?.status === 409) {
421
+ // Document still processing
422
+ toast({
423
+ title: 'Document still processing',
424
+ description: 'The Jedi Council is still analyzing this document. Please wait a moment and try again.',
425
+ status: 'warning',
426
+ duration: 5000,
427
+ isClosable: true,
428
+ });
429
+
430
+ setMessages(prev => [...prev, {
431
+ text: "The Jedi Council is still analyzing this document. Patience, young Padawan.",
432
+ isUser: false
433
+ }]);
434
+ } else {
435
+ // General error
436
+ toast({
437
+ title: 'Error',
438
+ description: error.response?.data?.detail || 'A disturbance in the Force - make sure the backend is operational',
439
+ status: 'error',
440
+ duration: 5000,
441
+ isClosable: true,
442
+ });
443
+
444
+ setMessages(prev => [...prev, {
445
+ text: "I find your lack of network connectivity disturbing. Please try again.",
446
+ isUser: false
447
+ }]);
448
+ }
449
+ } finally {
450
+ setIsProcessing(false);
451
+ }
452
+ };
453
+
454
+ // Scroll to the bottom of messages
455
+ React.useEffect(() => {
456
+ messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' });
457
+ }, [messages]);
458
+
459
+ // Handle Enter key press
460
+ const handleKeyPress = (e) => {
461
+ if (e.key === 'Enter' && !e.shiftKey) {
462
+ e.preventDefault();
463
+ handleSendMessage();
464
+ }
465
+ };
466
+
467
+ return (
468
+ <ChakraProvider theme={starWarsTheme}>
469
+ <Box bg="space.500" minH="100vh" py={8}>
470
+ <Container maxW="container.lg">
471
+ <VStack spacing={6} align="stretch" h="100vh">
472
+ <Box textAlign="center" mb={4}>
473
+ <Heading
474
+ as="h1"
475
+ size="xl"
476
+ color="brand.500"
477
+ textShadow="0 0 10px #ffe81f"
478
+ letterSpacing="2px"
479
+ >
480
+ Jedi Archives Chat
481
+ </Heading>
482
+ <Text color="light.500" mt={2}>The galaxy's knowledge at your fingertips</Text>
483
+ </Box>
484
+
485
+ {!sessionId ? (
486
+ <FileUploader onFileUpload={handleFileUpload} />
487
+ ) : (
488
+ <>
489
+ <Flex justify="space-between" align="center">
490
+ <Text fontWeight="bold" color="brand.500">
491
+ Current holocron: {fileName} {isDocProcessing && "(Jedi Council analyzing...)"}
492
+ </Text>
493
+ <Button
494
+ size="sm"
495
+ colorScheme="yellow"
496
+ variant="outline"
497
+ onClick={() => {
498
+ setSessionId(null);
499
+ setFileName(null);
500
+ setMessages([]);
501
+ setIsDocProcessing(false);
502
+ }}
503
+ >
504
+ Access different holocron
505
+ </Button>
506
+ </Flex>
507
+
508
+ <Divider borderColor="brand.500" />
509
+
510
+ <Box
511
+ flex="1"
512
+ overflowY="auto"
513
+ p={4}
514
+ bg="space.100"
515
+ borderRadius="md"
516
+ borderWidth="1px"
517
+ borderColor="brand.500"
518
+ boxShadow="0 0 15px #ffe81f22"
519
+ minH="300px"
520
+ >
521
+ <VStack spacing={4} align="stretch">
522
+ {messages.map((msg, idx) => (
523
+ <ChatMessage
524
+ key={idx}
525
+ message={msg.text}
526
+ isUser={msg.isUser}
527
+ />
528
+ ))}
529
+ {isDocProcessing && (
530
+ <Box textAlign="center" p={4}>
531
+ <Progress
532
+ size="xs"
533
+ isIndeterminate
534
+ colorScheme="yellow"
535
+ width="80%"
536
+ mx="auto"
537
+ />
538
+ <Text mt={2} color="brand.500">
539
+ The Force is strong with this document... Processing in progress
540
+ </Text>
541
+ </Box>
542
+ )}
543
+ <div ref={messagesEndRef} />
544
+ </VStack>
545
+ </Box>
546
+
547
+ <HStack>
548
+ <Input
549
+ placeholder={isDocProcessing
550
+ ? "Waiting for the Jedi Council to complete analysis..."
551
+ : "What knowledge do you seek from the holocron?"}
552
+ value={inputText}
553
+ onChange={(e) => setInputText(e.target.value)}
554
+ onKeyPress={handleKeyPress}
555
+ disabled={isProcessing || isDocProcessing}
556
+ bg="space.100"
557
+ color="light.500"
558
+ borderColor="brand.500"
559
+ _hover={{ borderColor: "rebel.500" }}
560
+ _focus={{ borderColor: "rebel.500", boxShadow: "0 0 0 1px #4bd5ee" }}
561
+ />
562
+ <Button
563
+ colorScheme="yellow"
564
+ isLoading={isProcessing}
565
+ onClick={handleSendMessage}
566
+ disabled={!inputText.trim() || isProcessing || isDocProcessing}
567
+ leftIcon={<FiSend />}
568
+ _hover={{ bg: "rebel.500", color: "dark.500" }}
569
+ >
570
+ Send
571
+ </Button>
572
+ </HStack>
573
+ </>
574
+ )}
575
+ </VStack>
576
+ </Container>
577
+ </Box>
578
+ </ChakraProvider>
579
+ );
580
+ }
581
+
582
+ export default App;
frontend/src/index.js ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import React from 'react';
2
+ import ReactDOM from 'react-dom/client';
3
+ import App from './App';
4
+
5
+ const root = ReactDOM.createRoot(document.getElementById('root'));
6
+ root.render(
7
+ <React.StrictMode>
8
+ <App />
9
+ </React.StrictMode>
10
+ );
pyproject.toml CHANGED
@@ -1,14 +1,34 @@
1
  [project]
2
- name = "aie5-deploypythonicrag"
3
  version = "0.1.0"
4
- description = "Simple Pythonic RAG App"
5
  readme = "README.md"
6
- requires-python = ">=3.13"
7
  dependencies = [
8
- "chainlit==2.0.4",
9
- "numpy==2.2.2",
10
- "openai==1.59.9",
11
- "pydantic==2.10.1",
12
- "pypdf2==3.0.1",
13
- "websockets==14.2",
 
 
 
 
 
 
 
14
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  [project]
2
+ name = "rag-application"
3
  version = "0.1.0"
4
+ description = "Retrieval Augmented Generation App with FastAPI backend and React frontend"
5
  readme = "README.md"
6
+ requires-python = ">=3.10"
7
  dependencies = [
8
+ "fastapi>=0.115.3",
9
+ "uvicorn>=0.25.0",
10
+ "python-multipart>=0.0.6",
11
+ "pydantic>=2.4.2",
12
+ "openai>=1.0.0",
13
+ "python-dotenv>=1.0.0",
14
+ "numpy>=1.20.0",
15
+ "pandas>=1.3.0",
16
+ "scikit-learn>=1.0.0",
17
+ "tiktoken>=0.5.0",
18
+ "PyPDF2>=3.0.0",
19
+ "chainlit==2.5.5",
20
+ "websockets>=11.0.0",
21
  ]
22
+
23
+ [project.optional-dependencies]
24
+ dev = [
25
+ "pytest",
26
+ "black",
27
+ ]
28
+
29
+ [tool.pytest]
30
+ testpaths = ["tests"]
31
+
32
+ [tool.black]
33
+ line-length = 88
34
+ target-version = ["py310"]
uv.lock CHANGED
The diff for this file is too large to render. See raw diff