tfrere commited on
Commit
d88a570
·
1 Parent(s): ccdd344

improve upload validation

Browse files
backend/lighteval_task/lighteval_task.py CHANGED
@@ -218,10 +218,11 @@ def process_judge_response_yourbench(response):
218
  class JudgeLLMYourBench(JudgeLLM):
219
  def __init__(self):
220
  super().__init__(
221
- judge_model_name="gpt-4o-2024-08-06",
222
  template=get_judge_prompt,
223
  process_judge_response=process_judge_response_yourbench,
224
- judge_backend="openai",
 
225
  short_judge_name="yourbench_judge",
226
  )
227
 
 
218
  class JudgeLLMYourBench(JudgeLLM):
219
  def __init__(self):
220
  super().__init__(
221
+ judge_model_name="Qwen/QwQ-32B",
222
  template=get_judge_prompt,
223
  process_judge_response=process_judge_response_yourbench,
224
+ judge_backend="inference-providers",
225
+ hf_provider="novita",
226
  short_judge_name="yourbench_judge",
227
  )
228
 
backend/pyproject.toml CHANGED
@@ -23,6 +23,8 @@ dependencies = [
23
  "fastapi>=0.110.0",
24
  "uvicorn>=0.29.0",
25
  "pydantic>=2.6.0",
 
 
26
  ]
27
 
28
  [build-system]
 
23
  "fastapi>=0.110.0",
24
  "uvicorn>=0.29.0",
25
  "pydantic>=2.6.0",
26
+ "PyPDF2>=3.0.0",
27
+ "beautifulsoup4>=4.12.0",
28
  ]
29
 
30
  [build-system]
backend/routes/upload.py CHANGED
@@ -1,7 +1,9 @@
1
- from fastapi import APIRouter, UploadFile, File
2
  import os
3
  import shutil
4
  import uuid
 
 
5
 
6
  router = APIRouter(tags=["files"])
7
 
@@ -12,6 +14,43 @@ session_files = {}
12
  UPLOAD_ROOT = "uploaded_files"
13
  os.makedirs(UPLOAD_ROOT, exist_ok=True)
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Initialize session files dictionary with pre-calculated documents
16
  precalculated_docs = ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"]
17
 
@@ -48,7 +87,7 @@ async def upload_file(file: UploadFile = File(...)):
48
  """
49
  # Vérifier si le fichier est un PDF, TXT, HTML ou MD
50
  if not file.filename.endswith(('.pdf', '.txt', '.html', '.md')):
51
- return {"error": "Only PDF, TXT, HTML and MD files are accepted"}
52
 
53
  # Get the file extension
54
  file_extension = os.path.splitext(file.filename)[1].lower()
@@ -71,11 +110,23 @@ async def upload_file(file: UploadFile = File(...)):
71
  with open(file_path, "wb") as buffer:
72
  shutil.copyfileobj(file.file, buffer)
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  # Store file path for later use
75
  session_files[session_id] = file_path
76
 
77
- # Débogage pour vérifier l'état des session_files
78
- print(f"DEBUG UPLOAD: File uploaded with session_id: {session_id}")
79
- print(f"DEBUG UPLOAD: Current session_files: {session_files}")
80
-
81
  return {"filename": standardized_filename, "status": "uploaded", "session_id": session_id}
 
1
+ from fastapi import APIRouter, UploadFile, File, HTTPException
2
  import os
3
  import shutil
4
  import uuid
5
+ from bs4 import BeautifulSoup
6
+ from PyPDF2 import PdfReader
7
 
8
  router = APIRouter(tags=["files"])
9
 
 
14
  UPLOAD_ROOT = "uploaded_files"
15
  os.makedirs(UPLOAD_ROOT, exist_ok=True)
16
 
17
+ def validate_pdf(file_path: str) -> bool:
18
+ """Validate if file is a valid PDF."""
19
+ try:
20
+ reader = PdfReader(file_path)
21
+ # Vérifier que le PDF a au moins une page
22
+ return len(reader.pages) > 0
23
+ except:
24
+ return False
25
+
26
+ def validate_markdown(file_path: str) -> bool:
27
+ """Validate if file is a valid Markdown file."""
28
+ try:
29
+ with open(file_path, 'r', encoding='utf-8') as f:
30
+ content = f.read()
31
+ # Simple check: file should contain some content and at least one markdown element
32
+ return len(content) > 0 and any(marker in content for marker in ['#', '-', '*', '`', '[', '>'])
33
+ except:
34
+ return False
35
+
36
+ def validate_html(file_path: str) -> bool:
37
+ """Validate if file is a valid HTML file."""
38
+ try:
39
+ with open(file_path, 'r', encoding='utf-8') as f:
40
+ BeautifulSoup(f.read(), 'html.parser')
41
+ return True
42
+ except:
43
+ return False
44
+
45
+ def validate_txt(file_path: str) -> bool:
46
+ """Validate if file is a valid text file."""
47
+ try:
48
+ with open(file_path, 'r', encoding='utf-8') as f:
49
+ content = f.read()
50
+ return len(content.strip()) > 0
51
+ except:
52
+ return False
53
+
54
  # Initialize session files dictionary with pre-calculated documents
55
  precalculated_docs = ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"]
56
 
 
87
  """
88
  # Vérifier si le fichier est un PDF, TXT, HTML ou MD
89
  if not file.filename.endswith(('.pdf', '.txt', '.html', '.md')):
90
+ raise HTTPException(status_code=400, detail="Only PDF, TXT, HTML and MD files are accepted")
91
 
92
  # Get the file extension
93
  file_extension = os.path.splitext(file.filename)[1].lower()
 
110
  with open(file_path, "wb") as buffer:
111
  shutil.copyfileobj(file.file, buffer)
112
 
113
+ # Valider le fichier selon son type
114
+ is_valid = False
115
+ if file_extension == '.pdf':
116
+ is_valid = validate_pdf(file_path)
117
+ elif file_extension == '.md':
118
+ is_valid = validate_markdown(file_path)
119
+ elif file_extension == '.html':
120
+ is_valid = validate_html(file_path)
121
+ elif file_extension == '.txt':
122
+ is_valid = validate_txt(file_path)
123
+
124
+ if not is_valid:
125
+ # Supprimer le fichier invalide
126
+ os.remove(file_path)
127
+ raise HTTPException(status_code=400, detail=f"Invalid {file_extension[1:].upper()} file")
128
+
129
  # Store file path for later use
130
  session_files[session_id] = file_path
131
 
 
 
 
 
132
  return {"filename": standardized_filename, "status": "uploaded", "session_id": session_id}
frontend/src/components/BenchmarkCreateForm.jsx CHANGED
@@ -143,7 +143,7 @@ function BenchmarkCreateForm({ onStartGeneration }) {
143
  if (response.ok) {
144
  setUploadStatus({
145
  success: true,
146
- message: `File ${result.filename} uploaded successfully`,
147
  });
148
  setOpenSnackbar(true);
149
  setSessionId(result.session_id);
@@ -151,7 +151,7 @@ function BenchmarkCreateForm({ onStartGeneration }) {
151
  } else {
152
  setUploadStatus({
153
  success: false,
154
- message: result.error || "Upload failed",
155
  });
156
  setOpenSnackbar(true);
157
  }
 
143
  if (response.ok) {
144
  setUploadStatus({
145
  success: true,
146
+ message: "File uploaded successfully",
147
  });
148
  setOpenSnackbar(true);
149
  setSessionId(result.session_id);
 
151
  } else {
152
  setUploadStatus({
153
  success: false,
154
+ message: result.detail || "Upload failed",
155
  });
156
  setOpenSnackbar(true);
157
  }