Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
improve upload validation
Browse files
backend/lighteval_task/lighteval_task.py
CHANGED
@@ -218,10 +218,11 @@ def process_judge_response_yourbench(response):
|
|
218 |
class JudgeLLMYourBench(JudgeLLM):
|
219 |
def __init__(self):
|
220 |
super().__init__(
|
221 |
-
judge_model_name="
|
222 |
template=get_judge_prompt,
|
223 |
process_judge_response=process_judge_response_yourbench,
|
224 |
-
judge_backend="
|
|
|
225 |
short_judge_name="yourbench_judge",
|
226 |
)
|
227 |
|
|
|
218 |
class JudgeLLMYourBench(JudgeLLM):
|
219 |
def __init__(self):
|
220 |
super().__init__(
|
221 |
+
judge_model_name="Qwen/QwQ-32B",
|
222 |
template=get_judge_prompt,
|
223 |
process_judge_response=process_judge_response_yourbench,
|
224 |
+
judge_backend="inference-providers",
|
225 |
+
hf_provider="novita",
|
226 |
short_judge_name="yourbench_judge",
|
227 |
)
|
228 |
|
backend/pyproject.toml
CHANGED
@@ -23,6 +23,8 @@ dependencies = [
|
|
23 |
"fastapi>=0.110.0",
|
24 |
"uvicorn>=0.29.0",
|
25 |
"pydantic>=2.6.0",
|
|
|
|
|
26 |
]
|
27 |
|
28 |
[build-system]
|
|
|
23 |
"fastapi>=0.110.0",
|
24 |
"uvicorn>=0.29.0",
|
25 |
"pydantic>=2.6.0",
|
26 |
+
"PyPDF2>=3.0.0",
|
27 |
+
"beautifulsoup4>=4.12.0",
|
28 |
]
|
29 |
|
30 |
[build-system]
|
backend/routes/upload.py
CHANGED
@@ -1,7 +1,9 @@
|
|
1 |
-
from fastapi import APIRouter, UploadFile, File
|
2 |
import os
|
3 |
import shutil
|
4 |
import uuid
|
|
|
|
|
5 |
|
6 |
router = APIRouter(tags=["files"])
|
7 |
|
@@ -12,6 +14,43 @@ session_files = {}
|
|
12 |
UPLOAD_ROOT = "uploaded_files"
|
13 |
os.makedirs(UPLOAD_ROOT, exist_ok=True)
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
# Initialize session files dictionary with pre-calculated documents
|
16 |
precalculated_docs = ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"]
|
17 |
|
@@ -48,7 +87,7 @@ async def upload_file(file: UploadFile = File(...)):
|
|
48 |
"""
|
49 |
# Vérifier si le fichier est un PDF, TXT, HTML ou MD
|
50 |
if not file.filename.endswith(('.pdf', '.txt', '.html', '.md')):
|
51 |
-
|
52 |
|
53 |
# Get the file extension
|
54 |
file_extension = os.path.splitext(file.filename)[1].lower()
|
@@ -71,11 +110,23 @@ async def upload_file(file: UploadFile = File(...)):
|
|
71 |
with open(file_path, "wb") as buffer:
|
72 |
shutil.copyfileobj(file.file, buffer)
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
# Store file path for later use
|
75 |
session_files[session_id] = file_path
|
76 |
|
77 |
-
# Débogage pour vérifier l'état des session_files
|
78 |
-
print(f"DEBUG UPLOAD: File uploaded with session_id: {session_id}")
|
79 |
-
print(f"DEBUG UPLOAD: Current session_files: {session_files}")
|
80 |
-
|
81 |
return {"filename": standardized_filename, "status": "uploaded", "session_id": session_id}
|
|
|
1 |
+
from fastapi import APIRouter, UploadFile, File, HTTPException
|
2 |
import os
|
3 |
import shutil
|
4 |
import uuid
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
+
from PyPDF2 import PdfReader
|
7 |
|
8 |
router = APIRouter(tags=["files"])
|
9 |
|
|
|
14 |
UPLOAD_ROOT = "uploaded_files"
|
15 |
os.makedirs(UPLOAD_ROOT, exist_ok=True)
|
16 |
|
17 |
+
def validate_pdf(file_path: str) -> bool:
|
18 |
+
"""Validate if file is a valid PDF."""
|
19 |
+
try:
|
20 |
+
reader = PdfReader(file_path)
|
21 |
+
# Vérifier que le PDF a au moins une page
|
22 |
+
return len(reader.pages) > 0
|
23 |
+
except:
|
24 |
+
return False
|
25 |
+
|
26 |
+
def validate_markdown(file_path: str) -> bool:
|
27 |
+
"""Validate if file is a valid Markdown file."""
|
28 |
+
try:
|
29 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
30 |
+
content = f.read()
|
31 |
+
# Simple check: file should contain some content and at least one markdown element
|
32 |
+
return len(content) > 0 and any(marker in content for marker in ['#', '-', '*', '`', '[', '>'])
|
33 |
+
except:
|
34 |
+
return False
|
35 |
+
|
36 |
+
def validate_html(file_path: str) -> bool:
|
37 |
+
"""Validate if file is a valid HTML file."""
|
38 |
+
try:
|
39 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
40 |
+
BeautifulSoup(f.read(), 'html.parser')
|
41 |
+
return True
|
42 |
+
except:
|
43 |
+
return False
|
44 |
+
|
45 |
+
def validate_txt(file_path: str) -> bool:
|
46 |
+
"""Validate if file is a valid text file."""
|
47 |
+
try:
|
48 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
49 |
+
content = f.read()
|
50 |
+
return len(content.strip()) > 0
|
51 |
+
except:
|
52 |
+
return False
|
53 |
+
|
54 |
# Initialize session files dictionary with pre-calculated documents
|
55 |
precalculated_docs = ["the-bitter-lesson", "hurricane-faq", "pokemon-guide"]
|
56 |
|
|
|
87 |
"""
|
88 |
# Vérifier si le fichier est un PDF, TXT, HTML ou MD
|
89 |
if not file.filename.endswith(('.pdf', '.txt', '.html', '.md')):
|
90 |
+
raise HTTPException(status_code=400, detail="Only PDF, TXT, HTML and MD files are accepted")
|
91 |
|
92 |
# Get the file extension
|
93 |
file_extension = os.path.splitext(file.filename)[1].lower()
|
|
|
110 |
with open(file_path, "wb") as buffer:
|
111 |
shutil.copyfileobj(file.file, buffer)
|
112 |
|
113 |
+
# Valider le fichier selon son type
|
114 |
+
is_valid = False
|
115 |
+
if file_extension == '.pdf':
|
116 |
+
is_valid = validate_pdf(file_path)
|
117 |
+
elif file_extension == '.md':
|
118 |
+
is_valid = validate_markdown(file_path)
|
119 |
+
elif file_extension == '.html':
|
120 |
+
is_valid = validate_html(file_path)
|
121 |
+
elif file_extension == '.txt':
|
122 |
+
is_valid = validate_txt(file_path)
|
123 |
+
|
124 |
+
if not is_valid:
|
125 |
+
# Supprimer le fichier invalide
|
126 |
+
os.remove(file_path)
|
127 |
+
raise HTTPException(status_code=400, detail=f"Invalid {file_extension[1:].upper()} file")
|
128 |
+
|
129 |
# Store file path for later use
|
130 |
session_files[session_id] = file_path
|
131 |
|
|
|
|
|
|
|
|
|
132 |
return {"filename": standardized_filename, "status": "uploaded", "session_id": session_id}
|
frontend/src/components/BenchmarkCreateForm.jsx
CHANGED
@@ -143,7 +143,7 @@ function BenchmarkCreateForm({ onStartGeneration }) {
|
|
143 |
if (response.ok) {
|
144 |
setUploadStatus({
|
145 |
success: true,
|
146 |
-
message:
|
147 |
});
|
148 |
setOpenSnackbar(true);
|
149 |
setSessionId(result.session_id);
|
@@ -151,7 +151,7 @@ function BenchmarkCreateForm({ onStartGeneration }) {
|
|
151 |
} else {
|
152 |
setUploadStatus({
|
153 |
success: false,
|
154 |
-
message: result.
|
155 |
});
|
156 |
setOpenSnackbar(true);
|
157 |
}
|
|
|
143 |
if (response.ok) {
|
144 |
setUploadStatus({
|
145 |
success: true,
|
146 |
+
message: "File uploaded successfully",
|
147 |
});
|
148 |
setOpenSnackbar(true);
|
149 |
setSessionId(result.session_id);
|
|
|
151 |
} else {
|
152 |
setUploadStatus({
|
153 |
success: false,
|
154 |
+
message: result.detail || "Upload failed",
|
155 |
});
|
156 |
setOpenSnackbar(true);
|
157 |
}
|