JoaquinVanschoren commited on
Commit
07c18c7
Β·
1 Parent(s): 1b48720

Updates including full validation report

Browse files
Files changed (3) hide show
  1. app.py +301 -44
  2. requirements.txt +10 -2
  3. validation.py +36 -12
app.py CHANGED
@@ -2,36 +2,50 @@ import gradio as gr
2
  import json
3
  import time
4
  import traceback
5
- from validation import validate_json, validate_croissant, validate_records
6
  import requests
7
 
8
  def process_file(file):
9
  results = []
 
 
 
 
10
 
11
  # Check 1: JSON validation
12
  json_valid, json_message, json_data = validate_json(file.name)
 
 
13
  results.append(("JSON Format Validation", json_valid, json_message))
14
 
15
  if not json_valid:
16
- return results
17
 
18
  # Check 2: Croissant validation
19
- croissant_valid, croissant_message = validate_croissant(json_data)
 
 
20
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
21
 
22
  if not croissant_valid:
23
- return results
24
 
25
  # Check 3: Records validation
26
- records_valid, records_message = validate_records(json_data)
 
 
27
  results.append(("Records Generation Test", records_valid, records_message))
28
 
29
- return results
 
 
 
 
30
 
31
  def create_ui():
32
  with gr.Blocks(theme=gr.themes.Soft()) as app:
33
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
34
- gr.Markdown("# πŸ₯ Croissant JSON-LD Validator for NeurIPS")
35
  gr.Markdown("""
36
  Upload your Croissant JSON-LD file or enter a URL to validate if it meets the requirements for NeurIPS submission. <a href="https://blog.neurips.cc/2025/03/10/neurips-datasets-benchmarks-raising-the-bar-for-dataset-submissions/">Read more about why this is required.</a>.
37
 
@@ -70,7 +84,23 @@ def create_ui():
70
  # Now create the validation results section in a separate group
71
  with gr.Group():
72
  # Validation results
73
- validation_results = gr.HTML(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # Define CSS for the validation UI
76
  gr.HTML("""
@@ -165,8 +195,84 @@ def create_ui():
165
  .arrow-indicator {
166
  font-size: 14px;
167
  transition: transform 0.3s ease;
 
168
  }
169
  .arrow-down {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  transform: rotate(90deg);
171
  }
172
  </style>
@@ -176,61 +282,145 @@ def create_ui():
176
  def on_tab_change(evt: gr.SelectData):
177
  tab_id = evt.value
178
  if tab_id == "Upload File":
179
- return "upload", """<div class="progress-status">Ready for upload</div>""", gr.update(visible=False)
 
 
 
 
 
 
 
 
 
180
  else:
181
- return "url", """<div class="progress-status">Enter a URL to fetch</div>""", gr.update(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  def on_file_upload(file):
184
  if file is None:
185
- return """<div class="progress-status">Ready for upload</div>""", gr.update(visible=False)
186
-
187
- return """<div class="progress-status">βœ… File uploaded successfully</div>""", gr.update(visible=False)
188
-
 
 
 
 
 
 
 
 
 
 
 
 
189
  def fetch_from_url(url):
190
  if not url:
191
- return """<div class="progress-status">Please enter a URL</div>""", gr.update(visible=False)
192
-
 
 
 
 
 
193
  try:
194
  # Fetch JSON from URL
195
  response = requests.get(url, timeout=10)
196
  response.raise_for_status()
197
  json_data = response.json()
198
 
199
- # Show success message
200
  progress_html = """<div class="progress-status">βœ… JSON fetched successfully from URL</div>"""
201
 
202
  # Validate the fetched JSON
203
  results = []
204
- results.append(("JSON Format Validation", True, "βœ… The URL returned valid JSON."))
205
 
206
  croissant_valid, croissant_message = validate_croissant(json_data)
207
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
208
 
209
  if not croissant_valid:
210
- return progress_html, build_results_html(results)
 
 
 
 
 
 
211
 
212
  records_valid, records_message = validate_records(json_data)
213
  results.append(("Records Generation Test", records_valid, records_message))
214
 
215
- return progress_html, build_results_html(results)
216
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  except requests.exceptions.RequestException as e:
218
- error_message = f"❌ Error fetching URL: {str(e)}"
219
- return f"""<div class="progress-status">{error_message}</div>""", gr.update(visible=False)
 
 
 
 
 
 
220
  except json.JSONDecodeError as e:
221
- error_message = f"❌ URL did not return valid JSON: {str(e)}"
222
- return f"""<div class="progress-status">{error_message}</div>""", gr.update(visible=False)
 
 
 
 
 
 
223
  except Exception as e:
224
- error_message = f"❌ Unexpected error: {str(e)}"
225
- return f"""<div class="progress-status">{error_message}</div>""", gr.update(visible=False)
226
-
 
 
 
 
 
 
227
  def build_results_html(results):
228
  # Build validation results HTML
229
  html = '<div class="validation-results">'
230
 
231
  for i, (test_name, passed, message) in enumerate(results):
232
  status_class = "status-success" if passed else "status-error"
233
- status_icon = "βœ“" if passed else "βœ—"
 
 
234
 
235
  html += f'''
236
  <div class="validation-step" id="step-{i}">
@@ -246,12 +436,12 @@ def create_ui():
246
  }}">
247
  <div class="step-left">
248
  <div class="step-status {status_class}">{status_icon}</div>
249
- <div class="step-title">{test_name}</div>
250
- <div class="arrow-indicator" id="arrow-{i}">β–Ά</div>
251
  </div>
252
  </div>
253
  <div class="step-details" id="details-{i}" style="display: none;">
254
- {message}
255
  </div>
256
  </div>
257
  '''
@@ -261,18 +451,85 @@ def create_ui():
261
 
262
  def on_validate(file):
263
  if file is None:
264
- return gr.update(visible=False)
265
-
 
 
 
 
 
 
266
  # Process the file and get results
267
- results = process_file(file)
268
- return build_results_html(results)
269
-
270
- # Connect UI events to functions
271
- tabs.select(on_tab_change, None, [active_tab, upload_progress, validation_results])
272
- file_input.change(on_file_upload, inputs=file_input, outputs=[upload_progress, validation_results])
273
- validate_btn.click(on_validate, inputs=file_input, outputs=validation_results)
274
- fetch_btn.click(fetch_from_url, inputs=url_input, outputs=[upload_progress, validation_results])
275
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  # Footer
277
  gr.HTML("""
278
  <div style="text-align: center; margin-top: 20px;">
@@ -284,4 +541,4 @@ def create_ui():
284
 
285
  if __name__ == "__main__":
286
  app = create_ui()
287
- app.launch()
 
2
  import json
3
  import time
4
  import traceback
5
+ from validation import validate_json, validate_croissant, validate_records, generate_validation_report
6
  import requests
7
 
8
  def process_file(file):
9
  results = []
10
+ json_data = None
11
+
12
+ # Use just the filename instead of full path
13
+ filename = file.name.split("/")[-1]
14
 
15
  # Check 1: JSON validation
16
  json_valid, json_message, json_data = validate_json(file.name)
17
+ # Remove empty checkmarks from messages
18
+ json_message = json_message.replace("\nβœ“\n", "\n")
19
  results.append(("JSON Format Validation", json_valid, json_message))
20
 
21
  if not json_valid:
22
+ return results, None
23
 
24
  # Check 2: Croissant validation
25
+ croissant_valid, croissant_message = validate_croissant(json_data)
26
+ # Remove empty checkmarks from messages
27
+ croissant_message = croissant_message.replace("\nβœ“\n", "\n")
28
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
29
 
30
  if not croissant_valid:
31
+ return results, None
32
 
33
  # Check 3: Records validation
34
+ records_valid, records_message = validate_records(json_data)
35
+ # Remove empty checkmarks from messages
36
+ records_message = records_message.replace("\nβœ“\n", "\n")
37
  results.append(("Records Generation Test", records_valid, records_message))
38
 
39
+
40
+ # Generate detailed report with just filename
41
+ report = generate_validation_report(filename, json_data, results)
42
+
43
+ return results, report
44
 
45
  def create_ui():
46
  with gr.Blocks(theme=gr.themes.Soft()) as app:
47
  gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
48
+ gr.Markdown("# πŸ₯ Croissant Validator for NeurIPS")
49
  gr.Markdown("""
50
  Upload your Croissant JSON-LD file or enter a URL to validate if it meets the requirements for NeurIPS submission. <a href="https://blog.neurips.cc/2025/03/10/neurips-datasets-benchmarks-raising-the-bar-for-dataset-submissions/">Read more about why this is required.</a>.
51
 
 
84
  # Now create the validation results section in a separate group
85
  with gr.Group():
86
  # Validation results
87
+ validation_results = gr.HTML(visible=False)
88
+ validation_progress = gr.HTML(visible=False)
89
+
90
+ # Collapsible report section
91
+ with gr.Accordion("Download full validation report", visible=False, open=False) as report_group:
92
+ with gr.Column():
93
+ report_md = gr.File(
94
+ label="Download Report",
95
+ visible=True,
96
+ file_types=[".md"]
97
+ )
98
+ report_text = gr.Textbox(
99
+ label="Report Content",
100
+ visible=True,
101
+ show_copy_button=True,
102
+ lines=10
103
+ )
104
 
105
  # Define CSS for the validation UI
106
  gr.HTML("""
 
195
  .arrow-indicator {
196
  font-size: 14px;
197
  transition: transform 0.3s ease;
198
+ transform: rotate(0deg); /* Point right by default */
199
  }
200
  .arrow-down {
201
+ transform: rotate(90deg); /* Point down when expanded */
202
+ }
203
+
204
+ /* Loading animation */
205
+ .loading-spinner {
206
+ display: inline-block;
207
+ width: 20px;
208
+ height: 20px;
209
+ border: 3px solid rgba(0, 0, 0, 0.1);
210
+ border-radius: 50%;
211
+ border-top-color: var(--primary-500);
212
+ animation: spin 1s ease-in-out infinite;
213
+ margin-right: 10px;
214
+ }
215
+ @keyframes spin {
216
+ to { transform: rotate(360deg); }
217
+ }
218
+ .validation-progress {
219
+ display: flex;
220
+ align-items: center;
221
+ justify-content: center;
222
+ padding: 10px;
223
+ margin: 10px 0;
224
+ background-color: var(--background-fill-secondary);
225
+ border-radius: 8px;
226
+ }
227
+ /* Override Gradio's default accordion arrow */
228
+ .gr-accordion {
229
+ position: relative;
230
+ }
231
+ .gr-accordion > .label-wrap {
232
+ display: flex;
233
+ align-items: center;
234
+ gap: 8px;
235
+ padding-right: 32px; /* Make room for the arrow */
236
+ }
237
+ .gr-accordion > .label-wrap::after {
238
+ content: "β–Ά";
239
+ position: absolute;
240
+ right: 16px;
241
+ top: 50%;
242
+ transform: translateY(-50%);
243
+ transition: transform 0.3s ease;
244
+ font-size: 0.8em;
245
+ }
246
+ .gr-accordion[data-open=true] > .label-wrap::after {
247
+ transform: translateY(-50%) rotate(90deg);
248
+ }
249
+ /* Consistent arrow styling for both validation steps and accordion */
250
+ .validation-step .step-header,
251
+ .gr-accordion > .label-wrap {
252
+ position: relative;
253
+ display: flex;
254
+ align-items: center;
255
+ gap: 8px;
256
+ }
257
+ .validation-step .arrow-indicator,
258
+ .gr-accordion > .label-wrap::after {
259
+ content: "β–Ά";
260
+ font-size: 0.8em;
261
+ margin-left: 8px;
262
+ transition: transform 0.3s ease;
263
+ }
264
+ /* Remove absolute positioning and right alignment for accordion arrow */
265
+ .gr-accordion > .label-wrap {
266
+ padding-right: 0; /* Remove extra padding */
267
+ }
268
+ .gr-accordion > .label-wrap::after {
269
+ position: static; /* Remove absolute positioning */
270
+ right: auto;
271
+ transform: none;
272
+ }
273
+ /* Consistent rotation for expanded state */
274
+ .validation-step .arrow-down,
275
+ .gr-accordion[data-open=true] > .label-wrap::after {
276
  transform: rotate(90deg);
277
  }
278
  </style>
 
282
  def on_tab_change(evt: gr.SelectData):
283
  tab_id = evt.value
284
  if tab_id == "Upload File":
285
+ return [
286
+ "upload",
287
+ """<div class="progress-status">Ready for upload</div>""",
288
+ gr.update(visible=False),
289
+ gr.update(visible=False), # Hide report group
290
+ None, # Clear report text
291
+ None, # Clear report file
292
+ None, # Clear file input
293
+ gr.update(value="") # Clear URL input
294
+ ]
295
  else:
296
+ return [
297
+ "url",
298
+ """<div class="progress-status">Enter a URL to fetch</div>""",
299
+ gr.update(visible=False),
300
+ gr.update(visible=False), # Hide report group
301
+ None, # Clear report text
302
+ None, # Clear report file
303
+ None, # Clear file input
304
+ gr.update(value="") # Clear URL input
305
+ ]
306
+
307
+ def on_copy_click(report):
308
+ return report
309
+
310
+ def on_download_click(report, file_name):
311
+ report_file = f"report_{file_name}.md"
312
+ with open(report_file, "w") as f:
313
+ f.write(report)
314
+ return report_file
315
 
316
  def on_file_upload(file):
317
  if file is None:
318
+ return [
319
+ """<div class="progress-status">Ready for upload</div>""",
320
+ gr.update(visible=False),
321
+ gr.update(visible=False), # Hide report group
322
+ None, # Clear report text
323
+ None # Clear report file
324
+ ]
325
+
326
+ return [
327
+ """<div class="progress-status">βœ… File uploaded successfully</div>""",
328
+ gr.update(visible=False),
329
+ gr.update(visible=False), # Hide report group
330
+ None, # Clear report text
331
+ None # Clear report file
332
+ ]
333
+
334
  def fetch_from_url(url):
335
  if not url:
336
+ return [
337
+ """<div class="progress-status">Please enter a URL</div>""",
338
+ gr.update(visible=False),
339
+ gr.update(visible=False),
340
+ None,
341
+ None
342
+ ]
343
  try:
344
  # Fetch JSON from URL
345
  response = requests.get(url, timeout=10)
346
  response.raise_for_status()
347
  json_data = response.json()
348
 
349
+ # Process validation
350
  progress_html = """<div class="progress-status">βœ… JSON fetched successfully from URL</div>"""
351
 
352
  # Validate the fetched JSON
353
  results = []
354
+ results.append(("JSON Format Validation", True, "The URL returned valid JSON."))
355
 
356
  croissant_valid, croissant_message = validate_croissant(json_data)
357
  results.append(("Croissant Schema Validation", croissant_valid, croissant_message))
358
 
359
  if not croissant_valid:
360
+ return [
361
+ """<div class="progress-status">βœ… JSON fetched successfully from URL</div>""",
362
+ build_results_html(results),
363
+ gr.update(visible=False),
364
+ None,
365
+ None
366
+ ]
367
 
368
  records_valid, records_message = validate_records(json_data)
369
  results.append(("Records Generation Test", records_valid, records_message))
370
 
371
+ # Generate report
372
+ report = generate_validation_report(url.split("/")[-1], json_data, results)
373
+ report_filename = f"report_croissant-validation_{json_data.get('name', 'unnamed')}.md"
374
+
375
+ if report:
376
+ with open(report_filename, "w") as f:
377
+ f.write(report)
378
+
379
+ return [
380
+ """<div class="progress-status">βœ… JSON fetched successfully from URL</div>""",
381
+ build_results_html(results),
382
+ gr.update(visible=True),
383
+ report,
384
+ report_filename
385
+ ]
386
+
387
  except requests.exceptions.RequestException as e:
388
+ error_message = f"Error fetching URL: {str(e)}"
389
+ return [
390
+ f"""<div class="progress-status">{error_message}</div>""",
391
+ gr.update(visible=False),
392
+ gr.update(visible=False),
393
+ None,
394
+ None
395
+ ]
396
  except json.JSONDecodeError as e:
397
+ error_message = f"URL did not return valid JSON: {str(e)}"
398
+ return [
399
+ f"""<div class="progress-status">{error_message}</div>""",
400
+ gr.update(visible=False),
401
+ gr.update(visible=False),
402
+ None,
403
+ None
404
+ ]
405
  except Exception as e:
406
+ error_message = f"Unexpected error: {str(e)}"
407
+ return [
408
+ f"""<div class="progress-status">{error_message}</div>""",
409
+ gr.update(visible=False),
410
+ gr.update(visible=False),
411
+ None,
412
+ None
413
+ ]
414
+
415
  def build_results_html(results):
416
  # Build validation results HTML
417
  html = '<div class="validation-results">'
418
 
419
  for i, (test_name, passed, message) in enumerate(results):
420
  status_class = "status-success" if passed else "status-error"
421
+ status_icon = "βœ“" if passed else "βœ—"
422
+ # Add emoji to message
423
+ message_with_emoji = ("βœ… " if passed else "❌ ") + message
424
 
425
  html += f'''
426
  <div class="validation-step" id="step-{i}">
 
436
  }}">
437
  <div class="step-left">
438
  <div class="step-status {status_class}">{status_icon}</div>
439
+ <span class="step-title">{test_name}</span>
440
+ <span class="arrow-indicator" id="arrow-{i}">β–Ά</span>
441
  </div>
442
  </div>
443
  <div class="step-details" id="details-{i}" style="display: none;">
444
+ {message_with_emoji}
445
  </div>
446
  </div>
447
  '''
 
451
 
452
  def on_validate(file):
453
  if file is None:
454
+ return [
455
+ gr.update(visible=False), # validation_results
456
+ gr.update(visible=False), # validation_progress
457
+ gr.update(visible=False), # report_group
458
+ None, # report_text
459
+ None # report_md
460
+ ]
461
+
462
  # Process the file and get results
463
+ results, report = process_file(file)
464
+
465
+ # Extract dataset name from the JSON for the report filename
466
+ try:
467
+ with open(file.name, 'r') as f:
468
+ json_data = json.load(f)
469
+ dataset_name = json_data.get('name', 'unnamed')
470
+ except:
471
+ dataset_name = 'unnamed'
472
+
473
+ # Save report to file with new naming convention
474
+ report_filename = f"report_croissant-validation_{dataset_name}.md"
475
+ if report:
476
+ with open(report_filename, "w") as f:
477
+ f.write(report)
478
+
479
+ # Return final state
480
+ return [
481
+ build_results_html(results), # validation_results
482
+ gr.update(visible=False), # validation_progress
483
+ gr.update(visible=True) if report else gr.update(visible=False), # report_group
484
+ report if report else None, # report_text
485
+ report_filename if report else None # report_md
486
+ ]
487
+
488
+ # Connect UI events to functions with updated outputs
489
+ tabs.select(
490
+ on_tab_change,
491
+ None,
492
+ [active_tab, upload_progress, validation_results, report_group, report_text, report_md, file_input, url_input]
493
+ )
494
+ file_input.change(
495
+ on_file_upload,
496
+ inputs=file_input,
497
+ outputs=[upload_progress, validation_results, report_group, report_text, report_md]
498
+ )
499
+
500
+ # Add progress state handling
501
+ def show_progress():
502
+ progress_html = """
503
+ <div class="validation-progress">
504
+ <div class="loading-spinner"></div>
505
+ <span>Validating file...</span>
506
+ </div>
507
+ """
508
+ return [
509
+ gr.update(visible=False), # validation_results
510
+ gr.update(visible=True, value=progress_html), # validation_progress
511
+ gr.update(visible=False), # report_group
512
+ None, # report_text
513
+ None # report_md
514
+ ]
515
+
516
+ validate_btn.click(
517
+ fn=show_progress,
518
+ inputs=None,
519
+ outputs=[validation_results, validation_progress, report_group, report_text, report_md],
520
+ queue=False
521
+ ).then(
522
+ fn=on_validate,
523
+ inputs=file_input,
524
+ outputs=[validation_results, validation_progress, report_group, report_text, report_md]
525
+ )
526
+
527
+ fetch_btn.click(
528
+ fetch_from_url,
529
+ inputs=url_input,
530
+ outputs=[upload_progress, validation_results, report_group, report_text, report_md]
531
+ )
532
+
533
  # Footer
534
  gr.HTML("""
535
  <div style="text-align: center; margin-top: 20px;">
 
541
 
542
  if __name__ == "__main__":
543
  app = create_ui()
544
+ app.launch()
requirements.txt CHANGED
@@ -1,4 +1,12 @@
 
 
1
  gradio>=3.50.2
2
- mlcroissant
3
  func_timeout
4
- requests
 
 
 
 
 
 
 
 
1
+ mlcroissant>=1.0.16
2
+ pydantic==2.10.6
3
  gradio>=3.50.2
 
4
  func_timeout
5
+ requests
6
+ huggingface-hub==0.30.1
7
+ fsspec==2023.10.0
8
+ aiohttp==3.11.15
9
+ aiohappyeyeballs==2.6.1
10
+ pandas==2.2.2
11
+ pyarrow==18.1.0
12
+ GitPython>=3.1.0
validation.py CHANGED
@@ -10,26 +10,26 @@ def validate_json(file_path):
10
  try:
11
  with open(file_path, 'r') as f:
12
  json_data = json.load(f)
13
- return True, "βœ… The file is valid JSON.", json_data
14
  except json.JSONDecodeError as e:
15
- error_message = f"❌ Invalid JSON format: {str(e)}"
16
  return False, error_message, None
17
  except Exception as e:
18
- error_message = f"❌ Error reading file: {str(e)}"
19
  return False, error_message, None
20
 
21
  def validate_croissant(json_data):
22
  """Validate that the JSON follows Croissant schema."""
23
  try:
24
  dataset = mlc.Dataset(jsonld=json_data)
25
- return True, "βœ… The dataset passes Croissant validation."
26
  except mlc.ValidationError as e:
27
  error_details = traceback.format_exc()
28
- error_message = f"❌ Validation failed: {str(e)}\n\n{error_details}"
29
  return False, error_message
30
  except Exception as e:
31
  error_details = traceback.format_exc()
32
- error_message = f"❌ Unexpected error during validation: {str(e)}\n\n{error_details}"
33
  return False, error_message
34
 
35
  def validate_records(json_data):
@@ -39,7 +39,7 @@ def validate_records(json_data):
39
  record_sets = dataset.metadata.record_sets
40
 
41
  if not record_sets:
42
- return True, "βœ… No record sets found to validate."
43
 
44
  results = []
45
 
@@ -47,17 +47,41 @@ def validate_records(json_data):
47
  try:
48
  records = dataset.records(record_set=record_set.uuid)
49
  _ = func_timeout.func_timeout(WAIT_TIME, lambda: next(iter(records)))
50
- results.append(f"βœ… Record set '{record_set.uuid}' passed validation.")
51
  except func_timeout.exceptions.FunctionTimedOut:
52
- error_message = f"❌ Record set '{record_set.uuid}' generation took too long (>60s)"
53
  return False, error_message
54
  except Exception as e:
55
  error_details = traceback.format_exc()
56
- error_message = f"❌ Record set '{record_set.uuid}' failed: {str(e)}\n\n{error_details}"
57
  return False, error_message
58
 
59
  return True, "\n".join(results)
60
  except Exception as e:
61
  error_details = traceback.format_exc()
62
- error_message = f"❌ Unexpected error during records validation: {str(e)}\n\n{error_details}"
63
- return False, error_message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  try:
11
  with open(file_path, 'r') as f:
12
  json_data = json.load(f)
13
+ return True, "The file is valid JSON.", json_data
14
  except json.JSONDecodeError as e:
15
+ error_message = f"Invalid JSON format: {str(e)}"
16
  return False, error_message, None
17
  except Exception as e:
18
+ error_message = f"Error reading file: {str(e)}"
19
  return False, error_message, None
20
 
21
  def validate_croissant(json_data):
22
  """Validate that the JSON follows Croissant schema."""
23
  try:
24
  dataset = mlc.Dataset(jsonld=json_data)
25
+ return True, "The dataset passes Croissant validation."
26
  except mlc.ValidationError as e:
27
  error_details = traceback.format_exc()
28
+ error_message = f"Validation failed: {str(e)}\n\n{error_details}"
29
  return False, error_message
30
  except Exception as e:
31
  error_details = traceback.format_exc()
32
+ error_message = f"Unexpected error during validation: {str(e)}\n\n{error_details}"
33
  return False, error_message
34
 
35
  def validate_records(json_data):
 
39
  record_sets = dataset.metadata.record_sets
40
 
41
  if not record_sets:
42
+ return True, "No record sets found to validate."
43
 
44
  results = []
45
 
 
47
  try:
48
  records = dataset.records(record_set=record_set.uuid)
49
  _ = func_timeout.func_timeout(WAIT_TIME, lambda: next(iter(records)))
50
+ results.append(f"Record set '{record_set.uuid}' passed validation.")
51
  except func_timeout.exceptions.FunctionTimedOut:
52
+ error_message = f"Record set '{record_set.uuid}' generation took too long (>60s)"
53
  return False, error_message
54
  except Exception as e:
55
  error_details = traceback.format_exc()
56
+ error_message = f"Record set '{record_set.uuid}' failed: {str(e)}\n\n{error_details}"
57
  return False, error_message
58
 
59
  return True, "\n".join(results)
60
  except Exception as e:
61
  error_details = traceback.format_exc()
62
+ error_message = f"Unexpected error during records validation: {str(e)}\n\n{error_details}"
63
+ return False, error_message
64
+
65
+ def generate_validation_report(filename, json_data, results):
66
+ """Generate a detailed validation report in markdown format."""
67
+ report = []
68
+ report.append("# CROISSANT VALIDATION REPORT")
69
+ report.append("=" * 80)
70
+ report.append("## VALIDATION RESULTS")
71
+ report.append("-" * 80)
72
+ report.append(f"Starting validation for file: {filename}")
73
+
74
+ # Add validation results
75
+ for test_name, passed, message in results:
76
+ report.append(f"### {test_name}")
77
+ report.append("βœ“" if passed else "βœ—")
78
+ report.append(message.strip()) # Remove any trailing newlines
79
+
80
+ # Add JSON-LD reference
81
+ report.append("## JSON-LD REFERENCE")
82
+ report.append("=" * 80)
83
+ report.append("```json")
84
+ report.append(json.dumps(json_data, indent=2))
85
+ report.append("```")
86
+
87
+ return "\n".join(report)