Spaces:
Running
Running
Commit
·
6440752
1
Parent(s):
10266ab
Fix encoding issues in paper_data function and add tests for text decoding
Browse files
main.py
CHANGED
@@ -31,14 +31,20 @@ def paper_data(paper_data, wait_time=5):
|
|
31 |
continue
|
32 |
try:
|
33 |
try:
|
34 |
-
|
|
|
|
|
|
|
35 |
except:
|
36 |
escaped_title = repr(title).strip()
|
37 |
title = html.escape(str(escaped_title).strip()[1:-1])
|
38 |
try:
|
39 |
-
|
|
|
|
|
|
|
40 |
except:
|
41 |
-
encoded_bytes = repr(citation)
|
42 |
citation = html.unescape(encoded_bytes)
|
43 |
status = post_blog(doi, title, category, summary, mindmap, citation, access_key, wait_time)
|
44 |
except Exception as e:
|
|
|
31 |
continue
|
32 |
try:
|
33 |
try:
|
34 |
+
try:
|
35 |
+
escaped_title = repr(title.encode('latin1').decode('unicode-escape', errors='replace')).strip()
|
36 |
+
except:
|
37 |
+
escaped_title = repr(title).strip().encode('latin1', errors='replace').decode('utf-8', errors='replace')
|
38 |
except:
|
39 |
escaped_title = repr(title).strip()
|
40 |
title = html.escape(str(escaped_title).strip()[1:-1])
|
41 |
try:
|
42 |
+
try:
|
43 |
+
encoded_bytes = citation.encode('latin1').decode('unicode-escape', errors='replace')
|
44 |
+
except:
|
45 |
+
encoded_bytes = repr(citation).strip().encode('latin1').decode('utf-8', errors='replace')
|
46 |
except:
|
47 |
+
encoded_bytes = repr(citation).strip()
|
48 |
citation = html.unescape(encoded_bytes)
|
49 |
status = post_blog(doi, title, category, summary, mindmap, citation, access_key, wait_time)
|
50 |
except Exception as e:
|
test.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Original text with incorrect encoding
|
2 |
+
text = "Itâ\x80\x99s the AIâ\x80\x99s fault, not mine: Mind perception increases blame attribution to AI"
|
3 |
+
|
4 |
+
# Decode as 'latin1' and re-encode as 'utf-8'
|
5 |
+
fixed_text = text.encode('latin1').decode('utf-8')
|
6 |
+
|
7 |
+
print(fixed_text)
|