Spaces:
Running
Running
Commit
·
f50b29d
1
Parent(s):
319999c
Refactor summarization and email sending logic; improve error handling and environment variable checks
Browse files- fetch_data.py +30 -26
- image.py +61 -89
- main.py +77 -82
- post_blog.py +99 -89
- send_mail.py +49 -31
- summarize_paper.py +25 -16
fetch_data.py
CHANGED
@@ -2,23 +2,23 @@ from gradio_client import Client
|
|
2 |
import json
|
3 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
4 |
|
5 |
-
def fetch_category_ids(
|
6 |
-
|
7 |
-
if not cat_ids_api_key:
|
8 |
raise ValueError("API access key not found. Please check your environment variables.")
|
9 |
cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
|
10 |
try:
|
11 |
result = cat_ids_api_client.predict(
|
12 |
-
user_access_key=
|
13 |
api_name="/fetch_paper_ids"
|
14 |
)
|
15 |
cat_ids = json.loads(result)
|
16 |
-
if cat_ids
|
17 |
-
return cat_ids
|
18 |
else:
|
|
|
19 |
return None
|
20 |
except Exception as e:
|
21 |
-
print(f"Exception while fetching category IDs: {
|
22 |
return None
|
23 |
|
24 |
def fetch_single_paper_data(paper_id):
|
@@ -29,44 +29,48 @@ def fetch_single_paper_data(paper_id):
|
|
29 |
api_name="/fetch_paper_data"
|
30 |
)
|
31 |
paper_data = json.loads(result)
|
32 |
-
if paper_data
|
33 |
-
return paper_id, paper_data
|
34 |
else:
|
35 |
print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
|
36 |
return paper_id, None
|
37 |
except Exception as e:
|
38 |
-
print(f"Exception while fetching data for paper ID {paper_id}: {
|
39 |
return paper_id, None
|
40 |
|
41 |
def fetch_paper_data_concurrently(paper_ids, max_threads=12):
|
42 |
paper_id_data = {}
|
43 |
with ThreadPoolExecutor(max_workers=max_threads) as executor:
|
44 |
-
|
45 |
-
for future in as_completed(
|
46 |
-
paper_id =
|
47 |
try:
|
48 |
-
|
49 |
if data:
|
50 |
-
paper_id_data[
|
51 |
except Exception as e:
|
52 |
-
print(f"Error fetching data for paper ID {paper_id}: {
|
53 |
return paper_id_data
|
54 |
|
55 |
-
def fetch_paper_data_with_category(
|
56 |
-
|
57 |
try:
|
58 |
-
cat_ids = fetch_category_ids(
|
59 |
if cat_ids:
|
60 |
-
for category,
|
61 |
print(f"Fetching data for category: {category}")
|
62 |
try:
|
63 |
-
|
64 |
-
if
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
except Exception as e:
|
67 |
-
print(f"Error fetching data for category {category}: {
|
68 |
continue
|
69 |
-
return json.dumps(
|
70 |
except Exception as e:
|
71 |
-
print(f"Exception while fetching paper data by category: {
|
72 |
return None
|
|
|
2 |
import json
|
3 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
4 |
|
5 |
+
def fetch_category_ids(api_key):
|
6 |
+
if not api_key:
|
|
|
7 |
raise ValueError("API access key not found. Please check your environment variables.")
|
8 |
cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
|
9 |
try:
|
10 |
result = cat_ids_api_client.predict(
|
11 |
+
user_access_key=api_key,
|
12 |
api_name="/fetch_paper_ids"
|
13 |
)
|
14 |
cat_ids = json.loads(result)
|
15 |
+
if cat_ids.get('status') == 'success':
|
16 |
+
return cat_ids.get('data')
|
17 |
else:
|
18 |
+
print(f"Failed to fetch category IDs: {cat_ids.get('message', 'No message provided')}")
|
19 |
return None
|
20 |
except Exception as e:
|
21 |
+
print(f"Exception while fetching category IDs: {e}")
|
22 |
return None
|
23 |
|
24 |
def fetch_single_paper_data(paper_id):
|
|
|
29 |
api_name="/fetch_paper_data"
|
30 |
)
|
31 |
paper_data = json.loads(result)
|
32 |
+
if paper_data.get('status') == 'success':
|
33 |
+
return paper_id, paper_data.get('data')
|
34 |
else:
|
35 |
print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
|
36 |
return paper_id, None
|
37 |
except Exception as e:
|
38 |
+
print(f"Exception while fetching data for paper ID {paper_id}: {e}")
|
39 |
return paper_id, None
|
40 |
|
41 |
def fetch_paper_data_concurrently(paper_ids, max_threads=12):
|
42 |
paper_id_data = {}
|
43 |
with ThreadPoolExecutor(max_workers=max_threads) as executor:
|
44 |
+
future_to_paper = {executor.submit(fetch_single_paper_data, pid): pid for pid in paper_ids}
|
45 |
+
for future in as_completed(future_to_paper):
|
46 |
+
paper_id = future_to_paper[future]
|
47 |
try:
|
48 |
+
pid, data = future.result()
|
49 |
if data:
|
50 |
+
paper_id_data[pid] = data
|
51 |
except Exception as e:
|
52 |
+
print(f"Error fetching data for paper ID {paper_id}: {e}")
|
53 |
return paper_id_data
|
54 |
|
55 |
+
def fetch_paper_data_with_category(api_key):
|
56 |
+
all_data = {}
|
57 |
try:
|
58 |
+
cat_ids = fetch_category_ids(api_key)
|
59 |
if cat_ids:
|
60 |
+
for category, info in cat_ids.items():
|
61 |
print(f"Fetching data for category: {category}")
|
62 |
try:
|
63 |
+
paper_ids = info.get('ids', [])
|
64 |
+
if paper_ids:
|
65 |
+
paper_data = fetch_paper_data_concurrently(paper_ids)
|
66 |
+
if paper_data:
|
67 |
+
all_data[category] = paper_data
|
68 |
+
else:
|
69 |
+
print(f"No paper IDs found for category: {category}")
|
70 |
except Exception as e:
|
71 |
+
print(f"Error fetching data for category {category}: {e}")
|
72 |
continue
|
73 |
+
return json.dumps(all_data, indent=4, ensure_ascii=False)
|
74 |
except Exception as e:
|
75 |
+
print(f"Exception while fetching paper data by category: {e}")
|
76 |
return None
|
image.py
CHANGED
@@ -2,22 +2,38 @@ import base64
|
|
2 |
import io
|
3 |
import os
|
4 |
import re
|
5 |
-
import
|
6 |
-
from urllib.parse import quote
|
7 |
from PIL import Image
|
8 |
from g4f.client import Client
|
9 |
from g4f.Provider import RetryProvider, PollinationsAI, ImageLabs, Blackbox, HuggingSpace, Airforce
|
10 |
from g4f.Provider.hf_space.BlackForestLabsFlux1Schnell import BlackForestLabsFlux1Schnell
|
11 |
from g4f.Provider.hf_space.VoodoohopFlux1Schnell import VoodoohopFlux1Schnell
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def extract_summary(text):
|
|
|
|
|
|
|
14 |
text = text.replace("#", "").strip().lower()
|
15 |
match = re.search(r"summary(.*?)highlights", text, re.DOTALL)
|
16 |
-
if match
|
17 |
-
return match.group(1).strip()
|
18 |
-
return text
|
19 |
|
20 |
def fix_base64_padding(data):
|
|
|
|
|
|
|
21 |
missing_padding = len(data) % 4
|
22 |
if missing_padding:
|
23 |
data += "=" * (4 - missing_padding)
|
@@ -25,84 +41,41 @@ def fix_base64_padding(data):
|
|
25 |
|
26 |
def generate_image(title, category, summary):
|
27 |
print("Generating image...")
|
28 |
-
import time
|
29 |
start = time.time()
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
try:
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
single_provider_retry=True,
|
42 |
-
max_retries=5,
|
43 |
-
),
|
44 |
-
model="sdxl-turbo",
|
45 |
-
prompt=prompt,
|
46 |
-
negative_prompt=negative,
|
47 |
-
response_format="b64_json",
|
48 |
-
width=1024,
|
49 |
-
height=576,
|
50 |
-
).data[0].b64_json
|
51 |
-
print(f"Image generated in {time.time() - start:.2f} seconds")
|
52 |
-
if img_data:
|
53 |
-
return f"data:image/png;base64,{img_data}"
|
54 |
-
return None
|
55 |
-
except Exception as e:
|
56 |
-
print(f"Error generating image: {e}")
|
57 |
-
negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
|
58 |
-
prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
|
59 |
-
client = Client()
|
60 |
-
time.sleep(20)
|
61 |
-
img_data = client.images.generate(
|
62 |
-
provider=RetryProvider(
|
63 |
-
providers=[Airforce, PollinationsAI, Blackbox],
|
64 |
-
shuffle=True,
|
65 |
-
single_provider_retry=True,
|
66 |
-
max_retries=5,
|
67 |
-
),
|
68 |
-
model="flux",
|
69 |
-
prompt=prompt,
|
70 |
-
negative_prompt=negative,
|
71 |
-
response_format="b64_json",
|
72 |
-
width=1024,
|
73 |
-
height=576,
|
74 |
-
).data[0].b64_json
|
75 |
-
print(f"Image generated in {time.time() - start:.2f} seconds")
|
76 |
-
if img_data:
|
77 |
-
return f"data:image/png;base64,{img_data}"
|
78 |
-
return None
|
79 |
-
except Exception as e:
|
80 |
-
print(f"Error generating image: {e}")
|
81 |
-
negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
|
82 |
-
prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
|
83 |
-
client = Client()
|
84 |
-
time.sleep(20)
|
85 |
-
img_data = client.images.generate(
|
86 |
-
provider=RetryProvider(
|
87 |
-
providers=[BlackForestLabsFlux1Schnell, VoodoohopFlux1Schnell, HuggingSpace],
|
88 |
-
shuffle=True,
|
89 |
-
single_provider_retry=True,
|
90 |
-
max_retries=5,
|
91 |
-
),
|
92 |
-
model="flux-schnell",
|
93 |
prompt=prompt,
|
94 |
-
negative_prompt=
|
95 |
response_format="b64_json",
|
96 |
width=1024,
|
97 |
height=576,
|
98 |
-
)
|
99 |
-
|
|
|
|
|
100 |
if img_data:
|
101 |
return f"data:image/png;base64,{img_data}"
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
|
107 |
def verify_image(image_data):
|
108 |
try:
|
@@ -115,31 +88,30 @@ def verify_image(image_data):
|
|
115 |
return False
|
116 |
|
117 |
def fetch_image(title, category, summary):
|
118 |
-
title = r"{}".format(title)
|
119 |
-
category = r"{}".format(category)
|
120 |
summary = extract_summary(summary)
|
121 |
-
|
122 |
try:
|
123 |
data_uri = generate_image(title, category, summary)
|
124 |
if data_uri:
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
except Exception as e:
|
132 |
print(f"Error fetching image: {e}")
|
133 |
-
|
134 |
finally:
|
135 |
if os.path.exists("image.png"):
|
136 |
os.remove("image.png")
|
137 |
-
return image_url
|
138 |
-
|
139 |
|
140 |
if __name__ == "__main__":
|
141 |
title = "Exposition: Enumerative Geometry and Tree-Level Gromov-Witten Invariants"
|
142 |
category = "Mathematics"
|
143 |
-
summary =
|
|
|
|
|
|
|
144 |
image_url = fetch_image(title, category, summary)
|
145 |
-
print(image_url)
|
|
|
2 |
import io
|
3 |
import os
|
4 |
import re
|
5 |
+
import time
|
|
|
6 |
from PIL import Image
|
7 |
from g4f.client import Client
|
8 |
from g4f.Provider import RetryProvider, PollinationsAI, ImageLabs, Blackbox, HuggingSpace, Airforce
|
9 |
from g4f.Provider.hf_space.BlackForestLabsFlux1Schnell import BlackForestLabsFlux1Schnell
|
10 |
from g4f.Provider.hf_space.VoodoohopFlux1Schnell import VoodoohopFlux1Schnell
|
11 |
|
12 |
+
NEGATIVE_PROMPT = (
|
13 |
+
"low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, "
|
14 |
+
"missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, "
|
15 |
+
"three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, "
|
16 |
+
"worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, "
|
17 |
+
"2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, "
|
18 |
+
"3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, "
|
19 |
+
"bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, "
|
20 |
+
"harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, "
|
21 |
+
"twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, "
|
22 |
+
"unsuitable, inappropriate, offensive, revealing, sexual, explicit"
|
23 |
+
)
|
24 |
+
|
25 |
def extract_summary(text):
|
26 |
+
"""
|
27 |
+
Clean and extract the summary portion from the text.
|
28 |
+
"""
|
29 |
text = text.replace("#", "").strip().lower()
|
30 |
match = re.search(r"summary(.*?)highlights", text, re.DOTALL)
|
31 |
+
return match.group(1).strip() if match else text
|
|
|
|
|
32 |
|
33 |
def fix_base64_padding(data):
|
34 |
+
"""
|
35 |
+
Ensure that the base64 string has the proper padding.
|
36 |
+
"""
|
37 |
missing_padding = len(data) % 4
|
38 |
if missing_padding:
|
39 |
data += "=" * (4 - missing_padding)
|
|
|
41 |
|
42 |
def generate_image(title, category, summary):
|
43 |
print("Generating image...")
|
|
|
44 |
start = time.time()
|
45 |
+
prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
|
46 |
+
client = Client()
|
47 |
+
attempts = [
|
48 |
+
([ImageLabs, PollinationsAI], "sdxl-turbo"),
|
49 |
+
([Airforce, PollinationsAI, Blackbox], "flux"),
|
50 |
+
([BlackForestLabsFlux1Schnell, VoodoohopFlux1Schnell, HuggingSpace], "flux-schnell")
|
51 |
+
]
|
52 |
+
|
53 |
+
for providers, model in attempts:
|
54 |
try:
|
55 |
+
provider = RetryProvider(
|
56 |
+
providers=providers,
|
57 |
+
shuffle=True,
|
58 |
+
single_provider_retry=True,
|
59 |
+
max_retries=3,
|
60 |
+
)
|
61 |
+
response = client.images.generate(
|
62 |
+
provider=provider,
|
63 |
+
model=model,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
prompt=prompt,
|
65 |
+
negative_prompt=NEGATIVE_PROMPT,
|
66 |
response_format="b64_json",
|
67 |
width=1024,
|
68 |
height=576,
|
69 |
+
)
|
70 |
+
img_data = response.data[0].b64_json
|
71 |
+
elapsed = time.time() - start
|
72 |
+
print(f"Image generated in {elapsed:.2f} seconds using model {model}")
|
73 |
if img_data:
|
74 |
return f"data:image/png;base64,{img_data}"
|
75 |
+
except Exception as e:
|
76 |
+
print(f"Attempt with model {model} failed: {e}")
|
77 |
+
|
78 |
+
return None
|
79 |
|
80 |
def verify_image(image_data):
|
81 |
try:
|
|
|
88 |
return False
|
89 |
|
90 |
def fetch_image(title, category, summary):
|
|
|
|
|
91 |
summary = extract_summary(summary)
|
92 |
+
fallback_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
|
93 |
try:
|
94 |
data_uri = generate_image(title, category, summary)
|
95 |
if data_uri:
|
96 |
+
base64_str = data_uri.split(",")[1]
|
97 |
+
base64_str = fix_base64_padding(base64_str)
|
98 |
+
decoded = base64.b64decode(base64_str, validate=True)
|
99 |
+
if verify_image(decoded):
|
100 |
+
return f"data:image/png;base64,{base64_str}"
|
101 |
+
return fallback_url
|
102 |
except Exception as e:
|
103 |
print(f"Error fetching image: {e}")
|
104 |
+
return fallback_url
|
105 |
finally:
|
106 |
if os.path.exists("image.png"):
|
107 |
os.remove("image.png")
|
|
|
|
|
108 |
|
109 |
if __name__ == "__main__":
|
110 |
title = "Exposition: Enumerative Geometry and Tree-Level Gromov-Witten Invariants"
|
111 |
category = "Mathematics"
|
112 |
+
summary = (
|
113 |
+
"The text discusses the Kontsevich-Manin formula for enumerating degree d rational curves via Gromov-Witten invariants. "
|
114 |
+
"It details the calculation of these invariants using moduli spaces of stable maps and explores their implications in enumerative geometry."
|
115 |
+
)
|
116 |
image_url = fetch_image(title, category, summary)
|
117 |
+
print(image_url)
|
main.py
CHANGED
@@ -9,108 +9,103 @@ from post_blog import post_blog
|
|
9 |
from send_mail import send_email
|
10 |
|
11 |
dotenv.load_dotenv()
|
12 |
-
|
13 |
|
14 |
-
def fix_text(text):
|
15 |
text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., "))
|
16 |
fixed_text = ""
|
17 |
for word in text.split():
|
18 |
try:
|
19 |
-
fixed_text += word.encode('latin1').decode('utf-8')+" "
|
20 |
-
except:
|
21 |
-
fixed_text += word+" "
|
22 |
return fixed_text.encode('utf-8').decode()
|
23 |
|
24 |
-
def paper_data(
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
retry_count = 0
|
44 |
-
while (not summary or not mindmap) and retry_count < max_retries:
|
45 |
-
try:
|
46 |
-
summary, mindmap = summarize_paper(pdf_url, paper_id, access_key)
|
47 |
-
if summary and mindmap:
|
48 |
-
break
|
49 |
-
except Exception as e:
|
50 |
-
print(f"Error summarizing paper {paper_id}: {e}")
|
51 |
-
retry_count += 1
|
52 |
-
if retry_count < max_retries:
|
53 |
-
print(f"Retrying paper {paper_id} in 3 minutes")
|
54 |
-
time.sleep(3*60)
|
55 |
-
if not summary or not mindmap:
|
56 |
-
print(f"Failed to summarize paper {paper_id} after {max_retries} attempts")
|
57 |
-
continue
|
58 |
try:
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
citation = html.escape(str(citation).strip())
|
63 |
-
summary = html.escape(str(summary).strip())
|
64 |
-
mindmap = html.escape(str(mindmap).strip())
|
65 |
-
status = post_blog(doi, title, category, summary, mindmap, citation, access_key, wait_time)
|
66 |
except Exception as e:
|
67 |
-
print(f"Error
|
68 |
-
|
69 |
-
|
70 |
-
"
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
-
def post_blogpost(uaccess_key, wait_time=5):
|
83 |
-
if uaccess_key !=
|
84 |
return False
|
85 |
data = fetch_paper_data_with_category(uaccess_key)
|
86 |
-
|
87 |
try:
|
88 |
-
send_email(
|
89 |
print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
|
90 |
except Exception as e:
|
91 |
print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
|
92 |
finally:
|
93 |
print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
|
94 |
-
|
|
|
95 |
|
96 |
-
def test(uaccess_key):
|
97 |
-
if uaccess_key !=
|
98 |
return False
|
99 |
-
|
100 |
"Economics": {
|
101 |
-
"2501.00578":{
|
102 |
-
"paper_id":"2501.00578",
|
103 |
-
"doi":"https://doi.org/10.1002/alz.14328",
|
104 |
-
"title":"Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project",
|
105 |
-
"category":"Economics",
|
106 |
-
"pdf_url":"https://arxiv.org/pdf/2501.00578",
|
107 |
-
"citation":"Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578",
|
108 |
-
}
|
109 |
-
}
|
110 |
}
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
if __name__ == '__main__':
|
115 |
-
|
116 |
-
print(
|
|
|
9 |
from send_mail import send_email
|
10 |
|
11 |
dotenv.load_dotenv()
|
12 |
+
ACCESS_KEY = os.getenv("ACCESS_KEY")
|
13 |
|
14 |
+
def fix_text(text: str) -> str:
|
15 |
text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., "))
|
16 |
fixed_text = ""
|
17 |
for word in text.split():
|
18 |
try:
|
19 |
+
fixed_text += word.encode('latin1').decode('utf-8') + " "
|
20 |
+
except Exception:
|
21 |
+
fixed_text += word + " "
|
22 |
return fixed_text.encode('utf-8').decode()
|
23 |
|
24 |
+
def paper_data(paper_data_json: str, wait_time: int = 5) -> str:
|
25 |
+
result_data = {"status": "success", "data": {}}
|
26 |
+
papers_by_category = json.loads(paper_data_json)
|
27 |
+
for category, papers in papers_by_category.items():
|
28 |
+
print(f"Processing category: {category}")
|
29 |
+
result_data["data"][category] = {}
|
30 |
+
for paper_id, details in papers.items():
|
31 |
+
doi = details.get("doi")
|
32 |
+
pdf_url = details.get("pdf_url")
|
33 |
+
title = details.get("title")
|
34 |
+
title = html.escape(title) if title else ""
|
35 |
+
citation = details.get("citation")
|
36 |
+
if not all([paper_id, doi, pdf_url, title, citation]):
|
37 |
+
print(f"Skipping paper with ID: {paper_id} (missing details)")
|
38 |
+
continue
|
39 |
+
summary, mindmap = None, None
|
40 |
+
max_retries = 3
|
41 |
+
retry_count = 0
|
42 |
+
while (not summary or not mindmap) and retry_count < max_retries:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
try:
|
44 |
+
summary, mindmap = summarize_paper(pdf_url, paper_id, ACCESS_KEY)
|
45 |
+
if summary and mindmap:
|
46 |
+
break
|
|
|
|
|
|
|
|
|
47 |
except Exception as e:
|
48 |
+
print(f"Error summarizing paper {paper_id}: {e}")
|
49 |
+
retry_count += 1
|
50 |
+
if retry_count < max_retries:
|
51 |
+
print(f"Retrying paper {paper_id} in 3 minutes")
|
52 |
+
time.sleep(3 * 60)
|
53 |
+
if not summary or not mindmap:
|
54 |
+
print(f"Failed to summarize paper {paper_id} after {max_retries} attempts")
|
55 |
+
continue
|
56 |
+
try:
|
57 |
+
fixed_title = html.escape(fix_text(title).strip())
|
58 |
+
fixed_citation = html.escape(fix_text(citation).strip())
|
59 |
+
fixed_summary = html.escape(str(summary).strip())
|
60 |
+
fixed_mindmap = html.escape(str(mindmap).strip())
|
61 |
+
post_status = post_blog(doi, fixed_title, category, fixed_summary, fixed_mindmap, fixed_citation, ACCESS_KEY, wait_time)
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error posting blog '{title}': {e}")
|
64 |
+
continue
|
65 |
+
result_data["data"][category][paper_id] = {
|
66 |
+
"id": paper_id,
|
67 |
+
"doi": doi,
|
68 |
+
"title": fixed_title,
|
69 |
+
"category": category,
|
70 |
+
"posted": post_status,
|
71 |
+
"citation": fixed_citation,
|
72 |
+
"summary": fixed_summary,
|
73 |
+
"mindmap": fixed_mindmap,
|
74 |
+
}
|
75 |
+
return json.dumps(result_data, indent=4, ensure_ascii=False)
|
76 |
|
77 |
+
def post_blogpost(uaccess_key: str, wait_time: int = 5) -> str:
|
78 |
+
if uaccess_key != ACCESS_KEY:
|
79 |
return False
|
80 |
data = fetch_paper_data_with_category(uaccess_key)
|
81 |
+
processed_data = paper_data(data, wait_time)
|
82 |
try:
|
83 |
+
send_email(processed_data)
|
84 |
print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
|
85 |
except Exception as e:
|
86 |
print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
|
87 |
finally:
|
88 |
print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
|
89 |
+
|
90 |
+
return processed_data
|
91 |
|
92 |
+
def test(uaccess_key: str) -> str:
|
93 |
+
if uaccess_key != ACCESS_KEY:
|
94 |
return False
|
95 |
+
test_data = {
|
96 |
"Economics": {
|
97 |
+
"2501.00578": {
|
98 |
+
"paper_id": "2501.00578",
|
99 |
+
"doi": "https://doi.org/10.1002/alz.14328",
|
100 |
+
"title": "Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project",
|
101 |
+
"category": "Economics",
|
102 |
+
"pdf_url": "https://arxiv.org/pdf/2501.00578",
|
103 |
+
"citation": "Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578",
|
104 |
+
}
|
105 |
+
}
|
106 |
}
|
107 |
+
return paper_data(json.dumps(test_data, ensure_ascii=False, indent=4))
|
108 |
+
|
|
|
109 |
if __name__ == '__main__':
|
110 |
+
result = test(ACCESS_KEY)
|
111 |
+
print(result)
|
post_blog.py
CHANGED
@@ -6,54 +6,53 @@ import mistune
|
|
6 |
from image import fetch_image
|
7 |
|
8 |
dotenv.load_dotenv()
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
14 |
|
15 |
def generate_post_html(doi, title, category, summary, mindmap, citation):
|
16 |
doi = doi.split("https://")[-1]
|
17 |
mindmap = mindmap.replace("{", r'{').replace("}", r'}')
|
18 |
-
|
|
|
19 |
image = fetch_image(title, category, summary)
|
20 |
-
html_summary = mistune.html(summary)
|
21 |
post = f"""
|
22 |
<div id="paper_post">
|
23 |
<img style="display:block; width:100%; height:100%;" id="paper_image"
|
24 |
-
|
25 |
-
|
26 |
-
<br>
|
27 |
-
<br>
|
28 |
<div id="paper_summary">
|
29 |
-
{html_summary
|
30 |
</div>
|
31 |
<br>
|
32 |
<h2>Mindmap</h2>
|
33 |
<p><small><em>If MindMap doesn't load, please try refreshing the page.</em></small></p>
|
34 |
<div class="markmap" id="paper_mindmap">
|
35 |
<script type="text/template">
|
36 |
-
|
37 |
-
|
38 |
</div>
|
39 |
<br>
|
40 |
<h2>Citation</h2>
|
41 |
<div id="paper_citation">
|
42 |
-
{
|
43 |
</div>
|
44 |
<script>
|
45 |
-
const paperImage = document.querySelector(
|
46 |
-
'img[style="display:block; width:100%; height:100%;"][id="paper_image"]'
|
47 |
-
);
|
48 |
if (paperImage) {{
|
49 |
-
const
|
50 |
-
|
51 |
-
const
|
52 |
-
if (
|
53 |
-
|
54 |
}}
|
55 |
}}
|
56 |
-
|
57 |
<script>
|
58 |
window.markmap = {{
|
59 |
autoLoader: {{
|
@@ -65,10 +64,9 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
|
|
65 |
<script>
|
66 |
window.addEventListener('load', function() {{
|
67 |
setTimeout(function() {{
|
68 |
-
const
|
69 |
-
|
70 |
-
|
71 |
-
element.click();
|
72 |
}} else {{
|
73 |
console.log('Element not found');
|
74 |
}}
|
@@ -79,7 +77,6 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
|
|
79 |
.markmap {{
|
80 |
position: relative;
|
81 |
}}
|
82 |
-
|
83 |
.markmap > svg {{
|
84 |
width: 100%;
|
85 |
border: 2px solid #000;
|
@@ -109,83 +106,96 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
|
|
109 |
"""
|
110 |
return post, image
|
111 |
|
|
|
112 |
def create_post(doi, title, category, summary, mindmap, citation):
|
113 |
-
post_title = title
|
114 |
-
post_category = f"{category}"
|
115 |
try:
|
116 |
post_body, post_image = generate_post_html(doi, title, category, summary, mindmap, citation)
|
117 |
-
# print("_____________________\n\n",title,"\n\n_____________________")
|
118 |
-
# with open('index.html', 'w', encoding='utf-8') as f:
|
119 |
-
# f.write(post_body)
|
120 |
-
# exit()
|
121 |
except Exception as e:
|
122 |
print(f"Error generating post: {e}")
|
123 |
return None, None, None, None
|
|
|
|
|
|
|
|
|
|
|
124 |
return post_title, post_category, post_body, post_image
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
def post_post(title, category, body, image):
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
try:
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
'refresh_token': refresh_token,
|
135 |
-
'client_id': client_id,
|
136 |
-
},
|
137 |
-
).json()
|
138 |
-
url = f"https://blogger.googleapis.com/v3/blogs/{blog_id}/posts"
|
139 |
-
headers = {
|
140 |
-
'Authorization': f"Bearer {data['access_token']}",
|
141 |
-
"content-type": "application/json"
|
142 |
-
}
|
143 |
-
post_data = {
|
144 |
-
"kind": "blogger#post",
|
145 |
-
"blog": {
|
146 |
-
"id": blog_id
|
147 |
-
},
|
148 |
-
"images": [{
|
149 |
-
"url": image
|
150 |
-
}],
|
151 |
-
"title": r"{}".format(title),
|
152 |
-
"content": body,
|
153 |
-
"labels": [category, "ZZZZZZZZZ"]
|
154 |
-
}
|
155 |
-
response = requests.post(url, headers=headers, json=post_data).json()
|
156 |
-
if response['status'] != 'LIVE':
|
157 |
-
print(response)
|
158 |
-
if response['status'] == 'LIVE':
|
159 |
-
print(f"The post '{title}' is {response['status']}")
|
160 |
return True
|
161 |
else:
|
162 |
-
print(
|
163 |
-
print(f"Error posting {title}: {response}")
|
164 |
return False
|
165 |
except Exception as e:
|
166 |
-
print(response)
|
167 |
print(f"Error posting {title}: {e}")
|
168 |
return False
|
169 |
|
|
|
170 |
def post_blog(doi, title, category, summary, mindmap, citation, uaccess_key, wait_time=5):
|
171 |
-
if uaccess_key !=
|
172 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
else:
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
else:
|
189 |
-
print('Failed to create post')
|
190 |
-
return False
|
191 |
-
|
|
|
6 |
from image import fetch_image
|
7 |
|
8 |
dotenv.load_dotenv()
|
9 |
+
ACCESS_KEY = os.getenv('ACCESS_KEY')
|
10 |
+
CLIENT_ID = os.getenv('CLIENT_ID')
|
11 |
+
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
|
12 |
+
REFRESH_TOKEN = os.getenv('REFRESH_TOKEN')
|
13 |
+
BLOG_ID = os.getenv('BLOG_ID')
|
14 |
+
|
15 |
|
16 |
def generate_post_html(doi, title, category, summary, mindmap, citation):
|
17 |
doi = doi.split("https://")[-1]
|
18 |
mindmap = mindmap.replace("{", r'{').replace("}", r'}')
|
19 |
+
citation_clean = citation.replace("&", "&").replace("```plaintext\n", "").replace("\n```", "").strip()
|
20 |
+
citation_html = mistune.html(repr(citation_clean)[1:-1])
|
21 |
image = fetch_image(title, category, summary)
|
22 |
+
html_summary = mistune.html(summary).replace("&", "&").strip()
|
23 |
post = f"""
|
24 |
<div id="paper_post">
|
25 |
<img style="display:block; width:100%; height:100%;" id="paper_image"
|
26 |
+
src="{image.strip()}"
|
27 |
+
alt="{title.strip()}">
|
28 |
+
<br><br>
|
|
|
29 |
<div id="paper_summary">
|
30 |
+
{html_summary}
|
31 |
</div>
|
32 |
<br>
|
33 |
<h2>Mindmap</h2>
|
34 |
<p><small><em>If MindMap doesn't load, please try refreshing the page.</em></small></p>
|
35 |
<div class="markmap" id="paper_mindmap">
|
36 |
<script type="text/template">
|
37 |
+
{mindmap.replace("&", "&").replace(":", "=>").strip()}
|
38 |
+
</script>
|
39 |
</div>
|
40 |
<br>
|
41 |
<h2>Citation</h2>
|
42 |
<div id="paper_citation">
|
43 |
+
{citation_html}
|
44 |
</div>
|
45 |
<script>
|
46 |
+
const paperImage = document.querySelector('img[style="display:block; width:100%; height:100%;"][id="paper_image"]');
|
|
|
|
|
47 |
if (paperImage) {{
|
48 |
+
const toc = document.createElement("div");
|
49 |
+
toc.innerHTML = "<b>{{getToc}} $title={{Table of Contents}}</b>";
|
50 |
+
const brElem = paperImage.nextElementSibling;
|
51 |
+
if (brElem && brElem.tagName === "BR") {{
|
52 |
+
brElem.insertAdjacentElement("afterend", toc);
|
53 |
}}
|
54 |
}}
|
55 |
+
</script>
|
56 |
<script>
|
57 |
window.markmap = {{
|
58 |
autoLoader: {{
|
|
|
64 |
<script>
|
65 |
window.addEventListener('load', function() {{
|
66 |
setTimeout(function() {{
|
67 |
+
const elem = document.querySelector('div.mm-toolbar-item[title="Fit window size"]');
|
68 |
+
if (elem) {{
|
69 |
+
elem.click();
|
|
|
70 |
}} else {{
|
71 |
console.log('Element not found');
|
72 |
}}
|
|
|
77 |
.markmap {{
|
78 |
position: relative;
|
79 |
}}
|
|
|
80 |
.markmap > svg {{
|
81 |
width: 100%;
|
82 |
border: 2px solid #000;
|
|
|
106 |
"""
|
107 |
return post, image
|
108 |
|
109 |
+
|
110 |
def create_post(doi, title, category, summary, mindmap, citation):
|
|
|
|
|
111 |
try:
|
112 |
post_body, post_image = generate_post_html(doi, title, category, summary, mindmap, citation)
|
|
|
|
|
|
|
|
|
113 |
except Exception as e:
|
114 |
print(f"Error generating post: {e}")
|
115 |
return None, None, None, None
|
116 |
+
post_title = title.replace("&", "&")
|
117 |
+
if "&" in post_title:
|
118 |
+
return None, None, None, None
|
119 |
+
|
120 |
+
post_category = f"{category}"
|
121 |
return post_title, post_category, post_body, post_image
|
122 |
|
123 |
+
|
124 |
+
def fetch_oauth_token():
|
125 |
+
token_data = {
|
126 |
+
'grant_type': 'refresh_token',
|
127 |
+
'client_secret': CLIENT_SECRET,
|
128 |
+
'refresh_token': REFRESH_TOKEN,
|
129 |
+
'client_id': CLIENT_ID,
|
130 |
+
}
|
131 |
+
try:
|
132 |
+
response = requests.post('https://oauth2.googleapis.com/token', data=token_data)
|
133 |
+
response.raise_for_status()
|
134 |
+
token_info = response.json()
|
135 |
+
return token_info.get('access_token')
|
136 |
+
except Exception as e:
|
137 |
+
print(f"Error fetching OAuth token: {e}")
|
138 |
+
return None
|
139 |
+
|
140 |
+
|
141 |
def post_post(title, category, body, image):
|
142 |
+
access_token = fetch_oauth_token()
|
143 |
+
if not access_token:
|
144 |
+
return False
|
145 |
+
|
146 |
+
url = f"https://blogger.googleapis.com/v3/blogs/{BLOG_ID}/posts"
|
147 |
+
headers = {
|
148 |
+
'Authorization': f"Bearer {access_token}",
|
149 |
+
"Content-Type": "application/json"
|
150 |
+
}
|
151 |
+
post_data = {
|
152 |
+
"kind": "blogger#post",
|
153 |
+
"blog": {"id": BLOG_ID},
|
154 |
+
"images": [{"url": image}],
|
155 |
+
"title": title,
|
156 |
+
"content": body,
|
157 |
+
"labels": [category, "ZZZZZZZZZ"]
|
158 |
+
}
|
159 |
try:
|
160 |
+
response = requests.post(url, headers=headers, json=post_data)
|
161 |
+
response.raise_for_status()
|
162 |
+
result = response.json()
|
163 |
+
if result.get('status') == 'LIVE':
|
164 |
+
print(f"The post '{title}' is LIVE")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
return True
|
166 |
else:
|
167 |
+
print(f"Error posting {title}: {result}")
|
|
|
168 |
return False
|
169 |
except Exception as e:
|
|
|
170 |
print(f"Error posting {title}: {e}")
|
171 |
return False
|
172 |
|
173 |
+
|
174 |
def post_blog(doi, title, category, summary, mindmap, citation, uaccess_key, wait_time=5):
|
175 |
+
if uaccess_key != ACCESS_KEY:
|
176 |
return False
|
177 |
+
post_title, post_category, post_body, post_image = create_post(doi, title, category, summary, mindmap, citation)
|
178 |
+
if not all([post_title, post_category, post_body, post_image]):
|
179 |
+
print(f"Failed to create post {post_title}")
|
180 |
+
return False
|
181 |
+
status = post_post(post_title, post_category, post_body, post_image)
|
182 |
+
print(f"Waiting for {wait_time * 60} seconds...")
|
183 |
+
time.sleep(wait_time * 60)
|
184 |
+
if status:
|
185 |
+
print("Post created successfully")
|
186 |
+
return True
|
187 |
else:
|
188 |
+
print("Failed to create post")
|
189 |
+
return False
|
190 |
+
|
191 |
+
if __name__ == "__main__":
|
192 |
+
doi = "https://doi.org/10.1234/example"
|
193 |
+
title = "Example Title"
|
194 |
+
category = "Science"
|
195 |
+
summary = "This is an example summary in markdown format."
|
196 |
+
mindmap = "{example: mindmap content}"
|
197 |
+
citation = "Example citation text"
|
198 |
+
uaccess_key = ACCESS_KEY
|
199 |
+
|
200 |
+
post_success = post_blog(doi, title, category, summary, mindmap, citation, uaccess_key)
|
201 |
+
print("Post success:", post_success)
|
|
|
|
|
|
|
|
send_mail.py
CHANGED
@@ -1,39 +1,38 @@
|
|
1 |
-
from email import encoders
|
2 |
-
from email.mime.base import MIMEBase
|
3 |
import os
|
4 |
from datetime import datetime
|
|
|
|
|
|
|
5 |
from pytz import timezone
|
6 |
-
import pytz
|
7 |
import sib_api_v3_sdk
|
8 |
from sib_api_v3_sdk.rest import ApiException
|
9 |
from dotenv import load_dotenv
|
10 |
|
11 |
load_dotenv()
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
def mail_body(generation_details):
|
15 |
body = f"""
|
16 |
Hello,
|
|
|
17 |
These are the details of the Blogs Posted at ReXplore: Science @ Fingertips.
|
18 |
|
19 |
Date & Time: {get_current_time()}
|
20 |
|
21 |
-
|
22 |
{generation_details}
|
23 |
|
24 |
-
|
25 |
Regards,
|
26 |
Nayan Kasturi (Raanna),
|
27 |
Developer & Maintainer,
|
28 |
ReXplore.
|
29 |
"""
|
30 |
-
return body
|
31 |
-
|
32 |
-
def get_current_time():
|
33 |
-
fmt = "%d-%m-%Y %H:%M:%S %Z%z"
|
34 |
-
now_utc = datetime.now(timezone('UTC'))
|
35 |
-
now_asia = now_utc.astimezone(timezone('Asia/Kolkata'))
|
36 |
-
return now_asia.strftime(fmt)
|
37 |
|
38 |
def create_attachment(content, filename):
|
39 |
attachment = MIMEBase('application', 'octet-stream')
|
@@ -44,26 +43,45 @@ def create_attachment(content, filename):
|
|
44 |
|
45 |
def send_email(generation_details):
|
46 |
configuration = sib_api_v3_sdk.Configuration()
|
47 |
-
configuration.api_key['api-key'] =
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
try:
|
63 |
-
|
64 |
print("Email Sent")
|
65 |
return True
|
66 |
except ApiException as e:
|
67 |
-
print("
|
68 |
-
print("Exception when calling SMTPApi->send_transac_email:
|
69 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
from datetime import datetime
|
3 |
+
from email import encoders
|
4 |
+
from email.mime.base import MIMEBase
|
5 |
+
|
6 |
from pytz import timezone
|
|
|
7 |
import sib_api_v3_sdk
|
8 |
from sib_api_v3_sdk.rest import ApiException
|
9 |
from dotenv import load_dotenv
|
10 |
|
11 |
load_dotenv()
|
12 |
+
MAIL_API_KEY = os.getenv("MAIL_API")
|
13 |
+
|
14 |
+
def get_current_time():
|
15 |
+
fmt = "%d-%m-%Y %H:%M:%S %Z%z"
|
16 |
+
now_utc = datetime.now(timezone('UTC'))
|
17 |
+
now_kolkata = now_utc.astimezone(timezone('Asia/Kolkata'))
|
18 |
+
return now_kolkata.strftime(fmt)
|
19 |
|
20 |
def mail_body(generation_details):
|
21 |
body = f"""
|
22 |
Hello,
|
23 |
+
|
24 |
These are the details of the Blogs Posted at ReXplore: Science @ Fingertips.
|
25 |
|
26 |
Date & Time: {get_current_time()}
|
27 |
|
|
|
28 |
{generation_details}
|
29 |
|
|
|
30 |
Regards,
|
31 |
Nayan Kasturi (Raanna),
|
32 |
Developer & Maintainer,
|
33 |
ReXplore.
|
34 |
"""
|
35 |
+
return body.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
def create_attachment(content, filename):
|
38 |
attachment = MIMEBase('application', 'octet-stream')
|
|
|
43 |
|
44 |
def send_email(generation_details):
|
45 |
configuration = sib_api_v3_sdk.Configuration()
|
46 |
+
configuration.api_key['api-key'] = MAIL_API_KEY
|
47 |
+
api_client = sib_api_v3_sdk.ApiClient(configuration)
|
48 |
+
transactional_api = sib_api_v3_sdk.TransactionalEmailsApi(api_client)
|
49 |
+
email_content = mail_body(generation_details)
|
50 |
+
attachment_obj = create_attachment(email_content.encode('utf-8'), "data.txt")
|
51 |
+
subject = f"New Blog Batch Published to ReXplore at {get_current_time()}"
|
52 |
+
sender = {
|
53 |
+
"name": "Project Gatekeeper",
|
54 |
+
"email": "projectgatekeeper@silerudaagartha.eu.org"
|
55 |
+
}
|
56 |
+
reply_to = {
|
57 |
+
"name": "Project Gatekeeper",
|
58 |
+
"email": "gatekeeper@raannakasturi.eu.org"
|
59 |
+
}
|
60 |
+
recipients = [{"email": "raannakasturi@proton.me"}]
|
61 |
+
attachments = [{
|
62 |
+
"content": attachment_obj.get_payload(),
|
63 |
+
"name": attachment_obj.get_filename()
|
64 |
+
}]
|
65 |
+
email = sib_api_v3_sdk.SendSmtpEmail(
|
66 |
+
to=recipients,
|
67 |
+
reply_to=reply_to,
|
68 |
+
attachment=attachments,
|
69 |
+
text_content=email_content,
|
70 |
+
sender=sender,
|
71 |
+
subject=subject
|
72 |
+
)
|
73 |
try:
|
74 |
+
transactional_api.send_transac_email(email)
|
75 |
print("Email Sent")
|
76 |
return True
|
77 |
except ApiException as e:
|
78 |
+
print("Failed to send email:")
|
79 |
+
print(f"Exception when calling SMTPApi->send_transac_email: {e}")
|
80 |
return False
|
81 |
+
|
82 |
+
if __name__ == "__main__":
|
83 |
+
generation_details = "Example: 5 blogs generated and posted successfully."
|
84 |
+
if send_email(generation_details):
|
85 |
+
print("Email sent successfully.")
|
86 |
+
else:
|
87 |
+
print("Email sending failed.")
|
summarize_paper.py
CHANGED
@@ -1,31 +1,40 @@
|
|
1 |
import os
|
2 |
-
import dotenv
|
3 |
import json
|
4 |
from gradio_client import Client
|
|
|
5 |
|
6 |
dotenv.load_dotenv()
|
7 |
|
8 |
-
def summarize_paper(pdf_url, paper_id, access_key):
|
9 |
-
mindmap = None
|
10 |
summary = None
|
|
|
11 |
try:
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
)
|
16 |
result = summarizer_client.predict(
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
)
|
22 |
if result:
|
23 |
data = json.loads(result[0])
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
summary = data["summary"]
|
29 |
except Exception as e:
|
30 |
print(f"Error summarizing paper: {e}")
|
|
|
31 |
return summary, mindmap
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
|
|
2 |
import json
|
3 |
from gradio_client import Client
|
4 |
+
import dotenv
|
5 |
|
6 |
dotenv.load_dotenv()
|
7 |
|
8 |
+
def summarize_paper(pdf_url: str, paper_id: str, access_key: str):
|
|
|
9 |
summary = None
|
10 |
+
mindmap = None
|
11 |
try:
|
12 |
+
hf_api_token = os.getenv("HF_API_TOKEN")
|
13 |
+
if not hf_api_token:
|
14 |
+
raise ValueError("HF_API_TOKEN not found in environment variables.")
|
15 |
+
summarizer_client = Client("raannakasturi/ReXploreAPI", hf_token=hf_api_token)
|
16 |
result = summarizer_client.predict(
|
17 |
+
url=pdf_url,
|
18 |
+
id=paper_id,
|
19 |
+
access_key=access_key,
|
20 |
+
api_name="/rexplore_summarizer"
|
21 |
)
|
22 |
if result:
|
23 |
data = json.loads(result[0])
|
24 |
+
if data.get("mindmap_status") == "success":
|
25 |
+
mindmap = data.get("mindmap")
|
26 |
+
if data.get("summary_status") == "success":
|
27 |
+
summary = data.get("summary")
|
|
|
28 |
except Exception as e:
|
29 |
print(f"Error summarizing paper: {e}")
|
30 |
+
|
31 |
return summary, mindmap
|
32 |
+
|
33 |
+
|
34 |
+
if __name__ == "__main__":
|
35 |
+
test_pdf_url = "https://example.com/paper.pdf"
|
36 |
+
test_paper_id = "12345"
|
37 |
+
test_access_key = "your_access_key_here"
|
38 |
+
paper_summary, paper_mindmap = summarize_paper(test_pdf_url, test_paper_id, test_access_key)
|
39 |
+
print("Summary:", paper_summary)
|
40 |
+
print("Mindmap:", paper_mindmap)
|