raannakasturi commited on
Commit
f50b29d
·
1 Parent(s): 319999c

Refactor summarization and email sending logic; improve error handling and environment variable checks

Browse files
Files changed (6) hide show
  1. fetch_data.py +30 -26
  2. image.py +61 -89
  3. main.py +77 -82
  4. post_blog.py +99 -89
  5. send_mail.py +49 -31
  6. summarize_paper.py +25 -16
fetch_data.py CHANGED
@@ -2,23 +2,23 @@ from gradio_client import Client
2
  import json
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
 
5
- def fetch_category_ids(cat_ids_api_key):
6
- """Fetch category IDs using the category API."""
7
- if not cat_ids_api_key:
8
  raise ValueError("API access key not found. Please check your environment variables.")
9
  cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
10
  try:
11
  result = cat_ids_api_client.predict(
12
- user_access_key=cat_ids_api_key,
13
  api_name="/fetch_paper_ids"
14
  )
15
  cat_ids = json.loads(result)
16
- if cat_ids['status'] == 'success':
17
- return cat_ids['data']
18
  else:
 
19
  return None
20
  except Exception as e:
21
- print(f"Exception while fetching category IDs: {str(e)}")
22
  return None
23
 
24
  def fetch_single_paper_data(paper_id):
@@ -29,44 +29,48 @@ def fetch_single_paper_data(paper_id):
29
  api_name="/fetch_paper_data"
30
  )
31
  paper_data = json.loads(result)
32
- if paper_data['status'] == 'success':
33
- return paper_id, paper_data['data']
34
  else:
35
  print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
36
  return paper_id, None
37
  except Exception as e:
38
- print(f"Exception while fetching data for paper ID {paper_id}: {str(e)}")
39
  return paper_id, None
40
 
41
  def fetch_paper_data_concurrently(paper_ids, max_threads=12):
42
  paper_id_data = {}
43
  with ThreadPoolExecutor(max_workers=max_threads) as executor:
44
- future_to_paper_id = {executor.submit(fetch_single_paper_data, paper_id): paper_id for paper_id in paper_ids}
45
- for future in as_completed(future_to_paper_id):
46
- paper_id = future_to_paper_id[future]
47
  try:
48
- paper_id, data = future.result()
49
  if data:
50
- paper_id_data[paper_id] = data
51
  except Exception as e:
52
- print(f"Error fetching data for paper ID {paper_id}: {str(e)}")
53
  return paper_id_data
54
 
55
- def fetch_paper_data_with_category(cat_ids_api_key):
56
- data = {}
57
  try:
58
- cat_ids = fetch_category_ids(cat_ids_api_key)
59
  if cat_ids:
60
- for category, ids in cat_ids.items():
61
  print(f"Fetching data for category: {category}")
62
  try:
63
- paper_data = fetch_paper_data_concurrently(ids['ids'])
64
- if paper_data:
65
- data[category] = paper_data
 
 
 
 
66
  except Exception as e:
67
- print(f"Error fetching data for category {category}: {str(e)}")
68
  continue
69
- return json.dumps(data, indent=4, ensure_ascii=False)
70
  except Exception as e:
71
- print(f"Exception while fetching paper data by category: {str(e)}")
72
  return None
 
2
  import json
3
  from concurrent.futures import ThreadPoolExecutor, as_completed
4
 
5
+ def fetch_category_ids(api_key):
6
+ if not api_key:
 
7
  raise ValueError("API access key not found. Please check your environment variables.")
8
  cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
9
  try:
10
  result = cat_ids_api_client.predict(
11
+ user_access_key=api_key,
12
  api_name="/fetch_paper_ids"
13
  )
14
  cat_ids = json.loads(result)
15
+ if cat_ids.get('status') == 'success':
16
+ return cat_ids.get('data')
17
  else:
18
+ print(f"Failed to fetch category IDs: {cat_ids.get('message', 'No message provided')}")
19
  return None
20
  except Exception as e:
21
+ print(f"Exception while fetching category IDs: {e}")
22
  return None
23
 
24
  def fetch_single_paper_data(paper_id):
 
29
  api_name="/fetch_paper_data"
30
  )
31
  paper_data = json.loads(result)
32
+ if paper_data.get('status') == 'success':
33
+ return paper_id, paper_data.get('data')
34
  else:
35
  print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
36
  return paper_id, None
37
  except Exception as e:
38
+ print(f"Exception while fetching data for paper ID {paper_id}: {e}")
39
  return paper_id, None
40
 
41
  def fetch_paper_data_concurrently(paper_ids, max_threads=12):
42
  paper_id_data = {}
43
  with ThreadPoolExecutor(max_workers=max_threads) as executor:
44
+ future_to_paper = {executor.submit(fetch_single_paper_data, pid): pid for pid in paper_ids}
45
+ for future in as_completed(future_to_paper):
46
+ paper_id = future_to_paper[future]
47
  try:
48
+ pid, data = future.result()
49
  if data:
50
+ paper_id_data[pid] = data
51
  except Exception as e:
52
+ print(f"Error fetching data for paper ID {paper_id}: {e}")
53
  return paper_id_data
54
 
55
+ def fetch_paper_data_with_category(api_key):
56
+ all_data = {}
57
  try:
58
+ cat_ids = fetch_category_ids(api_key)
59
  if cat_ids:
60
+ for category, info in cat_ids.items():
61
  print(f"Fetching data for category: {category}")
62
  try:
63
+ paper_ids = info.get('ids', [])
64
+ if paper_ids:
65
+ paper_data = fetch_paper_data_concurrently(paper_ids)
66
+ if paper_data:
67
+ all_data[category] = paper_data
68
+ else:
69
+ print(f"No paper IDs found for category: {category}")
70
  except Exception as e:
71
+ print(f"Error fetching data for category {category}: {e}")
72
  continue
73
+ return json.dumps(all_data, indent=4, ensure_ascii=False)
74
  except Exception as e:
75
+ print(f"Exception while fetching paper data by category: {e}")
76
  return None
image.py CHANGED
@@ -2,22 +2,38 @@ import base64
2
  import io
3
  import os
4
  import re
5
- import requests
6
- from urllib.parse import quote
7
  from PIL import Image
8
  from g4f.client import Client
9
  from g4f.Provider import RetryProvider, PollinationsAI, ImageLabs, Blackbox, HuggingSpace, Airforce
10
  from g4f.Provider.hf_space.BlackForestLabsFlux1Schnell import BlackForestLabsFlux1Schnell
11
  from g4f.Provider.hf_space.VoodoohopFlux1Schnell import VoodoohopFlux1Schnell
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def extract_summary(text):
 
 
 
14
  text = text.replace("#", "").strip().lower()
15
  match = re.search(r"summary(.*?)highlights", text, re.DOTALL)
16
- if match:
17
- return match.group(1).strip()
18
- return text
19
 
20
  def fix_base64_padding(data):
 
 
 
21
  missing_padding = len(data) % 4
22
  if missing_padding:
23
  data += "=" * (4 - missing_padding)
@@ -25,84 +41,41 @@ def fix_base64_padding(data):
25
 
26
  def generate_image(title, category, summary):
27
  print("Generating image...")
28
- import time
29
  start = time.time()
30
- try:
 
 
 
 
 
 
 
 
31
  try:
32
- try:
33
- negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
34
- prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
35
- client = Client()
36
- time.sleep(20)
37
- img_data = client.images.generate(
38
- provider=RetryProvider(
39
- providers=[ImageLabs, PollinationsAI],
40
- shuffle=True,
41
- single_provider_retry=True,
42
- max_retries=5,
43
- ),
44
- model="sdxl-turbo",
45
- prompt=prompt,
46
- negative_prompt=negative,
47
- response_format="b64_json",
48
- width=1024,
49
- height=576,
50
- ).data[0].b64_json
51
- print(f"Image generated in {time.time() - start:.2f} seconds")
52
- if img_data:
53
- return f"data:image/png;base64,{img_data}"
54
- return None
55
- except Exception as e:
56
- print(f"Error generating image: {e}")
57
- negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
58
- prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
59
- client = Client()
60
- time.sleep(20)
61
- img_data = client.images.generate(
62
- provider=RetryProvider(
63
- providers=[Airforce, PollinationsAI, Blackbox],
64
- shuffle=True,
65
- single_provider_retry=True,
66
- max_retries=5,
67
- ),
68
- model="flux",
69
- prompt=prompt,
70
- negative_prompt=negative,
71
- response_format="b64_json",
72
- width=1024,
73
- height=576,
74
- ).data[0].b64_json
75
- print(f"Image generated in {time.time() - start:.2f} seconds")
76
- if img_data:
77
- return f"data:image/png;base64,{img_data}"
78
- return None
79
- except Exception as e:
80
- print(f"Error generating image: {e}")
81
- negative="low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, 2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, 3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, unsuitable, inappropriate, offensive, revealing, sexual, explicit",
82
- prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
83
- client = Client()
84
- time.sleep(20)
85
- img_data = client.images.generate(
86
- provider=RetryProvider(
87
- providers=[BlackForestLabsFlux1Schnell, VoodoohopFlux1Schnell, HuggingSpace],
88
- shuffle=True,
89
- single_provider_retry=True,
90
- max_retries=5,
91
- ),
92
- model="flux-schnell",
93
  prompt=prompt,
94
- negative_prompt=negative,
95
  response_format="b64_json",
96
  width=1024,
97
  height=576,
98
- ).data[0].b64_json
99
- print(f"Image generated in {time.time() - start:.2f} seconds")
 
 
100
  if img_data:
101
  return f"data:image/png;base64,{img_data}"
102
- return None
103
- except Exception as e:
104
- print(f"Error generating image: {e}")
105
- return None
106
 
107
  def verify_image(image_data):
108
  try:
@@ -115,31 +88,30 @@ def verify_image(image_data):
115
  return False
116
 
117
  def fetch_image(title, category, summary):
118
- title = r"{}".format(title)
119
- category = r"{}".format(category)
120
  summary = extract_summary(summary)
121
- image_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
122
  try:
123
  data_uri = generate_image(title, category, summary)
124
  if data_uri:
125
- base64_image = fix_base64_padding(data_uri.split(",")[1])
126
- if base64_image:
127
- if verify_image(base64.b64decode(base64_image, validate=True)):
128
- image_url = f"data:image/png;base64,{base64_image}"
129
- else:
130
- image_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
131
  except Exception as e:
132
  print(f"Error fetching image: {e}")
133
- image_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
134
  finally:
135
  if os.path.exists("image.png"):
136
  os.remove("image.png")
137
- return image_url
138
-
139
 
140
  if __name__ == "__main__":
141
  title = "Exposition: Enumerative Geometry and Tree-Level Gromov-Witten Invariants"
142
  category = "Mathematics"
143
- summary = "The text discusses the Kontsevich-Manin formula for enumerating degree d rational curves via Gromov-Witten invariants. It details the calculation of these invariants using moduli spaces of stable maps and explores their implications in enumerative geometry."
 
 
 
144
  image_url = fetch_image(title, category, summary)
145
- print(image_url)
 
2
  import io
3
  import os
4
  import re
5
+ import time
 
6
  from PIL import Image
7
  from g4f.client import Client
8
  from g4f.Provider import RetryProvider, PollinationsAI, ImageLabs, Blackbox, HuggingSpace, Airforce
9
  from g4f.Provider.hf_space.BlackForestLabsFlux1Schnell import BlackForestLabsFlux1Schnell
10
  from g4f.Provider.hf_space.VoodoohopFlux1Schnell import VoodoohopFlux1Schnell
11
 
12
+ NEGATIVE_PROMPT = (
13
+ "low quality, blurry, pixelated, bad anatomy, bad hands, three hands, three legs, bad arms, missing legs, "
14
+ "missing arms, poorly drawn face, poorly rendered hands, bad face, fused face, cloned face, worst face, "
15
+ "three crus, extra crus, fused crus, worst feet, three feet, fused feet, fused thigh, three thigh, extra thigh, "
16
+ "worst thigh, missing fingers, extra fingers, ugly fingers, long fingers, bad composition, horn, extra eyes, huge eyes, "
17
+ "2girl, amputation, disconnected limbs, cartoon, cg, 3d, unreal, animate, cgi, render, artwork, illustration, "
18
+ "3d render, cinema 4d, artstation, octane render, mutated body parts, painting, oil painting, 2d, sketch, bad photography, "
19
+ "bad photo, deviant art, aberrations, abstract, anime, black and white, collapsed, conjoined, creative, drawing, extra windows, "
20
+ "harsh lighting, jpeg artifacts, low saturation, monochrome, multiple levels, overexposed, oversaturated, photoshop, rotten, surreal, "
21
+ "twisted, UI, underexposed, unnatural, unreal engine, unrealistic, video game, deformed body features, NSFW, NUDE, vulgar, negative, "
22
+ "unsuitable, inappropriate, offensive, revealing, sexual, explicit"
23
+ )
24
+
25
  def extract_summary(text):
26
+ """
27
+ Clean and extract the summary portion from the text.
28
+ """
29
  text = text.replace("#", "").strip().lower()
30
  match = re.search(r"summary(.*?)highlights", text, re.DOTALL)
31
+ return match.group(1).strip() if match else text
 
 
32
 
33
  def fix_base64_padding(data):
34
+ """
35
+ Ensure that the base64 string has the proper padding.
36
+ """
37
  missing_padding = len(data) % 4
38
  if missing_padding:
39
  data += "=" * (4 - missing_padding)
 
41
 
42
  def generate_image(title, category, summary):
43
  print("Generating image...")
 
44
  start = time.time()
45
+ prompt = f"Generate accurate image representing the {category} concept: ```{title.strip()}: {summary.strip()}```"
46
+ client = Client()
47
+ attempts = [
48
+ ([ImageLabs, PollinationsAI], "sdxl-turbo"),
49
+ ([Airforce, PollinationsAI, Blackbox], "flux"),
50
+ ([BlackForestLabsFlux1Schnell, VoodoohopFlux1Schnell, HuggingSpace], "flux-schnell")
51
+ ]
52
+
53
+ for providers, model in attempts:
54
  try:
55
+ provider = RetryProvider(
56
+ providers=providers,
57
+ shuffle=True,
58
+ single_provider_retry=True,
59
+ max_retries=3,
60
+ )
61
+ response = client.images.generate(
62
+ provider=provider,
63
+ model=model,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  prompt=prompt,
65
+ negative_prompt=NEGATIVE_PROMPT,
66
  response_format="b64_json",
67
  width=1024,
68
  height=576,
69
+ )
70
+ img_data = response.data[0].b64_json
71
+ elapsed = time.time() - start
72
+ print(f"Image generated in {elapsed:.2f} seconds using model {model}")
73
  if img_data:
74
  return f"data:image/png;base64,{img_data}"
75
+ except Exception as e:
76
+ print(f"Attempt with model {model} failed: {e}")
77
+
78
+ return None
79
 
80
  def verify_image(image_data):
81
  try:
 
88
  return False
89
 
90
  def fetch_image(title, category, summary):
 
 
91
  summary = extract_summary(summary)
92
+ fallback_url = "https://i.ibb.co/TBJqggw/Image-Not-Found.jpg"
93
  try:
94
  data_uri = generate_image(title, category, summary)
95
  if data_uri:
96
+ base64_str = data_uri.split(",")[1]
97
+ base64_str = fix_base64_padding(base64_str)
98
+ decoded = base64.b64decode(base64_str, validate=True)
99
+ if verify_image(decoded):
100
+ return f"data:image/png;base64,{base64_str}"
101
+ return fallback_url
102
  except Exception as e:
103
  print(f"Error fetching image: {e}")
104
+ return fallback_url
105
  finally:
106
  if os.path.exists("image.png"):
107
  os.remove("image.png")
 
 
108
 
109
  if __name__ == "__main__":
110
  title = "Exposition: Enumerative Geometry and Tree-Level Gromov-Witten Invariants"
111
  category = "Mathematics"
112
+ summary = (
113
+ "The text discusses the Kontsevich-Manin formula for enumerating degree d rational curves via Gromov-Witten invariants. "
114
+ "It details the calculation of these invariants using moduli spaces of stable maps and explores their implications in enumerative geometry."
115
+ )
116
  image_url = fetch_image(title, category, summary)
117
+ print(image_url)
main.py CHANGED
@@ -9,108 +9,103 @@ from post_blog import post_blog
9
  from send_mail import send_email
10
 
11
  dotenv.load_dotenv()
12
- access_key = os.getenv("ACCESS_KEY")
13
 
14
- def fix_text(text):
15
  text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., "))
16
  fixed_text = ""
17
  for word in text.split():
18
  try:
19
- fixed_text += word.encode('latin1').decode('utf-8')+" "
20
- except:
21
- fixed_text += word+" "
22
  return fixed_text.encode('utf-8').decode()
23
 
24
- def paper_data(paper_data, wait_time=5):
25
- data = {"status": "success"}
26
- data['data'] = {}
27
- paper_data = json.loads(paper_data)
28
- for category, papers in paper_data.items():
29
- print(f"Processing category: {category}")
30
- data['data'][category] = {}
31
- for paper_id, details in papers.items():
32
- doi = details.get("doi")
33
- pdf_url = details.get("pdf_url")
34
- title = details.get("title")
35
- title = html.escape(title)
36
- citation = details.get("citation")
37
- if not all([paper_id, doi, pdf_url, title, citation]):
38
- print(f"Skipping paper with ID: {paper_id} (missing details)")
39
- continue
40
- summary = None
41
- mindmap = None
42
- max_retries = 3
43
- retry_count = 0
44
- while (not summary or not mindmap) and retry_count < max_retries:
45
- try:
46
- summary, mindmap = summarize_paper(pdf_url, paper_id, access_key)
47
- if summary and mindmap:
48
- break
49
- except Exception as e:
50
- print(f"Error summarizing paper {paper_id}: {e}")
51
- retry_count += 1
52
- if retry_count < max_retries:
53
- print(f"Retrying paper {paper_id} in 3 minutes")
54
- time.sleep(3*60)
55
- if not summary or not mindmap:
56
- print(f"Failed to summarize paper {paper_id} after {max_retries} attempts")
57
- continue
58
  try:
59
- title = fix_text(title)
60
- citation = fix_text(citation)
61
- title = html.escape(str(title).strip())
62
- citation = html.escape(str(citation).strip())
63
- summary = html.escape(str(summary).strip())
64
- mindmap = html.escape(str(mindmap).strip())
65
- status = post_blog(doi, title, category, summary, mindmap, citation, access_key, wait_time)
66
  except Exception as e:
67
- print(f"Error posting blog '{title}': {e}")
68
- continue
69
- data['data'][category][paper_id] = {
70
- "id": paper_id,
71
- "doi": doi,
72
- "title": title,
73
- "category": category,
74
- "posted": status,
75
- "citation": citation,
76
- "summary": summary,
77
- "mindmap": mindmap,
78
- }
79
- data = json.dumps(data, indent=4, ensure_ascii=False)
80
- return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- def post_blogpost(uaccess_key, wait_time=5):
83
- if uaccess_key != access_key:
84
  return False
85
  data = fetch_paper_data_with_category(uaccess_key)
86
- pdata = paper_data(data, wait_time)
87
  try:
88
- send_email(pdata)
89
  print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
90
  except Exception as e:
91
  print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
92
  finally:
93
  print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
94
- return pdata
 
95
 
96
- def test(uaccess_key):
97
- if uaccess_key != access_key:
98
  return False
99
- data = {
100
  "Economics": {
101
- "2501.00578":{
102
- "paper_id":"2501.00578",
103
- "doi":"https://doi.org/10.1002/alz.14328",
104
- "title":"Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project",
105
- "category":"Economics",
106
- "pdf_url":"https://arxiv.org/pdf/2501.00578",
107
- "citation":"Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578",
108
- },
109
- },
110
  }
111
- status = paper_data(json.dumps(data, ensure_ascii=False, indent=4))
112
- return status
113
-
114
  if __name__ == '__main__':
115
- data = test(access_key)
116
- print(data)
 
9
  from send_mail import send_email
10
 
11
  dotenv.load_dotenv()
12
+ ACCESS_KEY = os.getenv("ACCESS_KEY")
13
 
14
+ def fix_text(text: str) -> str:
15
  text = html.escape(text.encode('utf-8').decode('utf-8').replace("â¦", "..., "))
16
  fixed_text = ""
17
  for word in text.split():
18
  try:
19
+ fixed_text += word.encode('latin1').decode('utf-8') + " "
20
+ except Exception:
21
+ fixed_text += word + " "
22
  return fixed_text.encode('utf-8').decode()
23
 
24
+ def paper_data(paper_data_json: str, wait_time: int = 5) -> str:
25
+ result_data = {"status": "success", "data": {}}
26
+ papers_by_category = json.loads(paper_data_json)
27
+ for category, papers in papers_by_category.items():
28
+ print(f"Processing category: {category}")
29
+ result_data["data"][category] = {}
30
+ for paper_id, details in papers.items():
31
+ doi = details.get("doi")
32
+ pdf_url = details.get("pdf_url")
33
+ title = details.get("title")
34
+ title = html.escape(title) if title else ""
35
+ citation = details.get("citation")
36
+ if not all([paper_id, doi, pdf_url, title, citation]):
37
+ print(f"Skipping paper with ID: {paper_id} (missing details)")
38
+ continue
39
+ summary, mindmap = None, None
40
+ max_retries = 3
41
+ retry_count = 0
42
+ while (not summary or not mindmap) and retry_count < max_retries:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  try:
44
+ summary, mindmap = summarize_paper(pdf_url, paper_id, ACCESS_KEY)
45
+ if summary and mindmap:
46
+ break
 
 
 
 
47
  except Exception as e:
48
+ print(f"Error summarizing paper {paper_id}: {e}")
49
+ retry_count += 1
50
+ if retry_count < max_retries:
51
+ print(f"Retrying paper {paper_id} in 3 minutes")
52
+ time.sleep(3 * 60)
53
+ if not summary or not mindmap:
54
+ print(f"Failed to summarize paper {paper_id} after {max_retries} attempts")
55
+ continue
56
+ try:
57
+ fixed_title = html.escape(fix_text(title).strip())
58
+ fixed_citation = html.escape(fix_text(citation).strip())
59
+ fixed_summary = html.escape(str(summary).strip())
60
+ fixed_mindmap = html.escape(str(mindmap).strip())
61
+ post_status = post_blog(doi, fixed_title, category, fixed_summary, fixed_mindmap, fixed_citation, ACCESS_KEY, wait_time)
62
+ except Exception as e:
63
+ print(f"Error posting blog '{title}': {e}")
64
+ continue
65
+ result_data["data"][category][paper_id] = {
66
+ "id": paper_id,
67
+ "doi": doi,
68
+ "title": fixed_title,
69
+ "category": category,
70
+ "posted": post_status,
71
+ "citation": fixed_citation,
72
+ "summary": fixed_summary,
73
+ "mindmap": fixed_mindmap,
74
+ }
75
+ return json.dumps(result_data, indent=4, ensure_ascii=False)
76
 
77
+ def post_blogpost(uaccess_key: str, wait_time: int = 5) -> str:
78
+ if uaccess_key != ACCESS_KEY:
79
  return False
80
  data = fetch_paper_data_with_category(uaccess_key)
81
+ processed_data = paper_data(data, wait_time)
82
  try:
83
+ send_email(processed_data)
84
  print("\n-------------------------------------------------------\nMail Sent\n-------------------------------------------------------\n")
85
  except Exception as e:
86
  print(f"\n-------------------------------------------------------\nError sending mail: {e}\n-------------------------------------------------------\n")
87
  finally:
88
  print("\n-------------------------------------------------------\nProcess Completed\n-------------------------------------------------------\n")
89
+
90
+ return processed_data
91
 
92
+ def test(uaccess_key: str) -> str:
93
+ if uaccess_key != ACCESS_KEY:
94
  return False
95
+ test_data = {
96
  "Economics": {
97
+ "2501.00578": {
98
+ "paper_id": "2501.00578",
99
+ "doi": "https://doi.org/10.1002/alz.14328",
100
+ "title": "Bound-State Beta Decay of $\\mathbf{\\mathrm{^{205}{Tl}^{81+}}}$ Ions and the LOREX Project",
101
+ "category": "Economics",
102
+ "pdf_url": "https://arxiv.org/pdf/2501.00578",
103
+ "citation": "Miller, A. D. (2025). The limits of tolerance (Version 1). arXiv. https://doi.org/10.48550/ARXIV.2501.00578",
104
+ }
105
+ }
106
  }
107
+ return paper_data(json.dumps(test_data, ensure_ascii=False, indent=4))
108
+
 
109
  if __name__ == '__main__':
110
+ result = test(ACCESS_KEY)
111
+ print(result)
post_blog.py CHANGED
@@ -6,54 +6,53 @@ import mistune
6
  from image import fetch_image
7
 
8
  dotenv.load_dotenv()
9
- access_key = os.getenv('ACCESS_KEY')
10
- client_id = os.getenv('CLIENT_ID')
11
- client_secret = os.getenv('CLIENT_SECRET')
12
- refresh_token = os.getenv('REFRESH_TOKEN')
13
- blog_id = os.getenv('BLOG_ID')
 
14
 
15
  def generate_post_html(doi, title, category, summary, mindmap, citation):
16
  doi = doi.split("https://")[-1]
17
  mindmap = mindmap.replace("{", r'{').replace("}", r'}')
18
- citation = mistune.html(repr(citation.replace("&amp;", "&").replace("```plaintext\n", "").replace("\n```", "").strip())[1:-1])
 
19
  image = fetch_image(title, category, summary)
20
- html_summary = mistune.html(summary)
21
  post = f"""
22
  <div id="paper_post">
23
  <img style="display:block; width:100%; height:100%;" id="paper_image"
24
- src="{image.strip()}"
25
- alt="{title.strip()}">
26
- <br>
27
- <br>
28
  <div id="paper_summary">
29
- {html_summary.replace("&amp;", "&").strip()}
30
  </div>
31
  <br>
32
  <h2>Mindmap</h2>
33
  <p><small><em>If MindMap doesn't load, please try refreshing the page.</em></small></p>
34
  <div class="markmap" id="paper_mindmap">
35
  <script type="text/template">
36
- {mindmap.replace("&amp;", "&").replace(":", "=>").strip()}
37
- </script>
38
  </div>
39
  <br>
40
  <h2>Citation</h2>
41
  <div id="paper_citation">
42
- {citation}
43
  </div>
44
  <script>
45
- const paperImage = document.querySelector(
46
- 'img[style="display:block; width:100%; height:100%;"][id="paper_image"]'
47
- );
48
  if (paperImage) {{
49
- const tablOfContents = document.createElement("div");
50
- tablOfContents.innerHTML = "<b>{{getToc}} $title={{Table of Contents}}</b>";
51
- const brElement = paperImage.nextElementSibling;
52
- if (brElement && brElement.tagName === "BR") {{
53
- brElement.insertAdjacentElement("afterend", tablOfContents);
54
  }}
55
  }}
56
- </script>
57
  <script>
58
  window.markmap = {{
59
  autoLoader: {{
@@ -65,10 +64,9 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
65
  <script>
66
  window.addEventListener('load', function() {{
67
  setTimeout(function() {{
68
- const element = document.querySelector('div.mm-toolbar-item[title="Fit window size"]');
69
- console.log(element);
70
- if (element) {{
71
- element.click();
72
  }} else {{
73
  console.log('Element not found');
74
  }}
@@ -79,7 +77,6 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
79
  .markmap {{
80
  position: relative;
81
  }}
82
-
83
  .markmap > svg {{
84
  width: 100%;
85
  border: 2px solid #000;
@@ -109,83 +106,96 @@ def generate_post_html(doi, title, category, summary, mindmap, citation):
109
  """
110
  return post, image
111
 
 
112
  def create_post(doi, title, category, summary, mindmap, citation):
113
- post_title = title
114
- post_category = f"{category}"
115
  try:
116
  post_body, post_image = generate_post_html(doi, title, category, summary, mindmap, citation)
117
- # print("_____________________\n\n",title,"\n\n_____________________")
118
- # with open('index.html', 'w', encoding='utf-8') as f:
119
- # f.write(post_body)
120
- # exit()
121
  except Exception as e:
122
  print(f"Error generating post: {e}")
123
  return None, None, None, None
 
 
 
 
 
124
  return post_title, post_category, post_body, post_image
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  def post_post(title, category, body, image):
127
- response = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  try:
129
- data = requests.post(
130
- url='https://oauth2.googleapis.com/token',
131
- data={
132
- 'grant_type': 'refresh_token',
133
- 'client_secret': client_secret,
134
- 'refresh_token': refresh_token,
135
- 'client_id': client_id,
136
- },
137
- ).json()
138
- url = f"https://blogger.googleapis.com/v3/blogs/{blog_id}/posts"
139
- headers = {
140
- 'Authorization': f"Bearer {data['access_token']}",
141
- "content-type": "application/json"
142
- }
143
- post_data = {
144
- "kind": "blogger#post",
145
- "blog": {
146
- "id": blog_id
147
- },
148
- "images": [{
149
- "url": image
150
- }],
151
- "title": r"{}".format(title),
152
- "content": body,
153
- "labels": [category, "ZZZZZZZZZ"]
154
- }
155
- response = requests.post(url, headers=headers, json=post_data).json()
156
- if response['status'] != 'LIVE':
157
- print(response)
158
- if response['status'] == 'LIVE':
159
- print(f"The post '{title}' is {response['status']}")
160
  return True
161
  else:
162
- print(response)
163
- print(f"Error posting {title}: {response}")
164
  return False
165
  except Exception as e:
166
- print(response)
167
  print(f"Error posting {title}: {e}")
168
  return False
169
 
 
170
  def post_blog(doi, title, category, summary, mindmap, citation, uaccess_key, wait_time=5):
171
- if uaccess_key != access_key:
172
  return False
 
 
 
 
 
 
 
 
 
 
173
  else:
174
- status = True
175
- post_title, post_category, post_body, post_image = create_post(doi, title, category, summary, mindmap, citation)
176
- if not all([post_title, post_category, post_body, post_image]):
177
- print(f'Failed to create post {post_title}')
178
- return False
179
- post_title = post_title.replace("&amp;", "&")
180
- if "&amp;" in post_title:
181
- return False
182
- status = post_post(post_title, post_category, post_body, post_image)
183
- print(f"Waiting for {wait_time*60} seconds...")
184
- time.sleep(wait_time*60)
185
- if status:
186
- print('Post created successfully')
187
- return True
188
- else:
189
- print('Failed to create post')
190
- return False
191
-
 
6
  from image import fetch_image
7
 
8
  dotenv.load_dotenv()
9
+ ACCESS_KEY = os.getenv('ACCESS_KEY')
10
+ CLIENT_ID = os.getenv('CLIENT_ID')
11
+ CLIENT_SECRET = os.getenv('CLIENT_SECRET')
12
+ REFRESH_TOKEN = os.getenv('REFRESH_TOKEN')
13
+ BLOG_ID = os.getenv('BLOG_ID')
14
+
15
 
16
  def generate_post_html(doi, title, category, summary, mindmap, citation):
17
  doi = doi.split("https://")[-1]
18
  mindmap = mindmap.replace("{", r'{').replace("}", r'}')
19
+ citation_clean = citation.replace("&amp;", "&").replace("```plaintext\n", "").replace("\n```", "").strip()
20
+ citation_html = mistune.html(repr(citation_clean)[1:-1])
21
  image = fetch_image(title, category, summary)
22
+ html_summary = mistune.html(summary).replace("&amp;", "&").strip()
23
  post = f"""
24
  <div id="paper_post">
25
  <img style="display:block; width:100%; height:100%;" id="paper_image"
26
+ src="{image.strip()}"
27
+ alt="{title.strip()}">
28
+ <br><br>
 
29
  <div id="paper_summary">
30
+ {html_summary}
31
  </div>
32
  <br>
33
  <h2>Mindmap</h2>
34
  <p><small><em>If MindMap doesn't load, please try refreshing the page.</em></small></p>
35
  <div class="markmap" id="paper_mindmap">
36
  <script type="text/template">
37
+ {mindmap.replace("&amp;", "&").replace(":", "=>").strip()}
38
+ </script>
39
  </div>
40
  <br>
41
  <h2>Citation</h2>
42
  <div id="paper_citation">
43
+ {citation_html}
44
  </div>
45
  <script>
46
+ const paperImage = document.querySelector('img[style="display:block; width:100%; height:100%;"][id="paper_image"]');
 
 
47
  if (paperImage) {{
48
+ const toc = document.createElement("div");
49
+ toc.innerHTML = "<b>{{getToc}} $title={{Table of Contents}}</b>";
50
+ const brElem = paperImage.nextElementSibling;
51
+ if (brElem && brElem.tagName === "BR") {{
52
+ brElem.insertAdjacentElement("afterend", toc);
53
  }}
54
  }}
55
+ </script>
56
  <script>
57
  window.markmap = {{
58
  autoLoader: {{
 
64
  <script>
65
  window.addEventListener('load', function() {{
66
  setTimeout(function() {{
67
+ const elem = document.querySelector('div.mm-toolbar-item[title="Fit window size"]');
68
+ if (elem) {{
69
+ elem.click();
 
70
  }} else {{
71
  console.log('Element not found');
72
  }}
 
77
  .markmap {{
78
  position: relative;
79
  }}
 
80
  .markmap > svg {{
81
  width: 100%;
82
  border: 2px solid #000;
 
106
  """
107
  return post, image
108
 
109
+
110
  def create_post(doi, title, category, summary, mindmap, citation):
 
 
111
  try:
112
  post_body, post_image = generate_post_html(doi, title, category, summary, mindmap, citation)
 
 
 
 
113
  except Exception as e:
114
  print(f"Error generating post: {e}")
115
  return None, None, None, None
116
+ post_title = title.replace("&amp;", "&")
117
+ if "&amp;" in post_title:
118
+ return None, None, None, None
119
+
120
+ post_category = f"{category}"
121
  return post_title, post_category, post_body, post_image
122
 
123
+
124
+ def fetch_oauth_token():
125
+ token_data = {
126
+ 'grant_type': 'refresh_token',
127
+ 'client_secret': CLIENT_SECRET,
128
+ 'refresh_token': REFRESH_TOKEN,
129
+ 'client_id': CLIENT_ID,
130
+ }
131
+ try:
132
+ response = requests.post('https://oauth2.googleapis.com/token', data=token_data)
133
+ response.raise_for_status()
134
+ token_info = response.json()
135
+ return token_info.get('access_token')
136
+ except Exception as e:
137
+ print(f"Error fetching OAuth token: {e}")
138
+ return None
139
+
140
+
141
  def post_post(title, category, body, image):
142
+ access_token = fetch_oauth_token()
143
+ if not access_token:
144
+ return False
145
+
146
+ url = f"https://blogger.googleapis.com/v3/blogs/{BLOG_ID}/posts"
147
+ headers = {
148
+ 'Authorization': f"Bearer {access_token}",
149
+ "Content-Type": "application/json"
150
+ }
151
+ post_data = {
152
+ "kind": "blogger#post",
153
+ "blog": {"id": BLOG_ID},
154
+ "images": [{"url": image}],
155
+ "title": title,
156
+ "content": body,
157
+ "labels": [category, "ZZZZZZZZZ"]
158
+ }
159
  try:
160
+ response = requests.post(url, headers=headers, json=post_data)
161
+ response.raise_for_status()
162
+ result = response.json()
163
+ if result.get('status') == 'LIVE':
164
+ print(f"The post '{title}' is LIVE")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  return True
166
  else:
167
+ print(f"Error posting {title}: {result}")
 
168
  return False
169
  except Exception as e:
 
170
  print(f"Error posting {title}: {e}")
171
  return False
172
 
173
+
174
  def post_blog(doi, title, category, summary, mindmap, citation, uaccess_key, wait_time=5):
175
+ if uaccess_key != ACCESS_KEY:
176
  return False
177
+ post_title, post_category, post_body, post_image = create_post(doi, title, category, summary, mindmap, citation)
178
+ if not all([post_title, post_category, post_body, post_image]):
179
+ print(f"Failed to create post {post_title}")
180
+ return False
181
+ status = post_post(post_title, post_category, post_body, post_image)
182
+ print(f"Waiting for {wait_time * 60} seconds...")
183
+ time.sleep(wait_time * 60)
184
+ if status:
185
+ print("Post created successfully")
186
+ return True
187
  else:
188
+ print("Failed to create post")
189
+ return False
190
+
191
+ if __name__ == "__main__":
192
+ doi = "https://doi.org/10.1234/example"
193
+ title = "Example Title"
194
+ category = "Science"
195
+ summary = "This is an example summary in markdown format."
196
+ mindmap = "{example: mindmap content}"
197
+ citation = "Example citation text"
198
+ uaccess_key = ACCESS_KEY
199
+
200
+ post_success = post_blog(doi, title, category, summary, mindmap, citation, uaccess_key)
201
+ print("Post success:", post_success)
 
 
 
 
send_mail.py CHANGED
@@ -1,39 +1,38 @@
1
- from email import encoders
2
- from email.mime.base import MIMEBase
3
  import os
4
  from datetime import datetime
 
 
 
5
  from pytz import timezone
6
- import pytz
7
  import sib_api_v3_sdk
8
  from sib_api_v3_sdk.rest import ApiException
9
  from dotenv import load_dotenv
10
 
11
  load_dotenv()
12
- mail_api = os.getenv("MAIL_API")
 
 
 
 
 
 
13
 
14
  def mail_body(generation_details):
15
  body = f"""
16
  Hello,
 
17
  These are the details of the Blogs Posted at ReXplore: Science @ Fingertips.
18
 
19
  Date & Time: {get_current_time()}
20
 
21
-
22
  {generation_details}
23
 
24
-
25
  Regards,
26
  Nayan Kasturi (Raanna),
27
  Developer & Maintainer,
28
  ReXplore.
29
  """
30
- return body
31
-
32
- def get_current_time():
33
- fmt = "%d-%m-%Y %H:%M:%S %Z%z"
34
- now_utc = datetime.now(timezone('UTC'))
35
- now_asia = now_utc.astimezone(timezone('Asia/Kolkata'))
36
- return now_asia.strftime(fmt)
37
 
38
  def create_attachment(content, filename):
39
  attachment = MIMEBase('application', 'octet-stream')
@@ -44,26 +43,45 @@ def create_attachment(content, filename):
44
 
45
  def send_email(generation_details):
46
  configuration = sib_api_v3_sdk.Configuration()
47
- configuration.api_key['api-key'] = mail_api
48
- api_instance = sib_api_v3_sdk.TransactionalEmailsApi(sib_api_v3_sdk.ApiClient(configuration))
49
-
50
- data = mail_body(generation_details)
51
- data_attchment = create_attachment(data.encode('utf-8'), "data.txt")
52
-
53
- subject = "New Blog Batch Published to ReXplore at " + get_current_time()
54
- sender = {"name": "Project Gatekeeper", "email": "projectgatekeeper@silerudaagartha.eu.org"}
55
- reply_to = {"name": "Project Gatekeeper", "email": "gatekeeper@raannakasturi.eu.org"}
56
- text_content = data
57
- attachments = [
58
- {"content": data_attchment.get_payload(), "name": data_attchment.get_filename()},
59
- ]
60
- to = [{"email": "raannakasturi@proton.me"}]
61
- send_smtp_email = sib_api_v3_sdk.SendSmtpEmail(to=to, reply_to=reply_to, attachment=attachments, text_content=text_content, sender=sender, subject=subject)
 
 
 
 
 
 
 
 
 
 
 
 
62
  try:
63
- api_instance.send_transac_email(send_smtp_email)
64
  print("Email Sent")
65
  return True
66
  except ApiException as e:
67
- print("Can't send email")
68
- print("Exception when calling SMTPApi->send_transac_email: %s\n" % e)
69
  return False
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  from datetime import datetime
3
+ from email import encoders
4
+ from email.mime.base import MIMEBase
5
+
6
  from pytz import timezone
 
7
  import sib_api_v3_sdk
8
  from sib_api_v3_sdk.rest import ApiException
9
  from dotenv import load_dotenv
10
 
11
  load_dotenv()
12
+ MAIL_API_KEY = os.getenv("MAIL_API")
13
+
14
+ def get_current_time():
15
+ fmt = "%d-%m-%Y %H:%M:%S %Z%z"
16
+ now_utc = datetime.now(timezone('UTC'))
17
+ now_kolkata = now_utc.astimezone(timezone('Asia/Kolkata'))
18
+ return now_kolkata.strftime(fmt)
19
 
20
  def mail_body(generation_details):
21
  body = f"""
22
  Hello,
23
+
24
  These are the details of the Blogs Posted at ReXplore: Science @ Fingertips.
25
 
26
  Date & Time: {get_current_time()}
27
 
 
28
  {generation_details}
29
 
 
30
  Regards,
31
  Nayan Kasturi (Raanna),
32
  Developer & Maintainer,
33
  ReXplore.
34
  """
35
+ return body.strip()
 
 
 
 
 
 
36
 
37
  def create_attachment(content, filename):
38
  attachment = MIMEBase('application', 'octet-stream')
 
43
 
44
  def send_email(generation_details):
45
  configuration = sib_api_v3_sdk.Configuration()
46
+ configuration.api_key['api-key'] = MAIL_API_KEY
47
+ api_client = sib_api_v3_sdk.ApiClient(configuration)
48
+ transactional_api = sib_api_v3_sdk.TransactionalEmailsApi(api_client)
49
+ email_content = mail_body(generation_details)
50
+ attachment_obj = create_attachment(email_content.encode('utf-8'), "data.txt")
51
+ subject = f"New Blog Batch Published to ReXplore at {get_current_time()}"
52
+ sender = {
53
+ "name": "Project Gatekeeper",
54
+ "email": "projectgatekeeper@silerudaagartha.eu.org"
55
+ }
56
+ reply_to = {
57
+ "name": "Project Gatekeeper",
58
+ "email": "gatekeeper@raannakasturi.eu.org"
59
+ }
60
+ recipients = [{"email": "raannakasturi@proton.me"}]
61
+ attachments = [{
62
+ "content": attachment_obj.get_payload(),
63
+ "name": attachment_obj.get_filename()
64
+ }]
65
+ email = sib_api_v3_sdk.SendSmtpEmail(
66
+ to=recipients,
67
+ reply_to=reply_to,
68
+ attachment=attachments,
69
+ text_content=email_content,
70
+ sender=sender,
71
+ subject=subject
72
+ )
73
  try:
74
+ transactional_api.send_transac_email(email)
75
  print("Email Sent")
76
  return True
77
  except ApiException as e:
78
+ print("Failed to send email:")
79
+ print(f"Exception when calling SMTPApi->send_transac_email: {e}")
80
  return False
81
+
82
+ if __name__ == "__main__":
83
+ generation_details = "Example: 5 blogs generated and posted successfully."
84
+ if send_email(generation_details):
85
+ print("Email sent successfully.")
86
+ else:
87
+ print("Email sending failed.")
summarize_paper.py CHANGED
@@ -1,31 +1,40 @@
1
  import os
2
- import dotenv
3
  import json
4
  from gradio_client import Client
 
5
 
6
  dotenv.load_dotenv()
7
 
8
- def summarize_paper(pdf_url, paper_id, access_key):
9
- mindmap = None
10
  summary = None
 
11
  try:
12
- summarizer_client = Client(
13
- "raannakasturi/ReXploreAPI",
14
- hf_token=os.getenv("HF_API_TOKEN"),
15
- )
16
  result = summarizer_client.predict(
17
- url=pdf_url,
18
- id=paper_id,
19
- access_key=access_key,
20
- api_name="/rexplore_summarizer"
21
  )
22
  if result:
23
  data = json.loads(result[0])
24
- print
25
- if data["mindmap_status"] == "success":
26
- mindmap = data["mindmap"]
27
- if data["summary_status"] == "success":
28
- summary = data["summary"]
29
  except Exception as e:
30
  print(f"Error summarizing paper: {e}")
 
31
  return summary, mindmap
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import json
3
  from gradio_client import Client
4
+ import dotenv
5
 
6
  dotenv.load_dotenv()
7
 
8
+ def summarize_paper(pdf_url: str, paper_id: str, access_key: str):
 
9
  summary = None
10
+ mindmap = None
11
  try:
12
+ hf_api_token = os.getenv("HF_API_TOKEN")
13
+ if not hf_api_token:
14
+ raise ValueError("HF_API_TOKEN not found in environment variables.")
15
+ summarizer_client = Client("raannakasturi/ReXploreAPI", hf_token=hf_api_token)
16
  result = summarizer_client.predict(
17
+ url=pdf_url,
18
+ id=paper_id,
19
+ access_key=access_key,
20
+ api_name="/rexplore_summarizer"
21
  )
22
  if result:
23
  data = json.loads(result[0])
24
+ if data.get("mindmap_status") == "success":
25
+ mindmap = data.get("mindmap")
26
+ if data.get("summary_status") == "success":
27
+ summary = data.get("summary")
 
28
  except Exception as e:
29
  print(f"Error summarizing paper: {e}")
30
+
31
  return summary, mindmap
32
+
33
+
34
+ if __name__ == "__main__":
35
+ test_pdf_url = "https://example.com/paper.pdf"
36
+ test_paper_id = "12345"
37
+ test_access_key = "your_access_key_here"
38
+ paper_summary, paper_mindmap = summarize_paper(test_pdf_url, test_paper_id, test_access_key)
39
+ print("Summary:", paper_summary)
40
+ print("Mindmap:", paper_mindmap)