Saiteja Solleti commited on
Commit
5fed436
·
1 Parent(s): 411adbd

calculate scores func added

Browse files
app.py CHANGED
@@ -8,7 +8,8 @@ from sentence_transformers import SentenceTransformer
8
  from searchmilvushelper import SearchTopKDocuments
9
  from finetuneresults import FineTuneAndRerankSearchResults
10
  from generationhelper import GenerateAnswer
11
- from calculatescorehelper import CalculateScoresBasedOnAnswer
 
12
 
13
  from model import generate_response
14
  from huggingface_hub import login
@@ -54,6 +55,12 @@ completion_result = CalculateScoresBasedOnAnswer(query, reranked_results.head(1)
54
 
55
  print(completion_result)
56
 
 
 
 
 
 
 
57
 
58
  def chatbot(prompt):
59
  return whoami()
 
8
  from searchmilvushelper import SearchTopKDocuments
9
  from finetuneresults import FineTuneAndRerankSearchResults
10
  from generationhelper import GenerateAnswer
11
+ from formatresultshelper import CalculateScoresBasedOnAnswer
12
+ from calculatescores import CalculateScores
13
 
14
  from model import generate_response
15
  from huggingface_hub import login
 
55
 
56
  print(completion_result)
57
 
58
+ score1, score2, score3 = CalculateScores()
59
+
60
+ print(score1)
61
+ print(score2)
62
+ print(score3)
63
+
64
 
65
  def chatbot(prompt):
66
  return whoami()
calculatescores.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import formatresultshelper
2
+
3
+ #Defined as utilized documents / retrieved documents for the query
4
+ def compute_context_relevance(relevant_sentences, support_keys):
5
+ total_relevance_score = 0
6
+ total_relevant_sentences = len(relevant_sentences)
7
+
8
+ for sentence in relevant_sentences:
9
+ if sentence in support_keys:
10
+ total_relevance_score += 1
11
+
12
+ # To avoid division by zero in case there are no relevant sentences
13
+ if total_relevant_sentences == 0:
14
+ return 0
15
+
16
+ return total_relevance_score / total_relevant_sentences
17
+
18
+ def compute_context_utilization(relevant_sentences, utilization_levels):
19
+ total_utilization_score = 0
20
+ total_relevant_sentences = len(relevant_sentences)
21
+ for sentence in relevant_sentences:
22
+ if sentence in utilization_levels:
23
+ total_utilization_score += 1
24
+ # To avoid division by zero in case there are no relevant sentences
25
+ if total_relevant_sentences == 0:
26
+ return 0
27
+ return total_utilization_score / total_relevant_sentences
28
+
29
+
30
+ def CalculateScores():
31
+ #compute Context Relevance
32
+ contextrel = compute_context_relevance(formatresultshelper.relevant_sentence_keys, formatresultshelper.support_keys)
33
+ print(f"Context Relevance = {contextrel}")
34
+
35
+ contextutil = compute_context_utilization(formatresultshelper.relevant_sentence_keys, formatresultshelper.all_utilized_sentence_keys)
36
+ print(f"Context Utilization = {contextutil}")
37
+
38
+ compnum = np.intersect1d(formatresultshelper.support_keys, formatresultshelper.all_utilized_sentence_keys)
39
+ completenes = compnum.size / len(formatresultshelper.support_keys)
40
+ print(f"Completeness = {completenes}")
41
+
42
+ #Adherence : whether all parts of response are grounded by context
43
+ for val in formatresultshelper.support_level:
44
+ prevval = 1;
45
+ if val == False:
46
+ adherence = 0 * prevval
47
+ break
48
+ else:
49
+ adherence = 1 * prevval
50
+ prevval = adherence
51
+
52
+ print(f"Adherence = {adherence}")
53
+
54
+
55
+
56
+ def mse(actual, predicted):
57
+ return (actual - predicted)**2
calculatescorehelper.py → formatresultshelper.py RENAMED
@@ -1,4 +1,5 @@
1
  import generationhelper
 
2
 
3
  def evaluate_response_with_prompt(templete, query, documents, answer, eval_model="llama-3.3-70b-specdec"):
4
 
@@ -33,8 +34,51 @@ def CalculateScoresBasedOnAnswer(query, documents, answer, eval_model):
33
  completion_results = evaluate_response_with_prompt(templete, query,documents, answer, eval_model)
34
 
35
  print(completion_results)
 
 
 
36
 
37
- return completion_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
 
 
1
  import generationhelper
2
+ import json
3
 
4
  def evaluate_response_with_prompt(templete, query, documents, answer, eval_model="llama-3.3-70b-specdec"):
5
 
 
34
  completion_results = evaluate_response_with_prompt(templete, query,documents, answer, eval_model)
35
 
36
  print(completion_results)
37
+ completion_results_response = completion_results.choices[0].message.content
38
+ completion_results_response = completion_results_response.strip().strip('```')
39
+ print(completion_results_response)
40
 
41
+ # Check if response_content is empty
42
+ if not completion_results_response.strip():
43
+ raise ValueError("Empty response content")
44
+
45
+ # Decode if it's a byte string
46
+ if isinstance(completion_results_response, bytes):
47
+ completion_results_response = completion_results_response.decode('utf-8')
48
+
49
+ # Try to parse JSON
50
+ try:
51
+ data_json = json.loads(completion_results_response)
52
+ print("JSON parsed successfully:")
53
+ print(data_json)
54
+ except json.JSONDecodeError as e:
55
+ print(f"Failed to parse JSON: {e}")
56
+ print(f"Response content: {completion_results_response}")
57
+
58
+ relavance_explanation = data_json['relevance_explanation']
59
+ relevant_sentence_keys = data_json['all_relevant_sentence_keys']
60
+ overall_supported_explanation = data_json['overall_supported_explanation']
61
+ overall_supported = data_json['overall_supported']
62
+ sentence_support_information = data_json['sentence_support_information']
63
+ all_utilized_sentence_keys = data_json['all_utilized_sentence_keys']
64
+
65
+ print(relavance_explanation)
66
+ print(relevant_sentence_keys)
67
+ print(overall_supported_explanation)
68
+ print(overall_supported)
69
+ print(sentence_support_information)
70
+ print(all_utilized_sentence_keys)
71
+
72
+ support_keys = []
73
+ support_level = []
74
+ for sentence_support in sentence_support_information:
75
+ support_keys += sentence_support['supporting_sentence_keys']
76
+ support_level.append(sentence_support['fully_supported'])
77
+
78
+ print(support_keys)
79
+ print(support_level)
80
+
81
+ return completion_results_response
82
 
83
 
84