File size: 2,619 Bytes
77f290b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from .utils import log
import re

def evaluate(verbose, llm, zip, readme):
  log(verbose, "LOG", "\nLooking for pre-trained model weights...")
  overall = "No"
  files = [file_path for file_path in zip.namelist() if ((file_path.endswith(".h5") | file_path.endswith(".pth") | file_path.endswith(".torch") | file_path.endswith(".pt") | file_path.endswith(".tar.gz") | file_path.endswith("checkpoint.pt") | ("weights" in file_path) | file_path.endswith("ckpt")))]
  if (len(files) > 0):
      log(verbose, "LOG", f"Found model weights: {files}")
      overall = "Yes"
      return overall

  if (readme):

      url_pattern = r'(https?://[^\s]+)'
      urls = re.findall(url_pattern, readme)
      if (len([url for url in urls if "pth" in url]) > 0):
          log(verbose, "LOG", "Found a link to pre-trained weights in readme")
          overall = "Yes"
          return overall

      readme_lines = readme.split("\n")
      if (len([row for row in readme_lines if ((len(re.findall("pretrained", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
          log(verbose, "LOG", "Found a link for 'pretrained' something in readme")
          overall = "Yes"
          return overall

      if (len([row for row in readme_lines if ((len(re.findall("pre-trained", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
          log(verbose, "LOG", "Found a link for 'pre-trained' something in readme")
          overall = "Yes"
          return overall

      if (len([row for row in readme_lines if ((len(re.findall("weight", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
          log(verbose, "LOG", "Found a link for 'weight' something in readme")
          overall = "Yes"
          return overall

      if (len([row for row in readme_lines if ((len(re.findall("download", row, re.IGNORECASE)) > 0) & (len(re.findall("model", row, re.IGNORECASE)) > 0) & (len(re.findall("http", row, re.IGNORECASE)) > 0))]) > 0):
          log(verbose, "LOG", "Found a link for 'model' something in readme")
          overall = "Yes"
          return overall

      if (llm):
          prompt = f"{readme}\nQ: Does this text contain a download link for the model pre-trained weights?"
          ans = model_predict(prompt)
          if (("Yes" in ans) & ("No" not in ans)):
              log(verbose, "LOG", "The LLM found signs for accessing the pre-trained weights from the readme")
              overall = "Yes"
              return overall

  log(verbose, "ERROR", "Found no pre-trained model weights.")
  return overall