arer90
commited on
Commit
·
f2d6c5c
1
Parent(s):
3ff3f4b
code 241111
Browse files- 1_standard_ML.ipynb +0 -0
- 2_solubility_fingerprint_compare.ipynb +0 -0
- 3_solubility_feature_checker.ipynb +0 -0
- 3_solubility_feature_deeplearning.ipynb +0 -0
- 4_ANO_feature.ipynb +0 -0
- 5_ANO_structure.ipynb +1750 -0
- 6_ANO_network_[fea_struc].ipynb +1992 -0
- 7_ANO_network_[struc_fea].ipynb +1913 -0
- 7_solubility_final_HPO_proving.ipynb +0 -0
- 8_solubility_xai.ipynb +0 -0
- data/Lovric2020_logS0.csv +830 -0
- data/delaney-processed.csv +1129 -0
- data/huusk.csv +0 -0
- data/ws496_logS.csv +497 -0
- extra_code/__pycache__/feature_search.cpython-312.pyc +0 -0
- extra_code/__pycache__/feature_selection.cpython-312.pyc +0 -0
- extra_code/ano_model.py +220 -0
- extra_code/basic_model.py +63 -0
- extra_code/feature_search.py +702 -0
- extra_code/feature_selection.py +951 -0
- extra_code/learning_process.py +139 -0
1_standard_ML.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
2_solubility_fingerprint_compare.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
3_solubility_feature_checker.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
3_solubility_feature_deeplearning.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
4_ANO_feature.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
5_ANO_structure.ipynb
ADDED
@@ -0,0 +1,1750 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 13,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"import sys\n",
|
11 |
+
"import time\n",
|
12 |
+
"import subprocess\n",
|
13 |
+
"import logging\n",
|
14 |
+
"import warnings\n",
|
15 |
+
"import gc\n",
|
16 |
+
"import numpy as np\n",
|
17 |
+
"import pandas as pd\n",
|
18 |
+
"import seaborn as sns\n",
|
19 |
+
"import matplotlib.pyplot as plt\n",
|
20 |
+
"import matplotlib.patches as mpatches\n",
|
21 |
+
"from concurrent.futures import ProcessPoolExecutor, as_completed"
|
22 |
+
]
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"cell_type": "code",
|
26 |
+
"execution_count": 14,
|
27 |
+
"metadata": {},
|
28 |
+
"outputs": [],
|
29 |
+
"source": [
|
30 |
+
"from rdkit import Chem\n",
|
31 |
+
"from rdkit.Chem import AllChem, DataStructs, Draw\n",
|
32 |
+
"from rdkit import RDConfig\n",
|
33 |
+
"from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges\n",
|
34 |
+
"from rdkit.Chem.AllChem import GetMorganGenerator\n",
|
35 |
+
"from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray\n",
|
36 |
+
"from rdkit.Avalon.pyAvalonTools import GetAvalonFP"
|
37 |
+
]
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"cell_type": "code",
|
41 |
+
"execution_count": 15,
|
42 |
+
"metadata": {},
|
43 |
+
"outputs": [],
|
44 |
+
"source": [
|
45 |
+
"import tensorflow as tf\n",
|
46 |
+
"from tensorflow import keras\n",
|
47 |
+
"from tensorflow.keras import layers\n",
|
48 |
+
"from tensorflow.keras.models import Sequential\n",
|
49 |
+
"from tensorflow.keras.layers import Dense, Dropout, Activation\n",
|
50 |
+
"from tensorflow.keras.regularizers import l2\n",
|
51 |
+
"from tensorflow.keras.optimizers import Adam\n",
|
52 |
+
"from tensorflow.keras import regularizers"
|
53 |
+
]
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"cell_type": "code",
|
57 |
+
"execution_count": 16,
|
58 |
+
"metadata": {},
|
59 |
+
"outputs": [],
|
60 |
+
"source": [
|
61 |
+
"from sklearn.model_selection import train_test_split\n",
|
62 |
+
"from sklearn.linear_model import Ridge\n",
|
63 |
+
"from sklearn.ensemble import RandomForestRegressor\n",
|
64 |
+
"from sklearn.neural_network import MLPRegressor\n",
|
65 |
+
"from sklearn.svm import SVR\n",
|
66 |
+
"from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error"
|
67 |
+
]
|
68 |
+
},
|
69 |
+
{
|
70 |
+
"cell_type": "code",
|
71 |
+
"execution_count": 17,
|
72 |
+
"metadata": {},
|
73 |
+
"outputs": [],
|
74 |
+
"source": [
|
75 |
+
"import optuna\n",
|
76 |
+
"from optuna.trial import TrialState"
|
77 |
+
]
|
78 |
+
},
|
79 |
+
{
|
80 |
+
"cell_type": "code",
|
81 |
+
"execution_count": 18,
|
82 |
+
"metadata": {},
|
83 |
+
"outputs": [],
|
84 |
+
"source": [
|
85 |
+
"tf.keras.backend.clear_session()\n",
|
86 |
+
"gpus = tf.config.experimental.list_physical_devices('GPU')\n",
|
87 |
+
"if gpus:\n",
|
88 |
+
" try:\n",
|
89 |
+
" for gpu in gpus:\n",
|
90 |
+
" tf.config.experimental.set_memory_growth(gpu, True)\n",
|
91 |
+
" except RuntimeError as e:\n",
|
92 |
+
" print(e)"
|
93 |
+
]
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"cell_type": "code",
|
97 |
+
"execution_count": 19,
|
98 |
+
"metadata": {},
|
99 |
+
"outputs": [],
|
100 |
+
"source": [
|
101 |
+
"target_path = \"result/5_ANO_structure\"\n",
|
102 |
+
"os.makedirs(target_path, exist_ok=True)"
|
103 |
+
]
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"cell_type": "code",
|
107 |
+
"execution_count": 20,
|
108 |
+
"metadata": {},
|
109 |
+
"outputs": [],
|
110 |
+
"source": [
|
111 |
+
"data_ws = pd.read_csv('./data/ws496_logS.csv', dtype={'SMILES': 'string'})\n",
|
112 |
+
"smiles_ws = data_ws['SMILES']\n",
|
113 |
+
"y_ws = data_ws.iloc[:, 2]\n",
|
114 |
+
"\n",
|
115 |
+
"data_delaney = pd.read_csv('./data/delaney-processed.csv', dtype={'smiles': 'string'})\n",
|
116 |
+
"smiles_de = data_delaney['smiles']\n",
|
117 |
+
"y_de = data_delaney.iloc[:, 1]\n",
|
118 |
+
"\n",
|
119 |
+
"data_lovric2020 = pd.read_csv('./data/Lovric2020_logS0.csv', dtype={'isomeric_smiles': 'string'})\n",
|
120 |
+
"smiles_lo = data_lovric2020['isomeric_smiles']\n",
|
121 |
+
"y_lo = data_lovric2020.iloc[:, 1]\n",
|
122 |
+
"\n",
|
123 |
+
"data_huuskonen = pd.read_csv('./data/huusk.csv', dtype={'SMILES': 'string'})\n",
|
124 |
+
"smiles_hu = data_huuskonen['SMILES']\n",
|
125 |
+
"y_hu = data_huuskonen.iloc[:, -1].astype('float')"
|
126 |
+
]
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"cell_type": "code",
|
130 |
+
"execution_count": 21,
|
131 |
+
"metadata": {},
|
132 |
+
"outputs": [],
|
133 |
+
"source": [
|
134 |
+
"def mol3d(mol):\n",
|
135 |
+
" mol = Chem.AddHs(mol)\n",
|
136 |
+
" optimization_methods = [\n",
|
137 |
+
" (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),\n",
|
138 |
+
" (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),\n",
|
139 |
+
" (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})\n",
|
140 |
+
" ]\n",
|
141 |
+
"\n",
|
142 |
+
" for method, args, kwargs in optimization_methods:\n",
|
143 |
+
" try:\n",
|
144 |
+
" method(*args, **kwargs)\n",
|
145 |
+
" if mol.GetNumConformers() > 0:\n",
|
146 |
+
" return mol\n",
|
147 |
+
" except ValueError as e:\n",
|
148 |
+
" print(f\"Error: {e} - Trying next optimization method [{method}]\")\n",
|
149 |
+
"\n",
|
150 |
+
" print(f\"Invalid mol for 3d {'\\033[94m'}{Chem.MolToSmiles(mol)}{'\\033[0m'} - No conformer generated\")\n",
|
151 |
+
" return None"
|
152 |
+
]
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"cell_type": "code",
|
156 |
+
"execution_count": 22,
|
157 |
+
"metadata": {},
|
158 |
+
"outputs": [],
|
159 |
+
"source": [
|
160 |
+
"def convert_smiles_to_mol(smiles, fail_folder=None, index=None, yvalue=None):\n",
|
161 |
+
" mol = Chem.MolFromSmiles(smiles)\n",
|
162 |
+
" if mol is None:\n",
|
163 |
+
" print(f\"[convert_smiles_to_mol] Cannot convert {smiles} to Mols\")\n",
|
164 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": \"Invalid SMILES\"}\n",
|
165 |
+
"\n",
|
166 |
+
" try:\n",
|
167 |
+
" Chem.Kekulize(mol, clearAromaticFlags=True)\n",
|
168 |
+
" isomeric_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)\n",
|
169 |
+
" mol = Chem.MolFromSmiles(isomeric_smiles)\n",
|
170 |
+
" except Exception as e:\n",
|
171 |
+
" print(f\"[convert_smiles_to_mol] failed {smiles} isomeric_smiles by {e}\")\n",
|
172 |
+
" if fail_folder and index is not None:\n",
|
173 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
174 |
+
" img = Draw.MolToImage(mol)\n",
|
175 |
+
" img.save(img_path)\n",
|
176 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"Isomeric SMILES error: {e}\"}\n",
|
177 |
+
"\n",
|
178 |
+
" try:\n",
|
179 |
+
" Chem.SanitizeMol(mol)\n",
|
180 |
+
" except Exception as e:\n",
|
181 |
+
" print(f\"[convert_smiles_to_mol] failed {smiles} SanitizeMol by {e}\")\n",
|
182 |
+
" if fail_folder and index is not None:\n",
|
183 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
184 |
+
" img = Draw.MolToImage(mol)\n",
|
185 |
+
" img.save(img_path)\n",
|
186 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"SanitizeMol error: {e}\"}\n",
|
187 |
+
"\n",
|
188 |
+
" return mol, None"
|
189 |
+
]
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"cell_type": "code",
|
193 |
+
"execution_count": 23,
|
194 |
+
"metadata": {},
|
195 |
+
"outputs": [],
|
196 |
+
"source": [
|
197 |
+
"def process_smiles(smiles, yvalue, fail_folder, index):\n",
|
198 |
+
" mol, error = convert_smiles_to_mol(smiles, fail_folder, index, yvalue)\n",
|
199 |
+
" if error:\n",
|
200 |
+
" return None, None, error\n",
|
201 |
+
"\n",
|
202 |
+
" mol_3d = mol3d(mol)\n",
|
203 |
+
" if mol_3d:\n",
|
204 |
+
" return smiles, yvalue, None\n",
|
205 |
+
" else:\n",
|
206 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
207 |
+
" img = Draw.MolToImage(mol)\n",
|
208 |
+
" img.save(img_path)\n",
|
209 |
+
" return None, None, {\"smiles\": smiles, \"y_value\": yvalue}\n",
|
210 |
+
"\n",
|
211 |
+
"def process_dataset(smiles_list, y_values, dataset_name, target_path=\"result\", max_workers=None):\n",
|
212 |
+
" start = time.time()\n",
|
213 |
+
" valid_smiles, valid_y = [], []\n",
|
214 |
+
" error_smiles_list = []\n",
|
215 |
+
" fail_folder = f\"{target_path}/failed/{dataset_name}\"\n",
|
216 |
+
" os.makedirs(fail_folder, exist_ok=True)\n",
|
217 |
+
"\n",
|
218 |
+
" with ProcessPoolExecutor(max_workers=max_workers) as executor:\n",
|
219 |
+
" futures = [\n",
|
220 |
+
" executor.submit(process_smiles, smiles, yvalue, fail_folder, i)\n",
|
221 |
+
" for i, (smiles, yvalue) in enumerate(zip(smiles_list, y_values))\n",
|
222 |
+
" ]\n",
|
223 |
+
" for future in as_completed(futures):\n",
|
224 |
+
" smiles, yvalue, error = future.result()\n",
|
225 |
+
" if error:\n",
|
226 |
+
" error_smiles_list.append(error)\n",
|
227 |
+
" elif smiles is not None and yvalue is not None:\n",
|
228 |
+
" valid_smiles.append(smiles)\n",
|
229 |
+
" valid_y.append(yvalue)\n",
|
230 |
+
"\n",
|
231 |
+
" if error_smiles_list:\n",
|
232 |
+
" error_df = pd.DataFrame(error_smiles_list)\n",
|
233 |
+
" error_df.to_csv(os.path.join(fail_folder, \"failed_smiles.csv\"), index=False)\n",
|
234 |
+
" print(f\" [{dataset_name:<10}] : {time.time()-start:.4f} sec\")\n",
|
235 |
+
" return valid_smiles, valid_y"
|
236 |
+
]
|
237 |
+
},
|
238 |
+
{
|
239 |
+
"cell_type": "code",
|
240 |
+
"execution_count": 24,
|
241 |
+
"metadata": {},
|
242 |
+
"outputs": [
|
243 |
+
{
|
244 |
+
"name": "stdout",
|
245 |
+
"output_type": "stream",
|
246 |
+
"text": [
|
247 |
+
" [ws496 ] : 0.8649 sec\n",
|
248 |
+
" [delaney ] : 1.3527 sec\n",
|
249 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x560fe30ad760>]\n",
|
250 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x560fe30ae5f0>]\n",
|
251 |
+
"Invalid mol for 3d \u001b[94m[H]O[C@@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
|
252 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x560fe30ad760>]\n",
|
253 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x560fe30ae5f0>]\n",
|
254 |
+
"Invalid mol for 3d \u001b[94m[H]O[C@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
|
255 |
+
" [Lovric2020_logS0] : 8.3057 sec\n",
|
256 |
+
" [huusk ] : 1.5089 sec\n"
|
257 |
+
]
|
258 |
+
}
|
259 |
+
],
|
260 |
+
"source": [
|
261 |
+
"smiles_ws, y_ws = process_dataset(smiles_ws, y_ws, \"ws496\", target_path)\n",
|
262 |
+
"smiles_de, y_de = process_dataset(smiles_de, y_de, \"delaney\", target_path)\n",
|
263 |
+
"smiles_lo, y_lo = process_dataset(smiles_lo, y_lo, \"Lovric2020_logS0\", target_path)\n",
|
264 |
+
"smiles_hu, y_hu = process_dataset(smiles_hu, y_hu, \"huusk\", target_path)"
|
265 |
+
]
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"cell_type": "code",
|
269 |
+
"execution_count": 25,
|
270 |
+
"metadata": {},
|
271 |
+
"outputs": [],
|
272 |
+
"source": [
|
273 |
+
"LEN_OF_FF = 2048\n",
|
274 |
+
"LEN_OF_MA = 167\n",
|
275 |
+
"LEN_OF_AV = 512"
|
276 |
+
]
|
277 |
+
},
|
278 |
+
{
|
279 |
+
"cell_type": "code",
|
280 |
+
"execution_count": 26,
|
281 |
+
"metadata": {},
|
282 |
+
"outputs": [],
|
283 |
+
"source": [
|
284 |
+
"def get_fingerprints(mol):\n",
|
285 |
+
" if mol is None:\n",
|
286 |
+
" return None, None, None\n",
|
287 |
+
" \n",
|
288 |
+
" morgan_generator = GetMorganGenerator(radius=2, fpSize=LEN_OF_FF)\n",
|
289 |
+
" ecfp = morgan_generator.GetFingerprint(mol)\n",
|
290 |
+
" ecfp_array = np.zeros((LEN_OF_FF,),dtype=int)\n",
|
291 |
+
" DataStructs.ConvertToNumpyArray(ecfp, ecfp_array)\n",
|
292 |
+
" \n",
|
293 |
+
" maccs = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol)\n",
|
294 |
+
"\n",
|
295 |
+
" avalon_fp = GetAvalonFP(mol)\n",
|
296 |
+
" avalon_array = np.zeros((LEN_OF_AV,),dtype=int)\n",
|
297 |
+
" DataStructs.ConvertToNumpyArray(avalon_fp, avalon_array)\n",
|
298 |
+
" \n",
|
299 |
+
" return ecfp_array, maccs, avalon_array\n",
|
300 |
+
"\n",
|
301 |
+
"def fp_converter(data, use_parallel=True):\n",
|
302 |
+
" mols = [Chem.MolFromSmiles(smi) for smi in data]\n",
|
303 |
+
" \n",
|
304 |
+
" if use_parallel:\n",
|
305 |
+
" try: \n",
|
306 |
+
" with ProcessPoolExecutor() as executor:\n",
|
307 |
+
" results = list(executor.map(get_fingerprints, mols))\n",
|
308 |
+
" except Exception as e:\n",
|
309 |
+
" print(f\"Parallel processing failed due to: {e}. Falling back to sequential processing.\")\n",
|
310 |
+
" use_parallel = False\n",
|
311 |
+
" \n",
|
312 |
+
" if not use_parallel:\n",
|
313 |
+
" results = [get_fingerprints(mol) for mol in mols]\n",
|
314 |
+
" \n",
|
315 |
+
" ECFP, MACCS, AvalonFP = zip(*results)\n",
|
316 |
+
" \n",
|
317 |
+
" ECFP_container = np.vstack([arr for arr in ECFP if arr is not None])\n",
|
318 |
+
" MACCS_container = np.zeros((len(MACCS), LEN_OF_MA), dtype=int)\n",
|
319 |
+
" AvalonFP_container = np.vstack([arr for arr in AvalonFP if arr is not None])\n",
|
320 |
+
"\n",
|
321 |
+
" for i, fp in enumerate(MACCS):\n",
|
322 |
+
" if fp is not None:\n",
|
323 |
+
" DataStructs.ConvertToNumpyArray(fp, MACCS_container[i])\n",
|
324 |
+
" \n",
|
325 |
+
" return mols, ECFP_container, MACCS_container, AvalonFP_container"
|
326 |
+
]
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"cell_type": "code",
|
330 |
+
"execution_count": 27,
|
331 |
+
"metadata": {},
|
332 |
+
"outputs": [],
|
333 |
+
"source": [
|
334 |
+
"mol_ws, x_ws, MACCS_ws, AvalonFP_ws = fp_converter(smiles_ws,target_path)\n",
|
335 |
+
"mol_de, x_de, MACCS_de, AvalonFP_de = fp_converter(smiles_de,target_path)\n",
|
336 |
+
"mol_lo, x_lo, MACCS_lo, AvalonFP_lo = fp_converter(smiles_lo,target_path)\n",
|
337 |
+
"mol_hu, x_hu, MACCS_hu, AvalonFP_hu = fp_converter(smiles_hu,target_path)"
|
338 |
+
]
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"cell_type": "code",
|
342 |
+
"execution_count": 28,
|
343 |
+
"metadata": {},
|
344 |
+
"outputs": [],
|
345 |
+
"source": [
|
346 |
+
"def concatenate_to_numpy(*dataframes):\n",
|
347 |
+
" numpy_arrays = [df.to_numpy() if isinstance(df, pd.DataFrame) else df for df in dataframes]\n",
|
348 |
+
" if not all(isinstance(arr, np.ndarray) for arr in numpy_arrays):\n",
|
349 |
+
" raise ValueError(\"All inputs must be either pandas DataFrame or numpy array\")\n",
|
350 |
+
" return np.concatenate(numpy_arrays, axis=1)"
|
351 |
+
]
|
352 |
+
},
|
353 |
+
{
|
354 |
+
"cell_type": "code",
|
355 |
+
"execution_count": 29,
|
356 |
+
"metadata": {},
|
357 |
+
"outputs": [],
|
358 |
+
"source": [
|
359 |
+
"group_nws = concatenate_to_numpy(x_ws, MACCS_ws, AvalonFP_ws)\n",
|
360 |
+
"group_nde = concatenate_to_numpy(x_de, MACCS_de, AvalonFP_de)\n",
|
361 |
+
"group_nlo = concatenate_to_numpy(x_lo, MACCS_lo, AvalonFP_lo)\n",
|
362 |
+
"group_nhu = concatenate_to_numpy(x_hu, MACCS_hu, AvalonFP_hu)"
|
363 |
+
]
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"cell_type": "code",
|
367 |
+
"execution_count": 30,
|
368 |
+
"metadata": {},
|
369 |
+
"outputs": [],
|
370 |
+
"source": [
|
371 |
+
"BATCHSIZE = 32\n",
|
372 |
+
"EPOCHS = 1000\n",
|
373 |
+
"lr = 0.0001\n",
|
374 |
+
"decay = 1e-4"
|
375 |
+
]
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"cell_type": "code",
|
379 |
+
"execution_count": 41,
|
380 |
+
"metadata": {},
|
381 |
+
"outputs": [],
|
382 |
+
"source": [
|
383 |
+
"def search_model(trial, input_dim):\n",
|
384 |
+
" n_layers = trial.suggest_int(\"n_layers\", 1, 3)\n",
|
385 |
+
" model = tf.keras.Sequential()\n",
|
386 |
+
" model.add(tf.keras.layers.Input(shape=(input_dim,)))\n",
|
387 |
+
" layer_dropout = trial.suggest_int(\"layer_dropout\", 0, 1)\n",
|
388 |
+
" \n",
|
389 |
+
" for i in range(n_layers):\n",
|
390 |
+
" num_hidden = trial.suggest_int(f\"n_units_l_{i}\", 2, 10000)\n",
|
391 |
+
" num_decay = trial.suggest_categorical(f\"n_decay_l_{i}\", [1e-3, 1e-4, 1e-5])\n",
|
392 |
+
" model.add(\n",
|
393 |
+
" tf.keras.layers.Dense(\n",
|
394 |
+
" num_hidden,\n",
|
395 |
+
" activation=\"relu\",\n",
|
396 |
+
" kernel_initializer='glorot_uniform',\n",
|
397 |
+
" kernel_regularizer=tf.keras.regularizers.l2(num_decay),\n",
|
398 |
+
" )\n",
|
399 |
+
" )\n",
|
400 |
+
" if layer_dropout == 1:\n",
|
401 |
+
" fdropout1 = trial.suggest_categorical(f\"F_dropout_{i}\", [0.1, 0.2, 0.3])\n",
|
402 |
+
" model.add(tf.keras.layers.Dropout(rate=fdropout1))\n",
|
403 |
+
" \n",
|
404 |
+
" if layer_dropout == 0:\n",
|
405 |
+
" fdropout2 = trial.suggest_categorical(\"last_dropout\", [0.1, 0.2, 0.3])\n",
|
406 |
+
" model.add(tf.keras.layers.Dropout(rate=fdropout2))\n",
|
407 |
+
" \n",
|
408 |
+
" model.add(tf.keras.layers.Dense(units=1))\n",
|
409 |
+
" # # Colab\n",
|
410 |
+
" # learningr = trial.suggest_categorical(\"Learning_rate\",[0.01,0.001,0.0001])\n",
|
411 |
+
" # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),\n",
|
412 |
+
" # loss=tf.keras.losses.MeanSquaredError(),\n",
|
413 |
+
" # metrics=[tf.keras.losses.MeanSquaredError(),\n",
|
414 |
+
" # tf.keras.losses.MeanAbsoluteError(),\n",
|
415 |
+
" # tf.keras.metrics.RootMeanSquaredError()])\n",
|
416 |
+
" return model\n",
|
417 |
+
"\n",
|
418 |
+
"def save_model(trial, x_data):\n",
|
419 |
+
" model_path = \"save_model/full_model.keras\"\n",
|
420 |
+
" if not os.path.exists(model_path):\n",
|
421 |
+
" try:\n",
|
422 |
+
" model = search_model(trial, x_data.shape[1])\n",
|
423 |
+
" os.makedirs(\"save_model\", exist_ok=True)\n",
|
424 |
+
" model.save(model_path)\n",
|
425 |
+
" print(f\"Model successfully saved to {model_path}\")\n",
|
426 |
+
" except Exception as e:\n",
|
427 |
+
" print(f\"Error saving model: {e}\")\n",
|
428 |
+
" else:\n",
|
429 |
+
" print(f\"Model already exists at {model_path}\")\n",
|
430 |
+
" os.remove(model_path)\n",
|
431 |
+
" save_model(trial, x_data)"
|
432 |
+
]
|
433 |
+
},
|
434 |
+
{
|
435 |
+
"cell_type": "code",
|
436 |
+
"execution_count": 42,
|
437 |
+
"metadata": {},
|
438 |
+
"outputs": [],
|
439 |
+
"source": [
|
440 |
+
"import logging\n",
|
441 |
+
"import warnings\n",
|
442 |
+
"\n",
|
443 |
+
"os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
|
444 |
+
"os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
|
445 |
+
"os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'\n",
|
446 |
+
"os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'\n",
|
447 |
+
"os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'\n",
|
448 |
+
"os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'\n",
|
449 |
+
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
|
450 |
+
"os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'\n",
|
451 |
+
"os.environ['TF_NUMA_NODES'] = '1'\n",
|
452 |
+
"\n",
|
453 |
+
"warnings.filterwarnings('ignore')\n",
|
454 |
+
"\n",
|
455 |
+
"warnings.simplefilter(action='ignore', category=FutureWarning)\n",
|
456 |
+
"\n",
|
457 |
+
"logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
|
458 |
+
"\n",
|
459 |
+
"tf.get_logger().setLevel('ERROR')\n",
|
460 |
+
"tf.autograph.set_verbosity(0)\n",
|
461 |
+
"\n",
|
462 |
+
"def suppress_warnings(condition=True):\n",
|
463 |
+
" if condition:\n",
|
464 |
+
" logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
|
465 |
+
" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
|
466 |
+
" else:\n",
|
467 |
+
" logging.getLogger('tensorflow').setLevel(logging.WARNING)\n",
|
468 |
+
" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'\n",
|
469 |
+
"\n",
|
470 |
+
"suppress_warnings(condition=True)"
|
471 |
+
]
|
472 |
+
},
|
473 |
+
{
|
474 |
+
"cell_type": "code",
|
475 |
+
"execution_count": 43,
|
476 |
+
"metadata": {},
|
477 |
+
"outputs": [],
|
478 |
+
"source": [
|
479 |
+
"def objective_ws_struct(trial):\n",
|
480 |
+
" try:\n",
|
481 |
+
" y_true = np.asarray(y_ws).astype('float')\n",
|
482 |
+
" np.save('new_fps.npy', group_nws)\n",
|
483 |
+
" np.save('y_true.npy', y_true)\n",
|
484 |
+
" \n",
|
485 |
+
" save_model(trial, group_nws)\n",
|
486 |
+
"\n",
|
487 |
+
" lr = trial.suggest_categorical(f\"lr\", [1e-3, 1e-4, 1e-5])\n",
|
488 |
+
"\n",
|
489 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
|
490 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
491 |
+
" str(lr), \n",
|
492 |
+
" 'new_fps.npy', 'y_true.npy'],\n",
|
493 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
494 |
+
"\n",
|
495 |
+
" if result.stderr:\n",
|
496 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
497 |
+
" if filtered_stderr:\n",
|
498 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
499 |
+
"\n",
|
500 |
+
" for line in result.stdout.splitlines():\n",
|
501 |
+
" if \"R2\" in line:\n",
|
502 |
+
" if \"(prune)\" in line:\n",
|
503 |
+
" print(f\"Pruning trial due to poor R2: {line}\")\n",
|
504 |
+
" r2_result = 0.0\n",
|
505 |
+
" trial.report(r2_result, step=0)\n",
|
506 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
507 |
+
" else:\n",
|
508 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
509 |
+
" print(f\"R2 score: {r2_result}\")\n",
|
510 |
+
" trial.report(r2_result, step=0)\n",
|
511 |
+
"\n",
|
512 |
+
" if trial.should_prune():\n",
|
513 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
514 |
+
"\n",
|
515 |
+
" except Exception as e:\n",
|
516 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
517 |
+
" r2_result = 0.0\n",
|
518 |
+
"\n",
|
519 |
+
" gc.collect()\n",
|
520 |
+
"\n",
|
521 |
+
" return r2_result"
|
522 |
+
]
|
523 |
+
},
|
524 |
+
{
|
525 |
+
"cell_type": "code",
|
526 |
+
"execution_count": 44,
|
527 |
+
"metadata": {},
|
528 |
+
"outputs": [],
|
529 |
+
"source": [
|
530 |
+
"def objective_de_struct(trial):\n",
|
531 |
+
" try:\n",
|
532 |
+
" y_true = np.asarray(y_de).astype('float')\n",
|
533 |
+
" np.save('new_fps.npy', group_nde)\n",
|
534 |
+
" np.save('y_true.npy', y_true)\n",
|
535 |
+
" \n",
|
536 |
+
" save_model(trial, group_nde)\n",
|
537 |
+
"\n",
|
538 |
+
" lr = trial.suggest_categorical(f\"lr\", [1e-3, 1e-4, 1e-5])\n",
|
539 |
+
"\n",
|
540 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
|
541 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
542 |
+
" str(lr), \n",
|
543 |
+
" 'new_fps.npy', 'y_true.npy'],\n",
|
544 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
545 |
+
"\n",
|
546 |
+
" if result.stderr:\n",
|
547 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
548 |
+
" if filtered_stderr:\n",
|
549 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
550 |
+
"\n",
|
551 |
+
" for line in result.stdout.splitlines():\n",
|
552 |
+
" if \"R2\" in line:\n",
|
553 |
+
" if \"(prune)\" in line:\n",
|
554 |
+
" print(f\"Pruning trial due to poor R2: {line}\")\n",
|
555 |
+
" r2_result = 0.0\n",
|
556 |
+
" trial.report(r2_result, step=0)\n",
|
557 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
558 |
+
" else:\n",
|
559 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
560 |
+
" print(f\"R2 score: {r2_result}\")\n",
|
561 |
+
" trial.report(r2_result, step=0)\n",
|
562 |
+
"\n",
|
563 |
+
" if trial.should_prune():\n",
|
564 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
565 |
+
"\n",
|
566 |
+
" except Exception as e:\n",
|
567 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
568 |
+
" r2_result = 0.0\n",
|
569 |
+
"\n",
|
570 |
+
" gc.collect()\n",
|
571 |
+
"\n",
|
572 |
+
" return r2_result"
|
573 |
+
]
|
574 |
+
},
|
575 |
+
{
|
576 |
+
"cell_type": "code",
|
577 |
+
"execution_count": 45,
|
578 |
+
"metadata": {},
|
579 |
+
"outputs": [],
|
580 |
+
"source": [
|
581 |
+
"def objective_lo_struct(trial):\n",
|
582 |
+
" try:\n",
|
583 |
+
" y_true = np.asarray(y_lo).astype('float')\n",
|
584 |
+
" np.save('new_fps.npy', group_nlo)\n",
|
585 |
+
" np.save('y_true.npy', y_true)\n",
|
586 |
+
" \n",
|
587 |
+
" save_model(trial, group_nlo)\n",
|
588 |
+
"\n",
|
589 |
+
" lr = trial.suggest_categorical(f\"lr\", [1e-3, 1e-4, 1e-5])\n",
|
590 |
+
"\n",
|
591 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
|
592 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
593 |
+
" str(lr), \n",
|
594 |
+
" 'new_fps.npy', 'y_true.npy'],\n",
|
595 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
596 |
+
"\n",
|
597 |
+
" if result.stderr:\n",
|
598 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
599 |
+
" if filtered_stderr:\n",
|
600 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
601 |
+
"\n",
|
602 |
+
" for line in result.stdout.splitlines():\n",
|
603 |
+
" if \"R2\" in line:\n",
|
604 |
+
" if \"(prune)\" in line:\n",
|
605 |
+
" print(f\"Pruning trial due to poor R2: {line}\")\n",
|
606 |
+
" r2_result = 0.0\n",
|
607 |
+
" trial.report(r2_result, step=0)\n",
|
608 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
609 |
+
" else:\n",
|
610 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
611 |
+
" print(f\"R2 score: {r2_result}\")\n",
|
612 |
+
" trial.report(r2_result, step=0)\n",
|
613 |
+
"\n",
|
614 |
+
" if trial.should_prune():\n",
|
615 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
616 |
+
"\n",
|
617 |
+
" except Exception as e:\n",
|
618 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
619 |
+
" r2_result = 0.0\n",
|
620 |
+
"\n",
|
621 |
+
" gc.collect()\n",
|
622 |
+
"\n",
|
623 |
+
" return r2_result"
|
624 |
+
]
|
625 |
+
},
|
626 |
+
{
|
627 |
+
"cell_type": "code",
|
628 |
+
"execution_count": 46,
|
629 |
+
"metadata": {},
|
630 |
+
"outputs": [],
|
631 |
+
"source": [
|
632 |
+
"def objective_hu_struct(trial):\n",
|
633 |
+
" try:\n",
|
634 |
+
" y_true = np.asarray(y_hu).astype('float')\n",
|
635 |
+
" np.save('new_fps.npy', group_nhu)\n",
|
636 |
+
" np.save('y_true.npy', y_true)\n",
|
637 |
+
" \n",
|
638 |
+
" save_model(trial, group_nhu)\n",
|
639 |
+
"\n",
|
640 |
+
" lr = trial.suggest_categorical(f\"lr\", [1e-3, 1e-4, 1e-5])\n",
|
641 |
+
"\n",
|
642 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
|
643 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
644 |
+
" str(lr), \n",
|
645 |
+
" 'new_fps.npy', 'y_true.npy'],\n",
|
646 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
647 |
+
"\n",
|
648 |
+
" if result.stderr:\n",
|
649 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
650 |
+
" if filtered_stderr:\n",
|
651 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
652 |
+
"\n",
|
653 |
+
" for line in result.stdout.splitlines():\n",
|
654 |
+
" if \"R2\" in line:\n",
|
655 |
+
" if \"(prune)\" in line:\n",
|
656 |
+
" print(f\"Pruning trial due to poor R2: {line}\")\n",
|
657 |
+
" r2_result = 0.0\n",
|
658 |
+
" trial.report(r2_result, step=0)\n",
|
659 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
660 |
+
" else:\n",
|
661 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
662 |
+
" print(f\"R2 score: {r2_result}\")\n",
|
663 |
+
" trial.report(r2_result, step=0)\n",
|
664 |
+
"\n",
|
665 |
+
" if trial.should_prune():\n",
|
666 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
667 |
+
"\n",
|
668 |
+
" except Exception as e:\n",
|
669 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
670 |
+
" r2_result = 0.0\n",
|
671 |
+
"\n",
|
672 |
+
" gc.collect()\n",
|
673 |
+
"\n",
|
674 |
+
" return r2_result"
|
675 |
+
]
|
676 |
+
},
|
677 |
+
{
|
678 |
+
"cell_type": "code",
|
679 |
+
"execution_count": 47,
|
680 |
+
"metadata": {},
|
681 |
+
"outputs": [],
|
682 |
+
"source": [
|
683 |
+
"storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
|
684 |
+
"# storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
|
685 |
+
"# storage = optuna.storages.RDBStorage(url=storage_urls)"
|
686 |
+
]
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"cell_type": "code",
|
690 |
+
"execution_count": 48,
|
691 |
+
"metadata": {},
|
692 |
+
"outputs": [],
|
693 |
+
"source": [
|
694 |
+
"try:\n",
|
695 |
+
" optuna.delete_study(study_name=\"ANO_ws_struct\", storage=storage)\n",
|
696 |
+
" optuna.delete_study(study_name=\"ANO_de_struct\", storage=storage)\n",
|
697 |
+
" optuna.delete_study(study_name=\"ANO_lo_struct\", storage=storage)\n",
|
698 |
+
" optuna.delete_study(study_name=\"ANO_hu_struct\", storage=storage)\n",
|
699 |
+
"except:\n",
|
700 |
+
" pass"
|
701 |
+
]
|
702 |
+
},
|
703 |
+
{
|
704 |
+
"cell_type": "code",
|
705 |
+
"execution_count": 49,
|
706 |
+
"metadata": {},
|
707 |
+
"outputs": [],
|
708 |
+
"source": [
|
709 |
+
"TRIALS = 5"
|
710 |
+
]
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"cell_type": "code",
|
714 |
+
"execution_count": 50,
|
715 |
+
"metadata": {},
|
716 |
+
"outputs": [
|
717 |
+
{
|
718 |
+
"name": "stderr",
|
719 |
+
"output_type": "stream",
|
720 |
+
"text": [
|
721 |
+
"[I 2024-10-25 11:27:16,193] A new study created in RDB with name: ANO_ws_struct\n",
|
722 |
+
"I0000 00:00:1729823236.262387 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
723 |
+
"Your kernel may have been built without NUMA support.\n",
|
724 |
+
"I0000 00:00:1729823236.262494 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
725 |
+
"Your kernel may have been built without NUMA support.\n",
|
726 |
+
"I0000 00:00:1729823236.262547 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
727 |
+
"Your kernel may have been built without NUMA support.\n",
|
728 |
+
"I0000 00:00:1729823236.414390 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
729 |
+
"Your kernel may have been built without NUMA support.\n",
|
730 |
+
"I0000 00:00:1729823236.414547 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
731 |
+
"Your kernel may have been built without NUMA support.\n",
|
732 |
+
"2024-10-25 11:27:16.414564: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0. Your kernel may not have been built with NUMA support.\n",
|
733 |
+
"2024-10-25 11:27:16.414596: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:198] Using CUDA malloc Async allocator for GPU: 0\n",
|
734 |
+
"I0000 00:00:1729823236.414877 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
735 |
+
"Your kernel may have been built without NUMA support.\n",
|
736 |
+
"2024-10-25 11:27:16.414914: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3586 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\n"
|
737 |
+
]
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"name": "stdout",
|
741 |
+
"output_type": "stream",
|
742 |
+
"text": [
|
743 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
744 |
+
]
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"name": "stderr",
|
748 |
+
"output_type": "stream",
|
749 |
+
"text": [
|
750 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
751 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
752 |
+
"I0000 00:00:1729823240.072256 713235 service.cc:146] XLA service 0x558b4abf8ec0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
753 |
+
"I0000 00:00:1729823240.072318 713235 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
754 |
+
"I0000 00:00:1729823240.199007 713235 service.cc:146] XLA service 0x558b4b330cb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
755 |
+
"I0000 00:00:1729823240.199045 713235 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
756 |
+
"I0000 00:00:1729823242.903035 713344 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
757 |
+
"\n"
|
758 |
+
]
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"name": "stdout",
|
762 |
+
"output_type": "stream",
|
763 |
+
"text": [
|
764 |
+
"R2 score: 0.72685\n"
|
765 |
+
]
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"name": "stderr",
|
769 |
+
"output_type": "stream",
|
770 |
+
"text": [
|
771 |
+
"[I 2024-10-25 11:27:28,379] Trial 0 finished with value: 0.72685 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 9922, 'n_decay_l_0': 1e-05, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 0 with value: 0.72685.\n"
|
772 |
+
]
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"name": "stdout",
|
776 |
+
"output_type": "stream",
|
777 |
+
"text": [
|
778 |
+
"Model already exists at save_model/full_model.keras\n",
|
779 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
780 |
+
]
|
781 |
+
},
|
782 |
+
{
|
783 |
+
"name": "stderr",
|
784 |
+
"output_type": "stream",
|
785 |
+
"text": [
|
786 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
787 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
788 |
+
"I0000 00:00:1729823251.215260 714082 service.cc:146] XLA service 0x55a8ec6b6500 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
789 |
+
"I0000 00:00:1729823251.215311 714082 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
790 |
+
"I0000 00:00:1729823251.333433 714082 service.cc:146] XLA service 0x55a8ec5cd290 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
791 |
+
"I0000 00:00:1729823251.333463 714082 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
792 |
+
"I0000 00:00:1729823263.351498 714193 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
793 |
+
"\n"
|
794 |
+
]
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"name": "stdout",
|
798 |
+
"output_type": "stream",
|
799 |
+
"text": [
|
800 |
+
"R2 score: 0.707063\n"
|
801 |
+
]
|
802 |
+
},
|
803 |
+
{
|
804 |
+
"name": "stderr",
|
805 |
+
"output_type": "stream",
|
806 |
+
"text": [
|
807 |
+
"[I 2024-10-25 11:28:11,955] Trial 1 finished with value: 0.707063 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 6572, 'n_decay_l_0': 1e-05, 'n_units_l_1': 1332, 'n_decay_l_1': 0.0001, 'last_dropout': 0.3, 'lr': 1e-05}. Best is trial 0 with value: 0.72685.\n"
|
808 |
+
]
|
809 |
+
},
|
810 |
+
{
|
811 |
+
"name": "stdout",
|
812 |
+
"output_type": "stream",
|
813 |
+
"text": [
|
814 |
+
"Model already exists at save_model/full_model.keras\n",
|
815 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
816 |
+
]
|
817 |
+
},
|
818 |
+
{
|
819 |
+
"name": "stderr",
|
820 |
+
"output_type": "stream",
|
821 |
+
"text": [
|
822 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
823 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
824 |
+
"I0000 00:00:1729823294.853158 716419 service.cc:146] XLA service 0x55b5e46a62f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
825 |
+
"I0000 00:00:1729823294.853225 716419 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
826 |
+
"I0000 00:00:1729823295.002577 716419 service.cc:146] XLA service 0x55b5e46e5fb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
827 |
+
"I0000 00:00:1729823295.002610 716419 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
828 |
+
"I0000 00:00:1729823297.511032 716525 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
829 |
+
"\n"
|
830 |
+
]
|
831 |
+
},
|
832 |
+
{
|
833 |
+
"name": "stdout",
|
834 |
+
"output_type": "stream",
|
835 |
+
"text": [
|
836 |
+
"R2 score: 0.705862\n"
|
837 |
+
]
|
838 |
+
},
|
839 |
+
{
|
840 |
+
"name": "stderr",
|
841 |
+
"output_type": "stream",
|
842 |
+
"text": [
|
843 |
+
"[I 2024-10-25 11:28:23,131] Trial 2 finished with value: 0.705862 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 3241, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'lr': 0.0001}. Best is trial 0 with value: 0.72685.\n"
|
844 |
+
]
|
845 |
+
},
|
846 |
+
{
|
847 |
+
"name": "stdout",
|
848 |
+
"output_type": "stream",
|
849 |
+
"text": [
|
850 |
+
"Model already exists at save_model/full_model.keras\n"
|
851 |
+
]
|
852 |
+
},
|
853 |
+
{
|
854 |
+
"name": "stderr",
|
855 |
+
"output_type": "stream",
|
856 |
+
"text": [
|
857 |
+
"2024-10-25 11:28:23.378722: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 286627920 exceeds 10% of free system memory.\n"
|
858 |
+
]
|
859 |
+
},
|
860 |
+
{
|
861 |
+
"name": "stdout",
|
862 |
+
"output_type": "stream",
|
863 |
+
"text": [
|
864 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
865 |
+
]
|
866 |
+
},
|
867 |
+
{
|
868 |
+
"name": "stderr",
|
869 |
+
"output_type": "stream",
|
870 |
+
"text": [
|
871 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
872 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
873 |
+
"I0000 00:00:1729823306.834696 718042 service.cc:146] XLA service 0x55ff6bc60e10 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
874 |
+
"I0000 00:00:1729823306.834744 718042 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
875 |
+
"I0000 00:00:1729823306.978987 718042 service.cc:146] XLA service 0x55ff6bc7c930 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
876 |
+
"I0000 00:00:1729823306.979019 718042 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
877 |
+
"I0000 00:00:1729823333.708484 718154 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
878 |
+
"\n"
|
879 |
+
]
|
880 |
+
},
|
881 |
+
{
|
882 |
+
"name": "stdout",
|
883 |
+
"output_type": "stream",
|
884 |
+
"text": [
|
885 |
+
"R2 score: 0.741337\n"
|
886 |
+
]
|
887 |
+
},
|
888 |
+
{
|
889 |
+
"name": "stderr",
|
890 |
+
"output_type": "stream",
|
891 |
+
"text": [
|
892 |
+
"[I 2024-10-25 11:30:53,672] Trial 3 finished with value: 0.741337 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 787, 'n_decay_l_0': 0.0001, 'n_units_l_1': 9082, 'n_decay_l_1': 0.001, 'n_units_l_2': 7890, 'n_decay_l_2': 0.001, 'last_dropout': 0.1, 'lr': 0.0001}. Best is trial 3 with value: 0.741337.\n"
|
893 |
+
]
|
894 |
+
},
|
895 |
+
{
|
896 |
+
"name": "stdout",
|
897 |
+
"output_type": "stream",
|
898 |
+
"text": [
|
899 |
+
"Model already exists at save_model/full_model.keras\n",
|
900 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
901 |
+
]
|
902 |
+
},
|
903 |
+
{
|
904 |
+
"name": "stderr",
|
905 |
+
"output_type": "stream",
|
906 |
+
"text": [
|
907 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
908 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
909 |
+
"I0000 00:00:1729823456.995677 725244 service.cc:146] XLA service 0x56214db0e060 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
910 |
+
"I0000 00:00:1729823456.995725 725244 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
911 |
+
"I0000 00:00:1729823457.136743 725244 service.cc:146] XLA service 0x56214da498b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
912 |
+
"I0000 00:00:1729823457.136787 725244 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
913 |
+
"I0000 00:00:1729823459.392929 725349 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
914 |
+
"\n"
|
915 |
+
]
|
916 |
+
},
|
917 |
+
{
|
918 |
+
"name": "stdout",
|
919 |
+
"output_type": "stream",
|
920 |
+
"text": [
|
921 |
+
"R2 score: 0.68373\n"
|
922 |
+
]
|
923 |
+
},
|
924 |
+
{
|
925 |
+
"name": "stderr",
|
926 |
+
"output_type": "stream",
|
927 |
+
"text": [
|
928 |
+
"[I 2024-10-25 11:31:13,484] Trial 4 finished with value: 0.68373 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 1253, 'n_decay_l_0': 0.0001, 'last_dropout': 0.2, 'lr': 1e-05}. Best is trial 3 with value: 0.741337.\n"
|
929 |
+
]
|
930 |
+
}
|
931 |
+
],
|
932 |
+
"source": [
|
933 |
+
"study_ws_struct = optuna.create_study(study_name='ANO_ws_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True) \n",
|
934 |
+
"# study_ws_fea = optuna.create_study(study_name='ANO_ws_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
935 |
+
"study_ws_struct.optimize(objective_ws_struct, n_trials=TRIALS)\n",
|
936 |
+
"pruned_trials_ws_struct = study_ws_struct.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
937 |
+
"complete_trials_ws_struct = study_ws_struct.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
|
938 |
+
]
|
939 |
+
},
|
940 |
+
{
|
941 |
+
"cell_type": "code",
|
942 |
+
"execution_count": 51,
|
943 |
+
"metadata": {},
|
944 |
+
"outputs": [
|
945 |
+
{
|
946 |
+
"name": "stderr",
|
947 |
+
"output_type": "stream",
|
948 |
+
"text": [
|
949 |
+
"[I 2024-10-25 11:31:13,504] A new study created in RDB with name: ANO_de_struct\n"
|
950 |
+
]
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"name": "stdout",
|
954 |
+
"output_type": "stream",
|
955 |
+
"text": [
|
956 |
+
"Model already exists at save_model/full_model.keras\n",
|
957 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
958 |
+
]
|
959 |
+
},
|
960 |
+
{
|
961 |
+
"name": "stderr",
|
962 |
+
"output_type": "stream",
|
963 |
+
"text": [
|
964 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
965 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
966 |
+
"I0000 00:00:1729823476.380438 735317 service.cc:146] XLA service 0x564b5beee4b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
967 |
+
"I0000 00:00:1729823476.380497 735317 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
968 |
+
"I0000 00:00:1729823476.549364 735317 service.cc:146] XLA service 0x564b5be2ad00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
969 |
+
"I0000 00:00:1729823476.549448 735317 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
970 |
+
"I0000 00:00:1729823490.704246 735426 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
971 |
+
"\n"
|
972 |
+
]
|
973 |
+
},
|
974 |
+
{
|
975 |
+
"name": "stdout",
|
976 |
+
"output_type": "stream",
|
977 |
+
"text": [
|
978 |
+
"R2 score: 0.803869\n"
|
979 |
+
]
|
980 |
+
},
|
981 |
+
{
|
982 |
+
"name": "stderr",
|
983 |
+
"output_type": "stream",
|
984 |
+
"text": [
|
985 |
+
"[I 2024-10-25 11:32:02,303] Trial 0 finished with value: 0.803869 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 1737, 'n_decay_l_0': 1e-05, 'n_units_l_1': 6702, 'n_decay_l_1': 1e-05, 'last_dropout': 0.2, 'lr': 0.001}. Best is trial 0 with value: 0.803869.\n"
|
986 |
+
]
|
987 |
+
},
|
988 |
+
{
|
989 |
+
"name": "stdout",
|
990 |
+
"output_type": "stream",
|
991 |
+
"text": [
|
992 |
+
"Model already exists at save_model/full_model.keras\n",
|
993 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
994 |
+
]
|
995 |
+
},
|
996 |
+
{
|
997 |
+
"name": "stderr",
|
998 |
+
"output_type": "stream",
|
999 |
+
"text": [
|
1000 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1001 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1002 |
+
"I0000 00:00:1729823525.878834 736385 service.cc:146] XLA service 0x55bad3cb21e0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1003 |
+
"I0000 00:00:1729823525.878873 736385 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1004 |
+
"I0000 00:00:1729823526.015032 736385 service.cc:146] XLA service 0x55bad3c304f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1005 |
+
"I0000 00:00:1729823526.015066 736385 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1006 |
+
"I0000 00:00:1729823529.879054 736488 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1007 |
+
"\n"
|
1008 |
+
]
|
1009 |
+
},
|
1010 |
+
{
|
1011 |
+
"name": "stdout",
|
1012 |
+
"output_type": "stream",
|
1013 |
+
"text": [
|
1014 |
+
"R2 score: 0.826782\n"
|
1015 |
+
]
|
1016 |
+
},
|
1017 |
+
{
|
1018 |
+
"name": "stderr",
|
1019 |
+
"output_type": "stream",
|
1020 |
+
"text": [
|
1021 |
+
"[I 2024-10-25 11:32:39,308] Trial 1 finished with value: 0.826782 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 9935, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'n_units_l_1': 3544, 'n_decay_l_1': 1e-05, 'F_dropout_1': 0.3, 'lr': 0.0001}. Best is trial 1 with value: 0.826782.\n"
|
1022 |
+
]
|
1023 |
+
},
|
1024 |
+
{
|
1025 |
+
"name": "stdout",
|
1026 |
+
"output_type": "stream",
|
1027 |
+
"text": [
|
1028 |
+
"Model already exists at save_model/full_model.keras\n",
|
1029 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1030 |
+
]
|
1031 |
+
},
|
1032 |
+
{
|
1033 |
+
"name": "stderr",
|
1034 |
+
"output_type": "stream",
|
1035 |
+
"text": [
|
1036 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1037 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1038 |
+
"I0000 00:00:1729823562.604376 737355 service.cc:146] XLA service 0x55eaf2377f20 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1039 |
+
"I0000 00:00:1729823562.604431 737355 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1040 |
+
"I0000 00:00:1729823562.742863 737355 service.cc:146] XLA service 0x55eaf23d2e30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1041 |
+
"I0000 00:00:1729823562.742895 737355 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1042 |
+
"I0000 00:00:1729823566.592650 737461 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1043 |
+
"\n"
|
1044 |
+
]
|
1045 |
+
},
|
1046 |
+
{
|
1047 |
+
"name": "stdout",
|
1048 |
+
"output_type": "stream",
|
1049 |
+
"text": [
|
1050 |
+
"R2 score: 0.823751\n"
|
1051 |
+
]
|
1052 |
+
},
|
1053 |
+
{
|
1054 |
+
"name": "stderr",
|
1055 |
+
"output_type": "stream",
|
1056 |
+
"text": [
|
1057 |
+
"[I 2024-10-25 11:33:33,879] Trial 2 finished with value: 0.823751 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 7233, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'n_units_l_1': 4859, 'n_decay_l_1': 0.0001, 'F_dropout_1': 0.3, 'lr': 0.001}. Best is trial 1 with value: 0.826782.\n"
|
1058 |
+
]
|
1059 |
+
},
|
1060 |
+
{
|
1061 |
+
"name": "stdout",
|
1062 |
+
"output_type": "stream",
|
1063 |
+
"text": [
|
1064 |
+
"Model already exists at save_model/full_model.keras\n",
|
1065 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1066 |
+
]
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"name": "stderr",
|
1070 |
+
"output_type": "stream",
|
1071 |
+
"text": [
|
1072 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1073 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1074 |
+
"I0000 00:00:1729823616.856357 739058 service.cc:146] XLA service 0x55d8efa212a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1075 |
+
"I0000 00:00:1729823616.856406 739058 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1076 |
+
"I0000 00:00:1729823616.997938 739058 service.cc:146] XLA service 0x55d8ef979320 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1077 |
+
"I0000 00:00:1729823616.997975 739058 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1078 |
+
"I0000 00:00:1729823621.412038 739168 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1079 |
+
"\n"
|
1080 |
+
]
|
1081 |
+
},
|
1082 |
+
{
|
1083 |
+
"name": "stdout",
|
1084 |
+
"output_type": "stream",
|
1085 |
+
"text": [
|
1086 |
+
"R2 score: 0.796098\n"
|
1087 |
+
]
|
1088 |
+
},
|
1089 |
+
{
|
1090 |
+
"name": "stderr",
|
1091 |
+
"output_type": "stream",
|
1092 |
+
"text": [
|
1093 |
+
"[I 2024-10-25 11:34:39,639] Trial 3 finished with value: 0.796098 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 809, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.3, 'n_units_l_1': 3939, 'n_decay_l_1': 0.0001, 'F_dropout_1': 0.2, 'n_units_l_2': 6198, 'n_decay_l_2': 0.0001, 'F_dropout_2': 0.1, 'lr': 0.001}. Best is trial 1 with value: 0.826782.\n"
|
1094 |
+
]
|
1095 |
+
},
|
1096 |
+
{
|
1097 |
+
"name": "stdout",
|
1098 |
+
"output_type": "stream",
|
1099 |
+
"text": [
|
1100 |
+
"Model already exists at save_model/full_model.keras\n",
|
1101 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1102 |
+
]
|
1103 |
+
},
|
1104 |
+
{
|
1105 |
+
"name": "stderr",
|
1106 |
+
"output_type": "stream",
|
1107 |
+
"text": [
|
1108 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1109 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1110 |
+
"I0000 00:00:1729823682.926391 740686 service.cc:146] XLA service 0x5579d09bb100 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1111 |
+
"I0000 00:00:1729823682.926465 740686 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1112 |
+
"I0000 00:00:1729823683.068876 740686 service.cc:146] XLA service 0x5579d09fa810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1113 |
+
"I0000 00:00:1729823683.068912 740686 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1114 |
+
"I0000 00:00:1729823696.228323 740798 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1115 |
+
"\n"
|
1116 |
+
]
|
1117 |
+
},
|
1118 |
+
{
|
1119 |
+
"name": "stdout",
|
1120 |
+
"output_type": "stream",
|
1121 |
+
"text": [
|
1122 |
+
"R2 score: 0.84961\n"
|
1123 |
+
]
|
1124 |
+
},
|
1125 |
+
{
|
1126 |
+
"name": "stderr",
|
1127 |
+
"output_type": "stream",
|
1128 |
+
"text": [
|
1129 |
+
"[I 2024-10-25 11:37:05,301] Trial 4 finished with value: 0.84961 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 7109, 'n_decay_l_0': 0.001, 'n_units_l_1': 3436, 'n_decay_l_1': 1e-05, 'last_dropout': 0.3, 'lr': 1e-05}. Best is trial 4 with value: 0.84961.\n"
|
1130 |
+
]
|
1131 |
+
}
|
1132 |
+
],
|
1133 |
+
"source": [
|
1134 |
+
"study_de_struct = optuna.create_study(study_name='ANO_de_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True) \n",
|
1135 |
+
"# study_de_fea = optuna.create_study(study_name='ANO_de_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
1136 |
+
"study_de_struct.optimize(objective_de_struct, n_trials=TRIALS)\n",
|
1137 |
+
"pruned_trials_de_struct = study_de_struct.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1138 |
+
"complete_trials_de_struct = study_de_struct.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
|
1139 |
+
]
|
1140 |
+
},
|
1141 |
+
{
|
1142 |
+
"cell_type": "code",
|
1143 |
+
"execution_count": 52,
|
1144 |
+
"metadata": {},
|
1145 |
+
"outputs": [
|
1146 |
+
{
|
1147 |
+
"name": "stderr",
|
1148 |
+
"output_type": "stream",
|
1149 |
+
"text": [
|
1150 |
+
"[I 2024-10-25 11:37:05,323] A new study created in RDB with name: ANO_lo_struct\n"
|
1151 |
+
]
|
1152 |
+
},
|
1153 |
+
{
|
1154 |
+
"name": "stdout",
|
1155 |
+
"output_type": "stream",
|
1156 |
+
"text": [
|
1157 |
+
"Model already exists at save_model/full_model.keras\n",
|
1158 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1159 |
+
]
|
1160 |
+
},
|
1161 |
+
{
|
1162 |
+
"name": "stderr",
|
1163 |
+
"output_type": "stream",
|
1164 |
+
"text": [
|
1165 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1166 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1167 |
+
"I0000 00:00:1729823828.951072 753530 service.cc:146] XLA service 0x560175567120 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1168 |
+
"I0000 00:00:1729823828.951145 753530 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1169 |
+
"I0000 00:00:1729823829.108219 753530 service.cc:146] XLA service 0x56017553de50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1170 |
+
"I0000 00:00:1729823829.108251 753530 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1171 |
+
"I0000 00:00:1729823833.863752 753634 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1172 |
+
"\n"
|
1173 |
+
]
|
1174 |
+
},
|
1175 |
+
{
|
1176 |
+
"name": "stdout",
|
1177 |
+
"output_type": "stream",
|
1178 |
+
"text": [
|
1179 |
+
"R2 score: 0.679332\n"
|
1180 |
+
]
|
1181 |
+
},
|
1182 |
+
{
|
1183 |
+
"name": "stderr",
|
1184 |
+
"output_type": "stream",
|
1185 |
+
"text": [
|
1186 |
+
"[I 2024-10-25 11:39:42,324] Trial 0 finished with value: 0.679332 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 7114, 'n_decay_l_0': 0.001, 'F_dropout_0': 0.1, 'n_units_l_1': 7475, 'n_decay_l_1': 0.0001, 'F_dropout_1': 0.3, 'lr': 1e-05}. Best is trial 0 with value: 0.679332.\n"
|
1187 |
+
]
|
1188 |
+
},
|
1189 |
+
{
|
1190 |
+
"name": "stdout",
|
1191 |
+
"output_type": "stream",
|
1192 |
+
"text": [
|
1193 |
+
"Model already exists at save_model/full_model.keras\n",
|
1194 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
{
|
1198 |
+
"name": "stderr",
|
1199 |
+
"output_type": "stream",
|
1200 |
+
"text": [
|
1201 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1202 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1203 |
+
"I0000 00:00:1729823985.424040 762359 service.cc:146] XLA service 0x55d75759be40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1204 |
+
"I0000 00:00:1729823985.424082 762359 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1205 |
+
"I0000 00:00:1729823985.555550 762359 service.cc:146] XLA service 0x55d75744c2d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1206 |
+
"I0000 00:00:1729823985.555588 762359 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1207 |
+
"I0000 00:00:1729823989.791130 762463 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1208 |
+
"\n"
|
1209 |
+
]
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"name": "stdout",
|
1213 |
+
"output_type": "stream",
|
1214 |
+
"text": [
|
1215 |
+
"R2 score: 0.668488\n"
|
1216 |
+
]
|
1217 |
+
},
|
1218 |
+
{
|
1219 |
+
"name": "stderr",
|
1220 |
+
"output_type": "stream",
|
1221 |
+
"text": [
|
1222 |
+
"[I 2024-10-25 11:40:50,103] Trial 1 finished with value: 0.668488 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 2152, 'n_decay_l_0': 0.001, 'F_dropout_0': 0.1, 'n_units_l_1': 1830, 'n_decay_l_1': 0.0001, 'F_dropout_1': 0.1, 'n_units_l_2': 4427, 'n_decay_l_2': 0.0001, 'F_dropout_2': 0.3, 'lr': 0.0001}. Best is trial 0 with value: 0.679332.\n"
|
1223 |
+
]
|
1224 |
+
},
|
1225 |
+
{
|
1226 |
+
"name": "stdout",
|
1227 |
+
"output_type": "stream",
|
1228 |
+
"text": [
|
1229 |
+
"Model already exists at save_model/full_model.keras\n",
|
1230 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1231 |
+
]
|
1232 |
+
},
|
1233 |
+
{
|
1234 |
+
"name": "stderr",
|
1235 |
+
"output_type": "stream",
|
1236 |
+
"text": [
|
1237 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1238 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1239 |
+
"I0000 00:00:1729824052.960321 765604 service.cc:146] XLA service 0x55cc5280bdf0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1240 |
+
"I0000 00:00:1729824052.960390 765604 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1241 |
+
"I0000 00:00:1729824053.116021 765604 service.cc:146] XLA service 0x55cc50253a30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1242 |
+
"I0000 00:00:1729824053.116054 765604 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1243 |
+
"I0000 00:00:1729824055.695706 765714 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1244 |
+
"\n"
|
1245 |
+
]
|
1246 |
+
},
|
1247 |
+
{
|
1248 |
+
"name": "stdout",
|
1249 |
+
"output_type": "stream",
|
1250 |
+
"text": [
|
1251 |
+
"R2 score: 0.662751\n"
|
1252 |
+
]
|
1253 |
+
},
|
1254 |
+
{
|
1255 |
+
"name": "stderr",
|
1256 |
+
"output_type": "stream",
|
1257 |
+
"text": [
|
1258 |
+
"[I 2024-10-25 11:41:01,389] Trial 2 finished with value: 0.662751 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 2892, 'n_decay_l_0': 0.001, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 0 with value: 0.679332.\n"
|
1259 |
+
]
|
1260 |
+
},
|
1261 |
+
{
|
1262 |
+
"name": "stdout",
|
1263 |
+
"output_type": "stream",
|
1264 |
+
"text": [
|
1265 |
+
"Model already exists at save_model/full_model.keras\n",
|
1266 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1267 |
+
]
|
1268 |
+
},
|
1269 |
+
{
|
1270 |
+
"name": "stderr",
|
1271 |
+
"output_type": "stream",
|
1272 |
+
"text": [
|
1273 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1274 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1275 |
+
"I0000 00:00:1729824064.281415 766911 service.cc:146] XLA service 0x55b827b832f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1276 |
+
"I0000 00:00:1729824064.281454 766911 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1277 |
+
"I0000 00:00:1729824064.424930 766911 service.cc:146] XLA service 0x55b827b5a3c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1278 |
+
"I0000 00:00:1729824064.424972 766911 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1279 |
+
"I0000 00:00:1729824067.087899 767014 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1280 |
+
"\n"
|
1281 |
+
]
|
1282 |
+
},
|
1283 |
+
{
|
1284 |
+
"name": "stdout",
|
1285 |
+
"output_type": "stream",
|
1286 |
+
"text": [
|
1287 |
+
"R2 score: 0.644237\n"
|
1288 |
+
]
|
1289 |
+
},
|
1290 |
+
{
|
1291 |
+
"name": "stderr",
|
1292 |
+
"output_type": "stream",
|
1293 |
+
"text": [
|
1294 |
+
"[I 2024-10-25 11:41:12,311] Trial 3 finished with value: 0.644237 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 6028, 'n_decay_l_0': 0.0001, 'last_dropout': 0.2, 'lr': 0.001}. Best is trial 0 with value: 0.679332.\n"
|
1295 |
+
]
|
1296 |
+
},
|
1297 |
+
{
|
1298 |
+
"name": "stdout",
|
1299 |
+
"output_type": "stream",
|
1300 |
+
"text": [
|
1301 |
+
"Model already exists at save_model/full_model.keras\n",
|
1302 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1303 |
+
]
|
1304 |
+
},
|
1305 |
+
{
|
1306 |
+
"name": "stderr",
|
1307 |
+
"output_type": "stream",
|
1308 |
+
"text": [
|
1309 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1310 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1311 |
+
"I0000 00:00:1729824075.529341 767599 service.cc:146] XLA service 0x563c07a27f10 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1312 |
+
"I0000 00:00:1729824075.529392 767599 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1313 |
+
"I0000 00:00:1729824075.650832 767599 service.cc:146] XLA service 0x563c07a82e20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1314 |
+
"I0000 00:00:1729824075.650868 767599 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1315 |
+
"I0000 00:00:1729824078.421404 767708 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1316 |
+
"\n"
|
1317 |
+
]
|
1318 |
+
},
|
1319 |
+
{
|
1320 |
+
"name": "stdout",
|
1321 |
+
"output_type": "stream",
|
1322 |
+
"text": [
|
1323 |
+
"R2 score: 0.619821\n"
|
1324 |
+
]
|
1325 |
+
},
|
1326 |
+
{
|
1327 |
+
"name": "stderr",
|
1328 |
+
"output_type": "stream",
|
1329 |
+
"text": [
|
1330 |
+
"[I 2024-10-25 11:41:23,815] Trial 4 finished with value: 0.619821 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 8402, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'lr': 0.001}. Best is trial 0 with value: 0.679332.\n"
|
1331 |
+
]
|
1332 |
+
}
|
1333 |
+
],
|
1334 |
+
"source": [
|
1335 |
+
"study_lo_struct = optuna.create_study(study_name='ANO_lo_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True) \n",
|
1336 |
+
"# study_lo_fea = optuna.create_study(study_name='ANO_lo_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
1337 |
+
"study_lo_struct.optimize(objective_lo_struct, n_trials=TRIALS)\n",
|
1338 |
+
"pruned_trials_lo_struct = study_lo_struct.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1339 |
+
"complete_trials_lo_struct = study_lo_struct.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
|
1340 |
+
]
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"cell_type": "code",
|
1344 |
+
"execution_count": 53,
|
1345 |
+
"metadata": {},
|
1346 |
+
"outputs": [
|
1347 |
+
{
|
1348 |
+
"name": "stderr",
|
1349 |
+
"output_type": "stream",
|
1350 |
+
"text": [
|
1351 |
+
"[I 2024-10-25 11:41:23,834] A new study created in RDB with name: ANO_hu_struct\n"
|
1352 |
+
]
|
1353 |
+
},
|
1354 |
+
{
|
1355 |
+
"name": "stdout",
|
1356 |
+
"output_type": "stream",
|
1357 |
+
"text": [
|
1358 |
+
"Model already exists at save_model/full_model.keras\n",
|
1359 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1360 |
+
]
|
1361 |
+
},
|
1362 |
+
{
|
1363 |
+
"name": "stderr",
|
1364 |
+
"output_type": "stream",
|
1365 |
+
"text": [
|
1366 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1367 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1368 |
+
"I0000 00:00:1729824086.884600 768220 service.cc:146] XLA service 0x5626a2e78390 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1369 |
+
"I0000 00:00:1729824086.884652 768220 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1370 |
+
"I0000 00:00:1729824087.003367 768220 service.cc:146] XLA service 0x5626a2e4fce0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1371 |
+
"I0000 00:00:1729824087.003400 768220 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1372 |
+
"I0000 00:00:1729824105.963199 768323 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1373 |
+
"\n"
|
1374 |
+
]
|
1375 |
+
},
|
1376 |
+
{
|
1377 |
+
"name": "stdout",
|
1378 |
+
"output_type": "stream",
|
1379 |
+
"text": [
|
1380 |
+
"R2 score: 0.856321\n"
|
1381 |
+
]
|
1382 |
+
},
|
1383 |
+
{
|
1384 |
+
"name": "stderr",
|
1385 |
+
"output_type": "stream",
|
1386 |
+
"text": [
|
1387 |
+
"[I 2024-10-25 11:42:47,320] Trial 0 finished with value: 0.856321 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 6594, 'n_decay_l_0': 0.0001, 'n_units_l_1': 301, 'n_decay_l_1': 0.001, 'last_dropout': 0.3, 'lr': 1e-05}. Best is trial 0 with value: 0.856321.\n"
|
1388 |
+
]
|
1389 |
+
},
|
1390 |
+
{
|
1391 |
+
"name": "stdout",
|
1392 |
+
"output_type": "stream",
|
1393 |
+
"text": [
|
1394 |
+
"Model already exists at save_model/full_model.keras\n",
|
1395 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1396 |
+
]
|
1397 |
+
},
|
1398 |
+
{
|
1399 |
+
"name": "stderr",
|
1400 |
+
"output_type": "stream",
|
1401 |
+
"text": [
|
1402 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1403 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1404 |
+
"I0000 00:00:1729824170.843053 772954 service.cc:146] XLA service 0x558ed359fba0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1405 |
+
"I0000 00:00:1729824170.843094 772954 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1406 |
+
"I0000 00:00:1729824170.980423 772954 service.cc:146] XLA service 0x558ed34d34b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1407 |
+
"I0000 00:00:1729824170.980455 772954 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1408 |
+
"I0000 00:00:1729824174.981970 773059 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1409 |
+
"\n"
|
1410 |
+
]
|
1411 |
+
},
|
1412 |
+
{
|
1413 |
+
"name": "stdout",
|
1414 |
+
"output_type": "stream",
|
1415 |
+
"text": [
|
1416 |
+
"R2 score: 0.852474\n"
|
1417 |
+
]
|
1418 |
+
},
|
1419 |
+
{
|
1420 |
+
"name": "stderr",
|
1421 |
+
"output_type": "stream",
|
1422 |
+
"text": [
|
1423 |
+
"[I 2024-10-25 11:46:37,297] Trial 1 finished with value: 0.852474 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 6712, 'n_decay_l_0': 0.0001, 'F_dropout_0': 0.1, 'n_units_l_1': 6556, 'n_decay_l_1': 0.001, 'F_dropout_1': 0.1, 'lr': 1e-05}. Best is trial 0 with value: 0.856321.\n"
|
1424 |
+
]
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"name": "stdout",
|
1428 |
+
"output_type": "stream",
|
1429 |
+
"text": [
|
1430 |
+
"Model already exists at save_model/full_model.keras\n",
|
1431 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1432 |
+
]
|
1433 |
+
},
|
1434 |
+
{
|
1435 |
+
"name": "stderr",
|
1436 |
+
"output_type": "stream",
|
1437 |
+
"text": [
|
1438 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1439 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1440 |
+
"I0000 00:00:1729824400.201392 788855 service.cc:146] XLA service 0x55a8a31a00b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1441 |
+
"I0000 00:00:1729824400.201453 788855 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1442 |
+
"I0000 00:00:1729824400.333349 788855 service.cc:146] XLA service 0x55a8a3176de0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1443 |
+
"I0000 00:00:1729824400.333383 788855 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1444 |
+
"I0000 00:00:1729824402.770049 788964 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1445 |
+
"\n"
|
1446 |
+
]
|
1447 |
+
},
|
1448 |
+
{
|
1449 |
+
"name": "stdout",
|
1450 |
+
"output_type": "stream",
|
1451 |
+
"text": [
|
1452 |
+
"R2 score: 0.839939\n"
|
1453 |
+
]
|
1454 |
+
},
|
1455 |
+
{
|
1456 |
+
"name": "stderr",
|
1457 |
+
"output_type": "stream",
|
1458 |
+
"text": [
|
1459 |
+
"[I 2024-10-25 11:46:47,755] Trial 2 finished with value: 0.839939 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 4994, 'n_decay_l_0': 0.0001, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 0 with value: 0.856321.\n"
|
1460 |
+
]
|
1461 |
+
},
|
1462 |
+
{
|
1463 |
+
"name": "stdout",
|
1464 |
+
"output_type": "stream",
|
1465 |
+
"text": [
|
1466 |
+
"Model already exists at save_model/full_model.keras\n",
|
1467 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1468 |
+
]
|
1469 |
+
},
|
1470 |
+
{
|
1471 |
+
"name": "stderr",
|
1472 |
+
"output_type": "stream",
|
1473 |
+
"text": [
|
1474 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1475 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1476 |
+
"I0000 00:00:1729824410.699218 789515 service.cc:146] XLA service 0x55ac8eda5d40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1477 |
+
"I0000 00:00:1729824410.699260 789515 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1478 |
+
"I0000 00:00:1729824410.834295 789515 service.cc:146] XLA service 0x55ac8ece7750 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1479 |
+
"I0000 00:00:1729824410.834333 789515 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1480 |
+
"I0000 00:00:1729824435.249758 789626 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1481 |
+
"\n"
|
1482 |
+
]
|
1483 |
+
},
|
1484 |
+
{
|
1485 |
+
"name": "stdout",
|
1486 |
+
"output_type": "stream",
|
1487 |
+
"text": [
|
1488 |
+
"R2 score: 0.845373\n"
|
1489 |
+
]
|
1490 |
+
},
|
1491 |
+
{
|
1492 |
+
"name": "stderr",
|
1493 |
+
"output_type": "stream",
|
1494 |
+
"text": [
|
1495 |
+
"[I 2024-10-25 11:48:32,252] Trial 3 finished with value: 0.845373 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 4463, 'n_decay_l_0': 0.0001, 'n_units_l_1': 1966, 'n_decay_l_1': 0.001, 'n_units_l_2': 924, 'n_decay_l_2': 0.0001, 'last_dropout': 0.1, 'lr': 0.0001}. Best is trial 0 with value: 0.856321.\n"
|
1496 |
+
]
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"name": "stdout",
|
1500 |
+
"output_type": "stream",
|
1501 |
+
"text": [
|
1502 |
+
"Model already exists at save_model/full_model.keras\n",
|
1503 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1504 |
+
]
|
1505 |
+
},
|
1506 |
+
{
|
1507 |
+
"name": "stderr",
|
1508 |
+
"output_type": "stream",
|
1509 |
+
"text": [
|
1510 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1511 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1512 |
+
"I0000 00:00:1729824515.181650 795905 service.cc:146] XLA service 0x55dbccb5d560 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1513 |
+
"I0000 00:00:1729824515.181691 795905 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1514 |
+
"I0000 00:00:1729824515.312754 795905 service.cc:146] XLA service 0x55dbccb71e90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1515 |
+
"I0000 00:00:1729824515.312792 795905 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1516 |
+
"I0000 00:00:1729824518.617949 796015 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1517 |
+
"\n"
|
1518 |
+
]
|
1519 |
+
},
|
1520 |
+
{
|
1521 |
+
"name": "stdout",
|
1522 |
+
"output_type": "stream",
|
1523 |
+
"text": [
|
1524 |
+
"R2 score: 0.812906\n"
|
1525 |
+
]
|
1526 |
+
},
|
1527 |
+
{
|
1528 |
+
"name": "stderr",
|
1529 |
+
"output_type": "stream",
|
1530 |
+
"text": [
|
1531 |
+
"[I 2024-10-25 11:49:05,409] Trial 4 finished with value: 0.812906 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 1477, 'n_decay_l_0': 0.0001, 'F_dropout_0': 0.3, 'n_units_l_1': 5762, 'n_decay_l_1': 1e-05, 'F_dropout_1': 0.2, 'lr': 0.001}. Best is trial 0 with value: 0.856321.\n"
|
1532 |
+
]
|
1533 |
+
}
|
1534 |
+
],
|
1535 |
+
"source": [
|
1536 |
+
"study_hu_struct = optuna.create_study(study_name='ANO_hu_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True) \n",
|
1537 |
+
"# study_hu_fea = optuna.create_study(study_name='ANO_hu_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
1538 |
+
"study_hu_struct.optimize(objective_hu_struct, n_trials=TRIALS)\n",
|
1539 |
+
"pruned_trials_hu_struct = study_hu_struct.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1540 |
+
"complete_trials_hu_struct = study_hu_struct.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
|
1541 |
+
]
|
1542 |
+
},
|
1543 |
+
{
|
1544 |
+
"cell_type": "code",
|
1545 |
+
"execution_count": 58,
|
1546 |
+
"metadata": {},
|
1547 |
+
"outputs": [
|
1548 |
+
{
|
1549 |
+
"name": "stdout",
|
1550 |
+
"output_type": "stream",
|
1551 |
+
"text": [
|
1552 |
+
"Study statistics: [ws_structure] \n",
|
1553 |
+
" Number of finished trials: 5\n",
|
1554 |
+
" Number of pruned trials: 0\n",
|
1555 |
+
" Number of complete trials: 5\n",
|
1556 |
+
"Best trial:\n",
|
1557 |
+
" Value: 0.741337\n",
|
1558 |
+
" Params: \n",
|
1559 |
+
" n_layers: 3\n",
|
1560 |
+
" layer_dropout: 0\n",
|
1561 |
+
" n_units_l_0: 787\n",
|
1562 |
+
" n_decay_l_0: 0.0001\n",
|
1563 |
+
" n_units_l_1: 9082\n",
|
1564 |
+
" n_decay_l_1: 0.001\n",
|
1565 |
+
" n_units_l_2: 7890\n",
|
1566 |
+
" n_decay_l_2: 0.001\n",
|
1567 |
+
" last_dropout: 0.1\n",
|
1568 |
+
" lr: 0.0001\n"
|
1569 |
+
]
|
1570 |
+
}
|
1571 |
+
],
|
1572 |
+
"source": [
|
1573 |
+
"print(\"Study statistics: [ws_structure] \")\n",
|
1574 |
+
"print(\" Number of finished trials: \", len(study_ws_struct.trials))\n",
|
1575 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_ws_struct))\n",
|
1576 |
+
"print(\" Number of complete trials: \", len(complete_trials_ws_struct))\n",
|
1577 |
+
"print(\"Best trial:\")\n",
|
1578 |
+
"trials_tmp = study_ws_struct.best_trial\n",
|
1579 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1580 |
+
"print(\" Params: \")\n",
|
1581 |
+
"for key, value in trials_tmp.params.items():\n",
|
1582 |
+
" print(\" {}: {}\".format(key, value))"
|
1583 |
+
]
|
1584 |
+
},
|
1585 |
+
{
|
1586 |
+
"cell_type": "code",
|
1587 |
+
"execution_count": 59,
|
1588 |
+
"metadata": {},
|
1589 |
+
"outputs": [
|
1590 |
+
{
|
1591 |
+
"name": "stdout",
|
1592 |
+
"output_type": "stream",
|
1593 |
+
"text": [
|
1594 |
+
"Study statistics: [de_structure] \n",
|
1595 |
+
" Number of finished trials: 5\n",
|
1596 |
+
" Number of pruned trials: 0\n",
|
1597 |
+
" Number of complete trials: 5\n",
|
1598 |
+
"Best trial:\n",
|
1599 |
+
" Value: 0.84961\n",
|
1600 |
+
" Params: \n",
|
1601 |
+
" n_layers: 2\n",
|
1602 |
+
" layer_dropout: 0\n",
|
1603 |
+
" n_units_l_0: 7109\n",
|
1604 |
+
" n_decay_l_0: 0.001\n",
|
1605 |
+
" n_units_l_1: 3436\n",
|
1606 |
+
" n_decay_l_1: 1e-05\n",
|
1607 |
+
" last_dropout: 0.3\n",
|
1608 |
+
" lr: 1e-05\n"
|
1609 |
+
]
|
1610 |
+
}
|
1611 |
+
],
|
1612 |
+
"source": [
|
1613 |
+
"print(\"Study statistics: [de_structure] \")\n",
|
1614 |
+
"print(\" Number of finished trials: \", len(study_de_struct.trials))\n",
|
1615 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_de_struct))\n",
|
1616 |
+
"print(\" Number of complete trials: \", len(complete_trials_de_struct))\n",
|
1617 |
+
"print(\"Best trial:\")\n",
|
1618 |
+
"trials_tmp = study_de_struct.best_trial\n",
|
1619 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1620 |
+
"print(\" Params: \")\n",
|
1621 |
+
"for key, value in trials_tmp.params.items():\n",
|
1622 |
+
" print(\" {}: {}\".format(key, value))"
|
1623 |
+
]
|
1624 |
+
},
|
1625 |
+
{
|
1626 |
+
"cell_type": "code",
|
1627 |
+
"execution_count": 60,
|
1628 |
+
"metadata": {},
|
1629 |
+
"outputs": [
|
1630 |
+
{
|
1631 |
+
"name": "stdout",
|
1632 |
+
"output_type": "stream",
|
1633 |
+
"text": [
|
1634 |
+
"Study statistics: [lo_structure] \n",
|
1635 |
+
" Number of finished trials: 5\n",
|
1636 |
+
" Number of pruned trials: 0\n",
|
1637 |
+
" Number of complete trials: 5\n",
|
1638 |
+
"Best trial:\n",
|
1639 |
+
" Value: 0.679332\n",
|
1640 |
+
" Params: \n",
|
1641 |
+
" n_layers: 2\n",
|
1642 |
+
" layer_dropout: 1\n",
|
1643 |
+
" n_units_l_0: 7114\n",
|
1644 |
+
" n_decay_l_0: 0.001\n",
|
1645 |
+
" F_dropout_0: 0.1\n",
|
1646 |
+
" n_units_l_1: 7475\n",
|
1647 |
+
" n_decay_l_1: 0.0001\n",
|
1648 |
+
" F_dropout_1: 0.3\n",
|
1649 |
+
" lr: 1e-05\n"
|
1650 |
+
]
|
1651 |
+
}
|
1652 |
+
],
|
1653 |
+
"source": [
|
1654 |
+
"print(\"Study statistics: [lo_structure] \")\n",
|
1655 |
+
"print(\" Number of finished trials: \", len(study_lo_struct.trials))\n",
|
1656 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_lo_struct))\n",
|
1657 |
+
"print(\" Number of complete trials: \", len(complete_trials_lo_struct))\n",
|
1658 |
+
"print(\"Best trial:\")\n",
|
1659 |
+
"trials_tmp = study_lo_struct.best_trial\n",
|
1660 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1661 |
+
"print(\" Params: \")\n",
|
1662 |
+
"for key, value in trials_tmp.params.items():\n",
|
1663 |
+
" print(\" {}: {}\".format(key, value))"
|
1664 |
+
]
|
1665 |
+
},
|
1666 |
+
{
|
1667 |
+
"cell_type": "code",
|
1668 |
+
"execution_count": 61,
|
1669 |
+
"metadata": {},
|
1670 |
+
"outputs": [
|
1671 |
+
{
|
1672 |
+
"name": "stdout",
|
1673 |
+
"output_type": "stream",
|
1674 |
+
"text": [
|
1675 |
+
"Study statistics: [hu_structure] \n",
|
1676 |
+
" Number of finished trials: 5\n",
|
1677 |
+
" Number of pruned trials: 0\n",
|
1678 |
+
" Number of complete trials: 5\n",
|
1679 |
+
"Best trial:\n",
|
1680 |
+
" Value: 0.856321\n",
|
1681 |
+
" Params: \n",
|
1682 |
+
" n_layers: 2\n",
|
1683 |
+
" layer_dropout: 0\n",
|
1684 |
+
" n_units_l_0: 6594\n",
|
1685 |
+
" n_decay_l_0: 0.0001\n",
|
1686 |
+
" n_units_l_1: 301\n",
|
1687 |
+
" n_decay_l_1: 0.001\n",
|
1688 |
+
" last_dropout: 0.3\n",
|
1689 |
+
" lr: 1e-05\n"
|
1690 |
+
]
|
1691 |
+
}
|
1692 |
+
],
|
1693 |
+
"source": [
|
1694 |
+
"print(\"Study statistics: [hu_structure] \")\n",
|
1695 |
+
"print(\" Number of finished trials: \", len(study_hu_struct.trials))\n",
|
1696 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_hu_struct))\n",
|
1697 |
+
"print(\" Number of complete trials: \", len(complete_trials_hu_struct))\n",
|
1698 |
+
"print(\"Best trial:\")\n",
|
1699 |
+
"trials_tmp = study_hu_struct.best_trial\n",
|
1700 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1701 |
+
"print(\" Params: \")\n",
|
1702 |
+
"for key, value in trials_tmp.params.items():\n",
|
1703 |
+
" print(\" {}: {}\".format(key, value))"
|
1704 |
+
]
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"cell_type": "code",
|
1708 |
+
"execution_count": null,
|
1709 |
+
"metadata": {},
|
1710 |
+
"outputs": [],
|
1711 |
+
"source": []
|
1712 |
+
},
|
1713 |
+
{
|
1714 |
+
"cell_type": "code",
|
1715 |
+
"execution_count": null,
|
1716 |
+
"metadata": {},
|
1717 |
+
"outputs": [],
|
1718 |
+
"source": []
|
1719 |
+
},
|
1720 |
+
{
|
1721 |
+
"cell_type": "code",
|
1722 |
+
"execution_count": null,
|
1723 |
+
"metadata": {},
|
1724 |
+
"outputs": [],
|
1725 |
+
"source": []
|
1726 |
+
}
|
1727 |
+
],
|
1728 |
+
"metadata": {
|
1729 |
+
"kernelspec": {
|
1730 |
+
"display_name": "ai",
|
1731 |
+
"language": "python",
|
1732 |
+
"name": "python3"
|
1733 |
+
},
|
1734 |
+
"language_info": {
|
1735 |
+
"codemirror_mode": {
|
1736 |
+
"name": "ipython",
|
1737 |
+
"version": 3
|
1738 |
+
},
|
1739 |
+
"file_extension": ".py",
|
1740 |
+
"mimetype": "text/x-python",
|
1741 |
+
"name": "python",
|
1742 |
+
"nbconvert_exporter": "python",
|
1743 |
+
"pygments_lexer": "ipython3",
|
1744 |
+
"version": "3.12.2"
|
1745 |
+
},
|
1746 |
+
"orig_nbformat": 4
|
1747 |
+
},
|
1748 |
+
"nbformat": 4,
|
1749 |
+
"nbformat_minor": 2
|
1750 |
+
}
|
6_ANO_network_[fea_struc].ipynb
ADDED
@@ -0,0 +1,1992 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"import sys\n",
|
11 |
+
"import numpy as np\n",
|
12 |
+
"import pandas as pd\n",
|
13 |
+
"import seaborn as sns\n",
|
14 |
+
"import matplotlib.pyplot as plt\n",
|
15 |
+
"import matplotlib.patches as mpatches\n",
|
16 |
+
"import gc\n",
|
17 |
+
"import time\n",
|
18 |
+
"import subprocess\n",
|
19 |
+
"import logging\n",
|
20 |
+
"from concurrent.futures import ProcessPoolExecutor, as_completed"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "code",
|
25 |
+
"execution_count": 2,
|
26 |
+
"metadata": {},
|
27 |
+
"outputs": [],
|
28 |
+
"source": [
|
29 |
+
"from rdkit import Chem\n",
|
30 |
+
"from rdkit.Chem import AllChem, DataStructs, Draw\n",
|
31 |
+
"from rdkit import RDConfig\n",
|
32 |
+
"from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges\n",
|
33 |
+
"from rdkit.Chem.AllChem import GetMorganGenerator\n",
|
34 |
+
"from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray\n",
|
35 |
+
"from rdkit.Avalon.pyAvalonTools import GetAvalonFP"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 3,
|
41 |
+
"metadata": {},
|
42 |
+
"outputs": [
|
43 |
+
{
|
44 |
+
"name": "stderr",
|
45 |
+
"output_type": "stream",
|
46 |
+
"text": [
|
47 |
+
"2024-11-04 22:59:19.830835: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
48 |
+
"2024-11-04 22:59:19.845573: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
49 |
+
"2024-11-04 22:59:19.849643: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
50 |
+
"2024-11-04 22:59:19.860597: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
51 |
+
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
52 |
+
"2024-11-04 22:59:21.010480: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
|
53 |
+
]
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"source": [
|
57 |
+
"import tensorflow as tf\n",
|
58 |
+
"from tensorflow import keras\n",
|
59 |
+
"from tensorflow.keras import layers\n",
|
60 |
+
"from tensorflow.keras.models import Sequential\n",
|
61 |
+
"from tensorflow.keras.layers import Dense, Dropout, Activation\n",
|
62 |
+
"from tensorflow.keras.regularizers import l2\n",
|
63 |
+
"from tensorflow.keras.optimizers import Adam\n",
|
64 |
+
"from tensorflow.keras import regularizers"
|
65 |
+
]
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"cell_type": "code",
|
69 |
+
"execution_count": 4,
|
70 |
+
"metadata": {},
|
71 |
+
"outputs": [],
|
72 |
+
"source": [
|
73 |
+
"from sklearn.model_selection import train_test_split\n",
|
74 |
+
"from sklearn.linear_model import Ridge\n",
|
75 |
+
"from sklearn.ensemble import RandomForestRegressor\n",
|
76 |
+
"from sklearn.neural_network import MLPRegressor\n",
|
77 |
+
"from sklearn.svm import SVR\n",
|
78 |
+
"from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": 5,
|
84 |
+
"metadata": {},
|
85 |
+
"outputs": [],
|
86 |
+
"source": [
|
87 |
+
"import optuna\n",
|
88 |
+
"from optuna.trial import TrialState\n",
|
89 |
+
"from optuna.integration import TFKerasPruningCallback"
|
90 |
+
]
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"cell_type": "code",
|
94 |
+
"execution_count": 6,
|
95 |
+
"metadata": {},
|
96 |
+
"outputs": [],
|
97 |
+
"source": [
|
98 |
+
"from extra_code.feature_selection import selection_data_descriptor_compress, selection_fromStudy_compress"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"cell_type": "code",
|
103 |
+
"execution_count": 7,
|
104 |
+
"metadata": {},
|
105 |
+
"outputs": [
|
106 |
+
{
|
107 |
+
"name": "stderr",
|
108 |
+
"output_type": "stream",
|
109 |
+
"text": [
|
110 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
111 |
+
"I0000 00:00:1730728761.540219 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
112 |
+
"Your kernel may have been built without NUMA support.\n",
|
113 |
+
"I0000 00:00:1730728761.611323 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
114 |
+
"Your kernel may have been built without NUMA support.\n",
|
115 |
+
"I0000 00:00:1730728761.611412 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
116 |
+
"Your kernel may have been built without NUMA support.\n"
|
117 |
+
]
|
118 |
+
}
|
119 |
+
],
|
120 |
+
"source": [
|
121 |
+
"tf.keras.backend.clear_session()\n",
|
122 |
+
"gpus = tf.config.experimental.list_physical_devices('GPU')\n",
|
123 |
+
"if gpus:\n",
|
124 |
+
" try:\n",
|
125 |
+
" for gpu in gpus:\n",
|
126 |
+
" tf.config.experimental.set_memory_growth(gpu, True)\n",
|
127 |
+
" except RuntimeError as e:\n",
|
128 |
+
" print(e)"
|
129 |
+
]
|
130 |
+
},
|
131 |
+
{
|
132 |
+
"cell_type": "code",
|
133 |
+
"execution_count": 8,
|
134 |
+
"metadata": {},
|
135 |
+
"outputs": [],
|
136 |
+
"source": [
|
137 |
+
"target_path = \"result/6_ANO_network_[fea_struc]\"\n",
|
138 |
+
"os.makedirs(target_path, exist_ok=True)"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"cell_type": "code",
|
143 |
+
"execution_count": 9,
|
144 |
+
"metadata": {},
|
145 |
+
"outputs": [],
|
146 |
+
"source": [
|
147 |
+
"data_ws = pd.read_csv('./data/ws496_logS.csv', dtype={'SMILES': 'string'})\n",
|
148 |
+
"smiles_ws = data_ws['SMILES']\n",
|
149 |
+
"y_ws = data_ws.iloc[:, 2]\n",
|
150 |
+
"\n",
|
151 |
+
"data_delaney = pd.read_csv('./data/delaney-processed.csv', dtype={'smiles': 'string'})\n",
|
152 |
+
"smiles_de = data_delaney['smiles']\n",
|
153 |
+
"y_de = data_delaney.iloc[:, 1]\n",
|
154 |
+
"\n",
|
155 |
+
"data_lovric2020 = pd.read_csv('./data/Lovric2020_logS0.csv', dtype={'isomeric_smiles': 'string'})\n",
|
156 |
+
"smiles_lo = data_lovric2020['isomeric_smiles']\n",
|
157 |
+
"y_lo = data_lovric2020.iloc[:, 1]\n",
|
158 |
+
"\n",
|
159 |
+
"data_huuskonen = pd.read_csv('./data/huusk.csv', dtype={'SMILES': 'string'})\n",
|
160 |
+
"smiles_hu = data_huuskonen['SMILES']\n",
|
161 |
+
"y_hu = data_huuskonen.iloc[:, -1].astype('float')"
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "code",
|
166 |
+
"execution_count": 10,
|
167 |
+
"metadata": {},
|
168 |
+
"outputs": [],
|
169 |
+
"source": [
|
170 |
+
"def mol3d(mol):\n",
|
171 |
+
" mol = Chem.AddHs(mol)\n",
|
172 |
+
" optimization_methods = [\n",
|
173 |
+
" (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),\n",
|
174 |
+
" (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),\n",
|
175 |
+
" (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})\n",
|
176 |
+
" ]\n",
|
177 |
+
"\n",
|
178 |
+
" for method, args, kwargs in optimization_methods:\n",
|
179 |
+
" try:\n",
|
180 |
+
" method(*args, **kwargs)\n",
|
181 |
+
" if mol.GetNumConformers() > 0:\n",
|
182 |
+
" return mol\n",
|
183 |
+
" except ValueError as e:\n",
|
184 |
+
" print(f\"Error: {e} - Trying next optimization method [{method}]\")\n",
|
185 |
+
"\n",
|
186 |
+
" print(f\"Invalid mol for 3d {'\\033[94m'}{Chem.MolToSmiles(mol)}{'\\033[0m'} - No conformer generated\")\n",
|
187 |
+
" return None"
|
188 |
+
]
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"cell_type": "code",
|
192 |
+
"execution_count": 11,
|
193 |
+
"metadata": {},
|
194 |
+
"outputs": [],
|
195 |
+
"source": [
|
196 |
+
"def convert_smiles_to_mol(smiles, fail_folder=None, index=None, yvalue=None):\n",
|
197 |
+
" mol = Chem.MolFromSmiles(smiles)\n",
|
198 |
+
" if mol is None:\n",
|
199 |
+
" print(f\"[convert_smiles_to_mol] Cannot convert {smiles} to Mols\")\n",
|
200 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": \"Invalid SMILES\"}\n",
|
201 |
+
"\n",
|
202 |
+
" try:\n",
|
203 |
+
" Chem.Kekulize(mol, clearAromaticFlags=True)\n",
|
204 |
+
" isomeric_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)\n",
|
205 |
+
" mol = Chem.MolFromSmiles(isomeric_smiles)\n",
|
206 |
+
" except Exception as e:\n",
|
207 |
+
" print(f\"[convert_smiles_to_mol] failed {smiles} isomeric_smiles by {e}\")\n",
|
208 |
+
" if fail_folder and index is not None:\n",
|
209 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
210 |
+
" img = Draw.MolToImage(mol)\n",
|
211 |
+
" img.save(img_path)\n",
|
212 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"Isomeric SMILES error: {e}\"}\n",
|
213 |
+
"\n",
|
214 |
+
" try:\n",
|
215 |
+
" Chem.SanitizeMol(mol)\n",
|
216 |
+
" except Exception as e:\n",
|
217 |
+
" print(f\"[convert_smiles_to_mol] failed {smiles} SanitizeMol by {e}\")\n",
|
218 |
+
" if fail_folder and index is not None:\n",
|
219 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
220 |
+
" img = Draw.MolToImage(mol)\n",
|
221 |
+
" img.save(img_path)\n",
|
222 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"SanitizeMol error: {e}\"}\n",
|
223 |
+
"\n",
|
224 |
+
" return mol, None"
|
225 |
+
]
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"cell_type": "code",
|
229 |
+
"execution_count": 12,
|
230 |
+
"metadata": {},
|
231 |
+
"outputs": [],
|
232 |
+
"source": [
|
233 |
+
"def process_smiles(smiles, yvalue, fail_folder, index):\n",
|
234 |
+
" mol, error = convert_smiles_to_mol(smiles, fail_folder, index, yvalue)\n",
|
235 |
+
" if error:\n",
|
236 |
+
" return None, None, error\n",
|
237 |
+
"\n",
|
238 |
+
" mol_3d = mol3d(mol)\n",
|
239 |
+
" if mol_3d:\n",
|
240 |
+
" return smiles, yvalue, None\n",
|
241 |
+
" else:\n",
|
242 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
243 |
+
" img = Draw.MolToImage(mol)\n",
|
244 |
+
" img.save(img_path)\n",
|
245 |
+
" return None, None, {\"smiles\": smiles, \"y_value\": yvalue}\n",
|
246 |
+
"\n",
|
247 |
+
"def process_dataset(smiles_list, y_values, dataset_name, target_path=\"result\", max_workers=None):\n",
|
248 |
+
" start = time.time()\n",
|
249 |
+
" valid_smiles, valid_y = [], []\n",
|
250 |
+
" error_smiles_list = []\n",
|
251 |
+
" fail_folder = f\"{target_path}/failed/{dataset_name}\"\n",
|
252 |
+
" os.makedirs(fail_folder, exist_ok=True)\n",
|
253 |
+
"\n",
|
254 |
+
" with ProcessPoolExecutor(max_workers=max_workers) as executor:\n",
|
255 |
+
" futures = [\n",
|
256 |
+
" executor.submit(process_smiles, smiles, yvalue, fail_folder, i)\n",
|
257 |
+
" for i, (smiles, yvalue) in enumerate(zip(smiles_list, y_values))\n",
|
258 |
+
" ]\n",
|
259 |
+
" for future in as_completed(futures):\n",
|
260 |
+
" smiles, yvalue, error = future.result()\n",
|
261 |
+
" if error:\n",
|
262 |
+
" error_smiles_list.append(error)\n",
|
263 |
+
" elif smiles is not None and yvalue is not None:\n",
|
264 |
+
" valid_smiles.append(smiles)\n",
|
265 |
+
" valid_y.append(yvalue)\n",
|
266 |
+
"\n",
|
267 |
+
" if error_smiles_list:\n",
|
268 |
+
" error_df = pd.DataFrame(error_smiles_list)\n",
|
269 |
+
" error_df.to_csv(os.path.join(fail_folder, \"failed_smiles.csv\"), index=False)\n",
|
270 |
+
" print(f\" [{dataset_name:<10}] : {time.time()-start:.4f} sec\")\n",
|
271 |
+
" return valid_smiles, valid_y"
|
272 |
+
]
|
273 |
+
},
|
274 |
+
{
|
275 |
+
"cell_type": "code",
|
276 |
+
"execution_count": 13,
|
277 |
+
"metadata": {},
|
278 |
+
"outputs": [
|
279 |
+
{
|
280 |
+
"name": "stdout",
|
281 |
+
"output_type": "stream",
|
282 |
+
"text": [
|
283 |
+
" [ws496 ] : 0.8667 sec\n",
|
284 |
+
" [delaney ] : 1.4338 sec\n",
|
285 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x5596b20fdd50>]\n",
|
286 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x5596b20febe0>]\n",
|
287 |
+
"Invalid mol for 3d \u001b[94m[H]O[C@@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
|
288 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x5596b20fdd50>]\n",
|
289 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x5596b20febe0>]\n",
|
290 |
+
"Invalid mol for 3d \u001b[94m[H]O[C@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
|
291 |
+
" [Lovric2020_logS0] : 8.8955 sec\n",
|
292 |
+
" [huusk ] : 1.5899 sec\n"
|
293 |
+
]
|
294 |
+
}
|
295 |
+
],
|
296 |
+
"source": [
|
297 |
+
"smiles_ws, y_ws = process_dataset(smiles_ws, y_ws, \"ws496\", target_path)\n",
|
298 |
+
"smiles_de, y_de = process_dataset(smiles_de, y_de, \"delaney\", target_path)\n",
|
299 |
+
"smiles_lo, y_lo = process_dataset(smiles_lo, y_lo, \"Lovric2020_logS0\", target_path)\n",
|
300 |
+
"smiles_hu, y_hu = process_dataset(smiles_hu, y_hu, \"huusk\", target_path)"
|
301 |
+
]
|
302 |
+
},
|
303 |
+
{
|
304 |
+
"cell_type": "code",
|
305 |
+
"execution_count": 14,
|
306 |
+
"metadata": {},
|
307 |
+
"outputs": [],
|
308 |
+
"source": [
|
309 |
+
"LEN_OF_FF = 2048\n",
|
310 |
+
"LEN_OF_MA = 167\n",
|
311 |
+
"LEN_OF_AV = 512"
|
312 |
+
]
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"cell_type": "code",
|
316 |
+
"execution_count": 15,
|
317 |
+
"metadata": {},
|
318 |
+
"outputs": [],
|
319 |
+
"source": [
|
320 |
+
"def get_fingerprints(mol):\n",
|
321 |
+
" if mol is None:\n",
|
322 |
+
" return None, None, None\n",
|
323 |
+
" \n",
|
324 |
+
" morgan_generator = GetMorganGenerator(radius=2, fpSize=LEN_OF_FF)\n",
|
325 |
+
" ecfp = morgan_generator.GetFingerprint(mol)\n",
|
326 |
+
" ecfp_array = np.zeros((LEN_OF_FF,),dtype=int)\n",
|
327 |
+
" DataStructs.ConvertToNumpyArray(ecfp, ecfp_array)\n",
|
328 |
+
" \n",
|
329 |
+
" maccs = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol)\n",
|
330 |
+
"\n",
|
331 |
+
" avalon_fp = GetAvalonFP(mol)\n",
|
332 |
+
" avalon_array = np.zeros((LEN_OF_AV,),dtype=int)\n",
|
333 |
+
" DataStructs.ConvertToNumpyArray(avalon_fp, avalon_array)\n",
|
334 |
+
" \n",
|
335 |
+
" return ecfp_array, maccs, avalon_array\n",
|
336 |
+
"\n",
|
337 |
+
"def fp_converter(data, use_parallel=True):\n",
|
338 |
+
" mols = [Chem.MolFromSmiles(smi) for smi in data]\n",
|
339 |
+
" \n",
|
340 |
+
" if use_parallel:\n",
|
341 |
+
" try: \n",
|
342 |
+
" with ProcessPoolExecutor() as executor:\n",
|
343 |
+
" results = list(executor.map(get_fingerprints, mols))\n",
|
344 |
+
" except Exception as e:\n",
|
345 |
+
" print(f\"Parallel processing failed due to: {e}. Falling back to sequential processing.\")\n",
|
346 |
+
" use_parallel = False\n",
|
347 |
+
" \n",
|
348 |
+
" if not use_parallel:\n",
|
349 |
+
" results = [get_fingerprints(mol) for mol in mols]\n",
|
350 |
+
" \n",
|
351 |
+
" ECFP, MACCS, AvalonFP = zip(*results)\n",
|
352 |
+
" \n",
|
353 |
+
" ECFP_container = np.vstack([arr for arr in ECFP if arr is not None])\n",
|
354 |
+
" MACCS_container = np.zeros((len(MACCS), LEN_OF_MA), dtype=int)\n",
|
355 |
+
" AvalonFP_container = np.vstack([arr for arr in AvalonFP if arr is not None])\n",
|
356 |
+
"\n",
|
357 |
+
" for i, fp in enumerate(MACCS):\n",
|
358 |
+
" if fp is not None:\n",
|
359 |
+
" DataStructs.ConvertToNumpyArray(fp, MACCS_container[i])\n",
|
360 |
+
" \n",
|
361 |
+
" return mols, ECFP_container, MACCS_container, AvalonFP_container"
|
362 |
+
]
|
363 |
+
},
|
364 |
+
{
|
365 |
+
"cell_type": "code",
|
366 |
+
"execution_count": 16,
|
367 |
+
"metadata": {},
|
368 |
+
"outputs": [
|
369 |
+
{
|
370 |
+
"data": {
|
371 |
+
"text/plain": [
|
372 |
+
"0"
|
373 |
+
]
|
374 |
+
},
|
375 |
+
"execution_count": 16,
|
376 |
+
"metadata": {},
|
377 |
+
"output_type": "execute_result"
|
378 |
+
}
|
379 |
+
],
|
380 |
+
"source": [
|
381 |
+
"mol_ws, x_ws, MACCS_ws, AvalonFP_ws = fp_converter(smiles_ws,target_path)\n",
|
382 |
+
"mol_de, x_de, MACCS_de, AvalonFP_de = fp_converter(smiles_de,target_path)\n",
|
383 |
+
"mol_lo, x_lo, MACCS_lo, AvalonFP_lo = fp_converter(smiles_lo,target_path)\n",
|
384 |
+
"mol_hu, x_hu, MACCS_hu, AvalonFP_hu = fp_converter(smiles_hu,target_path)\n",
|
385 |
+
"del smiles_ws\n",
|
386 |
+
"del smiles_de\n",
|
387 |
+
"del smiles_lo\n",
|
388 |
+
"del smiles_hu\n",
|
389 |
+
"gc.collect()"
|
390 |
+
]
|
391 |
+
},
|
392 |
+
{
|
393 |
+
"cell_type": "code",
|
394 |
+
"execution_count": 17,
|
395 |
+
"metadata": {},
|
396 |
+
"outputs": [],
|
397 |
+
"source": [
|
398 |
+
"def concatenate_to_numpy(*dataframes):\n",
|
399 |
+
" numpy_arrays = [df.to_numpy() if isinstance(df, pd.DataFrame) else df for df in dataframes]\n",
|
400 |
+
" if not all(isinstance(arr, np.ndarray) for arr in numpy_arrays):\n",
|
401 |
+
" raise ValueError(\"All inputs must be either pandas DataFrame or numpy array\")\n",
|
402 |
+
" return np.concatenate(numpy_arrays, axis=1)"
|
403 |
+
]
|
404 |
+
},
|
405 |
+
{
|
406 |
+
"cell_type": "code",
|
407 |
+
"execution_count": 18,
|
408 |
+
"metadata": {},
|
409 |
+
"outputs": [
|
410 |
+
{
|
411 |
+
"data": {
|
412 |
+
"text/plain": [
|
413 |
+
"0"
|
414 |
+
]
|
415 |
+
},
|
416 |
+
"execution_count": 18,
|
417 |
+
"metadata": {},
|
418 |
+
"output_type": "execute_result"
|
419 |
+
}
|
420 |
+
],
|
421 |
+
"source": [
|
422 |
+
"group_nws = concatenate_to_numpy(x_ws, MACCS_ws, AvalonFP_ws)\n",
|
423 |
+
"group_nde = concatenate_to_numpy(x_de, MACCS_de, AvalonFP_de)\n",
|
424 |
+
"group_nlo = concatenate_to_numpy(x_lo, MACCS_lo, AvalonFP_lo)\n",
|
425 |
+
"group_nhu = concatenate_to_numpy(x_hu, MACCS_hu, AvalonFP_hu)\n",
|
426 |
+
"del x_ws, MACCS_ws, AvalonFP_ws\n",
|
427 |
+
"del x_de, MACCS_de, AvalonFP_de\n",
|
428 |
+
"del x_lo, MACCS_lo, AvalonFP_lo\n",
|
429 |
+
"del x_hu, MACCS_hu, AvalonFP_hu\n",
|
430 |
+
"gc.collect()"
|
431 |
+
]
|
432 |
+
},
|
433 |
+
{
|
434 |
+
"cell_type": "code",
|
435 |
+
"execution_count": 19,
|
436 |
+
"metadata": {},
|
437 |
+
"outputs": [],
|
438 |
+
"source": [
|
439 |
+
"try:\n",
|
440 |
+
" storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
|
441 |
+
" # storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
|
442 |
+
" # storage = optuna.storages.RDBStorage(url=storage_urls)\n",
|
443 |
+
"except Exception as e:\n",
|
444 |
+
" print(f\"Error occured: {e}\")"
|
445 |
+
]
|
446 |
+
},
|
447 |
+
{
|
448 |
+
"cell_type": "code",
|
449 |
+
"execution_count": 20,
|
450 |
+
"metadata": {},
|
451 |
+
"outputs": [
|
452 |
+
{
|
453 |
+
"name": "stdout",
|
454 |
+
"output_type": "stream",
|
455 |
+
"text": [
|
456 |
+
"Best trial for study 'ANO_ws_feature':\n",
|
457 |
+
"Best trial value: 0.932153\n",
|
458 |
+
"Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 0}\n",
|
459 |
+
"Generated fea: [1 1 1 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 1 1 1\n",
|
460 |
+
" 1 0 1 0 1 0 1 1 0 0 0 0]\n",
|
461 |
+
"Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n"
|
462 |
+
]
|
463 |
+
}
|
464 |
+
],
|
465 |
+
"source": [
|
466 |
+
"try:\n",
|
467 |
+
" ws_fea = selection_fromStudy_compress('ANO_ws_feature', storage)\n",
|
468 |
+
"except Exception as e:\n",
|
469 |
+
" print(f\"Error occured: {e}\") \n",
|
470 |
+
" ws_fea =[\n",
|
471 |
+
" 1, # 1 - \"MolWeight\"\n",
|
472 |
+
" 1, # 2 - \"Mol_logP\"\n",
|
473 |
+
" 1, # 3 - \"Mol_MR\"\n",
|
474 |
+
" 1, # 4 - \"Mol_TPSA\"\n",
|
475 |
+
" 0, # 5 - \"NumRotatableBonds\"\n",
|
476 |
+
" 0, # 6 - \"HeavyAtomCount\"\n",
|
477 |
+
" 0, # 7 - \"NumHAcceptors\"\n",
|
478 |
+
" 0, # 8 - \"NumHDonors\"\n",
|
479 |
+
" 0, # 9 - \"NumHeteroatoms\"\n",
|
480 |
+
" 1, # 10 - \"NumValenceElec\"\n",
|
481 |
+
" 1, # 11 - \"NHOHCount\"\n",
|
482 |
+
" 1, # 12 - \"NOCount\"\n",
|
483 |
+
" 0, # 13 - \"RingCount\"\n",
|
484 |
+
" 1, # 14 - \"NumAromaticRings\"\n",
|
485 |
+
" 0, # 15 - \"NumSaturatedRings\"\n",
|
486 |
+
" 0, # 16 - \"NumAliphaticRings\"\n",
|
487 |
+
" 0, # 17 - \"LabuteASA\"\n",
|
488 |
+
" 0, # 18 - \"NumValenceElectrons\"\n",
|
489 |
+
" 1, # 19 - \"BalabanJ\"\n",
|
490 |
+
" 1, # 20 - \"BertzCT\"\n",
|
491 |
+
" 0, # 21 - \"Ipc\"\n",
|
492 |
+
" 0, # 22 - \"kappa_Series[1-3]_ind\"\n",
|
493 |
+
" 1, # 23 - \"Chi_Series[13]_ind\"\n",
|
494 |
+
" 1, # 24 - \"Phi\"\n",
|
495 |
+
" 0, # 25 - \"HallKierAlpha\"\n",
|
496 |
+
" 0, # 26 - \"NumAmideBonds\"\n",
|
497 |
+
" 1, # 27 - \"FractionCSP3\"\n",
|
498 |
+
" 0, # 28 - \"NumSpiroAtoms\"\n",
|
499 |
+
" 1, # 29 - \"NumBridgeheadAtoms\"\n",
|
500 |
+
" 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n",
|
501 |
+
" 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n",
|
502 |
+
" 0, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n",
|
503 |
+
" 1, # 33 - \"EState_VSA_Series[1-11]_ind\"\n",
|
504 |
+
" 0, # 34 - \"VSA_EState_Series[1-10]_ind\"\n",
|
505 |
+
" 0, # 35 - \"Asphericity\"\n",
|
506 |
+
" 1, # 36 - \"PBF\"\n",
|
507 |
+
" 0, # 37 - \"RadiusOfGyration\"\n",
|
508 |
+
" 0, # 38 - \"InertialShapeFactor\"\n",
|
509 |
+
" 1, # 39 - \"Eccentricity\"\n",
|
510 |
+
" 0, # 40 - \"SpherocityIndex\"\n",
|
511 |
+
" 1, # 41 - \"PMI_series[1-3]_ind\"\n",
|
512 |
+
" 0, # 42 - \"NPR_series[1-2]_ind\"\n",
|
513 |
+
" 0, # 43 - \"MQNs\"\n",
|
514 |
+
" 0, # 44 - \"AUTOCORR2D\"\n",
|
515 |
+
" 1, # 45 - \"BCUT2D\"\n",
|
516 |
+
" 0, # 46 - \"AUTOCORR3D\"\n",
|
517 |
+
" 1, # 47 - \"RDF\"\n",
|
518 |
+
" 0, # 48 - \"MORSE\"\n",
|
519 |
+
" 1, # 49 - \"WHIM\"\n",
|
520 |
+
" 0, # 50 - \"GETAWAY\" \n",
|
521 |
+
" ]"
|
522 |
+
]
|
523 |
+
},
|
524 |
+
{
|
525 |
+
"cell_type": "code",
|
526 |
+
"execution_count": 21,
|
527 |
+
"metadata": {},
|
528 |
+
"outputs": [
|
529 |
+
{
|
530 |
+
"name": "stdout",
|
531 |
+
"output_type": "stream",
|
532 |
+
"text": [
|
533 |
+
"Best trial for study 'ANO_de_feature':\n",
|
534 |
+
"Best trial value: 0.973052\n",
|
535 |
+
"Best trial parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 1, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 0, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 1}\n",
|
536 |
+
"Generated fea: [1 1 1 1 1 1 0 0 1 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 0 0 1 1 1 1\n",
|
537 |
+
" 1 1 0 1 1 1 0 0 0 1 0 1]\n",
|
538 |
+
"Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n"
|
539 |
+
]
|
540 |
+
}
|
541 |
+
],
|
542 |
+
"source": [
|
543 |
+
"try:\n",
|
544 |
+
" de_fea = selection_fromStudy_compress('ANO_de_feature', storage)\n",
|
545 |
+
"except Exception as e:\n",
|
546 |
+
" print(f\"Error occured: {e}\") \n",
|
547 |
+
" de_fea =[\n",
|
548 |
+
" 1, # 1 - \"MolWeight\"\n",
|
549 |
+
" 1, # 2 - \"Mol_logP\"\n",
|
550 |
+
" 1, # 3 - \"Mol_MR\"\n",
|
551 |
+
" 1, # 4 - \"Mol_TPSA\"\n",
|
552 |
+
" 0, # 5 - \"NumRotatableBonds\"\n",
|
553 |
+
" 0, # 6 - \"HeavyAtomCount\"\n",
|
554 |
+
" 1, # 7 - \"NumHAcceptors\"\n",
|
555 |
+
" 1, # 8 - \"NumHDonors\"\n",
|
556 |
+
" 0, # 9 - \"NumHeteroatoms\"\n",
|
557 |
+
" 0, # 10 - \"NumValenceElec\"\n",
|
558 |
+
" 1, # 11 - \"NHOHCount\"\n",
|
559 |
+
" 0, # 12 - \"NOCount\"\n",
|
560 |
+
" 0, # 13 - \"RingCount\"\n",
|
561 |
+
" 0, # 14 - \"NumAromaticRings\"\n",
|
562 |
+
" 0, # 15 - \"NumSaturatedRings\"\n",
|
563 |
+
" 1, # 16 - \"NumAliphaticRings\"\n",
|
564 |
+
" 1, # 17 - \"LabuteASA\"\n",
|
565 |
+
" 0, # 18 - \"NumValenceElectrons\"\n",
|
566 |
+
" 1, # 19 - \"BalabanJ\"\n",
|
567 |
+
" 1, # 20 - \"BertzCT\"\n",
|
568 |
+
" 1, # 21 - \"Ipc\"\n",
|
569 |
+
" 0, # 22 - \"kappa_Series[1-3]_ind\"\n",
|
570 |
+
" 0, # 23 - \"Chi_Series[13]_ind\"\n",
|
571 |
+
" 0, # 24 - \"Phi\"\n",
|
572 |
+
" 1, # 25 - \"HallKierAlpha\"\n",
|
573 |
+
" 1, # 26 - \"NumAmideBonds\"\n",
|
574 |
+
" 1, # 27 - \"FractionCSP3\"\n",
|
575 |
+
" 1, # 28 - \"NumSpiroAtoms\"\n",
|
576 |
+
" 0, # 29 - \"NumBridgeheadAtoms\"\n",
|
577 |
+
" 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n",
|
578 |
+
" 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n",
|
579 |
+
" 0, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n",
|
580 |
+
" 0, # 33 - \"EState_VSA_Series[1-11]_ind\"\n",
|
581 |
+
" 0, # 34 - \"VSA_EState_Series[1-10]_ind\"\n",
|
582 |
+
" 1, # 35 - \"Asphericity\"\n",
|
583 |
+
" 0, # 36 - \"PBF\"\n",
|
584 |
+
" 0, # 37 - \"RadiusOfGyration\"\n",
|
585 |
+
" 0, # 38 - \"InertialShapeFactor\"\n",
|
586 |
+
" 0, # 39 - \"Eccentricity\"\n",
|
587 |
+
" 0, # 40 - \"SpherocityIndex\"\n",
|
588 |
+
" 0, # 41 - \"PMI_series[1-3]_ind\"\n",
|
589 |
+
" 1, # 42 - \"NPR_series[1-2]_ind\"\n",
|
590 |
+
" 0, # 43 - \"MQNs\"\n",
|
591 |
+
" 1, # 44 - \"AUTOCORR2D\"\n",
|
592 |
+
" 1, # 45 - \"BCUT2D\"\n",
|
593 |
+
" 0, # 46 - \"AUTOCORR3D\"\n",
|
594 |
+
" 1, # 47 - \"RDF\"\n",
|
595 |
+
" 0, # 48 - \"MORSE\"\n",
|
596 |
+
" 1, # 49 - \"WHIM\"\n",
|
597 |
+
" 0, # 50 - \"GETAWAY\" \n",
|
598 |
+
" ]"
|
599 |
+
]
|
600 |
+
},
|
601 |
+
{
|
602 |
+
"cell_type": "code",
|
603 |
+
"execution_count": 22,
|
604 |
+
"metadata": {},
|
605 |
+
"outputs": [
|
606 |
+
{
|
607 |
+
"name": "stdout",
|
608 |
+
"output_type": "stream",
|
609 |
+
"text": [
|
610 |
+
"Best trial for study 'ANO_lo_feature':\n",
|
611 |
+
"Best trial value: 0.843203\n",
|
612 |
+
"Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]': 1, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 1, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 0}\n",
|
613 |
+
"Generated fea: [1 1 1 1 0 0 0 1 0 0 0 0 0 1 1 1 0 1 0 1 1 1 1 1 0 0 0 1 0 0 1 0 1 1 1 1 0\n",
|
614 |
+
" 0 1 1 1 1 1 0 0 0 1 0 0]\n",
|
615 |
+
"Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n"
|
616 |
+
]
|
617 |
+
}
|
618 |
+
],
|
619 |
+
"source": [
|
620 |
+
"try:\n",
|
621 |
+
" lo_fea = selection_fromStudy_compress('ANO_lo_feature', storage)\n",
|
622 |
+
"except Exception as e:\n",
|
623 |
+
" print(f\"Error occured: {e}\") \n",
|
624 |
+
" lo_fea =[\n",
|
625 |
+
" 1, # 1 - \"MolWeight\"\n",
|
626 |
+
" 1, # 2 - \"Mol_logP\"\n",
|
627 |
+
" 1, # 3 - \"Mol_MR\"\n",
|
628 |
+
" 1, # 4 - \"Mol_TPSA\"\n",
|
629 |
+
" 1, # 5 - \"NumRotatableBonds\"\n",
|
630 |
+
" 0, # 6 - \"HeavyAtomCount\"\n",
|
631 |
+
" 0, # 7 - \"NumHAcceptors\"\n",
|
632 |
+
" 0, # 8 - \"NumHDonors\"\n",
|
633 |
+
" 1, # 9 - \"NumHeteroatoms\"\n",
|
634 |
+
" 1, # 10 - \"NumValenceElec\"\n",
|
635 |
+
" 1, # 11 - \"NHOHCount\"\n",
|
636 |
+
" 1, # 12 - \"NOCount\"\n",
|
637 |
+
" 0, # 13 - \"RingCount\"\n",
|
638 |
+
" 1, # 14 - \"NumAromaticRings\"\n",
|
639 |
+
" 0, # 15 - \"NumSaturatedRings\"\n",
|
640 |
+
" 0, # 16 - \"NumAliphaticRings\"\n",
|
641 |
+
" 0, # 17 - \"LabuteASA\"\n",
|
642 |
+
" 1, # 18 - \"NumValenceElectrons\"\n",
|
643 |
+
" 0, # 19 - \"BalabanJ\"\n",
|
644 |
+
" 0, # 20 - \"BertzCT\"\n",
|
645 |
+
" 0, # 21 - \"Ipc\"\n",
|
646 |
+
" 1, # 22 - \"kappa_Series[1-3]_ind\"\n",
|
647 |
+
" 0, # 23 - \"Chi_Series[13]_ind\"\n",
|
648 |
+
" 1, # 24 - \"Phi\"\n",
|
649 |
+
" 1, # 25 - \"HallKierAlpha\"\n",
|
650 |
+
" 0, # 26 - \"NumAmideBonds\"\n",
|
651 |
+
" 1, # 27 - \"FractionCSP3\"\n",
|
652 |
+
" 1, # 28 - \"NumSpiroAtoms\"\n",
|
653 |
+
" 0, # 29 - \"NumBridgeheadAtoms\"\n",
|
654 |
+
" 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n",
|
655 |
+
" 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n",
|
656 |
+
" 1, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n",
|
657 |
+
" 0, # 33 - \"EState_VSA_Series[1-11]_ind\"\n",
|
658 |
+
" 1, # 34 - \"VSA_EState_Series[1-10]_ind\"\n",
|
659 |
+
" 1, # 35 - \"Asphericity\"\n",
|
660 |
+
" 0, # 36 - \"PBF\"\n",
|
661 |
+
" 1, # 37 - \"RadiusOfGyration\"\n",
|
662 |
+
" 0, # 38 - \"InertialShapeFactor\"\n",
|
663 |
+
" 0, # 39 - \"Eccentricity\"\n",
|
664 |
+
" 1, # 40 - \"SpherocityIndex\"\n",
|
665 |
+
" 0, # 41 - \"PMI_series[1-3]_ind\"\n",
|
666 |
+
" 1, # 42 - \"NPR_series[1-2]_ind\"\n",
|
667 |
+
" 0, # 43 - \"MQNs\"\n",
|
668 |
+
" 0, # 44 - \"AUTOCORR2D\"\n",
|
669 |
+
" 0, # 45 - \"BCUT2D\"\n",
|
670 |
+
" 0, # 46 - \"AUTOCORR3D\"\n",
|
671 |
+
" 1, # 47 - \"RDF\"\n",
|
672 |
+
" 0, # 48 - \"MORSE\"\n",
|
673 |
+
" 0, # 49 - \"WHIM\"\n",
|
674 |
+
" 0, # 50 - \"GETAWAY\" \n",
|
675 |
+
" ]"
|
676 |
+
]
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"cell_type": "code",
|
680 |
+
"execution_count": 23,
|
681 |
+
"metadata": {},
|
682 |
+
"outputs": [
|
683 |
+
{
|
684 |
+
"name": "stdout",
|
685 |
+
"output_type": "stream",
|
686 |
+
"text": [
|
687 |
+
"Best trial for study 'ANO_hu_feature':\n",
|
688 |
+
"Best trial value: 0.939862\n",
|
689 |
+
"Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 1, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 1}\n",
|
690 |
+
"Generated fea: [1 1 1 1 0 0 1 0 1 0 0 1 1 0 1 1 0 0 1 1 0 1 1 1 0 0 0 0 1 0 1 1 0 1 1 1 1\n",
|
691 |
+
" 0 0 1 1 1 0 1 0 1 0 0 1]\n",
|
692 |
+
"Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n"
|
693 |
+
]
|
694 |
+
}
|
695 |
+
],
|
696 |
+
"source": [
|
697 |
+
"try:\n",
|
698 |
+
" hu_fea = selection_fromStudy_compress('ANO_hu_feature', storage)\n",
|
699 |
+
"except Exception as e:\n",
|
700 |
+
" print(f\"Error occured: {e}\") \n",
|
701 |
+
" hu_fea =[\n",
|
702 |
+
" 1, # 1 - \"MolWeight\"\n",
|
703 |
+
" 1, # 2 - \"Mol_logP\"\n",
|
704 |
+
" 1, # 3 - \"Mol_MR\"\n",
|
705 |
+
" 1, # 4 - \"Mol_TPSA\"\n",
|
706 |
+
" 0, # 5 - \"NumRotatableBonds\"\n",
|
707 |
+
" 1, # 6 - \"HeavyAtomCount\"\n",
|
708 |
+
" 0, # 7 - \"NumHAcceptors\"\n",
|
709 |
+
" 1, # 8 - \"NumHDonors\"\n",
|
710 |
+
" 1, # 9 - \"NumHeteroatoms\"\n",
|
711 |
+
" 1, # 10 - \"NumValenceElec\"\n",
|
712 |
+
" 0, # 11 - \"NHOHCount\"\n",
|
713 |
+
" 1, # 12 - \"NOCount\"\n",
|
714 |
+
" 1, # 13 - \"RingCount\"\n",
|
715 |
+
" 1, # 14 - \"NumAromaticRings\"\n",
|
716 |
+
" 1, # 15 - \"NumSaturatedRings\"\n",
|
717 |
+
" 0, # 16 - \"NumAliphaticRings\"\n",
|
718 |
+
" 0, # 17 - \"LabuteASA\"\n",
|
719 |
+
" 0, # 18 - \"NumValenceElectrons\"\n",
|
720 |
+
" 1, # 19 - \"BalabanJ\"\n",
|
721 |
+
" 1, # 20 - \"BertzCT\"\n",
|
722 |
+
" 1, # 21 - \"Ipc\"\n",
|
723 |
+
" 0, # 22 - \"kappa_Series[1-3]_ind\"\n",
|
724 |
+
" 1, # 23 - \"Chi_Series[13]_ind\"\n",
|
725 |
+
" 1, # 24 - \"Phi\"\n",
|
726 |
+
" 0, # 25 - \"HallKierAlpha\"\n",
|
727 |
+
" 1, # 26 - \"NumAmideBonds\"\n",
|
728 |
+
" 0, # 27 - \"FractionCSP3\"\n",
|
729 |
+
" 1, # 28 - \"NumSpiroAtoms\"\n",
|
730 |
+
" 0, # 29 - \"NumBridgeheadAtoms\"\n",
|
731 |
+
" 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n",
|
732 |
+
" 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n",
|
733 |
+
" 1, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n",
|
734 |
+
" 1, # 33 - \"EState_VSA_Series[1-11]_ind\"\n",
|
735 |
+
" 1, # 34 - \"VSA_EState_Series[1-10]_ind\"\n",
|
736 |
+
" 1, # 35 - \"Asphericity\"\n",
|
737 |
+
" 1, # 36 - \"PBF\"\n",
|
738 |
+
" 1, # 37 - \"RadiusOfGyration\"\n",
|
739 |
+
" 1, # 38 - \"InertialShapeFactor\"\n",
|
740 |
+
" 0, # 39 - \"Eccentricity\"\n",
|
741 |
+
" 0, # 40 - \"SpherocityIndex\"\n",
|
742 |
+
" 1, # 41 - \"PMI_series[1-3]_ind\"\n",
|
743 |
+
" 1, # 42 - \"NPR_series[1-2]_ind\"\n",
|
744 |
+
" 1, # 43 - \"MQNs\"\n",
|
745 |
+
" 1, # 44 - \"AUTOCORR2D\"\n",
|
746 |
+
" 1, # 45 - \"BCUT2D\"\n",
|
747 |
+
" 1, # 46 - \"AUTOCORR3D\"\n",
|
748 |
+
" 1, # 47 - \"RDF\"\n",
|
749 |
+
" 0, # 48 - \"MORSE\"\n",
|
750 |
+
" 0, # 49 - \"WHIM\"\n",
|
751 |
+
" 0, # 50 - \"GETAWAY\" \n",
|
752 |
+
" ]"
|
753 |
+
]
|
754 |
+
},
|
755 |
+
{
|
756 |
+
"cell_type": "code",
|
757 |
+
"execution_count": 24,
|
758 |
+
"metadata": {},
|
759 |
+
"outputs": [
|
760 |
+
{
|
761 |
+
"name": "stdout",
|
762 |
+
"output_type": "stream",
|
763 |
+
"text": [
|
764 |
+
"BCUT2D calculation failed: ERROR: No Gasteiger Partial Charge parameters for Element: Sn Mode: sp3\n"
|
765 |
+
]
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"data": {
|
769 |
+
"text/plain": [
|
770 |
+
"0"
|
771 |
+
]
|
772 |
+
},
|
773 |
+
"execution_count": 24,
|
774 |
+
"metadata": {},
|
775 |
+
"output_type": "execute_result"
|
776 |
+
}
|
777 |
+
],
|
778 |
+
"source": [
|
779 |
+
"new_ws = selection_data_descriptor_compress(ws_fea, group_nws, mol_ws, 'ws')\n",
|
780 |
+
"new_de = selection_data_descriptor_compress(de_fea, group_nde, mol_de, 'de')\n",
|
781 |
+
"new_lo = selection_data_descriptor_compress(lo_fea, group_nlo, mol_lo, 'lo')\n",
|
782 |
+
"new_hu = selection_data_descriptor_compress(hu_fea, group_nhu, mol_hu, 'hu')\n",
|
783 |
+
"del ws_fea, group_nws, mol_ws\n",
|
784 |
+
"del de_fea, group_nde, mol_de\n",
|
785 |
+
"del lo_fea, group_nlo, mol_lo\n",
|
786 |
+
"del hu_fea, group_nhu, mol_hu\n",
|
787 |
+
"gc.collect()\n",
|
788 |
+
"# 6m 10.3s"
|
789 |
+
]
|
790 |
+
},
|
791 |
+
{
|
792 |
+
"cell_type": "code",
|
793 |
+
"execution_count": 25,
|
794 |
+
"metadata": {},
|
795 |
+
"outputs": [],
|
796 |
+
"source": [
|
797 |
+
"import logging\n",
|
798 |
+
"import warnings\n",
|
799 |
+
"\n",
|
800 |
+
"os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
|
801 |
+
"os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
|
802 |
+
"os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'\n",
|
803 |
+
"os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'\n",
|
804 |
+
"os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'\n",
|
805 |
+
"os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'\n",
|
806 |
+
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
|
807 |
+
"os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'\n",
|
808 |
+
"os.environ['TF_NUMA_NODES'] = '1'\n",
|
809 |
+
"\n",
|
810 |
+
"warnings.filterwarnings('ignore')\n",
|
811 |
+
"\n",
|
812 |
+
"warnings.simplefilter(action='ignore', category=FutureWarning)\n",
|
813 |
+
"\n",
|
814 |
+
"logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
|
815 |
+
"\n",
|
816 |
+
"tf.get_logger().setLevel('ERROR')\n",
|
817 |
+
"tf.autograph.set_verbosity(0)\n",
|
818 |
+
"\n",
|
819 |
+
"def suppress_warnings(condition=True):\n",
|
820 |
+
" if condition:\n",
|
821 |
+
" logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
|
822 |
+
" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
|
823 |
+
" else:\n",
|
824 |
+
" logging.getLogger('tensorflow').setLevel(logging.WARNING)\n",
|
825 |
+
" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'\n",
|
826 |
+
"\n",
|
827 |
+
"suppress_warnings(condition=True)"
|
828 |
+
]
|
829 |
+
},
|
830 |
+
{
|
831 |
+
"cell_type": "code",
|
832 |
+
"execution_count": 26,
|
833 |
+
"metadata": {},
|
834 |
+
"outputs": [],
|
835 |
+
"source": [
|
836 |
+
"BATCHSIZE = 16\n",
|
837 |
+
"EPOCHS = 1000\n",
|
838 |
+
"# lr = 0.0001\n",
|
839 |
+
"# decay = 1e-4"
|
840 |
+
]
|
841 |
+
},
|
842 |
+
{
|
843 |
+
"cell_type": "code",
|
844 |
+
"execution_count": 27,
|
845 |
+
"metadata": {},
|
846 |
+
"outputs": [],
|
847 |
+
"source": [
|
848 |
+
"# def new_model(trial):\n",
|
849 |
+
"# n_layers = trial.suggest_int(\"n_layers\", 1, 3)\n",
|
850 |
+
"# model = tf.keras.Sequential()\n",
|
851 |
+
"# layer_dropout = trial.suggest_int(\"layer_dropout\", 0,1)\n",
|
852 |
+
"# for i in range(n_layers):\n",
|
853 |
+
"# num_hidden = trial.suggest_int(\"n_units_l_{}\".format(i), 2, 1e4-1)\n",
|
854 |
+
"# num_decay = trial.suggest_categorical(\"n_decay_l_{}\".format(i), [1e-3,1e-4,1e-5])\n",
|
855 |
+
"# model.add(\n",
|
856 |
+
"# tf.keras.layers.Dense(\n",
|
857 |
+
"# num_hidden,\n",
|
858 |
+
"# activation=\"relu\",\n",
|
859 |
+
"# kernel_initializer='glorot_uniform',\n",
|
860 |
+
"# kernel_regularizer=tf.keras.regularizers.l2(num_decay),\n",
|
861 |
+
"# )\n",
|
862 |
+
"# )\n",
|
863 |
+
"# if layer_dropout==1:\n",
|
864 |
+
"# fdropout1 = trial.suggest_categorical(\"F_dropout_{}\".format(i),[0.1,0.2])\n",
|
865 |
+
"# model.add(Dropout(rate=fdropout1))\n",
|
866 |
+
"# if layer_dropout==0:\n",
|
867 |
+
"# fdropout2 = trial.suggest_categorical(\"Final_dropout\",[0.1,0.2])\n",
|
868 |
+
"# model.add(Dropout(rate=fdropout2))\n",
|
869 |
+
"# model.add(Dense(units=1))\n",
|
870 |
+
"# learningr = trial.suggest_categorical(\"Learning_rate\",[0.01,0.001,0.0001])\n",
|
871 |
+
"# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),\n",
|
872 |
+
"# loss=tf.keras.losses.MeanSquaredError(),\n",
|
873 |
+
"# metrics=[tf.keras.losses.MeanSquaredError(),\n",
|
874 |
+
"# tf.keras.losses.MeanAbsoluteError(),\n",
|
875 |
+
"# tf.keras.metrics.RootMeanSquaredError()])\n",
|
876 |
+
"# return model\n",
|
877 |
+
"\n",
|
878 |
+
"def search_model(trial, input_dim):\n",
|
879 |
+
" n_layers = trial.suggest_int(\"n_layers\", 1, 3)\n",
|
880 |
+
" model = tf.keras.Sequential()\n",
|
881 |
+
" model.add(tf.keras.layers.Input(shape=(input_dim,)))\n",
|
882 |
+
" layer_dropout = trial.suggest_int(\"layer_dropout\", 0, 1)\n",
|
883 |
+
" \n",
|
884 |
+
" for i in range(n_layers):\n",
|
885 |
+
" num_hidden = trial.suggest_int(f\"n_units_l_{i}\", 2, 9999)\n",
|
886 |
+
" num_decay = trial.suggest_categorical(f\"n_decay_l_{i}\", [1e-4,1e-5,1e-6])\n",
|
887 |
+
" model.add(\n",
|
888 |
+
" tf.keras.layers.Dense(\n",
|
889 |
+
" num_hidden,\n",
|
890 |
+
" # activation=\"relu\",\n",
|
891 |
+
" kernel_initializer='glorot_uniform',\n",
|
892 |
+
" kernel_regularizer=tf.keras.regularizers.l2(num_decay),\n",
|
893 |
+
" )\n",
|
894 |
+
" )\n",
|
895 |
+
" model.add(tf.keras.layers.LeakyReLU(alpha=0.01))\n",
|
896 |
+
" if layer_dropout == 1:\n",
|
897 |
+
" fdropout1 = trial.suggest_categorical(f\"F_dropout_{i}\", [0.1, 0.2, 0.3])\n",
|
898 |
+
" model.add(tf.keras.layers.Dropout(rate=fdropout1))\n",
|
899 |
+
" \n",
|
900 |
+
" if layer_dropout == 0:\n",
|
901 |
+
" fdropout2 = trial.suggest_categorical(\"last_dropout\", [0.1, 0.2, 0.3])\n",
|
902 |
+
" model.add(tf.keras.layers.Dropout(rate=fdropout2))\n",
|
903 |
+
" \n",
|
904 |
+
" model.add(tf.keras.layers.Dense(units=1))\n",
|
905 |
+
" # # Colab\n",
|
906 |
+
" # learningr = trial.suggest_categorical(\"Learning_rate\",[0.01,0.001,0.0001])\n",
|
907 |
+
" # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),\n",
|
908 |
+
" # loss=tf.keras.losses.MeanSquaredError(),\n",
|
909 |
+
" # metrics=[tf.keras.losses.MeanSquaredError(),\n",
|
910 |
+
" # tf.keras.losses.MeanAbsoluteError(),\n",
|
911 |
+
" # tf.keras.metrics.RootMeanSquaredError()])\n",
|
912 |
+
" return model\n",
|
913 |
+
"\n",
|
914 |
+
"\n",
|
915 |
+
"def save_model(trial, x_data):\n",
|
916 |
+
" model_path = \"save_model/full_model.keras\"\n",
|
917 |
+
" \n",
|
918 |
+
" if not os.path.exists(model_path):\n",
|
919 |
+
" try:\n",
|
920 |
+
" model = search_model(trial, x_data.shape[1])\n",
|
921 |
+
" os.makedirs(\"save_model\", exist_ok=True)\n",
|
922 |
+
" model.save(model_path)\n",
|
923 |
+
" print(f\"Model successfully saved to {model_path}\")\n",
|
924 |
+
" except Exception as e:\n",
|
925 |
+
" print(f\"Error saving model: {e}\")\n",
|
926 |
+
" else:\n",
|
927 |
+
" print(f\"Model already exists at {model_path}\")\n",
|
928 |
+
" os.remove(model_path)\n",
|
929 |
+
" save_model(trial, x_data)"
|
930 |
+
]
|
931 |
+
},
|
932 |
+
{
|
933 |
+
"cell_type": "code",
|
934 |
+
"execution_count": 28,
|
935 |
+
"metadata": {},
|
936 |
+
"outputs": [],
|
937 |
+
"source": [
|
938 |
+
"from sklearn.model_selection import train_test_split\n",
|
939 |
+
"xtr_fws, xte_fws, ytr_fws, yte_fws = train_test_split(new_ws, y_ws, test_size = 0.1, random_state = 42)\n",
|
940 |
+
"xtr_fde, xte_fde, ytr_fde, yte_fde = train_test_split(new_de, y_de, test_size = 0.1, random_state = 42)\n",
|
941 |
+
"xtr_flo, xte_flo, ytr_flo, yte_flo = train_test_split(new_lo, y_lo, test_size = 0.1, random_state = 42)\n",
|
942 |
+
"xtr_fhu, xte_fhu, ytr_fhu, yte_fhu = train_test_split(new_hu, y_hu, test_size = 0.1, random_state = 42)"
|
943 |
+
]
|
944 |
+
},
|
945 |
+
{
|
946 |
+
"cell_type": "code",
|
947 |
+
"execution_count": 29,
|
948 |
+
"metadata": {},
|
949 |
+
"outputs": [],
|
950 |
+
"source": [
|
951 |
+
"# # Colab\n",
|
952 |
+
"# def preprocess_data(xtr, ytr):\n",
|
953 |
+
"# dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))\n",
|
954 |
+
"# dataset = dataset.shuffle(buffer_size=len(xtr)).batch(BATCHSIZE).cache().prefetch(tf.data.AUTOTUNE)\n",
|
955 |
+
"# return dataset\n",
|
956 |
+
"\n",
|
957 |
+
"# cb = tf.keras.callbacks.EarlyStopping(\n",
|
958 |
+
"# monitor='loss', \n",
|
959 |
+
"# patience=5,\n",
|
960 |
+
"# restore_best_weights=True,\n",
|
961 |
+
"# # min_delta=0.001,\n",
|
962 |
+
"# mode='min',\n",
|
963 |
+
"# verbose=1\n",
|
964 |
+
"# )"
|
965 |
+
]
|
966 |
+
},
|
967 |
+
{
|
968 |
+
"cell_type": "code",
|
969 |
+
"execution_count": 30,
|
970 |
+
"metadata": {},
|
971 |
+
"outputs": [],
|
972 |
+
"source": [
|
973 |
+
"# # Colab\n",
|
974 |
+
"# def objective_ws_network(trial):\n",
|
975 |
+
"# tf.keras.backend.clear_session()\n",
|
976 |
+
"# model = search_model(trial, xtr_fws.shape[1])\n",
|
977 |
+
"# train_data = preprocess_data(xtr_fws, ytr_fws)\n",
|
978 |
+
"# model.fit(\n",
|
979 |
+
"# train_data,\n",
|
980 |
+
"# batch_size=BATCHSIZE,\n",
|
981 |
+
"# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n",
|
982 |
+
"# epochs=EPOCHS,\n",
|
983 |
+
"# verbose=0,\n",
|
984 |
+
"# )\n",
|
985 |
+
"# y_pred_search = model.predict(xte_fws, verbose=0)\n",
|
986 |
+
"# score = r2_score(yte_fws, y_pred_search)\n",
|
987 |
+
"# del model\n",
|
988 |
+
"# tf.keras.backend.clear_session()\n",
|
989 |
+
"# gc.collect()\n",
|
990 |
+
"# return score"
|
991 |
+
]
|
992 |
+
},
|
993 |
+
{
|
994 |
+
"cell_type": "code",
|
995 |
+
"execution_count": 31,
|
996 |
+
"metadata": {},
|
997 |
+
"outputs": [],
|
998 |
+
"source": [
|
999 |
+
"# # Colab\n",
|
1000 |
+
"# def objective_de_network(trial):\n",
|
1001 |
+
"# tf.keras.backend.clear_session()\n",
|
1002 |
+
"# model = search_model(trial, xtr_fde.shape[1])\n",
|
1003 |
+
"# train_data = preprocess_data(xtr_fde, ytr_fde)\n",
|
1004 |
+
"# model.fit(\n",
|
1005 |
+
"# train_data,\n",
|
1006 |
+
"# batch_size=BATCHSIZE,\n",
|
1007 |
+
"# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n",
|
1008 |
+
"# epochs=EPOCHS,\n",
|
1009 |
+
"# verbose=0,\n",
|
1010 |
+
"# )\n",
|
1011 |
+
"# y_pred_search = model.predict(xte_fde, verbose=0)\n",
|
1012 |
+
"# score = r2_score(yte_fde, y_pred_search)\n",
|
1013 |
+
"# del model\n",
|
1014 |
+
"# tf.keras.backend.clear_session()\n",
|
1015 |
+
"# gc.collect()\n",
|
1016 |
+
"# return score"
|
1017 |
+
]
|
1018 |
+
},
|
1019 |
+
{
|
1020 |
+
"cell_type": "code",
|
1021 |
+
"execution_count": 32,
|
1022 |
+
"metadata": {},
|
1023 |
+
"outputs": [],
|
1024 |
+
"source": [
|
1025 |
+
"# # Colab\n",
|
1026 |
+
"# def objective_lo_network(trial):\n",
|
1027 |
+
"# tf.keras.backend.clear_session()\n",
|
1028 |
+
"# model = search_model(trial, xtr_flo.shape[1])\n",
|
1029 |
+
"# train_data = preprocess_data(xtr_flo, ytr_flo)\n",
|
1030 |
+
"# model.fit(\n",
|
1031 |
+
"# train_data,\n",
|
1032 |
+
"# batch_size=BATCHSIZE,\n",
|
1033 |
+
"# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n",
|
1034 |
+
"# epochs=EPOCHS,\n",
|
1035 |
+
"# verbose=0,\n",
|
1036 |
+
"# )\n",
|
1037 |
+
"# y_pred_search = model.predict(xte_flo, verbose=0)\n",
|
1038 |
+
"# score = r2_score(yte_flo, y_pred_search)\n",
|
1039 |
+
"# del model\n",
|
1040 |
+
"# tf.keras.backend.clear_session()\n",
|
1041 |
+
"# gc.collect()\n",
|
1042 |
+
"# return score"
|
1043 |
+
]
|
1044 |
+
},
|
1045 |
+
{
|
1046 |
+
"cell_type": "code",
|
1047 |
+
"execution_count": 33,
|
1048 |
+
"metadata": {},
|
1049 |
+
"outputs": [],
|
1050 |
+
"source": [
|
1051 |
+
"# # Colab\n",
|
1052 |
+
"# def objective_hu_network(trial):\n",
|
1053 |
+
"# tf.keras.backend.clear_session()\n",
|
1054 |
+
"# model = search_model(trial, xtr_fhu.shape[1])\n",
|
1055 |
+
"# train_data = preprocess_data(xtr_fhu, ytr_fhu)\n",
|
1056 |
+
"# model.fit(\n",
|
1057 |
+
"# train_data,\n",
|
1058 |
+
"# batch_size=BATCHSIZE,\n",
|
1059 |
+
"# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n",
|
1060 |
+
"# epochs=EPOCHS,\n",
|
1061 |
+
"# verbose=0,\n",
|
1062 |
+
"# )\n",
|
1063 |
+
"# y_pred_search = model.predict(xte_fhu, verbose=0)\n",
|
1064 |
+
"# score = r2_score(yte_fhu, y_pred_search)\n",
|
1065 |
+
"# del model\n",
|
1066 |
+
"# tf.keras.backend.clear_session()\n",
|
1067 |
+
"# gc.collect()\n",
|
1068 |
+
"# return score"
|
1069 |
+
]
|
1070 |
+
},
|
1071 |
+
{
|
1072 |
+
"cell_type": "code",
|
1073 |
+
"execution_count": 34,
|
1074 |
+
"metadata": {},
|
1075 |
+
"outputs": [],
|
1076 |
+
"source": [
|
1077 |
+
"def objective_ws_network(trial):\n",
|
1078 |
+
" r2_result = None\n",
|
1079 |
+
" current_step = 0 \n",
|
1080 |
+
" try:\n",
|
1081 |
+
" y_true = np.asarray(y_ws).astype('float')\n",
|
1082 |
+
" np.save('new_fps.npy', new_ws)\n",
|
1083 |
+
" np.save('y_true.npy', y_true)\n",
|
1084 |
+
" \n",
|
1085 |
+
" save_model(trial, new_ws)\n",
|
1086 |
+
"\n",
|
1087 |
+
" lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n",
|
1088 |
+
"\n",
|
1089 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py',\n",
|
1090 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
1091 |
+
" str(lr), \n",
|
1092 |
+
" 'new_fps.npy', 'y_true.npy', str(trial.number)],\n",
|
1093 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
1094 |
+
" \n",
|
1095 |
+
" if result.stderr:\n",
|
1096 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n",
|
1097 |
+
" if \"could not open file to read NUMA node\" not in line \n",
|
1098 |
+
" and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
1099 |
+
" if filtered_stderr:\n",
|
1100 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
1101 |
+
"\n",
|
1102 |
+
" lines = result.stdout.splitlines()\n",
|
1103 |
+
" for line in lines:\n",
|
1104 |
+
" if line.startswith(\"intermediate_value:\"):\n",
|
1105 |
+
" _, step, value = line.split(\":\")\n",
|
1106 |
+
" step = int(step)\n",
|
1107 |
+
" value = float(value)\n",
|
1108 |
+
" current_step = step\n",
|
1109 |
+
" \n",
|
1110 |
+
" trial.report(value, step)\n",
|
1111 |
+
" \n",
|
1112 |
+
" if trial.should_prune():\n",
|
1113 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
1114 |
+
"\n",
|
1115 |
+
" for line in reversed(lines):\n",
|
1116 |
+
" if \"R2:\" in line:\n",
|
1117 |
+
" if \"(prune)\" in line:\n",
|
1118 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
1119 |
+
" else:\n",
|
1120 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
1121 |
+
" break\n",
|
1122 |
+
"\n",
|
1123 |
+
" except optuna.exceptions.TrialPruned:\n",
|
1124 |
+
" print(f\"Trial pruned at step {current_step}\")\n",
|
1125 |
+
" raise\n",
|
1126 |
+
" except Exception as e:\n",
|
1127 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
1128 |
+
" r2_result = 0.0\n",
|
1129 |
+
"\n",
|
1130 |
+
" gc.collect()\n",
|
1131 |
+
" return r2_result if r2_result is not None else 0.0"
|
1132 |
+
]
|
1133 |
+
},
|
1134 |
+
{
|
1135 |
+
"cell_type": "code",
|
1136 |
+
"execution_count": 35,
|
1137 |
+
"metadata": {},
|
1138 |
+
"outputs": [],
|
1139 |
+
"source": [
|
1140 |
+
"def objective_de_network(trial):\n",
|
1141 |
+
" r2_result = None\n",
|
1142 |
+
" current_step = 0 \n",
|
1143 |
+
" try:\n",
|
1144 |
+
" y_true = np.asarray(y_de).astype('float')\n",
|
1145 |
+
" np.save('new_fps.npy', new_de)\n",
|
1146 |
+
" np.save('y_true.npy', y_true)\n",
|
1147 |
+
" \n",
|
1148 |
+
" save_model(trial, new_de)\n",
|
1149 |
+
"\n",
|
1150 |
+
" lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n",
|
1151 |
+
"\n",
|
1152 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py',\n",
|
1153 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
1154 |
+
" str(lr), \n",
|
1155 |
+
" 'new_fps.npy', 'y_true.npy', str(trial.number)],\n",
|
1156 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
1157 |
+
" \n",
|
1158 |
+
" if result.stderr:\n",
|
1159 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n",
|
1160 |
+
" if \"could not open file to read NUMA node\" not in line \n",
|
1161 |
+
" and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
1162 |
+
" if filtered_stderr:\n",
|
1163 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
1164 |
+
"\n",
|
1165 |
+
" lines = result.stdout.splitlines()\n",
|
1166 |
+
" for line in lines:\n",
|
1167 |
+
" if line.startswith(\"intermediate_value:\"):\n",
|
1168 |
+
" _, step, value = line.split(\":\")\n",
|
1169 |
+
" step = int(step)\n",
|
1170 |
+
" value = float(value)\n",
|
1171 |
+
" current_step = step\n",
|
1172 |
+
" \n",
|
1173 |
+
" trial.report(value, step)\n",
|
1174 |
+
" \n",
|
1175 |
+
" if trial.should_prune():\n",
|
1176 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
1177 |
+
"\n",
|
1178 |
+
" for line in reversed(lines):\n",
|
1179 |
+
" if \"R2:\" in line:\n",
|
1180 |
+
" if \"(prune)\" in line:\n",
|
1181 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
1182 |
+
" else:\n",
|
1183 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
1184 |
+
" break\n",
|
1185 |
+
"\n",
|
1186 |
+
" except optuna.exceptions.TrialPruned:\n",
|
1187 |
+
" print(f\"Trial pruned at step {current_step}\")\n",
|
1188 |
+
" raise\n",
|
1189 |
+
" except Exception as e:\n",
|
1190 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
1191 |
+
" r2_result = 0.0\n",
|
1192 |
+
"\n",
|
1193 |
+
" gc.collect()\n",
|
1194 |
+
" return r2_result if r2_result is not None else 0.0"
|
1195 |
+
]
|
1196 |
+
},
|
1197 |
+
{
|
1198 |
+
"cell_type": "code",
|
1199 |
+
"execution_count": 36,
|
1200 |
+
"metadata": {},
|
1201 |
+
"outputs": [],
|
1202 |
+
"source": [
|
1203 |
+
"def objective_lo_network(trial):\n",
|
1204 |
+
" r2_result = None\n",
|
1205 |
+
" current_step = 0 \n",
|
1206 |
+
" try:\n",
|
1207 |
+
" y_true = np.asarray(y_lo).astype('float')\n",
|
1208 |
+
" np.save('new_fps.npy', new_lo)\n",
|
1209 |
+
" np.save('y_true.npy', y_true)\n",
|
1210 |
+
" \n",
|
1211 |
+
" save_model(trial, new_lo)\n",
|
1212 |
+
"\n",
|
1213 |
+
" lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n",
|
1214 |
+
"\n",
|
1215 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py',\n",
|
1216 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
1217 |
+
" str(lr), \n",
|
1218 |
+
" 'new_fps.npy', 'y_true.npy', str(trial.number)],\n",
|
1219 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
1220 |
+
" \n",
|
1221 |
+
" if result.stderr:\n",
|
1222 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n",
|
1223 |
+
" if \"could not open file to read NUMA node\" not in line \n",
|
1224 |
+
" and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
1225 |
+
" if filtered_stderr:\n",
|
1226 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
1227 |
+
"\n",
|
1228 |
+
" lines = result.stdout.splitlines()\n",
|
1229 |
+
" for line in lines:\n",
|
1230 |
+
" if line.startswith(\"intermediate_value:\"):\n",
|
1231 |
+
" _, step, value = line.split(\":\")\n",
|
1232 |
+
" step = int(step)\n",
|
1233 |
+
" value = float(value)\n",
|
1234 |
+
" current_step = step\n",
|
1235 |
+
" \n",
|
1236 |
+
" trial.report(value, step)\n",
|
1237 |
+
" \n",
|
1238 |
+
" if trial.should_prune():\n",
|
1239 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
1240 |
+
"\n",
|
1241 |
+
" for line in reversed(lines):\n",
|
1242 |
+
" if \"R2:\" in line:\n",
|
1243 |
+
" if \"(prune)\" in line:\n",
|
1244 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
1245 |
+
" else:\n",
|
1246 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
1247 |
+
" break\n",
|
1248 |
+
"\n",
|
1249 |
+
" except optuna.exceptions.TrialPruned:\n",
|
1250 |
+
" print(f\"Trial pruned at step {current_step}\")\n",
|
1251 |
+
" raise\n",
|
1252 |
+
" except Exception as e:\n",
|
1253 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
1254 |
+
" r2_result = 0.0\n",
|
1255 |
+
"\n",
|
1256 |
+
" gc.collect()\n",
|
1257 |
+
" return r2_result if r2_result is not None else 0.0"
|
1258 |
+
]
|
1259 |
+
},
|
1260 |
+
{
|
1261 |
+
"cell_type": "code",
|
1262 |
+
"execution_count": 37,
|
1263 |
+
"metadata": {},
|
1264 |
+
"outputs": [],
|
1265 |
+
"source": [
|
1266 |
+
"def objective_hu_network(trial):\n",
|
1267 |
+
" r2_result = None\n",
|
1268 |
+
" current_step = 0 \n",
|
1269 |
+
" try:\n",
|
1270 |
+
" y_true = np.asarray(y_hu).astype('float')\n",
|
1271 |
+
" np.save('new_fps.npy', new_hu)\n",
|
1272 |
+
" np.save('y_true.npy', y_true)\n",
|
1273 |
+
" \n",
|
1274 |
+
" save_model(trial, new_hu)\n",
|
1275 |
+
"\n",
|
1276 |
+
" lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n",
|
1277 |
+
"\n",
|
1278 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py',\n",
|
1279 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
1280 |
+
" str(lr), \n",
|
1281 |
+
" 'new_fps.npy', 'y_true.npy', str(trial.number)],\n",
|
1282 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
1283 |
+
" \n",
|
1284 |
+
" if result.stderr:\n",
|
1285 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n",
|
1286 |
+
" if \"could not open file to read NUMA node\" not in line \n",
|
1287 |
+
" and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
1288 |
+
" if filtered_stderr:\n",
|
1289 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
1290 |
+
"\n",
|
1291 |
+
" lines = result.stdout.splitlines()\n",
|
1292 |
+
" for line in lines:\n",
|
1293 |
+
" if line.startswith(\"intermediate_value:\"):\n",
|
1294 |
+
" _, step, value = line.split(\":\")\n",
|
1295 |
+
" step = int(step)\n",
|
1296 |
+
" value = float(value)\n",
|
1297 |
+
" current_step = step\n",
|
1298 |
+
" \n",
|
1299 |
+
" trial.report(value, step)\n",
|
1300 |
+
" \n",
|
1301 |
+
" if trial.should_prune():\n",
|
1302 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
1303 |
+
"\n",
|
1304 |
+
" for line in reversed(lines):\n",
|
1305 |
+
" if \"R2:\" in line:\n",
|
1306 |
+
" if \"(prune)\" in line:\n",
|
1307 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
1308 |
+
" else:\n",
|
1309 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
1310 |
+
" break\n",
|
1311 |
+
"\n",
|
1312 |
+
" except optuna.exceptions.TrialPruned:\n",
|
1313 |
+
" print(f\"Trial pruned at step {current_step}\")\n",
|
1314 |
+
" raise\n",
|
1315 |
+
" except Exception as e:\n",
|
1316 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
1317 |
+
" r2_result = 0.0\n",
|
1318 |
+
"\n",
|
1319 |
+
" gc.collect()\n",
|
1320 |
+
" return r2_result if r2_result is not None else 0.0"
|
1321 |
+
]
|
1322 |
+
},
|
1323 |
+
{
|
1324 |
+
"cell_type": "code",
|
1325 |
+
"execution_count": 38,
|
1326 |
+
"metadata": {},
|
1327 |
+
"outputs": [],
|
1328 |
+
"source": [
|
1329 |
+
"storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
|
1330 |
+
"# storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
|
1331 |
+
"# storage = optuna.storages.RDBStorage(url=storage_urls)"
|
1332 |
+
]
|
1333 |
+
},
|
1334 |
+
{
|
1335 |
+
"cell_type": "code",
|
1336 |
+
"execution_count": 42,
|
1337 |
+
"metadata": {},
|
1338 |
+
"outputs": [],
|
1339 |
+
"source": [
|
1340 |
+
"try:\n",
|
1341 |
+
" # optuna.delete_study(study_name=\"ANO_ws_network\", storage=storage)\n",
|
1342 |
+
" # optuna.delete_study(study_name=\"ANO_de_network\", storage=storage)\n",
|
1343 |
+
" optuna.delete_study(study_name=\"ANO_lo_network\", storage=storage)\n",
|
1344 |
+
" # optuna.delete_study(study_name=\"ANO_hu_network\", storage=storage)\n",
|
1345 |
+
" pass\n",
|
1346 |
+
"except:\n",
|
1347 |
+
" pass "
|
1348 |
+
]
|
1349 |
+
},
|
1350 |
+
{
|
1351 |
+
"cell_type": "code",
|
1352 |
+
"execution_count": 43,
|
1353 |
+
"metadata": {},
|
1354 |
+
"outputs": [],
|
1355 |
+
"source": [
|
1356 |
+
"TRIALS=1"
|
1357 |
+
]
|
1358 |
+
},
|
1359 |
+
{
|
1360 |
+
"cell_type": "code",
|
1361 |
+
"execution_count": 44,
|
1362 |
+
"metadata": {},
|
1363 |
+
"outputs": [
|
1364 |
+
{
|
1365 |
+
"name": "stderr",
|
1366 |
+
"output_type": "stream",
|
1367 |
+
"text": [
|
1368 |
+
"[I 2024-11-04 23:06:11,544] Using an existing study with name 'ANO_de_network' instead of creating a new one.\n"
|
1369 |
+
]
|
1370 |
+
},
|
1371 |
+
{
|
1372 |
+
"name": "stdout",
|
1373 |
+
"output_type": "stream",
|
1374 |
+
"text": [
|
1375 |
+
"Model already exists at save_model/full_model.keras\n",
|
1376 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1377 |
+
]
|
1378 |
+
},
|
1379 |
+
{
|
1380 |
+
"name": "stderr",
|
1381 |
+
"output_type": "stream",
|
1382 |
+
"text": [
|
1383 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1384 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1385 |
+
"I0000 00:00:1730729176.331625 2499072 service.cc:146] XLA service 0x55701a22e460 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1386 |
+
"I0000 00:00:1730729176.331663 2499072 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1387 |
+
"I0000 00:00:1730729176.465814 2499072 service.cc:146] XLA service 0x55701a205900 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1388 |
+
"I0000 00:00:1730729176.465843 2499072 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1389 |
+
"I0000 00:00:1730729180.613395 2499185 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1390 |
+
"\n",
|
1391 |
+
"[I 2024-11-04 23:09:13,063] Trial 1115 finished with value: 0.965164 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 7490, 'n_decay_l_0': 1e-05, 'n_units_l_1': 2373, 'n_decay_l_1': 1e-06, 'n_units_l_2': 6613, 'n_decay_l_2': 1e-05, 'last_dropout': 0.3, 'lr': 0.001}. Best is trial 1097 with value: 0.983023.\n"
|
1392 |
+
]
|
1393 |
+
}
|
1394 |
+
],
|
1395 |
+
"source": [
|
1396 |
+
"# study_de_network = optuna.create_study(study_name='ANO_de_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
|
1397 |
+
"study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n",
|
1398 |
+
"# study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
1399 |
+
"# study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n",
|
1400 |
+
"study_de_network.optimize(objective_de_network, n_trials=TRIALS)\n",
|
1401 |
+
"pruned_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1402 |
+
"complete_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n",
|
1403 |
+
"#74m 22.0s\n",
|
1404 |
+
"#386m 42.2 - 100 trial 1000 epochs\n",
|
1405 |
+
"#278m 23.3s"
|
1406 |
+
]
|
1407 |
+
},
|
1408 |
+
{
|
1409 |
+
"cell_type": "code",
|
1410 |
+
"execution_count": 45,
|
1411 |
+
"metadata": {},
|
1412 |
+
"outputs": [
|
1413 |
+
{
|
1414 |
+
"name": "stderr",
|
1415 |
+
"output_type": "stream",
|
1416 |
+
"text": [
|
1417 |
+
"[I 2024-11-04 23:09:13,086] Using an existing study with name 'ANO_ws_network' instead of creating a new one.\n"
|
1418 |
+
]
|
1419 |
+
},
|
1420 |
+
{
|
1421 |
+
"name": "stdout",
|
1422 |
+
"output_type": "stream",
|
1423 |
+
"text": [
|
1424 |
+
"Model already exists at save_model/full_model.keras\n",
|
1425 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1426 |
+
]
|
1427 |
+
},
|
1428 |
+
{
|
1429 |
+
"name": "stderr",
|
1430 |
+
"output_type": "stream",
|
1431 |
+
"text": [
|
1432 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1433 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1434 |
+
"I0000 00:00:1730729356.277557 2507565 service.cc:146] XLA service 0x55c7cad07060 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1435 |
+
"I0000 00:00:1730729356.277598 2507565 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1436 |
+
"I0000 00:00:1730729356.416113 2507565 service.cc:146] XLA service 0x55c7cac0bd20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1437 |
+
"I0000 00:00:1730729356.416147 2507565 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1438 |
+
"I0000 00:00:1730729359.300797 2507682 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1439 |
+
"\n",
|
1440 |
+
"[I 2024-11-04 23:09:27,954] Trial 193 finished with value: 0.939087 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 800, 'n_decay_l_0': 1e-06, 'n_units_l_1': 530, 'n_decay_l_1': 1e-05, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 104 with value: 0.970129.\n"
|
1441 |
+
]
|
1442 |
+
}
|
1443 |
+
],
|
1444 |
+
"source": [
|
1445 |
+
"# study_ws_network = optuna.create_study(study_name='ANO_ws_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
|
1446 |
+
"study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n",
|
1447 |
+
"# study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n",
|
1448 |
+
"# study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n",
|
1449 |
+
"study_ws_network.optimize(objective_ws_network, n_trials=TRIALS)\n",
|
1450 |
+
"pruned_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1451 |
+
"complete_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n",
|
1452 |
+
"# 108m 38.1s\n",
|
1453 |
+
"#160m 18.2 - 100 trial 1000 epochs"
|
1454 |
+
]
|
1455 |
+
},
|
1456 |
+
{
|
1457 |
+
"cell_type": "code",
|
1458 |
+
"execution_count": 46,
|
1459 |
+
"metadata": {},
|
1460 |
+
"outputs": [],
|
1461 |
+
"source": [
|
1462 |
+
"TRIALS=10"
|
1463 |
+
]
|
1464 |
+
},
|
1465 |
+
{
|
1466 |
+
"cell_type": "code",
|
1467 |
+
"execution_count": 47,
|
1468 |
+
"metadata": {},
|
1469 |
+
"outputs": [
|
1470 |
+
{
|
1471 |
+
"name": "stderr",
|
1472 |
+
"output_type": "stream",
|
1473 |
+
"text": [
|
1474 |
+
"[I 2024-11-04 23:09:27,984] A new study created in RDB with name: ANO_lo_network\n"
|
1475 |
+
]
|
1476 |
+
},
|
1477 |
+
{
|
1478 |
+
"name": "stdout",
|
1479 |
+
"output_type": "stream",
|
1480 |
+
"text": [
|
1481 |
+
"Model already exists at save_model/full_model.keras\n",
|
1482 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1483 |
+
]
|
1484 |
+
},
|
1485 |
+
{
|
1486 |
+
"name": "stderr",
|
1487 |
+
"output_type": "stream",
|
1488 |
+
"text": [
|
1489 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1490 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1491 |
+
"I0000 00:00:1730729368.680796 2510630 service.cc:146] XLA service 0x56035729eda0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1492 |
+
"I0000 00:00:1730729368.680848 2510630 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1493 |
+
"I0000 00:00:1730729368.837668 2510630 service.cc:146] XLA service 0x5603572f9c70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1494 |
+
"I0000 00:00:1730729368.837708 2510630 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1495 |
+
"I0000 00:00:1730729371.354407 2510735 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1496 |
+
"\n",
|
1497 |
+
"[I 2024-11-04 23:09:45,883] Trial 0 finished with value: 0.723669 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 2941, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.1, 'lr': 0.0001}. Best is trial 0 with value: 0.723669.\n"
|
1498 |
+
]
|
1499 |
+
},
|
1500 |
+
{
|
1501 |
+
"name": "stdout",
|
1502 |
+
"output_type": "stream",
|
1503 |
+
"text": [
|
1504 |
+
"Model already exists at save_model/full_model.keras\n",
|
1505 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1506 |
+
]
|
1507 |
+
},
|
1508 |
+
{
|
1509 |
+
"name": "stderr",
|
1510 |
+
"output_type": "stream",
|
1511 |
+
"text": [
|
1512 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1513 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1514 |
+
"I0000 00:00:1730729388.955076 2513776 service.cc:146] XLA service 0x5615ce07e010 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1515 |
+
"I0000 00:00:1730729388.955114 2513776 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1516 |
+
"I0000 00:00:1730729389.098245 2513776 service.cc:146] XLA service 0x5615ce09f3b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1517 |
+
"I0000 00:00:1730729389.098279 2513776 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1518 |
+
"I0000 00:00:1730729391.952147 2513881 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1519 |
+
"\n",
|
1520 |
+
"[I 2024-11-04 23:10:24,537] Trial 1 finished with value: 0.780745 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 7693, 'n_decay_l_0': 0.0001, 'last_dropout': 0.1, 'lr': 0.0001}. Best is trial 1 with value: 0.780745.\n"
|
1521 |
+
]
|
1522 |
+
},
|
1523 |
+
{
|
1524 |
+
"name": "stdout",
|
1525 |
+
"output_type": "stream",
|
1526 |
+
"text": [
|
1527 |
+
"Model already exists at save_model/full_model.keras\n",
|
1528 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1529 |
+
]
|
1530 |
+
},
|
1531 |
+
{
|
1532 |
+
"name": "stderr",
|
1533 |
+
"output_type": "stream",
|
1534 |
+
"text": [
|
1535 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1536 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1537 |
+
"I0000 00:00:1730729427.740828 2518105 service.cc:146] XLA service 0x563fb10b11b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1538 |
+
"I0000 00:00:1730729427.740872 2518105 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1539 |
+
"I0000 00:00:1730729427.891154 2518105 service.cc:146] XLA service 0x563fb0f93760 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1540 |
+
"I0000 00:00:1730729427.891196 2518105 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1541 |
+
"I0000 00:00:1730729428.275905 2518208 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1542 |
+
"\n",
|
1543 |
+
"[I 2024-11-04 23:10:56,189] Trial 2 finished with value: 0.861173 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 3091, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'lr': 0.001}. Best is trial 2 with value: 0.861173.\n"
|
1544 |
+
]
|
1545 |
+
},
|
1546 |
+
{
|
1547 |
+
"name": "stdout",
|
1548 |
+
"output_type": "stream",
|
1549 |
+
"text": [
|
1550 |
+
"Model already exists at save_model/full_model.keras\n",
|
1551 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1552 |
+
]
|
1553 |
+
},
|
1554 |
+
{
|
1555 |
+
"name": "stderr",
|
1556 |
+
"output_type": "stream",
|
1557 |
+
"text": [
|
1558 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1559 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1560 |
+
"I0000 00:00:1730729457.830007 2524433 service.cc:146] XLA service 0x56115c187470 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1561 |
+
"I0000 00:00:1730729457.830062 2524433 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1562 |
+
"I0000 00:00:1730729457.978097 2524433 service.cc:146] XLA service 0x56115c0a57a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1563 |
+
"I0000 00:00:1730729457.978151 2524433 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1564 |
+
"I0000 00:00:1730729463.165301 2524546 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1565 |
+
"\n",
|
1566 |
+
"[I 2024-11-04 23:12:29,474] Trial 3 pruned. \n"
|
1567 |
+
]
|
1568 |
+
},
|
1569 |
+
{
|
1570 |
+
"name": "stdout",
|
1571 |
+
"output_type": "stream",
|
1572 |
+
"text": [
|
1573 |
+
"Trial pruned at step 50\n",
|
1574 |
+
"Model already exists at save_model/full_model.keras\n",
|
1575 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1576 |
+
]
|
1577 |
+
},
|
1578 |
+
{
|
1579 |
+
"name": "stderr",
|
1580 |
+
"output_type": "stream",
|
1581 |
+
"text": [
|
1582 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1583 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1584 |
+
"I0000 00:00:1730729550.554875 2527992 service.cc:146] XLA service 0x559ab4767160 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1585 |
+
"I0000 00:00:1730729550.554932 2527992 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1586 |
+
"I0000 00:00:1730729550.699674 2527992 service.cc:146] XLA service 0x559ab47a71a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1587 |
+
"I0000 00:00:1730729550.699710 2527992 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1588 |
+
"I0000 00:00:1730729554.340454 2528101 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1589 |
+
"\n",
|
1590 |
+
"[I 2024-11-04 23:13:00,476] Trial 4 pruned. \n"
|
1591 |
+
]
|
1592 |
+
},
|
1593 |
+
{
|
1594 |
+
"name": "stdout",
|
1595 |
+
"output_type": "stream",
|
1596 |
+
"text": [
|
1597 |
+
"Trial pruned at step 50\n",
|
1598 |
+
"Model already exists at save_model/full_model.keras\n",
|
1599 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1600 |
+
]
|
1601 |
+
},
|
1602 |
+
{
|
1603 |
+
"name": "stderr",
|
1604 |
+
"output_type": "stream",
|
1605 |
+
"text": [
|
1606 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1607 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1608 |
+
"I0000 00:00:1730729581.148610 2530567 service.cc:146] XLA service 0x5603394ef990 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1609 |
+
"I0000 00:00:1730729581.148655 2530567 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1610 |
+
"I0000 00:00:1730729581.280871 2530567 service.cc:146] XLA service 0x5603394f7710 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1611 |
+
"I0000 00:00:1730729581.280904 2530567 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1612 |
+
"I0000 00:00:1730729584.114676 2530679 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1613 |
+
"\n",
|
1614 |
+
"[I 2024-11-04 23:13:19,868] Trial 5 pruned. \n"
|
1615 |
+
]
|
1616 |
+
},
|
1617 |
+
{
|
1618 |
+
"name": "stdout",
|
1619 |
+
"output_type": "stream",
|
1620 |
+
"text": [
|
1621 |
+
"Trial pruned at step 50\n",
|
1622 |
+
"Model already exists at save_model/full_model.keras\n",
|
1623 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1624 |
+
]
|
1625 |
+
},
|
1626 |
+
{
|
1627 |
+
"name": "stderr",
|
1628 |
+
"output_type": "stream",
|
1629 |
+
"text": [
|
1630 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1631 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1632 |
+
"I0000 00:00:1730729603.760434 2536173 service.cc:146] XLA service 0x55b2351fb2f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1633 |
+
"I0000 00:00:1730729603.760485 2536173 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1634 |
+
"I0000 00:00:1730729603.892722 2536173 service.cc:146] XLA service 0x55b232ce1e60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1635 |
+
"I0000 00:00:1730729603.892765 2536173 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1636 |
+
"I0000 00:00:1730729609.099171 2536280 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1637 |
+
"\n",
|
1638 |
+
"[I 2024-11-04 23:14:49,961] Trial 6 finished with value: 0.761589 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 8234, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.3, 'n_units_l_1': 5907, 'n_decay_l_1': 1e-05, 'F_dropout_1': 0.1, 'n_units_l_2': 5363, 'n_decay_l_2': 1e-05, 'F_dropout_2': 0.2, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n"
|
1639 |
+
]
|
1640 |
+
},
|
1641 |
+
{
|
1642 |
+
"name": "stdout",
|
1643 |
+
"output_type": "stream",
|
1644 |
+
"text": [
|
1645 |
+
"Model already exists at save_model/full_model.keras\n",
|
1646 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1647 |
+
]
|
1648 |
+
},
|
1649 |
+
{
|
1650 |
+
"name": "stderr",
|
1651 |
+
"output_type": "stream",
|
1652 |
+
"text": [
|
1653 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1654 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1655 |
+
"I0000 00:00:1730729693.449528 2539291 service.cc:146] XLA service 0x562dce6885b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1656 |
+
"I0000 00:00:1730729693.449582 2539291 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1657 |
+
"I0000 00:00:1730729693.592325 2539291 service.cc:146] XLA service 0x562dce5c4c30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1658 |
+
"I0000 00:00:1730729693.592355 2539291 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1659 |
+
"I0000 00:00:1730729697.337810 2539396 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1660 |
+
"\n",
|
1661 |
+
"[I 2024-11-04 23:15:31,025] Trial 7 pruned. \n"
|
1662 |
+
]
|
1663 |
+
},
|
1664 |
+
{
|
1665 |
+
"name": "stdout",
|
1666 |
+
"output_type": "stream",
|
1667 |
+
"text": [
|
1668 |
+
"Trial pruned at step 50\n",
|
1669 |
+
"Model already exists at save_model/full_model.keras\n",
|
1670 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1671 |
+
]
|
1672 |
+
},
|
1673 |
+
{
|
1674 |
+
"name": "stderr",
|
1675 |
+
"output_type": "stream",
|
1676 |
+
"text": [
|
1677 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1678 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1679 |
+
"I0000 00:00:1730729732.655927 2542190 service.cc:146] XLA service 0x557b8aaafa90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1680 |
+
"I0000 00:00:1730729732.655980 2542190 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1681 |
+
"I0000 00:00:1730729732.804313 2542190 service.cc:146] XLA service 0x557b8aa19180 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1682 |
+
"I0000 00:00:1730729732.804347 2542190 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1683 |
+
"I0000 00:00:1730729737.528836 2542300 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1684 |
+
"\n",
|
1685 |
+
"[I 2024-11-04 23:17:30,392] Trial 8 finished with value: 0.849003 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 9926, 'n_decay_l_0': 1e-05, 'n_units_l_1': 6304, 'n_decay_l_1': 1e-06, 'n_units_l_2': 1149, 'n_decay_l_2': 0.0001, 'last_dropout': 0.3, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n"
|
1686 |
+
]
|
1687 |
+
},
|
1688 |
+
{
|
1689 |
+
"name": "stdout",
|
1690 |
+
"output_type": "stream",
|
1691 |
+
"text": [
|
1692 |
+
"Model already exists at save_model/full_model.keras\n",
|
1693 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1694 |
+
]
|
1695 |
+
},
|
1696 |
+
{
|
1697 |
+
"name": "stderr",
|
1698 |
+
"output_type": "stream",
|
1699 |
+
"text": [
|
1700 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1701 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1702 |
+
"I0000 00:00:1730729854.300336 2546634 service.cc:146] XLA service 0x5635047e3fd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1703 |
+
"I0000 00:00:1730729854.300383 2546634 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1704 |
+
"I0000 00:00:1730729854.440994 2546634 service.cc:146] XLA service 0x5635046b3d50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1705 |
+
"I0000 00:00:1730729854.441033 2546634 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1706 |
+
"I0000 00:00:1730729857.327780 2546746 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1707 |
+
"\n",
|
1708 |
+
"[I 2024-11-04 23:18:18,438] Trial 9 finished with value: 0.859502 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 6912, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.3, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n"
|
1709 |
+
]
|
1710 |
+
}
|
1711 |
+
],
|
1712 |
+
"source": [
|
1713 |
+
"# study_lo_network = optuna.create_study(study_name='ANO_lo_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
|
1714 |
+
"study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n",
|
1715 |
+
"# study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n",
|
1716 |
+
"# study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n",
|
1717 |
+
"study_lo_network.optimize(objective_lo_network, n_trials=TRIALS)\n",
|
1718 |
+
"pruned_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1719 |
+
"complete_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
|
1720 |
+
]
|
1721 |
+
},
|
1722 |
+
{
|
1723 |
+
"cell_type": "code",
|
1724 |
+
"execution_count": 48,
|
1725 |
+
"metadata": {},
|
1726 |
+
"outputs": [],
|
1727 |
+
"source": [
|
1728 |
+
"TRIALS=1"
|
1729 |
+
]
|
1730 |
+
},
|
1731 |
+
{
|
1732 |
+
"cell_type": "code",
|
1733 |
+
"execution_count": 49,
|
1734 |
+
"metadata": {},
|
1735 |
+
"outputs": [
|
1736 |
+
{
|
1737 |
+
"name": "stderr",
|
1738 |
+
"output_type": "stream",
|
1739 |
+
"text": [
|
1740 |
+
"[I 2024-11-04 23:18:18,463] Using an existing study with name 'ANO_hu_network' instead of creating a new one.\n"
|
1741 |
+
]
|
1742 |
+
},
|
1743 |
+
{
|
1744 |
+
"name": "stdout",
|
1745 |
+
"output_type": "stream",
|
1746 |
+
"text": [
|
1747 |
+
"Model already exists at save_model/full_model.keras\n",
|
1748 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1749 |
+
]
|
1750 |
+
},
|
1751 |
+
{
|
1752 |
+
"name": "stderr",
|
1753 |
+
"output_type": "stream",
|
1754 |
+
"text": [
|
1755 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1756 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1757 |
+
"I0000 00:00:1730729902.168016 2552533 service.cc:146] XLA service 0x55fd29098ab0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1758 |
+
"I0000 00:00:1730729902.168077 2552533 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1759 |
+
"I0000 00:00:1730729902.305499 2552533 service.cc:146] XLA service 0x55fd28631810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1760 |
+
"I0000 00:00:1730729902.305538 2552533 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1761 |
+
"I0000 00:00:1730729907.273542 2552637 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1762 |
+
"\n",
|
1763 |
+
"[I 2024-11-04 23:19:12,205] Trial 144 finished with value: 0.936649 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 1510, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.1, 'n_units_l_1': 2489, 'n_decay_l_1': 1e-06, 'F_dropout_1': 0.1, 'n_units_l_2': 1567, 'n_decay_l_2': 1e-05, 'F_dropout_2': 0.1, 'lr': 0.0001}. Best is trial 130 with value: 0.943809.\n"
|
1764 |
+
]
|
1765 |
+
}
|
1766 |
+
],
|
1767 |
+
"source": [
|
1768 |
+
"# study_hu_network = optuna.create_study(study_name='ANO_hu_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
|
1769 |
+
"study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n",
|
1770 |
+
"# study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n",
|
1771 |
+
"# study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n",
|
1772 |
+
"study_hu_network.optimize(objective_hu_network, n_trials=TRIALS)\n",
|
1773 |
+
"pruned_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1774 |
+
"complete_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
|
1775 |
+
]
|
1776 |
+
},
|
1777 |
+
{
|
1778 |
+
"cell_type": "code",
|
1779 |
+
"execution_count": 50,
|
1780 |
+
"metadata": {},
|
1781 |
+
"outputs": [
|
1782 |
+
{
|
1783 |
+
"name": "stdout",
|
1784 |
+
"output_type": "stream",
|
1785 |
+
"text": [
|
1786 |
+
"Study statistics: [ws_structure] \n",
|
1787 |
+
" Number of finished trials: 194\n",
|
1788 |
+
" Number of pruned trials: 3\n",
|
1789 |
+
" Number of complete trials: 168\n",
|
1790 |
+
"Best trial:\n",
|
1791 |
+
" Value: 0.970129\n",
|
1792 |
+
" Params: \n",
|
1793 |
+
" n_layers: 2\n",
|
1794 |
+
" layer_dropout: 0\n",
|
1795 |
+
" n_units_l_0: 205\n",
|
1796 |
+
" n_decay_l_0: 1e-06\n",
|
1797 |
+
" n_units_l_1: 742\n",
|
1798 |
+
" n_decay_l_1: 0.0001\n",
|
1799 |
+
" last_dropout: 0.1\n",
|
1800 |
+
" lr: 0.0001\n"
|
1801 |
+
]
|
1802 |
+
}
|
1803 |
+
],
|
1804 |
+
"source": [
|
1805 |
+
"print(\"Study statistics: [ws_structure] \")\n",
|
1806 |
+
"print(\" Number of finished trials: \", len(study_ws_network.trials))\n",
|
1807 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_ws_newtork))\n",
|
1808 |
+
"print(\" Number of complete trials: \", len(complete_trials_ws_newtork))\n",
|
1809 |
+
"print(\"Best trial:\")\n",
|
1810 |
+
"trials_tmp = study_ws_network.best_trial\n",
|
1811 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1812 |
+
"print(\" Params: \")\n",
|
1813 |
+
"for key, value in trials_tmp.params.items():\n",
|
1814 |
+
" print(\" {}: {}\".format(key, value))"
|
1815 |
+
]
|
1816 |
+
},
|
1817 |
+
{
|
1818 |
+
"cell_type": "code",
|
1819 |
+
"execution_count": 51,
|
1820 |
+
"metadata": {},
|
1821 |
+
"outputs": [
|
1822 |
+
{
|
1823 |
+
"name": "stdout",
|
1824 |
+
"output_type": "stream",
|
1825 |
+
"text": [
|
1826 |
+
"Study statistics: [de_structure] \n",
|
1827 |
+
" Number of finished trials: 1116\n",
|
1828 |
+
" Number of pruned trials: 59\n",
|
1829 |
+
" Number of complete trials: 1032\n",
|
1830 |
+
"Best trial:\n",
|
1831 |
+
" Value: 0.983023\n",
|
1832 |
+
" Params: \n",
|
1833 |
+
" n_layers: 3\n",
|
1834 |
+
" layer_dropout: 0\n",
|
1835 |
+
" n_units_l_0: 7946\n",
|
1836 |
+
" n_decay_l_0: 1e-05\n",
|
1837 |
+
" n_units_l_1: 2662\n",
|
1838 |
+
" n_decay_l_1: 1e-06\n",
|
1839 |
+
" n_units_l_2: 6499\n",
|
1840 |
+
" n_decay_l_2: 1e-05\n",
|
1841 |
+
" last_dropout: 0.3\n",
|
1842 |
+
" lr: 0.001\n"
|
1843 |
+
]
|
1844 |
+
}
|
1845 |
+
],
|
1846 |
+
"source": [
|
1847 |
+
"print(\"Study statistics: [de_structure] \")\n",
|
1848 |
+
"print(\" Number of finished trials: \", len(study_de_network.trials))\n",
|
1849 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_de_newtork))\n",
|
1850 |
+
"print(\" Number of complete trials: \", len(complete_trials_de_newtork))\n",
|
1851 |
+
"print(\"Best trial:\")\n",
|
1852 |
+
"trials_tmp = study_de_network.best_trial\n",
|
1853 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1854 |
+
"print(\" Params: \")\n",
|
1855 |
+
"for key, value in trials_tmp.params.items():\n",
|
1856 |
+
" print(\" {}: {}\".format(key, value))"
|
1857 |
+
]
|
1858 |
+
},
|
1859 |
+
{
|
1860 |
+
"cell_type": "code",
|
1861 |
+
"execution_count": 52,
|
1862 |
+
"metadata": {},
|
1863 |
+
"outputs": [
|
1864 |
+
{
|
1865 |
+
"name": "stdout",
|
1866 |
+
"output_type": "stream",
|
1867 |
+
"text": [
|
1868 |
+
"Study statistics: [lo_structure] \n",
|
1869 |
+
" Number of finished trials: 10\n",
|
1870 |
+
" Number of pruned trials: 4\n",
|
1871 |
+
" Number of complete trials: 6\n",
|
1872 |
+
"Best trial:\n",
|
1873 |
+
" Value: 0.861173\n",
|
1874 |
+
" Params: \n",
|
1875 |
+
" n_layers: 1\n",
|
1876 |
+
" layer_dropout: 1\n",
|
1877 |
+
" n_units_l_0: 3091\n",
|
1878 |
+
" n_decay_l_0: 1e-05\n",
|
1879 |
+
" F_dropout_0: 0.2\n",
|
1880 |
+
" lr: 0.001\n"
|
1881 |
+
]
|
1882 |
+
}
|
1883 |
+
],
|
1884 |
+
"source": [
|
1885 |
+
"print(\"Study statistics: [lo_structure] \")\n",
|
1886 |
+
"print(\" Number of finished trials: \", len(study_lo_network.trials))\n",
|
1887 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_lo_newtork))\n",
|
1888 |
+
"print(\" Number of complete trials: \", len(complete_trials_lo_newtork))\n",
|
1889 |
+
"print(\"Best trial:\")\n",
|
1890 |
+
"trials_tmp = study_lo_network.best_trial\n",
|
1891 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1892 |
+
"print(\" Params: \")\n",
|
1893 |
+
"for key, value in trials_tmp.params.items():\n",
|
1894 |
+
" print(\" {}: {}\".format(key, value))"
|
1895 |
+
]
|
1896 |
+
},
|
1897 |
+
{
|
1898 |
+
"cell_type": "code",
|
1899 |
+
"execution_count": 53,
|
1900 |
+
"metadata": {},
|
1901 |
+
"outputs": [
|
1902 |
+
{
|
1903 |
+
"name": "stdout",
|
1904 |
+
"output_type": "stream",
|
1905 |
+
"text": [
|
1906 |
+
"Study statistics: [hu_structure] \n",
|
1907 |
+
" Number of finished trials: 145\n",
|
1908 |
+
" Number of pruned trials: 55\n",
|
1909 |
+
" Number of complete trials: 78\n",
|
1910 |
+
"Best trial:\n",
|
1911 |
+
" Value: 0.943809\n",
|
1912 |
+
" Params: \n",
|
1913 |
+
" n_layers: 3\n",
|
1914 |
+
" layer_dropout: 1\n",
|
1915 |
+
" n_units_l_0: 3891\n",
|
1916 |
+
" n_decay_l_0: 0.0001\n",
|
1917 |
+
" F_dropout_0: 0.1\n",
|
1918 |
+
" n_units_l_1: 7719\n",
|
1919 |
+
" n_decay_l_1: 1e-05\n",
|
1920 |
+
" F_dropout_1: 0.3\n",
|
1921 |
+
" n_units_l_2: 342\n",
|
1922 |
+
" n_decay_l_2: 1e-05\n",
|
1923 |
+
" F_dropout_2: 0.1\n",
|
1924 |
+
" lr: 0.0001\n"
|
1925 |
+
]
|
1926 |
+
}
|
1927 |
+
],
|
1928 |
+
"source": [
|
1929 |
+
"print(\"Study statistics: [hu_structure] \")\n",
|
1930 |
+
"print(\" Number of finished trials: \", len(study_hu_network.trials))\n",
|
1931 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_hu_newtork))\n",
|
1932 |
+
"print(\" Number of complete trials: \", len(complete_trials_hu_newtork))\n",
|
1933 |
+
"print(\"Best trial:\")\n",
|
1934 |
+
"trials_tmp = study_hu_network.best_trial\n",
|
1935 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1936 |
+
"print(\" Params: \")\n",
|
1937 |
+
"for key, value in trials_tmp.params.items():\n",
|
1938 |
+
" print(\" {}: {}\".format(key, value))"
|
1939 |
+
]
|
1940 |
+
},
|
1941 |
+
{
|
1942 |
+
"cell_type": "code",
|
1943 |
+
"execution_count": null,
|
1944 |
+
"metadata": {},
|
1945 |
+
"outputs": [],
|
1946 |
+
"source": []
|
1947 |
+
},
|
1948 |
+
{
|
1949 |
+
"cell_type": "code",
|
1950 |
+
"execution_count": null,
|
1951 |
+
"metadata": {},
|
1952 |
+
"outputs": [],
|
1953 |
+
"source": []
|
1954 |
+
},
|
1955 |
+
{
|
1956 |
+
"cell_type": "code",
|
1957 |
+
"execution_count": null,
|
1958 |
+
"metadata": {},
|
1959 |
+
"outputs": [],
|
1960 |
+
"source": []
|
1961 |
+
},
|
1962 |
+
{
|
1963 |
+
"cell_type": "code",
|
1964 |
+
"execution_count": null,
|
1965 |
+
"metadata": {},
|
1966 |
+
"outputs": [],
|
1967 |
+
"source": []
|
1968 |
+
}
|
1969 |
+
],
|
1970 |
+
"metadata": {
|
1971 |
+
"kernelspec": {
|
1972 |
+
"display_name": "ai",
|
1973 |
+
"language": "python",
|
1974 |
+
"name": "python3"
|
1975 |
+
},
|
1976 |
+
"language_info": {
|
1977 |
+
"codemirror_mode": {
|
1978 |
+
"name": "ipython",
|
1979 |
+
"version": 3
|
1980 |
+
},
|
1981 |
+
"file_extension": ".py",
|
1982 |
+
"mimetype": "text/x-python",
|
1983 |
+
"name": "python",
|
1984 |
+
"nbconvert_exporter": "python",
|
1985 |
+
"pygments_lexer": "ipython3",
|
1986 |
+
"version": "3.12.2"
|
1987 |
+
},
|
1988 |
+
"orig_nbformat": 4
|
1989 |
+
},
|
1990 |
+
"nbformat": 4,
|
1991 |
+
"nbformat_minor": 2
|
1992 |
+
}
|
7_ANO_network_[struc_fea].ipynb
ADDED
@@ -0,0 +1,1913 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os\n",
|
10 |
+
"import sys\n",
|
11 |
+
"import numpy as np\n",
|
12 |
+
"import pandas as pd\n",
|
13 |
+
"import seaborn as sns\n",
|
14 |
+
"import matplotlib.pyplot as plt\n",
|
15 |
+
"import matplotlib.patches as mpatches\n",
|
16 |
+
"import gc\n",
|
17 |
+
"import time\n",
|
18 |
+
"import subprocess\n",
|
19 |
+
"from concurrent.futures import ProcessPoolExecutor, as_completed"
|
20 |
+
]
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"cell_type": "code",
|
24 |
+
"execution_count": 2,
|
25 |
+
"metadata": {},
|
26 |
+
"outputs": [],
|
27 |
+
"source": [
|
28 |
+
"from rdkit import Chem\n",
|
29 |
+
"from rdkit.Chem import AllChem, DataStructs, Draw\n",
|
30 |
+
"from rdkit import RDConfig\n",
|
31 |
+
"from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges\n",
|
32 |
+
"from rdkit.Chem.AllChem import GetMorganGenerator\n",
|
33 |
+
"from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray\n",
|
34 |
+
"from rdkit.Avalon.pyAvalonTools import GetAvalonFP"
|
35 |
+
]
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"cell_type": "code",
|
39 |
+
"execution_count": 3,
|
40 |
+
"metadata": {},
|
41 |
+
"outputs": [
|
42 |
+
{
|
43 |
+
"name": "stderr",
|
44 |
+
"output_type": "stream",
|
45 |
+
"text": [
|
46 |
+
"2024-10-20 11:19:00.304949: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
47 |
+
"2024-10-20 11:19:00.318297: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
48 |
+
"2024-10-20 11:19:00.322661: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
49 |
+
"2024-10-20 11:19:00.333360: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
50 |
+
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
51 |
+
"2024-10-20 11:19:01.123896: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
|
52 |
+
]
|
53 |
+
}
|
54 |
+
],
|
55 |
+
"source": [
|
56 |
+
"import tensorflow as tf\n",
|
57 |
+
"from tensorflow import keras\n",
|
58 |
+
"from tensorflow.keras import layers\n",
|
59 |
+
"from tensorflow.keras.models import Sequential\n",
|
60 |
+
"from tensorflow.keras.layers import Dense, Dropout, Activation\n",
|
61 |
+
"from tensorflow.keras.regularizers import l2\n",
|
62 |
+
"from tensorflow.keras.optimizers import Adam\n",
|
63 |
+
"from tensorflow.keras import regularizers"
|
64 |
+
]
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"cell_type": "code",
|
68 |
+
"execution_count": 4,
|
69 |
+
"metadata": {},
|
70 |
+
"outputs": [],
|
71 |
+
"source": [
|
72 |
+
"from sklearn.model_selection import train_test_split\n",
|
73 |
+
"from sklearn.linear_model import Ridge\n",
|
74 |
+
"from sklearn.ensemble import RandomForestRegressor\n",
|
75 |
+
"from sklearn.neural_network import MLPRegressor\n",
|
76 |
+
"from sklearn.svm import SVR\n",
|
77 |
+
"from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error"
|
78 |
+
]
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"cell_type": "code",
|
82 |
+
"execution_count": 5,
|
83 |
+
"metadata": {},
|
84 |
+
"outputs": [],
|
85 |
+
"source": [
|
86 |
+
"import optuna\n",
|
87 |
+
"from optuna.trial import TrialState"
|
88 |
+
]
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"cell_type": "code",
|
92 |
+
"execution_count": 6,
|
93 |
+
"metadata": {},
|
94 |
+
"outputs": [],
|
95 |
+
"source": [
|
96 |
+
"from extra_code.feature_selection import selection_structure_compress\n",
|
97 |
+
"from extra_code.feature_search import search_data_descriptor_compress"
|
98 |
+
]
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"execution_count": 7,
|
103 |
+
"metadata": {},
|
104 |
+
"outputs": [
|
105 |
+
{
|
106 |
+
"name": "stderr",
|
107 |
+
"output_type": "stream",
|
108 |
+
"text": [
|
109 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
110 |
+
"I0000 00:00:1729390742.084862 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
111 |
+
"Your kernel may have been built without NUMA support.\n",
|
112 |
+
"I0000 00:00:1729390742.133906 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
113 |
+
"Your kernel may have been built without NUMA support.\n",
|
114 |
+
"I0000 00:00:1729390742.134144 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
115 |
+
"Your kernel may have been built without NUMA support.\n"
|
116 |
+
]
|
117 |
+
}
|
118 |
+
],
|
119 |
+
"source": [
|
120 |
+
"tf.keras.backend.clear_session()\n",
|
121 |
+
"gpus = tf.config.experimental.list_physical_devices('GPU')\n",
|
122 |
+
"if gpus:\n",
|
123 |
+
" try:\n",
|
124 |
+
" for gpu in gpus:\n",
|
125 |
+
" tf.config.experimental.set_memory_growth(gpu, True)\n",
|
126 |
+
" except RuntimeError as e:\n",
|
127 |
+
" print(e)"
|
128 |
+
]
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"cell_type": "code",
|
132 |
+
"execution_count": 8,
|
133 |
+
"metadata": {},
|
134 |
+
"outputs": [],
|
135 |
+
"source": [
|
136 |
+
"target_path = \"result/7_ANO_network_[struc_fea]\"\n",
|
137 |
+
"os.makedirs(target_path, exist_ok=True)"
|
138 |
+
]
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"cell_type": "code",
|
142 |
+
"execution_count": 9,
|
143 |
+
"metadata": {},
|
144 |
+
"outputs": [],
|
145 |
+
"source": [
|
146 |
+
"data_ws = pd.read_csv('./data/ws496_logS.csv', dtype={'SMILES': 'string'})\n",
|
147 |
+
"smiles_ws = data_ws['SMILES']\n",
|
148 |
+
"y_ws = data_ws.iloc[:, 2]\n",
|
149 |
+
"\n",
|
150 |
+
"data_delaney = pd.read_csv('./data/delaney-processed.csv', dtype={'smiles': 'string'})\n",
|
151 |
+
"smiles_de = data_delaney['smiles']\n",
|
152 |
+
"y_de = data_delaney.iloc[:, 1]\n",
|
153 |
+
"\n",
|
154 |
+
"data_lovric2020 = pd.read_csv('./data/Lovric2020_logS0.csv', dtype={'isomeric_smiles': 'string'})\n",
|
155 |
+
"smiles_lo = data_lovric2020['isomeric_smiles']\n",
|
156 |
+
"y_lo = data_lovric2020.iloc[:, 1]\n",
|
157 |
+
"\n",
|
158 |
+
"data_huuskonen = pd.read_csv('./data/huusk.csv', dtype={'SMILES': 'string'})\n",
|
159 |
+
"smiles_hu = data_huuskonen['SMILES']\n",
|
160 |
+
"y_hu = data_huuskonen.iloc[:, -1].astype('float')"
|
161 |
+
]
|
162 |
+
},
|
163 |
+
{
|
164 |
+
"cell_type": "code",
|
165 |
+
"execution_count": 10,
|
166 |
+
"metadata": {},
|
167 |
+
"outputs": [],
|
168 |
+
"source": [
|
169 |
+
"def mol3d(mol):\n",
|
170 |
+
" mol = Chem.AddHs(mol)\n",
|
171 |
+
" optimization_methods = [\n",
|
172 |
+
" (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),\n",
|
173 |
+
" (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),\n",
|
174 |
+
" (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})\n",
|
175 |
+
" ]\n",
|
176 |
+
"\n",
|
177 |
+
" for method, args, kwargs in optimization_methods:\n",
|
178 |
+
" try:\n",
|
179 |
+
" method(*args, **kwargs)\n",
|
180 |
+
" if mol.GetNumConformers() > 0:\n",
|
181 |
+
" return mol\n",
|
182 |
+
" except ValueError as e:\n",
|
183 |
+
" print(f\"Error: {e} - Trying next optimization method [{method}]\")\n",
|
184 |
+
"\n",
|
185 |
+
" print(f\"Invalid mol for 3d {'\\033[94m'}{Chem.MolToSmiles(mol)}{'\\033[0m'} - No conformer generated\")\n",
|
186 |
+
" return None"
|
187 |
+
]
|
188 |
+
},
|
189 |
+
{
|
190 |
+
"cell_type": "code",
|
191 |
+
"execution_count": 11,
|
192 |
+
"metadata": {},
|
193 |
+
"outputs": [],
|
194 |
+
"source": [
|
195 |
+
"def convert_smiles_to_mol(smiles, fail_folder=None, index=None, yvalue=None):\n",
|
196 |
+
" mol = Chem.MolFromSmiles(smiles)\n",
|
197 |
+
" if mol is None:\n",
|
198 |
+
" print(f\"[convert_smiles_to_mol] Cannot convert {smiles} to Mols\")\n",
|
199 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": \"Invalid SMILES\"}\n",
|
200 |
+
"\n",
|
201 |
+
" try:\n",
|
202 |
+
" Chem.Kekulize(mol, clearAromaticFlags=True)\n",
|
203 |
+
" isomeric_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)\n",
|
204 |
+
" mol = Chem.MolFromSmiles(isomeric_smiles)\n",
|
205 |
+
" except Exception as e:\n",
|
206 |
+
" print(f\"[convert_smiles_to_mol] failed {smiles} isomeric_smiles by {e}\")\n",
|
207 |
+
" if fail_folder and index is not None:\n",
|
208 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
209 |
+
" img = Draw.MolToImage(mol)\n",
|
210 |
+
" img.save(img_path)\n",
|
211 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"Isomeric SMILES error: {e}\"}\n",
|
212 |
+
"\n",
|
213 |
+
" try:\n",
|
214 |
+
" Chem.SanitizeMol(mol)\n",
|
215 |
+
" except Exception as e:\n",
|
216 |
+
" print(f\"[convert_smiles_to_mol] failed {smiles} SanitizeMol by {e}\")\n",
|
217 |
+
" if fail_folder and index is not None:\n",
|
218 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
219 |
+
" img = Draw.MolToImage(mol)\n",
|
220 |
+
" img.save(img_path)\n",
|
221 |
+
" return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"SanitizeMol error: {e}\"}\n",
|
222 |
+
"\n",
|
223 |
+
" return mol, None"
|
224 |
+
]
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"cell_type": "code",
|
228 |
+
"execution_count": 12,
|
229 |
+
"metadata": {},
|
230 |
+
"outputs": [],
|
231 |
+
"source": [
|
232 |
+
"def process_smiles(smiles, yvalue, fail_folder, index):\n",
|
233 |
+
" mol, error = convert_smiles_to_mol(smiles, fail_folder, index, yvalue)\n",
|
234 |
+
" if error:\n",
|
235 |
+
" return None, None, error\n",
|
236 |
+
"\n",
|
237 |
+
" mol_3d = mol3d(mol)\n",
|
238 |
+
" if mol_3d:\n",
|
239 |
+
" return smiles, yvalue, None\n",
|
240 |
+
" else:\n",
|
241 |
+
" img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
|
242 |
+
" img = Draw.MolToImage(mol)\n",
|
243 |
+
" img.save(img_path)\n",
|
244 |
+
" return None, None, {\"smiles\": smiles, \"y_value\": yvalue}\n",
|
245 |
+
"\n",
|
246 |
+
"def process_dataset(smiles_list, y_values, dataset_name, target_path=\"result\", max_workers=None):\n",
|
247 |
+
" start = time.time()\n",
|
248 |
+
" valid_smiles, valid_y = [], []\n",
|
249 |
+
" error_smiles_list = []\n",
|
250 |
+
" fail_folder = f\"{target_path}/failed/{dataset_name}\"\n",
|
251 |
+
" os.makedirs(fail_folder, exist_ok=True)\n",
|
252 |
+
"\n",
|
253 |
+
" with ProcessPoolExecutor(max_workers=max_workers) as executor:\n",
|
254 |
+
" futures = [\n",
|
255 |
+
" executor.submit(process_smiles, smiles, yvalue, fail_folder, i)\n",
|
256 |
+
" for i, (smiles, yvalue) in enumerate(zip(smiles_list, y_values))\n",
|
257 |
+
" ]\n",
|
258 |
+
" for future in as_completed(futures):\n",
|
259 |
+
" smiles, yvalue, error = future.result()\n",
|
260 |
+
" if error:\n",
|
261 |
+
" error_smiles_list.append(error)\n",
|
262 |
+
" elif smiles is not None and yvalue is not None:\n",
|
263 |
+
" valid_smiles.append(smiles)\n",
|
264 |
+
" valid_y.append(yvalue)\n",
|
265 |
+
"\n",
|
266 |
+
" if error_smiles_list:\n",
|
267 |
+
" error_df = pd.DataFrame(error_smiles_list)\n",
|
268 |
+
" error_df.to_csv(os.path.join(fail_folder, \"failed_smiles.csv\"), index=False)\n",
|
269 |
+
" print(f\" [{dataset_name:<10}] : {time.time()-start:.4f} sec\")\n",
|
270 |
+
" return valid_smiles, valid_y"
|
271 |
+
]
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"cell_type": "code",
|
275 |
+
"execution_count": 13,
|
276 |
+
"metadata": {},
|
277 |
+
"outputs": [
|
278 |
+
{
|
279 |
+
"name": "stdout",
|
280 |
+
"output_type": "stream",
|
281 |
+
"text": [
|
282 |
+
" [ws496 ] : 1.0593 sec\n",
|
283 |
+
" [delaney ] : 1.7029 sec\n",
|
284 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x56473ec3be90>]\n",
|
285 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x56473ec3cd20>]\n",
|
286 |
+
"Invalid mol for 3d \u001b[94m[H]O[C@@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
|
287 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x56473ec3be90>]\n",
|
288 |
+
"Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x56473ec3cd20>]\n",
|
289 |
+
"Invalid mol for 3d \u001b[94m[H]O[C@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
|
290 |
+
" [Lovric2020_logS0] : 9.2073 sec\n",
|
291 |
+
" [huusk ] : 3.8625 sec\n"
|
292 |
+
]
|
293 |
+
}
|
294 |
+
],
|
295 |
+
"source": [
|
296 |
+
"smiles_ws, y_ws = process_dataset(smiles_ws, y_ws, \"ws496\", target_path)\n",
|
297 |
+
"smiles_de, y_de = process_dataset(smiles_de, y_de, \"delaney\", target_path)\n",
|
298 |
+
"smiles_lo, y_lo = process_dataset(smiles_lo, y_lo, \"Lovric2020_logS0\", target_path)\n",
|
299 |
+
"smiles_hu, y_hu = process_dataset(smiles_hu, y_hu, \"huusk\", target_path)"
|
300 |
+
]
|
301 |
+
},
|
302 |
+
{
|
303 |
+
"cell_type": "code",
|
304 |
+
"execution_count": 14,
|
305 |
+
"metadata": {},
|
306 |
+
"outputs": [],
|
307 |
+
"source": [
|
308 |
+
"LEN_OF_FF = 2048\n",
|
309 |
+
"LEN_OF_MA = 167\n",
|
310 |
+
"LEN_OF_AV = 512"
|
311 |
+
]
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"cell_type": "code",
|
315 |
+
"execution_count": 15,
|
316 |
+
"metadata": {},
|
317 |
+
"outputs": [],
|
318 |
+
"source": [
|
319 |
+
"def get_fingerprints(mol):\n",
|
320 |
+
" if mol is None:\n",
|
321 |
+
" return None, None, None\n",
|
322 |
+
" \n",
|
323 |
+
" morgan_generator = GetMorganGenerator(radius=2, fpSize=LEN_OF_FF)\n",
|
324 |
+
" ecfp = morgan_generator.GetFingerprint(mol)\n",
|
325 |
+
" ecfp_array = np.zeros((LEN_OF_FF,),dtype=int)\n",
|
326 |
+
" DataStructs.ConvertToNumpyArray(ecfp, ecfp_array)\n",
|
327 |
+
" \n",
|
328 |
+
" maccs = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol)\n",
|
329 |
+
"\n",
|
330 |
+
" avalon_fp = GetAvalonFP(mol)\n",
|
331 |
+
" avalon_array = np.zeros((LEN_OF_AV,),dtype=int)\n",
|
332 |
+
" DataStructs.ConvertToNumpyArray(avalon_fp, avalon_array)\n",
|
333 |
+
" \n",
|
334 |
+
" return ecfp_array, maccs, avalon_array\n",
|
335 |
+
"\n",
|
336 |
+
"def fp_converter(data, use_parallel=True):\n",
|
337 |
+
" mols = [Chem.MolFromSmiles(smi) for smi in data]\n",
|
338 |
+
" \n",
|
339 |
+
" if use_parallel:\n",
|
340 |
+
" try: \n",
|
341 |
+
" with ProcessPoolExecutor() as executor:\n",
|
342 |
+
" results = list(executor.map(get_fingerprints, mols))\n",
|
343 |
+
" except Exception as e:\n",
|
344 |
+
" print(f\"Parallel processing failed due to: {e}. Falling back to sequential processing.\")\n",
|
345 |
+
" use_parallel = False\n",
|
346 |
+
" \n",
|
347 |
+
" if not use_parallel:\n",
|
348 |
+
" results = [get_fingerprints(mol) for mol in mols]\n",
|
349 |
+
" \n",
|
350 |
+
" ECFP, MACCS, AvalonFP = zip(*results)\n",
|
351 |
+
" \n",
|
352 |
+
" ECFP_container = np.vstack([arr for arr in ECFP if arr is not None])\n",
|
353 |
+
" MACCS_container = np.zeros((len(MACCS), LEN_OF_MA), dtype=int)\n",
|
354 |
+
" AvalonFP_container = np.vstack([arr for arr in AvalonFP if arr is not None])\n",
|
355 |
+
"\n",
|
356 |
+
" for i, fp in enumerate(MACCS):\n",
|
357 |
+
" if fp is not None:\n",
|
358 |
+
" DataStructs.ConvertToNumpyArray(fp, MACCS_container[i])\n",
|
359 |
+
" \n",
|
360 |
+
" return mols, ECFP_container, MACCS_container, AvalonFP_container"
|
361 |
+
]
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"cell_type": "code",
|
365 |
+
"execution_count": 16,
|
366 |
+
"metadata": {},
|
367 |
+
"outputs": [
|
368 |
+
{
|
369 |
+
"data": {
|
370 |
+
"text/plain": [
|
371 |
+
"0"
|
372 |
+
]
|
373 |
+
},
|
374 |
+
"execution_count": 16,
|
375 |
+
"metadata": {},
|
376 |
+
"output_type": "execute_result"
|
377 |
+
}
|
378 |
+
],
|
379 |
+
"source": [
|
380 |
+
"mol_ws, x_ws, MACCS_ws, AvalonFP_ws = fp_converter(smiles_ws,target_path)\n",
|
381 |
+
"mol_de, x_de, MACCS_de, AvalonFP_de = fp_converter(smiles_de,target_path)\n",
|
382 |
+
"mol_lo, x_lo, MACCS_lo, AvalonFP_lo = fp_converter(smiles_lo,target_path)\n",
|
383 |
+
"mol_hu, x_hu, MACCS_hu, AvalonFP_hu = fp_converter(smiles_hu,target_path)\n",
|
384 |
+
"del smiles_ws\n",
|
385 |
+
"del smiles_de\n",
|
386 |
+
"del smiles_lo\n",
|
387 |
+
"del smiles_hu\n",
|
388 |
+
"gc.collect()"
|
389 |
+
]
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"cell_type": "code",
|
393 |
+
"execution_count": 17,
|
394 |
+
"metadata": {},
|
395 |
+
"outputs": [],
|
396 |
+
"source": [
|
397 |
+
"def concatenate_to_numpy(*dataframes):\n",
|
398 |
+
" numpy_arrays = [df.to_numpy() if isinstance(df, pd.DataFrame) else df for df in dataframes]\n",
|
399 |
+
" if not all(isinstance(arr, np.ndarray) for arr in numpy_arrays):\n",
|
400 |
+
" raise ValueError(\"All inputs must be either pandas DataFrame or numpy array\")\n",
|
401 |
+
" return np.concatenate(numpy_arrays, axis=1)"
|
402 |
+
]
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"cell_type": "code",
|
406 |
+
"execution_count": 18,
|
407 |
+
"metadata": {},
|
408 |
+
"outputs": [
|
409 |
+
{
|
410 |
+
"data": {
|
411 |
+
"text/plain": [
|
412 |
+
"0"
|
413 |
+
]
|
414 |
+
},
|
415 |
+
"execution_count": 18,
|
416 |
+
"metadata": {},
|
417 |
+
"output_type": "execute_result"
|
418 |
+
}
|
419 |
+
],
|
420 |
+
"source": [
|
421 |
+
"group_nws = concatenate_to_numpy(x_ws, MACCS_ws, AvalonFP_ws)\n",
|
422 |
+
"group_nde = concatenate_to_numpy(x_de, MACCS_de, AvalonFP_de)\n",
|
423 |
+
"group_nlo = concatenate_to_numpy(x_lo, MACCS_lo, AvalonFP_lo)\n",
|
424 |
+
"group_nhu = concatenate_to_numpy(x_hu, MACCS_hu, AvalonFP_hu)\n",
|
425 |
+
"del x_ws, MACCS_ws, AvalonFP_ws\n",
|
426 |
+
"del x_de, MACCS_de, AvalonFP_de\n",
|
427 |
+
"del x_lo, MACCS_lo, AvalonFP_lo\n",
|
428 |
+
"del x_hu, MACCS_hu, AvalonFP_hu\n",
|
429 |
+
"gc.collect()"
|
430 |
+
]
|
431 |
+
},
|
432 |
+
{
|
433 |
+
"cell_type": "code",
|
434 |
+
"execution_count": 19,
|
435 |
+
"metadata": {},
|
436 |
+
"outputs": [],
|
437 |
+
"source": [
|
438 |
+
"try:\n",
|
439 |
+
" storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
|
440 |
+
" # storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
|
441 |
+
" # storage = optuna.storages.RDBStorage(url=storage_urls)\n",
|
442 |
+
"except Exception as e:\n",
|
443 |
+
" print(f\"Error occured: {e}\")"
|
444 |
+
]
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"cell_type": "code",
|
448 |
+
"execution_count": 20,
|
449 |
+
"metadata": {},
|
450 |
+
"outputs": [],
|
451 |
+
"source": [
|
452 |
+
"def save_model(x_data, model_name):\n",
|
453 |
+
" model_path = \"save_model/full_model.keras\"\n",
|
454 |
+
" input_shape = x_data.shape[1]\n",
|
455 |
+
" lr = None\n",
|
456 |
+
" if not os.path.exists(model_path):\n",
|
457 |
+
" try:\n",
|
458 |
+
" if model_name in [\"ws\", \"de\", \"lo\", \"hu\"]:\n",
|
459 |
+
" model, lr = selection_structure_compress(f'ANO_{model_name}_struct', storage, input_shape) \n",
|
460 |
+
" os.makedirs(\"save_model\", exist_ok=True)\n",
|
461 |
+
" model.save(model_path)\n",
|
462 |
+
" # print(model.summary())\n",
|
463 |
+
" print(f\"Model successfully saved to {model_path}\")\n",
|
464 |
+
" return lr\n",
|
465 |
+
" except Exception as e:\n",
|
466 |
+
" print(f\"Error saving model: {e}\")\n",
|
467 |
+
" return lr\n",
|
468 |
+
" else:\n",
|
469 |
+
" print(f\"Model already exists at {model_path}\")\n",
|
470 |
+
" os.remove(model_path)\n",
|
471 |
+
" save_model(x_data, model_name)"
|
472 |
+
]
|
473 |
+
},
|
474 |
+
{
|
475 |
+
"cell_type": "code",
|
476 |
+
"execution_count": 21,
|
477 |
+
"metadata": {},
|
478 |
+
"outputs": [],
|
479 |
+
"source": [
|
480 |
+
"BATCHSIZE = 32\n",
|
481 |
+
"EPOCHS = 1000\n",
|
482 |
+
"# lr = 0.0001\n",
|
483 |
+
"# decay = 1e-4"
|
484 |
+
]
|
485 |
+
},
|
486 |
+
{
|
487 |
+
"cell_type": "code",
|
488 |
+
"execution_count": 22,
|
489 |
+
"metadata": {},
|
490 |
+
"outputs": [],
|
491 |
+
"source": [
|
492 |
+
"import logging\n",
|
493 |
+
"import warnings\n",
|
494 |
+
"\n",
|
495 |
+
"os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
|
496 |
+
"os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
|
497 |
+
"os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'\n",
|
498 |
+
"os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'\n",
|
499 |
+
"os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'\n",
|
500 |
+
"os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'\n",
|
501 |
+
"os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
|
502 |
+
"os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'\n",
|
503 |
+
"os.environ['TF_NUMA_NODES'] = '1'\n",
|
504 |
+
"\n",
|
505 |
+
"warnings.filterwarnings('ignore')\n",
|
506 |
+
"\n",
|
507 |
+
"warnings.simplefilter(action='ignore', category=FutureWarning)\n",
|
508 |
+
"\n",
|
509 |
+
"logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
|
510 |
+
"\n",
|
511 |
+
"tf.get_logger().setLevel('ERROR')\n",
|
512 |
+
"tf.autograph.set_verbosity(0)\n",
|
513 |
+
"\n",
|
514 |
+
"def suppress_warnings(condition=True):\n",
|
515 |
+
" if condition:\n",
|
516 |
+
" logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
|
517 |
+
" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
|
518 |
+
" else:\n",
|
519 |
+
" logging.getLogger('tensorflow').setLevel(logging.WARNING)\n",
|
520 |
+
" os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'\n",
|
521 |
+
"\n",
|
522 |
+
"suppress_warnings(condition=True)"
|
523 |
+
]
|
524 |
+
},
|
525 |
+
{
|
526 |
+
"cell_type": "code",
|
527 |
+
"execution_count": 23,
|
528 |
+
"metadata": {},
|
529 |
+
"outputs": [],
|
530 |
+
"source": [
|
531 |
+
"def objective_ws_network(trial):\n",
|
532 |
+
" try:\n",
|
533 |
+
" new_x = search_data_descriptor_compress(trial, group_nws, mol_ws, 'ws496')\n",
|
534 |
+
" new_x = np.nan_to_num(new_x, nan=0.0, posinf=0.0, neginf=0.0).astype('float')\n",
|
535 |
+
" y_true = np.asarray(y_ws).astype('float')\n",
|
536 |
+
" np.save('new_fps.npy', new_x)\n",
|
537 |
+
" np.save('y_true.npy', y_true)\n",
|
538 |
+
" \n",
|
539 |
+
" lr = 0.0001\n",
|
540 |
+
" tmp_lr = save_model(new_x, 'ws')\n",
|
541 |
+
" if tmp_lr != None:\n",
|
542 |
+
" lr = tmp_lr\n",
|
543 |
+
"\n",
|
544 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
|
545 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
546 |
+
" str(lr),\n",
|
547 |
+
" 'new_fps.npy', 'y_true.npy'],\n",
|
548 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
549 |
+
"\n",
|
550 |
+
" if result.stderr:\n",
|
551 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
552 |
+
" if filtered_stderr:\n",
|
553 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
554 |
+
"\n",
|
555 |
+
" for line in result.stdout.splitlines():\n",
|
556 |
+
" if \"R2\" in line:\n",
|
557 |
+
" if \"(prune)\" in line:\n",
|
558 |
+
" print(f\"Pruning trial due to poor R2: {line}\")\n",
|
559 |
+
" r2_result = 0.0\n",
|
560 |
+
" trial.report(r2_result, step=0)\n",
|
561 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
562 |
+
" else:\n",
|
563 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
564 |
+
" print(f\"R2 score: {r2_result}\")\n",
|
565 |
+
" trial.report(r2_result, step=0)\n",
|
566 |
+
"\n",
|
567 |
+
" if trial.should_prune():\n",
|
568 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
569 |
+
"\n",
|
570 |
+
" except Exception as e:\n",
|
571 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
572 |
+
" r2_result = 0.0\n",
|
573 |
+
"\n",
|
574 |
+
" gc.collect()\n",
|
575 |
+
"\n",
|
576 |
+
" return r2_result"
|
577 |
+
]
|
578 |
+
},
|
579 |
+
{
|
580 |
+
"cell_type": "code",
|
581 |
+
"execution_count": 24,
|
582 |
+
"metadata": {},
|
583 |
+
"outputs": [],
|
584 |
+
"source": [
|
585 |
+
"def objective_de_network(trial):\n",
|
586 |
+
" try:\n",
|
587 |
+
" new_x = search_data_descriptor_compress(trial, group_nde, mol_de, 'delaney')\n",
|
588 |
+
" new_x = np.nan_to_num(new_x, nan=0.0, posinf=0.0, neginf=0.0).astype('float')\n",
|
589 |
+
" y_true = np.asarray(y_de).astype('float')\n",
|
590 |
+
" np.save('new_fps.npy', new_x)\n",
|
591 |
+
" np.save('y_true.npy', y_true)\n",
|
592 |
+
" \n",
|
593 |
+
" lr = 0.0001\n",
|
594 |
+
" tmp_lr = save_model(new_x, 'de')\n",
|
595 |
+
" if tmp_lr != None:\n",
|
596 |
+
" lr = tmp_lr\n",
|
597 |
+
"\n",
|
598 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
|
599 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
600 |
+
" str(lr),\n",
|
601 |
+
" 'new_fps.npy', 'y_true.npy'],\n",
|
602 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
603 |
+
"\n",
|
604 |
+
" if result.stderr:\n",
|
605 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
606 |
+
" if filtered_stderr:\n",
|
607 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
608 |
+
"\n",
|
609 |
+
" for line in result.stdout.splitlines():\n",
|
610 |
+
" if \"R2\" in line:\n",
|
611 |
+
" if \"(prune)\" in line:\n",
|
612 |
+
" print(f\"Pruning trial due to poor R2: {line}\")\n",
|
613 |
+
" r2_result = 0.0\n",
|
614 |
+
" trial.report(r2_result, step=0)\n",
|
615 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
616 |
+
" else:\n",
|
617 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
618 |
+
" print(f\"R2 score: {r2_result}\")\n",
|
619 |
+
" trial.report(r2_result, step=0)\n",
|
620 |
+
"\n",
|
621 |
+
" if trial.should_prune():\n",
|
622 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
623 |
+
"\n",
|
624 |
+
" except Exception as e:\n",
|
625 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
626 |
+
" r2_result = 0.0\n",
|
627 |
+
"\n",
|
628 |
+
" gc.collect()\n",
|
629 |
+
"\n",
|
630 |
+
" return r2_result"
|
631 |
+
]
|
632 |
+
},
|
633 |
+
{
|
634 |
+
"cell_type": "code",
|
635 |
+
"execution_count": 25,
|
636 |
+
"metadata": {},
|
637 |
+
"outputs": [],
|
638 |
+
"source": [
|
639 |
+
"def objective_lo_network(trial):\n",
|
640 |
+
" try:\n",
|
641 |
+
" new_x = search_data_descriptor_compress(trial, group_nlo, mol_lo, 'lovrics')\n",
|
642 |
+
" new_x = np.nan_to_num(new_x, nan=0.0, posinf=0.0, neginf=0.0).astype('float')\n",
|
643 |
+
" y_true = np.asarray(y_lo).astype('float')\n",
|
644 |
+
" np.save('new_fps.npy', new_x)\n",
|
645 |
+
" np.save('y_true.npy', y_true)\n",
|
646 |
+
" \n",
|
647 |
+
" lr = 0.0001\n",
|
648 |
+
" tmp_lr = save_model(new_x, 'lo')\n",
|
649 |
+
" if tmp_lr != None:\n",
|
650 |
+
" lr = tmp_lr\n",
|
651 |
+
"\n",
|
652 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
|
653 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
654 |
+
" str(lr),\n",
|
655 |
+
" 'new_fps.npy', 'y_true.npy'],\n",
|
656 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
657 |
+
"\n",
|
658 |
+
" if result.stderr:\n",
|
659 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
660 |
+
" if filtered_stderr:\n",
|
661 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
662 |
+
"\n",
|
663 |
+
" for line in result.stdout.splitlines():\n",
|
664 |
+
" if \"R2\" in line:\n",
|
665 |
+
" if \"(prune)\" in line:\n",
|
666 |
+
" print(f\"Pruning trial due to poor R2: {line}\")\n",
|
667 |
+
" r2_result = 0.0\n",
|
668 |
+
" trial.report(r2_result, step=0)\n",
|
669 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
670 |
+
" else:\n",
|
671 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
672 |
+
" print(f\"R2 score: {r2_result}\")\n",
|
673 |
+
" trial.report(r2_result, step=0)\n",
|
674 |
+
"\n",
|
675 |
+
" if trial.should_prune():\n",
|
676 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
677 |
+
"\n",
|
678 |
+
" except Exception as e:\n",
|
679 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
680 |
+
" r2_result = 0.0\n",
|
681 |
+
"\n",
|
682 |
+
" gc.collect()\n",
|
683 |
+
"\n",
|
684 |
+
" return r2_result"
|
685 |
+
]
|
686 |
+
},
|
687 |
+
{
|
688 |
+
"cell_type": "code",
|
689 |
+
"execution_count": 26,
|
690 |
+
"metadata": {},
|
691 |
+
"outputs": [],
|
692 |
+
"source": [
|
693 |
+
"def objective_hu_network(trial):\n",
|
694 |
+
" try:\n",
|
695 |
+
" new_x = search_data_descriptor_compress(trial, group_nhu, mol_hu, 'hussk')\n",
|
696 |
+
" new_x = np.nan_to_num(new_x, nan=0.0, posinf=0.0, neginf=0.0).astype('float')\n",
|
697 |
+
" y_true = np.asarray(y_hu).astype('float')\n",
|
698 |
+
" np.save('new_fps.npy', new_x)\n",
|
699 |
+
" np.save('y_true.npy', y_true)\n",
|
700 |
+
" \n",
|
701 |
+
" lr = 0.0001\n",
|
702 |
+
" tmp_lr = save_model(new_x, 'hu')\n",
|
703 |
+
" if tmp_lr != None:\n",
|
704 |
+
" lr = tmp_lr\n",
|
705 |
+
"\n",
|
706 |
+
" result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
|
707 |
+
" str(BATCHSIZE), str(EPOCHS), \n",
|
708 |
+
" str(lr),\n",
|
709 |
+
" 'new_fps.npy', 'y_true.npy'],\n",
|
710 |
+
" stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
711 |
+
"\n",
|
712 |
+
" if result.stderr:\n",
|
713 |
+
" filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
|
714 |
+
" if filtered_stderr:\n",
|
715 |
+
" print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
|
716 |
+
"\n",
|
717 |
+
" for line in result.stdout.splitlines():\n",
|
718 |
+
" if \"R2\" in line:\n",
|
719 |
+
" if \"(prune)\" in line:\n",
|
720 |
+
" print(f\"Pruning trial due to poor R2: {line}\")\n",
|
721 |
+
" r2_result = 0.0\n",
|
722 |
+
" trial.report(r2_result, step=0)\n",
|
723 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
724 |
+
" else:\n",
|
725 |
+
" r2_result = float(line.split(\":\")[1].strip())\n",
|
726 |
+
" print(f\"R2 score: {r2_result}\")\n",
|
727 |
+
" trial.report(r2_result, step=0)\n",
|
728 |
+
"\n",
|
729 |
+
" if trial.should_prune():\n",
|
730 |
+
" raise optuna.exceptions.TrialPruned()\n",
|
731 |
+
"\n",
|
732 |
+
" except Exception as e:\n",
|
733 |
+
" print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
|
734 |
+
" r2_result = 0.0\n",
|
735 |
+
"\n",
|
736 |
+
" gc.collect()\n",
|
737 |
+
"\n",
|
738 |
+
" return r2_result"
|
739 |
+
]
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"cell_type": "code",
|
743 |
+
"execution_count": 27,
|
744 |
+
"metadata": {},
|
745 |
+
"outputs": [],
|
746 |
+
"source": [
|
747 |
+
"storage = optuna.storages.RDBStorage(url=\"sqlite:///example_ano.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
|
748 |
+
"# storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
|
749 |
+
"# storage = optuna.storages.RDBStorage(url=storage_urls)"
|
750 |
+
]
|
751 |
+
},
|
752 |
+
{
|
753 |
+
"cell_type": "code",
|
754 |
+
"execution_count": 28,
|
755 |
+
"metadata": {},
|
756 |
+
"outputs": [],
|
757 |
+
"source": [
|
758 |
+
"try:\n",
|
759 |
+
" optuna.delete_study(study_name=\"ANO_ws_network_s2f\", storage=storage)\n",
|
760 |
+
" optuna.delete_study(study_name=\"ANO_de_network_s2f\", storage=storage)\n",
|
761 |
+
" optuna.delete_study(study_name=\"ANO_lo_network_s2f\", storage=storage)\n",
|
762 |
+
" optuna.delete_study(study_name=\"ANO_hu_network_s2f\", storage=storage)\n",
|
763 |
+
"except:\n",
|
764 |
+
" pass "
|
765 |
+
]
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"cell_type": "code",
|
769 |
+
"execution_count": 29,
|
770 |
+
"metadata": {},
|
771 |
+
"outputs": [],
|
772 |
+
"source": [
|
773 |
+
"TRIALS=3"
|
774 |
+
]
|
775 |
+
},
|
776 |
+
{
|
777 |
+
"cell_type": "code",
|
778 |
+
"execution_count": 30,
|
779 |
+
"metadata": {},
|
780 |
+
"outputs": [
|
781 |
+
{
|
782 |
+
"name": "stderr",
|
783 |
+
"output_type": "stream",
|
784 |
+
"text": [
|
785 |
+
"[I 2024-10-20 11:19:21,464] A new study created in RDB with name: ANO_ws_network_s2f\n"
|
786 |
+
]
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"name": "stdout",
|
790 |
+
"output_type": "stream",
|
791 |
+
"text": [
|
792 |
+
"Model already exists at save_model/full_model.keras\n",
|
793 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n"
|
794 |
+
]
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"name": "stderr",
|
798 |
+
"output_type": "stream",
|
799 |
+
"text": [
|
800 |
+
"I0000 00:00:1729390847.014614 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
801 |
+
"Your kernel may have been built without NUMA support.\n",
|
802 |
+
"I0000 00:00:1729390847.014721 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
803 |
+
"Your kernel may have been built without NUMA support.\n",
|
804 |
+
"I0000 00:00:1729390847.014780 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
805 |
+
"Your kernel may have been built without NUMA support.\n",
|
806 |
+
"I0000 00:00:1729390847.186332 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
807 |
+
"Your kernel may have been built without NUMA support.\n",
|
808 |
+
"I0000 00:00:1729390847.186551 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
809 |
+
"Your kernel may have been built without NUMA support.\n",
|
810 |
+
"2024-10-20 11:20:47.186582: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0. Your kernel may not have been built with NUMA support.\n",
|
811 |
+
"2024-10-20 11:20:47.186639: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:198] Using CUDA malloc Async allocator for GPU: 0\n",
|
812 |
+
"I0000 00:00:1729390847.187010 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
|
813 |
+
"Your kernel may have been built without NUMA support.\n",
|
814 |
+
"2024-10-20 11:20:47.187059: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3586 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\n"
|
815 |
+
]
|
816 |
+
},
|
817 |
+
{
|
818 |
+
"name": "stdout",
|
819 |
+
"output_type": "stream",
|
820 |
+
"text": [
|
821 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
822 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
823 |
+
" Best trial value: 0.77755\n",
|
824 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
825 |
+
]
|
826 |
+
},
|
827 |
+
{
|
828 |
+
"name": "stderr",
|
829 |
+
"output_type": "stream",
|
830 |
+
"text": [
|
831 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
832 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
833 |
+
"I0000 00:00:1729390851.965239 1600607 service.cc:146] XLA service 0x556b2b6d7c90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
834 |
+
"I0000 00:00:1729390851.965303 1600607 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
835 |
+
"I0000 00:00:1729390852.219993 1600607 service.cc:146] XLA service 0x556b2b69f410 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
836 |
+
"I0000 00:00:1729390852.220034 1600607 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
837 |
+
"I0000 00:00:1729390860.162273 1600721 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
838 |
+
"\n"
|
839 |
+
]
|
840 |
+
},
|
841 |
+
{
|
842 |
+
"name": "stdout",
|
843 |
+
"output_type": "stream",
|
844 |
+
"text": [
|
845 |
+
"R2 score: 0.858591\n"
|
846 |
+
]
|
847 |
+
},
|
848 |
+
{
|
849 |
+
"name": "stderr",
|
850 |
+
"output_type": "stream",
|
851 |
+
"text": [
|
852 |
+
"[I 2024-10-20 11:24:15,303] Trial 0 finished with value: 0.858591 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 1, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 0 with value: 0.858591.\n"
|
853 |
+
]
|
854 |
+
},
|
855 |
+
{
|
856 |
+
"name": "stdout",
|
857 |
+
"output_type": "stream",
|
858 |
+
"text": [
|
859 |
+
"Model already exists at save_model/full_model.keras\n",
|
860 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
861 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
862 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
863 |
+
" Best trial value: 0.77755\n",
|
864 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
865 |
+
]
|
866 |
+
},
|
867 |
+
{
|
868 |
+
"name": "stderr",
|
869 |
+
"output_type": "stream",
|
870 |
+
"text": [
|
871 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
872 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
873 |
+
"I0000 00:00:1729391077.508810 1619585 service.cc:146] XLA service 0x564642a96600 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
874 |
+
"I0000 00:00:1729391077.508907 1619585 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
875 |
+
"I0000 00:00:1729391077.722268 1619585 service.cc:146] XLA service 0x5646429f2270 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
876 |
+
"I0000 00:00:1729391077.722308 1619585 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
877 |
+
"I0000 00:00:1729391082.624481 1619699 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
878 |
+
"\n"
|
879 |
+
]
|
880 |
+
},
|
881 |
+
{
|
882 |
+
"name": "stdout",
|
883 |
+
"output_type": "stream",
|
884 |
+
"text": [
|
885 |
+
"R2 score: 0.869889\n"
|
886 |
+
]
|
887 |
+
},
|
888 |
+
{
|
889 |
+
"name": "stderr",
|
890 |
+
"output_type": "stream",
|
891 |
+
"text": [
|
892 |
+
"[I 2024-10-20 11:27:55,750] Trial 1 finished with value: 0.869889 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElec': 0, 'NHOHCount': 1, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 0}. Best is trial 1 with value: 0.869889.\n"
|
893 |
+
]
|
894 |
+
},
|
895 |
+
{
|
896 |
+
"name": "stdout",
|
897 |
+
"output_type": "stream",
|
898 |
+
"text": [
|
899 |
+
"Model already exists at save_model/full_model.keras\n",
|
900 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
901 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
902 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
903 |
+
" Best trial value: 0.77755\n",
|
904 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
905 |
+
]
|
906 |
+
},
|
907 |
+
{
|
908 |
+
"name": "stderr",
|
909 |
+
"output_type": "stream",
|
910 |
+
"text": [
|
911 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
912 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
913 |
+
"I0000 00:00:1729391299.807522 1638541 service.cc:146] XLA service 0x564a6f2f36e0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
914 |
+
"I0000 00:00:1729391299.807567 1638541 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
915 |
+
"I0000 00:00:1729391300.031993 1638541 service.cc:146] XLA service 0x564a6f24eed0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
916 |
+
"I0000 00:00:1729391300.032062 1638541 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
917 |
+
"I0000 00:00:1729391305.114762 1638647 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
918 |
+
"\n"
|
919 |
+
]
|
920 |
+
},
|
921 |
+
{
|
922 |
+
"name": "stdout",
|
923 |
+
"output_type": "stream",
|
924 |
+
"text": [
|
925 |
+
"R2 score: 0.878699\n"
|
926 |
+
]
|
927 |
+
},
|
928 |
+
{
|
929 |
+
"name": "stderr",
|
930 |
+
"output_type": "stream",
|
931 |
+
"text": [
|
932 |
+
"[I 2024-10-20 11:31:39,117] Trial 2 finished with value: 0.878699 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 0, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElec': 1, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'NumValenceElectrons': 1, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 0}. Best is trial 2 with value: 0.878699.\n"
|
933 |
+
]
|
934 |
+
}
|
935 |
+
],
|
936 |
+
"source": [
|
937 |
+
"study_ws_network = optuna.create_study(study_name='ANO_ws_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True)\n",
|
938 |
+
"study_ws_network.optimize(objective_ws_network, n_trials=TRIALS)\n",
|
939 |
+
"pruned_trials_ws_fea = study_ws_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
940 |
+
"complete_trials_ws_fea = study_ws_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n"
|
941 |
+
]
|
942 |
+
},
|
943 |
+
{
|
944 |
+
"cell_type": "code",
|
945 |
+
"execution_count": 31,
|
946 |
+
"metadata": {},
|
947 |
+
"outputs": [
|
948 |
+
{
|
949 |
+
"name": "stderr",
|
950 |
+
"output_type": "stream",
|
951 |
+
"text": [
|
952 |
+
"[I 2024-10-20 11:31:39,146] Using an existing study with name 'ANO_ws_network_s2f' instead of creating a new one.\n"
|
953 |
+
]
|
954 |
+
},
|
955 |
+
{
|
956 |
+
"name": "stdout",
|
957 |
+
"output_type": "stream",
|
958 |
+
"text": [
|
959 |
+
"Model already exists at save_model/full_model.keras\n",
|
960 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
961 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
962 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
963 |
+
" Best trial value: 0.77755\n",
|
964 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
965 |
+
]
|
966 |
+
},
|
967 |
+
{
|
968 |
+
"name": "stderr",
|
969 |
+
"output_type": "stream",
|
970 |
+
"text": [
|
971 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
972 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
973 |
+
"I0000 00:00:1729391518.947845 1657545 service.cc:146] XLA service 0x55d650748ae0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
974 |
+
"I0000 00:00:1729391518.947890 1657545 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
975 |
+
"I0000 00:00:1729391519.134710 1657545 service.cc:146] XLA service 0x55d6506a36b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
976 |
+
"I0000 00:00:1729391519.134754 1657545 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
977 |
+
"I0000 00:00:1729391524.385701 1657654 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
978 |
+
"\n"
|
979 |
+
]
|
980 |
+
},
|
981 |
+
{
|
982 |
+
"name": "stdout",
|
983 |
+
"output_type": "stream",
|
984 |
+
"text": [
|
985 |
+
"R2 score: 0.865804\n"
|
986 |
+
]
|
987 |
+
},
|
988 |
+
{
|
989 |
+
"name": "stderr",
|
990 |
+
"output_type": "stream",
|
991 |
+
"text": [
|
992 |
+
"[I 2024-10-20 11:36:15,828] Trial 3 finished with value: 0.865804 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 1, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 0, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 0, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 0}. Best is trial 2 with value: 0.878699.\n"
|
993 |
+
]
|
994 |
+
},
|
995 |
+
{
|
996 |
+
"name": "stdout",
|
997 |
+
"output_type": "stream",
|
998 |
+
"text": [
|
999 |
+
"Model already exists at save_model/full_model.keras\n",
|
1000 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1001 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
1002 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1003 |
+
" Best trial value: 0.77755\n",
|
1004 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1005 |
+
]
|
1006 |
+
},
|
1007 |
+
{
|
1008 |
+
"name": "stderr",
|
1009 |
+
"output_type": "stream",
|
1010 |
+
"text": [
|
1011 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1012 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1013 |
+
"I0000 00:00:1729391870.164562 1676975 service.cc:146] XLA service 0x561fb8ffcbd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1014 |
+
"I0000 00:00:1729391870.164627 1676975 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1015 |
+
"I0000 00:00:1729391870.350359 1676975 service.cc:146] XLA service 0x561fb8f16400 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1016 |
+
"I0000 00:00:1729391870.350392 1676975 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1017 |
+
"I0000 00:00:1729391875.581017 1677087 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1018 |
+
"\n"
|
1019 |
+
]
|
1020 |
+
},
|
1021 |
+
{
|
1022 |
+
"name": "stdout",
|
1023 |
+
"output_type": "stream",
|
1024 |
+
"text": [
|
1025 |
+
"R2 score: 0.887433\n"
|
1026 |
+
]
|
1027 |
+
},
|
1028 |
+
{
|
1029 |
+
"name": "stderr",
|
1030 |
+
"output_type": "stream",
|
1031 |
+
"text": [
|
1032 |
+
"[I 2024-10-20 11:41:05,066] Trial 4 finished with value: 0.887433 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 0, 'BCUT2D': 0, 'AUTOCORR3D': 1, 'RDF': 1, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 4 with value: 0.887433.\n"
|
1033 |
+
]
|
1034 |
+
},
|
1035 |
+
{
|
1036 |
+
"name": "stdout",
|
1037 |
+
"output_type": "stream",
|
1038 |
+
"text": [
|
1039 |
+
"Model already exists at save_model/full_model.keras\n",
|
1040 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1041 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
1042 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1043 |
+
" Best trial value: 0.77755\n",
|
1044 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1045 |
+
]
|
1046 |
+
},
|
1047 |
+
{
|
1048 |
+
"name": "stderr",
|
1049 |
+
"output_type": "stream",
|
1050 |
+
"text": [
|
1051 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1052 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1053 |
+
"I0000 00:00:1729392158.794709 1696169 service.cc:146] XLA service 0x55d9be410480 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1054 |
+
"I0000 00:00:1729392158.794767 1696169 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1055 |
+
"I0000 00:00:1729392159.010612 1696169 service.cc:146] XLA service 0x55d9be36b340 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1056 |
+
"I0000 00:00:1729392159.010676 1696169 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1057 |
+
"I0000 00:00:1729392164.300024 1696277 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1058 |
+
"\n"
|
1059 |
+
]
|
1060 |
+
},
|
1061 |
+
{
|
1062 |
+
"name": "stdout",
|
1063 |
+
"output_type": "stream",
|
1064 |
+
"text": [
|
1065 |
+
"R2 score: 0.891404\n"
|
1066 |
+
]
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"name": "stderr",
|
1070 |
+
"output_type": "stream",
|
1071 |
+
"text": [
|
1072 |
+
"[I 2024-10-20 11:45:51,346] Trial 5 finished with value: 0.891404 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 0, 'NHOHCount': 1, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 1, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 1, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 1, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 0, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 5 with value: 0.891404.\n"
|
1073 |
+
]
|
1074 |
+
}
|
1075 |
+
],
|
1076 |
+
"source": [
|
1077 |
+
"# study_ws_network = optuna.create_study(study_name='ANO_ws_network_fixed_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
|
1078 |
+
"study_ws_network = optuna.create_study(study_name='ANO_ws_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
1079 |
+
"study_ws_network.optimize(objective_ws_network, n_trials=TRIALS)\n",
|
1080 |
+
"pruned_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1081 |
+
"complete_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n",
|
1082 |
+
"# 108m 38.1s\n",
|
1083 |
+
"#160m 18.2 - 100 trial 1000 epochs"
|
1084 |
+
]
|
1085 |
+
},
|
1086 |
+
{
|
1087 |
+
"cell_type": "code",
|
1088 |
+
"execution_count": 32,
|
1089 |
+
"metadata": {},
|
1090 |
+
"outputs": [
|
1091 |
+
{
|
1092 |
+
"name": "stderr",
|
1093 |
+
"output_type": "stream",
|
1094 |
+
"text": [
|
1095 |
+
"[I 2024-10-20 11:45:51,374] A new study created in RDB with name: ANO_de_network_s2f\n"
|
1096 |
+
]
|
1097 |
+
},
|
1098 |
+
{
|
1099 |
+
"name": "stdout",
|
1100 |
+
"output_type": "stream",
|
1101 |
+
"text": [
|
1102 |
+
"Model already exists at save_model/full_model.keras\n",
|
1103 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1104 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
1105 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1106 |
+
" Best trial value: 0.77755\n",
|
1107 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1108 |
+
]
|
1109 |
+
},
|
1110 |
+
{
|
1111 |
+
"name": "stderr",
|
1112 |
+
"output_type": "stream",
|
1113 |
+
"text": [
|
1114 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1115 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1116 |
+
"I0000 00:00:1729392374.693258 1715146 service.cc:146] XLA service 0x56316822dc00 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1117 |
+
"I0000 00:00:1729392374.693318 1715146 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1118 |
+
"I0000 00:00:1729392374.893630 1715146 service.cc:146] XLA service 0x5631680ee0e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1119 |
+
"I0000 00:00:1729392374.893670 1715146 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1120 |
+
"I0000 00:00:1729392380.028886 1715262 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1121 |
+
"\n"
|
1122 |
+
]
|
1123 |
+
},
|
1124 |
+
{
|
1125 |
+
"name": "stdout",
|
1126 |
+
"output_type": "stream",
|
1127 |
+
"text": [
|
1128 |
+
"R2 score: 0.897157\n"
|
1129 |
+
]
|
1130 |
+
},
|
1131 |
+
{
|
1132 |
+
"name": "stderr",
|
1133 |
+
"output_type": "stream",
|
1134 |
+
"text": [
|
1135 |
+
"[I 2024-10-20 11:49:33,553] Trial 0 finished with value: 0.897157 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 1, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 0}. Best is trial 0 with value: 0.897157.\n"
|
1136 |
+
]
|
1137 |
+
},
|
1138 |
+
{
|
1139 |
+
"name": "stdout",
|
1140 |
+
"output_type": "stream",
|
1141 |
+
"text": [
|
1142 |
+
"Model already exists at save_model/full_model.keras\n",
|
1143 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1144 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
1145 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1146 |
+
" Best trial value: 0.77755\n",
|
1147 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1148 |
+
]
|
1149 |
+
},
|
1150 |
+
{
|
1151 |
+
"name": "stderr",
|
1152 |
+
"output_type": "stream",
|
1153 |
+
"text": [
|
1154 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1155 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1156 |
+
"I0000 00:00:1729392596.112086 1734128 service.cc:146] XLA service 0x56143e783d80 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1157 |
+
"I0000 00:00:1729392596.112159 1734128 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1158 |
+
"I0000 00:00:1729392596.338586 1734128 service.cc:146] XLA service 0x56143e6df470 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1159 |
+
"I0000 00:00:1729392596.338628 1734128 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1160 |
+
"I0000 00:00:1729392601.646269 1734238 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1161 |
+
"\n"
|
1162 |
+
]
|
1163 |
+
},
|
1164 |
+
{
|
1165 |
+
"name": "stdout",
|
1166 |
+
"output_type": "stream",
|
1167 |
+
"text": [
|
1168 |
+
"R2 score: 0.903416\n"
|
1169 |
+
]
|
1170 |
+
},
|
1171 |
+
{
|
1172 |
+
"name": "stderr",
|
1173 |
+
"output_type": "stream",
|
1174 |
+
"text": [
|
1175 |
+
"[I 2024-10-20 11:53:09,937] Trial 1 finished with value: 0.903416 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 0, 'NumValenceElec': 1, 'NHOHCount': 1, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 1, 'NumValenceElectrons': 0, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 0}. Best is trial 1 with value: 0.903416.\n"
|
1176 |
+
]
|
1177 |
+
},
|
1178 |
+
{
|
1179 |
+
"name": "stdout",
|
1180 |
+
"output_type": "stream",
|
1181 |
+
"text": [
|
1182 |
+
"Model already exists at save_model/full_model.keras\n",
|
1183 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1184 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
1185 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1186 |
+
" Best trial value: 0.77755\n",
|
1187 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1188 |
+
]
|
1189 |
+
},
|
1190 |
+
{
|
1191 |
+
"name": "stderr",
|
1192 |
+
"output_type": "stream",
|
1193 |
+
"text": [
|
1194 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1195 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1196 |
+
"I0000 00:00:1729392888.161687 1753348 service.cc:146] XLA service 0x564c24c171f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1197 |
+
"I0000 00:00:1729392888.161742 1753348 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1198 |
+
"I0000 00:00:1729392888.371050 1753348 service.cc:146] XLA service 0x564c24b71870 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1199 |
+
"I0000 00:00:1729392888.371090 1753348 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1200 |
+
"I0000 00:00:1729392893.641735 1753460 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1201 |
+
"\n"
|
1202 |
+
]
|
1203 |
+
},
|
1204 |
+
{
|
1205 |
+
"name": "stdout",
|
1206 |
+
"output_type": "stream",
|
1207 |
+
"text": [
|
1208 |
+
"R2 score: 0.880276\n"
|
1209 |
+
]
|
1210 |
+
},
|
1211 |
+
{
|
1212 |
+
"name": "stderr",
|
1213 |
+
"output_type": "stream",
|
1214 |
+
"text": [
|
1215 |
+
"[I 2024-10-20 11:58:03,586] Trial 2 finished with value: 0.880276 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 1, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 1, 'BCUT2D': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 1}. Best is trial 1 with value: 0.903416.\n"
|
1216 |
+
]
|
1217 |
+
}
|
1218 |
+
],
|
1219 |
+
"source": [
|
1220 |
+
"# study_de_network = optuna.create_study(study_name='ANO_de_network_fixed_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
|
1221 |
+
"study_de_network = optuna.create_study(study_name='ANO_de_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
1222 |
+
"study_de_network.optimize(objective_de_network, n_trials=TRIALS)\n",
|
1223 |
+
"pruned_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1224 |
+
"complete_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n",
|
1225 |
+
"#74m 22.0s\n",
|
1226 |
+
"#386m 42.2 - 100 trial 1000 epochs"
|
1227 |
+
]
|
1228 |
+
},
|
1229 |
+
{
|
1230 |
+
"cell_type": "code",
|
1231 |
+
"execution_count": 33,
|
1232 |
+
"metadata": {},
|
1233 |
+
"outputs": [
|
1234 |
+
{
|
1235 |
+
"name": "stderr",
|
1236 |
+
"output_type": "stream",
|
1237 |
+
"text": [
|
1238 |
+
"[I 2024-10-20 11:58:03,612] A new study created in RDB with name: ANO_lo_network_s2f\n"
|
1239 |
+
]
|
1240 |
+
},
|
1241 |
+
{
|
1242 |
+
"name": "stdout",
|
1243 |
+
"output_type": "stream",
|
1244 |
+
"text": [
|
1245 |
+
"Model already exists at save_model/full_model.keras\n",
|
1246 |
+
"Best trial params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
|
1247 |
+
"Model created from best trial of 'ANO_lo_struct':\n",
|
1248 |
+
" Params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
|
1249 |
+
" Best trial value: 0.683309\n"
|
1250 |
+
]
|
1251 |
+
},
|
1252 |
+
{
|
1253 |
+
"name": "stderr",
|
1254 |
+
"output_type": "stream",
|
1255 |
+
"text": [
|
1256 |
+
"2024-10-20 11:58:27.205874: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 240931272 exceeds 10% of free system memory.\n"
|
1257 |
+
]
|
1258 |
+
},
|
1259 |
+
{
|
1260 |
+
"name": "stdout",
|
1261 |
+
"output_type": "stream",
|
1262 |
+
"text": [
|
1263 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1264 |
+
]
|
1265 |
+
},
|
1266 |
+
{
|
1267 |
+
"name": "stderr",
|
1268 |
+
"output_type": "stream",
|
1269 |
+
"text": [
|
1270 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1271 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1272 |
+
"I0000 00:00:1729393110.988890 1772389 service.cc:146] XLA service 0x55b5a753f1d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1273 |
+
"I0000 00:00:1729393110.988948 1772389 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1274 |
+
"I0000 00:00:1729393111.194962 1772389 service.cc:146] XLA service 0x55b5a74d89e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1275 |
+
"I0000 00:00:1729393111.195011 1772389 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1276 |
+
"I0000 00:00:1729393116.790223 1772497 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1277 |
+
"\n"
|
1278 |
+
]
|
1279 |
+
},
|
1280 |
+
{
|
1281 |
+
"name": "stdout",
|
1282 |
+
"output_type": "stream",
|
1283 |
+
"text": [
|
1284 |
+
"R2 score: 0.713994\n"
|
1285 |
+
]
|
1286 |
+
},
|
1287 |
+
{
|
1288 |
+
"name": "stderr",
|
1289 |
+
"output_type": "stream",
|
1290 |
+
"text": [
|
1291 |
+
"[I 2024-10-20 12:12:13,855] Trial 0 finished with value: 0.713994 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 0, 'Eccentricity': 0, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 1, 'MORSE': 0, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 0 with value: 0.713994.\n"
|
1292 |
+
]
|
1293 |
+
},
|
1294 |
+
{
|
1295 |
+
"name": "stdout",
|
1296 |
+
"output_type": "stream",
|
1297 |
+
"text": [
|
1298 |
+
"Model already exists at save_model/full_model.keras\n",
|
1299 |
+
"Best trial params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
|
1300 |
+
"Model created from best trial of 'ANO_lo_struct':\n",
|
1301 |
+
" Params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
|
1302 |
+
" Best trial value: 0.683309\n"
|
1303 |
+
]
|
1304 |
+
},
|
1305 |
+
{
|
1306 |
+
"name": "stderr",
|
1307 |
+
"output_type": "stream",
|
1308 |
+
"text": [
|
1309 |
+
"2024-10-20 12:12:36.493107: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 240931272 exceeds 10% of free system memory.\n"
|
1310 |
+
]
|
1311 |
+
},
|
1312 |
+
{
|
1313 |
+
"name": "stdout",
|
1314 |
+
"output_type": "stream",
|
1315 |
+
"text": [
|
1316 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1317 |
+
]
|
1318 |
+
},
|
1319 |
+
{
|
1320 |
+
"name": "stderr",
|
1321 |
+
"output_type": "stream",
|
1322 |
+
"text": [
|
1323 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1324 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1325 |
+
"I0000 00:00:1729393960.991997 1793223 service.cc:146] XLA service 0x561d6d841040 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1326 |
+
"I0000 00:00:1729393960.992049 1793223 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1327 |
+
"I0000 00:00:1729393961.191204 1793223 service.cc:146] XLA service 0x561d6cdf02a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1328 |
+
"I0000 00:00:1729393961.191247 1793223 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1329 |
+
"I0000 00:00:1729393967.194640 1793326 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1330 |
+
"\n"
|
1331 |
+
]
|
1332 |
+
},
|
1333 |
+
{
|
1334 |
+
"name": "stdout",
|
1335 |
+
"output_type": "stream",
|
1336 |
+
"text": [
|
1337 |
+
"R2 score: 0.685843\n"
|
1338 |
+
]
|
1339 |
+
},
|
1340 |
+
{
|
1341 |
+
"name": "stderr",
|
1342 |
+
"output_type": "stream",
|
1343 |
+
"text": [
|
1344 |
+
"[I 2024-10-20 12:34:26,856] Trial 1 finished with value: 0.685843 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 1, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 0}. Best is trial 0 with value: 0.713994.\n"
|
1345 |
+
]
|
1346 |
+
},
|
1347 |
+
{
|
1348 |
+
"name": "stdout",
|
1349 |
+
"output_type": "stream",
|
1350 |
+
"text": [
|
1351 |
+
"Model already exists at save_model/full_model.keras\n",
|
1352 |
+
"Best trial params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
|
1353 |
+
"Model created from best trial of 'ANO_lo_struct':\n",
|
1354 |
+
" Params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
|
1355 |
+
" Best trial value: 0.683309\n"
|
1356 |
+
]
|
1357 |
+
},
|
1358 |
+
{
|
1359 |
+
"name": "stderr",
|
1360 |
+
"output_type": "stream",
|
1361 |
+
"text": [
|
1362 |
+
"2024-10-20 12:34:56.213221: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 240931272 exceeds 10% of free system memory.\n"
|
1363 |
+
]
|
1364 |
+
},
|
1365 |
+
{
|
1366 |
+
"name": "stdout",
|
1367 |
+
"output_type": "stream",
|
1368 |
+
"text": [
|
1369 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1370 |
+
]
|
1371 |
+
},
|
1372 |
+
{
|
1373 |
+
"name": "stderr",
|
1374 |
+
"output_type": "stream",
|
1375 |
+
"text": [
|
1376 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1377 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1378 |
+
"I0000 00:00:1729395300.642231 1815505 service.cc:146] XLA service 0x55e01cc870a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1379 |
+
"I0000 00:00:1729395300.642304 1815505 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1380 |
+
"I0000 00:00:1729395300.846508 1815505 service.cc:146] XLA service 0x55e01cbe0700 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1381 |
+
"I0000 00:00:1729395300.846550 1815505 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1382 |
+
"I0000 00:00:1729395306.797856 1815618 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1383 |
+
"\n"
|
1384 |
+
]
|
1385 |
+
},
|
1386 |
+
{
|
1387 |
+
"name": "stdout",
|
1388 |
+
"output_type": "stream",
|
1389 |
+
"text": [
|
1390 |
+
"R2 score: 0.661041\n"
|
1391 |
+
]
|
1392 |
+
},
|
1393 |
+
{
|
1394 |
+
"name": "stderr",
|
1395 |
+
"output_type": "stream",
|
1396 |
+
"text": [
|
1397 |
+
"[I 2024-10-20 12:51:24,525] Trial 2 finished with value: 0.661041 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 1, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 1, 'NumValenceElectrons': 1, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 0, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 1, 'BCUT2D': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 1}. Best is trial 0 with value: 0.713994.\n"
|
1398 |
+
]
|
1399 |
+
}
|
1400 |
+
],
|
1401 |
+
"source": [
|
1402 |
+
"# study_lo_network = optuna.create_study(study_name='ANO_lo_network_fixed_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
|
1403 |
+
"study_lo_network = optuna.create_study(study_name='ANO_lo_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
1404 |
+
"study_lo_network.optimize(objective_lo_network, n_trials=TRIALS)\n",
|
1405 |
+
"pruned_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1406 |
+
"complete_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
|
1407 |
+
]
|
1408 |
+
},
|
1409 |
+
{
|
1410 |
+
"cell_type": "code",
|
1411 |
+
"execution_count": 34,
|
1412 |
+
"metadata": {},
|
1413 |
+
"outputs": [
|
1414 |
+
{
|
1415 |
+
"name": "stderr",
|
1416 |
+
"output_type": "stream",
|
1417 |
+
"text": [
|
1418 |
+
"[I 2024-10-20 12:51:24,574] A new study created in RDB with name: ANO_hu_network_s2f\n"
|
1419 |
+
]
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"name": "stdout",
|
1423 |
+
"output_type": "stream",
|
1424 |
+
"text": [
|
1425 |
+
"Model already exists at save_model/full_model.keras\n",
|
1426 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1427 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
1428 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1429 |
+
" Best trial value: 0.77755\n",
|
1430 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1431 |
+
]
|
1432 |
+
},
|
1433 |
+
{
|
1434 |
+
"name": "stderr",
|
1435 |
+
"output_type": "stream",
|
1436 |
+
"text": [
|
1437 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1438 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1439 |
+
"I0000 00:00:1729396386.767441 1837018 service.cc:146] XLA service 0x561ad9acf200 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1440 |
+
"I0000 00:00:1729396386.767513 1837018 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1441 |
+
"I0000 00:00:1729396386.929264 1837018 service.cc:146] XLA service 0x561ad99bd7b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1442 |
+
"I0000 00:00:1729396386.929306 1837018 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1443 |
+
"I0000 00:00:1729396392.843622 1837130 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1444 |
+
"\n"
|
1445 |
+
]
|
1446 |
+
},
|
1447 |
+
{
|
1448 |
+
"name": "stdout",
|
1449 |
+
"output_type": "stream",
|
1450 |
+
"text": [
|
1451 |
+
"R2 score: 0.906639\n"
|
1452 |
+
]
|
1453 |
+
},
|
1454 |
+
{
|
1455 |
+
"name": "stderr",
|
1456 |
+
"output_type": "stream",
|
1457 |
+
"text": [
|
1458 |
+
"[I 2024-10-20 13:02:51,820] Trial 0 finished with value: 0.906639 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 1, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 0, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 1}. Best is trial 0 with value: 0.906639.\n"
|
1459 |
+
]
|
1460 |
+
},
|
1461 |
+
{
|
1462 |
+
"name": "stdout",
|
1463 |
+
"output_type": "stream",
|
1464 |
+
"text": [
|
1465 |
+
"Model already exists at save_model/full_model.keras\n",
|
1466 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1467 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
1468 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1469 |
+
" Best trial value: 0.77755\n",
|
1470 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1471 |
+
]
|
1472 |
+
},
|
1473 |
+
{
|
1474 |
+
"name": "stderr",
|
1475 |
+
"output_type": "stream",
|
1476 |
+
"text": [
|
1477 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1478 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1479 |
+
"I0000 00:00:1729397073.720871 1857420 service.cc:146] XLA service 0x558f5d8e5b70 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1480 |
+
"I0000 00:00:1729397073.720952 1857420 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1481 |
+
"I0000 00:00:1729397073.896023 1857420 service.cc:146] XLA service 0x558f5d7ff3a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1482 |
+
"I0000 00:00:1729397073.896075 1857420 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1483 |
+
"I0000 00:00:1729397079.724248 1857532 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1484 |
+
"\n"
|
1485 |
+
]
|
1486 |
+
},
|
1487 |
+
{
|
1488 |
+
"name": "stdout",
|
1489 |
+
"output_type": "stream",
|
1490 |
+
"text": [
|
1491 |
+
"R2 score: 0.912375\n"
|
1492 |
+
]
|
1493 |
+
},
|
1494 |
+
{
|
1495 |
+
"name": "stderr",
|
1496 |
+
"output_type": "stream",
|
1497 |
+
"text": [
|
1498 |
+
"[I 2024-10-20 13:14:49,340] Trial 1 finished with value: 0.912375 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 0, 'NumValenceElec': 1, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 0, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 0, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 1 with value: 0.912375.\n"
|
1499 |
+
]
|
1500 |
+
},
|
1501 |
+
{
|
1502 |
+
"name": "stdout",
|
1503 |
+
"output_type": "stream",
|
1504 |
+
"text": [
|
1505 |
+
"Model already exists at save_model/full_model.keras\n",
|
1506 |
+
"Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1507 |
+
"Model created from best trial of 'ANO_ws_struct':\n",
|
1508 |
+
" Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
|
1509 |
+
" Best trial value: 0.77755\n",
|
1510 |
+
"Model successfully saved to save_model/full_model.keras\n"
|
1511 |
+
]
|
1512 |
+
},
|
1513 |
+
{
|
1514 |
+
"name": "stderr",
|
1515 |
+
"output_type": "stream",
|
1516 |
+
"text": [
|
1517 |
+
"Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1518 |
+
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
1519 |
+
"I0000 00:00:1729397712.104750 1877650 service.cc:146] XLA service 0x55f4c53b7d30 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
|
1520 |
+
"I0000 00:00:1729397712.104817 1877650 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
|
1521 |
+
"I0000 00:00:1729397712.270438 1877650 service.cc:146] XLA service 0x55f4c5313420 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
1522 |
+
"I0000 00:00:1729397712.270487 1877650 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
|
1523 |
+
"I0000 00:00:1729397717.919845 1877761 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
|
1524 |
+
"\n"
|
1525 |
+
]
|
1526 |
+
},
|
1527 |
+
{
|
1528 |
+
"name": "stdout",
|
1529 |
+
"output_type": "stream",
|
1530 |
+
"text": [
|
1531 |
+
"R2 score: 0.891494\n"
|
1532 |
+
]
|
1533 |
+
},
|
1534 |
+
{
|
1535 |
+
"name": "stderr",
|
1536 |
+
"output_type": "stream",
|
1537 |
+
"text": [
|
1538 |
+
"[I 2024-10-20 13:25:39,445] Trial 2 finished with value: 0.891494 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElec': 1, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 0, 'AUTOCORR3D': 1, 'RDF': 1, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 0}. Best is trial 1 with value: 0.912375.\n"
|
1539 |
+
]
|
1540 |
+
}
|
1541 |
+
],
|
1542 |
+
"source": [
|
1543 |
+
"# study_hu_network = optuna.create_study(study_name='ANO_hu_network_fixed_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
|
1544 |
+
"study_hu_network = optuna.create_study(study_name='ANO_hu_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
|
1545 |
+
"study_hu_network.optimize(objective_hu_network, n_trials=TRIALS)\n",
|
1546 |
+
"pruned_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
|
1547 |
+
"complete_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
|
1548 |
+
]
|
1549 |
+
},
|
1550 |
+
{
|
1551 |
+
"cell_type": "code",
|
1552 |
+
"execution_count": 35,
|
1553 |
+
"metadata": {},
|
1554 |
+
"outputs": [
|
1555 |
+
{
|
1556 |
+
"name": "stdout",
|
1557 |
+
"output_type": "stream",
|
1558 |
+
"text": [
|
1559 |
+
"Study statistics: [ws_structure] \n",
|
1560 |
+
" Number of finished trials: 6\n",
|
1561 |
+
" Number of pruned trials: 0\n",
|
1562 |
+
" Number of complete trials: 6\n",
|
1563 |
+
"Best trial:\n",
|
1564 |
+
" Value: 0.891404\n",
|
1565 |
+
" Params: \n",
|
1566 |
+
" NumRotatableBonds: 1\n",
|
1567 |
+
" HeavyAtomCount: 1\n",
|
1568 |
+
" NumHAcceptors: 1\n",
|
1569 |
+
" NumHDonors: 1\n",
|
1570 |
+
" NumHeteroatoms: 1\n",
|
1571 |
+
" NumValenceElec: 0\n",
|
1572 |
+
" NHOHCount: 1\n",
|
1573 |
+
" NOCount: 1\n",
|
1574 |
+
" RingCount: 0\n",
|
1575 |
+
" NumAromaticRings: 0\n",
|
1576 |
+
" NumSaturatedRings: 1\n",
|
1577 |
+
" NumAliphaticRings: 1\n",
|
1578 |
+
" LabuteASA: 0\n",
|
1579 |
+
" NumValenceElectrons: 1\n",
|
1580 |
+
" BalabanJ: 1\n",
|
1581 |
+
" BertzCT: 1\n",
|
1582 |
+
" Ipc: 1\n",
|
1583 |
+
" kappa_Series[1-3]_ind: 0\n",
|
1584 |
+
" Chi_Series[13]_ind: 0\n",
|
1585 |
+
" Phi: 0\n",
|
1586 |
+
" HallKierAlpha: 1\n",
|
1587 |
+
" NumAmideBonds: 0\n",
|
1588 |
+
" FractionCSP3: 0\n",
|
1589 |
+
" NumSpiroAtoms: 1\n",
|
1590 |
+
" NumBridgeheadAtoms: 1\n",
|
1591 |
+
" PEOE_VSA_Series[1-14]_ind: 1\n",
|
1592 |
+
" SMR_VSA_Series[1-10]_ind: 1\n",
|
1593 |
+
" SlogP_VSA_Series[1-12]_ind: 1\n",
|
1594 |
+
" EState_VSA_Series[1-11]_ind: 0\n",
|
1595 |
+
" VSA_EState_Series[1-10]_ind: 0\n",
|
1596 |
+
" Asphericity: 0\n",
|
1597 |
+
" PBF: 0\n",
|
1598 |
+
" RadiusOfGyration: 1\n",
|
1599 |
+
" InertialShapeFactor: 1\n",
|
1600 |
+
" Eccentricity: 0\n",
|
1601 |
+
" SpherocityIndex: 1\n",
|
1602 |
+
" PMI_series[1-3]_ind: 1\n",
|
1603 |
+
" NPR_series[1-2]_ind: 1\n",
|
1604 |
+
" MQNs: 1\n",
|
1605 |
+
" AUTOCORR2D: 1\n",
|
1606 |
+
" BCUT2D: 1\n",
|
1607 |
+
" AUTOCORR3D: 0\n",
|
1608 |
+
" RDF: 1\n",
|
1609 |
+
" MORSE: 0\n",
|
1610 |
+
" WHIM: 1\n",
|
1611 |
+
" GETAWAY: 1\n"
|
1612 |
+
]
|
1613 |
+
}
|
1614 |
+
],
|
1615 |
+
"source": [
|
1616 |
+
"print(\"Study statistics: [ws_structure] \")\n",
|
1617 |
+
"print(\" Number of finished trials: \", len(study_ws_network.trials))\n",
|
1618 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_ws_newtork))\n",
|
1619 |
+
"print(\" Number of complete trials: \", len(complete_trials_ws_newtork))\n",
|
1620 |
+
"print(\"Best trial:\")\n",
|
1621 |
+
"trials_tmp = study_ws_network.best_trial\n",
|
1622 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1623 |
+
"print(\" Params: \")\n",
|
1624 |
+
"for key, value in trials_tmp.params.items():\n",
|
1625 |
+
" print(\" {}: {}\".format(key, value))"
|
1626 |
+
]
|
1627 |
+
},
|
1628 |
+
{
|
1629 |
+
"cell_type": "code",
|
1630 |
+
"execution_count": 36,
|
1631 |
+
"metadata": {},
|
1632 |
+
"outputs": [
|
1633 |
+
{
|
1634 |
+
"name": "stdout",
|
1635 |
+
"output_type": "stream",
|
1636 |
+
"text": [
|
1637 |
+
"Study statistics: [de_structure] \n",
|
1638 |
+
" Number of finished trials: 3\n",
|
1639 |
+
" Number of pruned trials: 0\n",
|
1640 |
+
" Number of complete trials: 3\n",
|
1641 |
+
"Best trial:\n",
|
1642 |
+
" Value: 0.903416\n",
|
1643 |
+
" Params: \n",
|
1644 |
+
" NumRotatableBonds: 0\n",
|
1645 |
+
" HeavyAtomCount: 1\n",
|
1646 |
+
" NumHAcceptors: 1\n",
|
1647 |
+
" NumHDonors: 0\n",
|
1648 |
+
" NumHeteroatoms: 0\n",
|
1649 |
+
" NumValenceElec: 1\n",
|
1650 |
+
" NHOHCount: 1\n",
|
1651 |
+
" NOCount: 1\n",
|
1652 |
+
" RingCount: 0\n",
|
1653 |
+
" NumAromaticRings: 1\n",
|
1654 |
+
" NumSaturatedRings: 1\n",
|
1655 |
+
" NumAliphaticRings: 0\n",
|
1656 |
+
" LabuteASA: 1\n",
|
1657 |
+
" NumValenceElectrons: 0\n",
|
1658 |
+
" BalabanJ: 1\n",
|
1659 |
+
" BertzCT: 0\n",
|
1660 |
+
" Ipc: 0\n",
|
1661 |
+
" kappa_Series[1-3]_ind: 1\n",
|
1662 |
+
" Chi_Series[13]_ind: 0\n",
|
1663 |
+
" Phi: 0\n",
|
1664 |
+
" HallKierAlpha: 1\n",
|
1665 |
+
" NumAmideBonds: 0\n",
|
1666 |
+
" FractionCSP3: 0\n",
|
1667 |
+
" NumSpiroAtoms: 0\n",
|
1668 |
+
" NumBridgeheadAtoms: 1\n",
|
1669 |
+
" PEOE_VSA_Series[1-14]_ind: 1\n",
|
1670 |
+
" SMR_VSA_Series[1-10]_ind: 0\n",
|
1671 |
+
" SlogP_VSA_Series[1-12]_ind: 1\n",
|
1672 |
+
" EState_VSA_Series[1-11]_ind: 1\n",
|
1673 |
+
" VSA_EState_Series[1-10]_ind: 0\n",
|
1674 |
+
" Asphericity: 1\n",
|
1675 |
+
" PBF: 1\n",
|
1676 |
+
" RadiusOfGyration: 0\n",
|
1677 |
+
" InertialShapeFactor: 1\n",
|
1678 |
+
" Eccentricity: 0\n",
|
1679 |
+
" SpherocityIndex: 0\n",
|
1680 |
+
" PMI_series[1-3]_ind: 1\n",
|
1681 |
+
" NPR_series[1-2]_ind: 0\n",
|
1682 |
+
" MQNs: 1\n",
|
1683 |
+
" AUTOCORR2D: 0\n",
|
1684 |
+
" BCUT2D: 1\n",
|
1685 |
+
" AUTOCORR3D: 0\n",
|
1686 |
+
" RDF: 0\n",
|
1687 |
+
" MORSE: 1\n",
|
1688 |
+
" WHIM: 0\n",
|
1689 |
+
" GETAWAY: 0\n"
|
1690 |
+
]
|
1691 |
+
}
|
1692 |
+
],
|
1693 |
+
"source": [
|
1694 |
+
"print(\"Study statistics: [de_structure] \")\n",
|
1695 |
+
"print(\" Number of finished trials: \", len(study_de_network.trials))\n",
|
1696 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_de_newtork))\n",
|
1697 |
+
"print(\" Number of complete trials: \", len(complete_trials_de_newtork))\n",
|
1698 |
+
"print(\"Best trial:\")\n",
|
1699 |
+
"trials_tmp = study_de_network.best_trial\n",
|
1700 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1701 |
+
"print(\" Params: \")\n",
|
1702 |
+
"for key, value in trials_tmp.params.items():\n",
|
1703 |
+
" print(\" {}: {}\".format(key, value))"
|
1704 |
+
]
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"cell_type": "code",
|
1708 |
+
"execution_count": 37,
|
1709 |
+
"metadata": {},
|
1710 |
+
"outputs": [
|
1711 |
+
{
|
1712 |
+
"name": "stdout",
|
1713 |
+
"output_type": "stream",
|
1714 |
+
"text": [
|
1715 |
+
"Study statistics: [lo_structure] \n",
|
1716 |
+
" Number of finished trials: 3\n",
|
1717 |
+
" Number of pruned trials: 0\n",
|
1718 |
+
" Number of complete trials: 3\n",
|
1719 |
+
"Best trial:\n",
|
1720 |
+
" Value: 0.713994\n",
|
1721 |
+
" Params: \n",
|
1722 |
+
" NumRotatableBonds: 1\n",
|
1723 |
+
" HeavyAtomCount: 1\n",
|
1724 |
+
" NumHAcceptors: 0\n",
|
1725 |
+
" NumHDonors: 1\n",
|
1726 |
+
" NumHeteroatoms: 1\n",
|
1727 |
+
" NumValenceElec: 0\n",
|
1728 |
+
" NHOHCount: 0\n",
|
1729 |
+
" NOCount: 1\n",
|
1730 |
+
" RingCount: 0\n",
|
1731 |
+
" NumAromaticRings: 1\n",
|
1732 |
+
" NumSaturatedRings: 0\n",
|
1733 |
+
" NumAliphaticRings: 1\n",
|
1734 |
+
" LabuteASA: 0\n",
|
1735 |
+
" NumValenceElectrons: 0\n",
|
1736 |
+
" BalabanJ: 0\n",
|
1737 |
+
" BertzCT: 0\n",
|
1738 |
+
" Ipc: 1\n",
|
1739 |
+
" kappa_Series[1-3]_ind: 1\n",
|
1740 |
+
" Chi_Series[13]_ind: 0\n",
|
1741 |
+
" Phi: 1\n",
|
1742 |
+
" HallKierAlpha: 1\n",
|
1743 |
+
" NumAmideBonds: 0\n",
|
1744 |
+
" FractionCSP3: 1\n",
|
1745 |
+
" NumSpiroAtoms: 0\n",
|
1746 |
+
" NumBridgeheadAtoms: 1\n",
|
1747 |
+
" PEOE_VSA_Series[1-14]_ind: 0\n",
|
1748 |
+
" SMR_VSA_Series[1-10]_ind: 0\n",
|
1749 |
+
" SlogP_VSA_Series[1-12]_ind: 1\n",
|
1750 |
+
" EState_VSA_Series[1-11]_ind: 1\n",
|
1751 |
+
" VSA_EState_Series[1-10]_ind: 1\n",
|
1752 |
+
" Asphericity: 0\n",
|
1753 |
+
" PBF: 0\n",
|
1754 |
+
" RadiusOfGyration: 0\n",
|
1755 |
+
" InertialShapeFactor: 0\n",
|
1756 |
+
" Eccentricity: 0\n",
|
1757 |
+
" SpherocityIndex: 0\n",
|
1758 |
+
" PMI_series[1-3]_ind: 0\n",
|
1759 |
+
" NPR_series[1-2]_ind: 0\n",
|
1760 |
+
" MQNs: 0\n",
|
1761 |
+
" AUTOCORR2D: 1\n",
|
1762 |
+
" BCUT2D: 1\n",
|
1763 |
+
" AUTOCORR3D: 1\n",
|
1764 |
+
" RDF: 1\n",
|
1765 |
+
" MORSE: 0\n",
|
1766 |
+
" WHIM: 1\n",
|
1767 |
+
" GETAWAY: 1\n"
|
1768 |
+
]
|
1769 |
+
}
|
1770 |
+
],
|
1771 |
+
"source": [
|
1772 |
+
"print(\"Study statistics: [lo_structure] \")\n",
|
1773 |
+
"print(\" Number of finished trials: \", len(study_lo_network.trials))\n",
|
1774 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_lo_newtork))\n",
|
1775 |
+
"print(\" Number of complete trials: \", len(complete_trials_lo_newtork))\n",
|
1776 |
+
"print(\"Best trial:\")\n",
|
1777 |
+
"trials_tmp = study_lo_network.best_trial\n",
|
1778 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1779 |
+
"print(\" Params: \")\n",
|
1780 |
+
"for key, value in trials_tmp.params.items():\n",
|
1781 |
+
" print(\" {}: {}\".format(key, value))"
|
1782 |
+
]
|
1783 |
+
},
|
1784 |
+
{
|
1785 |
+
"cell_type": "code",
|
1786 |
+
"execution_count": 38,
|
1787 |
+
"metadata": {},
|
1788 |
+
"outputs": [
|
1789 |
+
{
|
1790 |
+
"name": "stdout",
|
1791 |
+
"output_type": "stream",
|
1792 |
+
"text": [
|
1793 |
+
"Study statistics: [hu_structure] \n",
|
1794 |
+
" Number of finished trials: 3\n",
|
1795 |
+
" Number of pruned trials: 0\n",
|
1796 |
+
" Number of complete trials: 3\n",
|
1797 |
+
"Best trial:\n",
|
1798 |
+
" Value: 0.912375\n",
|
1799 |
+
" Params: \n",
|
1800 |
+
" NumRotatableBonds: 1\n",
|
1801 |
+
" HeavyAtomCount: 0\n",
|
1802 |
+
" NumHAcceptors: 1\n",
|
1803 |
+
" NumHDonors: 0\n",
|
1804 |
+
" NumHeteroatoms: 0\n",
|
1805 |
+
" NumValenceElec: 1\n",
|
1806 |
+
" NHOHCount: 0\n",
|
1807 |
+
" NOCount: 0\n",
|
1808 |
+
" RingCount: 0\n",
|
1809 |
+
" NumAromaticRings: 1\n",
|
1810 |
+
" NumSaturatedRings: 1\n",
|
1811 |
+
" NumAliphaticRings: 1\n",
|
1812 |
+
" LabuteASA: 0\n",
|
1813 |
+
" NumValenceElectrons: 1\n",
|
1814 |
+
" BalabanJ: 0\n",
|
1815 |
+
" BertzCT: 1\n",
|
1816 |
+
" Ipc: 1\n",
|
1817 |
+
" kappa_Series[1-3]_ind: 0\n",
|
1818 |
+
" Chi_Series[13]_ind: 1\n",
|
1819 |
+
" Phi: 1\n",
|
1820 |
+
" HallKierAlpha: 1\n",
|
1821 |
+
" NumAmideBonds: 1\n",
|
1822 |
+
" FractionCSP3: 0\n",
|
1823 |
+
" NumSpiroAtoms: 1\n",
|
1824 |
+
" NumBridgeheadAtoms: 1\n",
|
1825 |
+
" PEOE_VSA_Series[1-14]_ind: 0\n",
|
1826 |
+
" SMR_VSA_Series[1-10]_ind: 0\n",
|
1827 |
+
" SlogP_VSA_Series[1-12]_ind: 1\n",
|
1828 |
+
" EState_VSA_Series[1-11]_ind: 0\n",
|
1829 |
+
" VSA_EState_Series[1-10]_ind: 1\n",
|
1830 |
+
" Asphericity: 1\n",
|
1831 |
+
" PBF: 1\n",
|
1832 |
+
" RadiusOfGyration: 0\n",
|
1833 |
+
" InertialShapeFactor: 0\n",
|
1834 |
+
" Eccentricity: 1\n",
|
1835 |
+
" SpherocityIndex: 1\n",
|
1836 |
+
" PMI_series[1-3]_ind: 1\n",
|
1837 |
+
" NPR_series[1-2]_ind: 0\n",
|
1838 |
+
" MQNs: 1\n",
|
1839 |
+
" AUTOCORR2D: 0\n",
|
1840 |
+
" BCUT2D: 1\n",
|
1841 |
+
" AUTOCORR3D: 0\n",
|
1842 |
+
" RDF: 1\n",
|
1843 |
+
" MORSE: 0\n",
|
1844 |
+
" WHIM: 1\n",
|
1845 |
+
" GETAWAY: 1\n"
|
1846 |
+
]
|
1847 |
+
}
|
1848 |
+
],
|
1849 |
+
"source": [
|
1850 |
+
"print(\"Study statistics: [hu_structure] \")\n",
|
1851 |
+
"print(\" Number of finished trials: \", len(study_hu_network.trials))\n",
|
1852 |
+
"print(\" Number of pruned trials: \", len(pruned_trials_hu_newtork))\n",
|
1853 |
+
"print(\" Number of complete trials: \", len(complete_trials_hu_newtork))\n",
|
1854 |
+
"print(\"Best trial:\")\n",
|
1855 |
+
"trials_tmp = study_hu_network.best_trial\n",
|
1856 |
+
"print(\" Value: \", trials_tmp.value)\n",
|
1857 |
+
"print(\" Params: \")\n",
|
1858 |
+
"for key, value in trials_tmp.params.items():\n",
|
1859 |
+
" print(\" {}: {}\".format(key, value))"
|
1860 |
+
]
|
1861 |
+
},
|
1862 |
+
{
|
1863 |
+
"cell_type": "code",
|
1864 |
+
"execution_count": null,
|
1865 |
+
"metadata": {},
|
1866 |
+
"outputs": [],
|
1867 |
+
"source": []
|
1868 |
+
},
|
1869 |
+
{
|
1870 |
+
"cell_type": "code",
|
1871 |
+
"execution_count": null,
|
1872 |
+
"metadata": {},
|
1873 |
+
"outputs": [],
|
1874 |
+
"source": []
|
1875 |
+
},
|
1876 |
+
{
|
1877 |
+
"cell_type": "code",
|
1878 |
+
"execution_count": null,
|
1879 |
+
"metadata": {},
|
1880 |
+
"outputs": [],
|
1881 |
+
"source": []
|
1882 |
+
},
|
1883 |
+
{
|
1884 |
+
"cell_type": "code",
|
1885 |
+
"execution_count": null,
|
1886 |
+
"metadata": {},
|
1887 |
+
"outputs": [],
|
1888 |
+
"source": []
|
1889 |
+
}
|
1890 |
+
],
|
1891 |
+
"metadata": {
|
1892 |
+
"kernelspec": {
|
1893 |
+
"display_name": "ai",
|
1894 |
+
"language": "python",
|
1895 |
+
"name": "python3"
|
1896 |
+
},
|
1897 |
+
"language_info": {
|
1898 |
+
"codemirror_mode": {
|
1899 |
+
"name": "ipython",
|
1900 |
+
"version": 3
|
1901 |
+
},
|
1902 |
+
"file_extension": ".py",
|
1903 |
+
"mimetype": "text/x-python",
|
1904 |
+
"name": "python",
|
1905 |
+
"nbconvert_exporter": "python",
|
1906 |
+
"pygments_lexer": "ipython3",
|
1907 |
+
"version": "3.12.2"
|
1908 |
+
},
|
1909 |
+
"orig_nbformat": 4
|
1910 |
+
},
|
1911 |
+
"nbformat": 4,
|
1912 |
+
"nbformat_minor": 2
|
1913 |
+
}
|
7_solubility_final_HPO_proving.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
8_solubility_xai.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/Lovric2020_logS0.csv
ADDED
@@ -0,0 +1,830 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
isomeric_smiles,logS0
|
2 |
+
C(\C=C\c1ccccc1)N1CCN(CC1)C(c1ccccc1)c1ccccc1,-5.34
|
3 |
+
C(c1ccccc1)n1ccnc1,-2.26
|
4 |
+
C1Cc2ccccc2N1,-1.04
|
5 |
+
C1O[C@H]1c1ccccc1,-1.6
|
6 |
+
C=CCC1(C(=O)NC(=O)NC1=O)c1ccccc1,-2.346
|
7 |
+
C=CCC1(CC=C)C(=O)NC(=O)NC1=O,-1.796
|
8 |
+
C=CCS[S@@](=O)CC=C,-0.83
|
9 |
+
CC#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@H](C[C@]12C)c1ccc(cc1)N(C)C,-5.825
|
10 |
+
CC(=C)[C@@H]1CC=C(C)C(=O)C1,-2.06
|
11 |
+
CC(=C)[C@H]1CC=C(C)C(=O)C1,-2.06
|
12 |
+
CC(=O)C(C)(C)C,-0.723666667
|
13 |
+
CC(=O)CCCCn1c(=O)n(C)c2ncn(C)c2c1=O,-0.558
|
14 |
+
CC(=O)C[C@H](c1ccccc1)c1c(O)c2ccccc2oc1=O,-4.761
|
15 |
+
CC(=O)N1CCN(CC1)c1ccc(OC[C@H]2CO[C@@](Cn3ccnc3)(O2)c2ccc(Cl)cc2Cl)cc1,-3.8
|
16 |
+
CC(=O)NC(N)=O,-0.9
|
17 |
+
CC(=O)NC[C@H]1CN(C(=O)O1)c1ccc(N2CCN(CC2)C(=O)CO)c(F)c1,-1.97
|
18 |
+
CC(=O)NC[C@H]1CN(C(=O)O1)c1ccc(N2CCOCC2)c(F)c1,-2.07
|
19 |
+
CC(=O)NCc1c(I)c(NC(C)=O)c(I)c(C(O)=O)c1I,-2.321
|
20 |
+
CC(=O)NS(=O)(=O)c1ccc(N)cc1,-1.5135
|
21 |
+
CC(=O)Nc1c(I)c(NC(C)=O)c(I)c(C(O)=O)c1I,-2.788
|
22 |
+
CC(=O)Nc1ccc(C=O)cc1,-1.58
|
23 |
+
CC(=O)Nc1ccc(Cl)cc1,-2.842
|
24 |
+
CC(=O)Nc1ccc(F)cc1,-1.78
|
25 |
+
CC(=O)Nc1ccc(N)cc1,-0.98
|
26 |
+
CC(=O)Nc1ccc(O)cc1,-1.064
|
27 |
+
CC(=O)Nc1ccc(OC(C)=O)cc1,-1.91
|
28 |
+
CC(=O)Nc1ccc(cc1)S(N)(=O)=O,-1.61
|
29 |
+
CC(=O)Nc1ccc(cc1)[N+]([O-])=O,-2.691333333
|
30 |
+
CC(=O)Nc1cccc(C)c1,-2.09
|
31 |
+
CC(=O)Nc1ccccc1,-1.398
|
32 |
+
CC(=O)Nc1ccccc1Cl,-1.4
|
33 |
+
CC(=O)Nc1ccccc1[N+]([O-])=O,-1.91
|
34 |
+
CC(=O)Nc1nnc(s1)S(N)(=O)=O,-2.462
|
35 |
+
CC(=O)OC1CCCCC1,-1.67
|
36 |
+
CC(=O)OCC(COC(C)=O)OC(C)=O,-0.6
|
37 |
+
CC(=O)OCC1=C(N2[C@H](SC1)[C@H](NC(=O)Cc1cccs1)C2=O)C(O)=O,-2.938
|
38 |
+
CC(=O)O[C@@H]1C2=C(C)[C@H](C[C@@](O)([C@@H](OC(=O)c3ccccc3)[C@@H]3[C@@]4(CO[C@@H]4C[C@H](O)[C@@]3(C)C1=O)OC(C)=O)C2(C)C)OC(=O)[C@H](O)[C@@H](NC(=O)c1ccccc1)c1ccccc1,-6.63
|
39 |
+
CC(=O)O[C@]1(C(C)=O)C(=C)C[C@H]2[C@@H]3C=C(C)C4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,-5.57
|
40 |
+
CC(=O)Oc1ccccc1C(O)=O,-1.75
|
41 |
+
CC(=O)S[C@@H]1CC2=CC(=O)CC[C@]2(C)[C@H]2CC[C@@]3(C)[C@@H](CC[C@@]33CCC(=O)O3)[C@H]12,-4.173
|
42 |
+
CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,-4.42
|
43 |
+
CC(=O)c1ccc(N)cc1,-1.61
|
44 |
+
CC(=O)c1cccc(N)c1,-1.28
|
45 |
+
CC(=O)c1ccccc1,-1.29
|
46 |
+
CC(C)(C)C(C)(C)O,-0.72
|
47 |
+
CC(C)(C)CCCO,-1.55
|
48 |
+
CC(C)(C)CCO,-0.5
|
49 |
+
CC(C)(C)CO,-0.4
|
50 |
+
CC(C)(C)COC(N)=O,-0.8
|
51 |
+
CC(C)(C)NC(=O)[C@H]1CC[C@H]2[C@@H]3CC=C4C=C(CC[C@]4(C)[C@H]3CC[C@]12C)C(O)=O,-8.7585
|
52 |
+
CC(C)(C)NC[C@H](O)COc1ccc(NC(=O)NC2CCCCC2)cc1,-3.62
|
53 |
+
CC(C)(C)NC[C@H](O)COc1cccc2C[C@@H](O)[C@@H](O)Cc12,-1.57
|
54 |
+
CC(C)(C)NC[C@H](O)c1ccc(O)c(CO)c1,-1.224
|
55 |
+
CC(C)(C)OC(N)=O,0.1
|
56 |
+
CC(C)(C)c1ccc(O)cc1,-2.41
|
57 |
+
CC(C)(C)c1ccc(cc1)[C@@H](O)CCCN1CCC(CC1)C(O)(c1ccccc1)c1ccccc1,-6.69
|
58 |
+
CC(C)(N)C(O)=O,0.21
|
59 |
+
CC(C)(O\N=C(\C(=O)N[C@H]1[C@H]2SCC(C[n+]3ccccc3)=C(N2C1=O)C(O)=O)c1csc(N)n1)C(O)=O,-2.038
|
60 |
+
CC(C)(Oc1ccc(CCNC(=O)c2ccc(Cl)cc2)cc1)C(O)=O,-4.8
|
61 |
+
CC(C)(S)[C@@H](N)C(O)=O,-0.13
|
62 |
+
CC(C)(S)[C@H](N)C(O)=O,-0.128
|
63 |
+
CC(C)=CCC1(C)C(=O)NC(=O)NC1=O,-2.602
|
64 |
+
CC(C)=CCC[C@](C)(O)C=C,-1.99
|
65 |
+
CC(C)=CCC\C(C)=C/CO,-2.46
|
66 |
+
CC(C)=CCC\C(C)=C\C=O,-2.06
|
67 |
+
CC(C)C(=O)C(C)C,-1.3
|
68 |
+
CC(C)C(C)(C)O,-0.41
|
69 |
+
CC(C)C(O)C(C)C,-1.13725
|
70 |
+
CC(C)C1(C(C)C)C(=O)NC(=O)NC1=O,-2.766
|
71 |
+
CC(C)C1(CC=C(C)C)C(=O)NC(=O)NC1=O,-2.593
|
72 |
+
CC(C)C1(CC=C)C(=O)NC(=O)NC1=O,-1.71
|
73 |
+
CC(C)C1C(=O)NC(=O)NC1=O,-1.456
|
74 |
+
CC(C)CC(C)(C)O,-0.92
|
75 |
+
CC(C)CC(C)=O,-0.966666667
|
76 |
+
CC(C)CC1(CC=C)C(=O)NC(=O)NC1=O,-2.119
|
77 |
+
CC(C)CCC(C)=O,-1.33
|
78 |
+
CC(C)CCCO,-1.14
|
79 |
+
CC(C)CCO,-0.513333333
|
80 |
+
CC(C)CCOC(C)=O,-1.92
|
81 |
+
CC(C)CCOC=O,-1.52
|
82 |
+
CC(C)CC[C@@H](C)O,-1.38
|
83 |
+
CC(C)CNC(=O)N1CCNC1=O,-2.15
|
84 |
+
CC(C)COC(=O)C=C,-1.21
|
85 |
+
CC(C)COC(C)=O,-1.22
|
86 |
+
CC(C)COC=O,-1.01
|
87 |
+
CC(C)C[C@@H](C)O,-0.79625
|
88 |
+
CC(C)C[C@H](C)CO,-1.6
|
89 |
+
CC(C)C[C@H](N)C(O)=O,-0.75
|
90 |
+
CC(C)Cc1ccc(cc1)[C@@H](C)C(O)=O,-3.595
|
91 |
+
CC(C)Cn1c(C)nc2n(C)c(=O)n(C)c(=O)c12,-1.599
|
92 |
+
CC(C)Cn1cnc2n(C)c(=O)n(C)c(=O)c12,-0.942
|
93 |
+
CC(C)N(C(=O)CCl)c1ccccc1,-2.48
|
94 |
+
CC(C)N(CC[C@@](C(N)=O)(c1ccccc1)c1ccccn1)C(C)C,-3.1
|
95 |
+
CC(C)N(CC[C@H](c1ccccc1)c1cc(C)ccc1O)C(C)C,-2.58
|
96 |
+
CC(C)NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(Cl)c1,-4.405
|
97 |
+
CC(C)NC[C@H](O)COc1ccc(CC(N)=O)cc1,-1.3
|
98 |
+
CC(C)NC[C@H](O)COc1ccc(COCCOC(C)C)cc1,-1.93
|
99 |
+
CC(C)NC[C@H](O)COc1cccc2[nH]ccc12,-3.88
|
100 |
+
CC(C)NC[C@H](O)COc1cccc2ccccc12,-3.7075
|
101 |
+
CC(C)NC[C@H](O)COc1ccccc1CC=C,-2.82
|
102 |
+
CC(C)NC[C@H]1CCc2cc(CO)c(cc2N1)[N+]([O-])=O,-2.965
|
103 |
+
CC(C)Nc1cccnc1N1CCN(CC1)C(=O)c1cc2cc(NS(C)(=O)=O)ccc2[nH]1,-5.74
|
104 |
+
CC(C)OC(=O)C(C)(C)Oc1ccc(cc1)C(=O)c1ccc(Cl)cc1,-5.712
|
105 |
+
CC(C)OC(C)=O,-0.563
|
106 |
+
CC(C)OC(C)C,-1.1
|
107 |
+
CC(C)OC=O,-0.63
|
108 |
+
CC(C)SC(C)C,-2.24
|
109 |
+
CC(C)[C@@H](C)O,-0.186666667
|
110 |
+
CC(C)[C@@H]1CC[C@@H](C)CC1=O,-2.396666667
|
111 |
+
CC(C)[C@H](C)CO,-0.39
|
112 |
+
CC(C)[C@H]1CC[C@H](C)C[C@@H]1O,-2.53
|
113 |
+
CC(C)[N+]([O-])=O,-0.62
|
114 |
+
CC(C)\N=c1/cc2n(-c3ccc(Cl)cc3)c3ccccc3nc2cc1Nc1ccc(Cl)cc1,-5.8
|
115 |
+
CC(C)c1ccc(C)c(O)c1,-2.08
|
116 |
+
CC(C)c1ccc(C)cc1O,-2.186
|
117 |
+
CC(C)c1ccc(NC(=O)N(C)C)cc1,-3.469
|
118 |
+
CC(C)c1nc(nc(-c2ccc(F)cc2)c1\C=C\[C@@H](O)C[C@@H](O)CC(O)=O)N(C)S(C)(=O)=O,-2.48
|
119 |
+
CC(C)n1c(\C=C\[C@H](O)C[C@H](O)CC(O)=O)c(-c2ccc(F)cc2)c2ccccc12,-3.83
|
120 |
+
CC1(C(=O)NC(=O)NC1=O)c1ccccc1,-2.38
|
121 |
+
CC1(C)C(=O)NC(=O)NC1=O,-1.742
|
122 |
+
CC1(C)CON(Cc2ccccc2Cl)C1=O,-2.338
|
123 |
+
CC1(C)N(Cl)C(=O)N(Cl)C1=O,-2.6
|
124 |
+
CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccc(O)cc3)C(=O)N2[C@H]1C(O)=O,-2.031
|
125 |
+
CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccccc3)C(=O)N2[C@H]1C(O)=O,-1.539
|
126 |
+
CC1(C)[C@@H]2CC[C@@](C)(C2)C1=O,-1.85
|
127 |
+
CC1(C)[C@H]2CC[C@]1(C)C(=O)C2,-2.086
|
128 |
+
CC1(C)[C@H]2CC[C@]1(C)[C@H](O)C2,-2.32
|
129 |
+
CC1(CC=C)C(=O)NC(=O)NC1=O,-1.16
|
130 |
+
CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1,-2.3555
|
131 |
+
CC1=C(CC(O)=O)c2cc(F)ccc2\C1=C/c1ccc(cc1)[S@](C)=O,-4.78
|
132 |
+
CC1=C(CCCO1)C(=O)Nc1ccccc1,-2.56
|
133 |
+
CC1=C(N2[C@H](SC1)[C@H](NC(=O)[C@H](N)C1=CCC=CC1)C2=O)C(O)=O,-1.215
|
134 |
+
CC1=CC(=O)CC(C)(C)C1,-1.06
|
135 |
+
CC1=NS(=O)(=O)c2cc(Cl)ccc2N1,-3.481
|
136 |
+
CC1C(=O)NC(=O)NC1=O,-1.126
|
137 |
+
CCC(=O)Nc1ccccc1,-1.92
|
138 |
+
CCC(=O)OC,-0.14
|
139 |
+
CCC(=O)O[C@@](Cc1ccccc1)([C@H](C)CN(C)C)c1ccccc1,-4.985
|
140 |
+
CCC(=O)c1ccccc1,-1.83
|
141 |
+
CCC(Br)(CC)C(=O)NC(N)=O,-2.68
|
142 |
+
CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@@H]12,-4.145
|
143 |
+
CCC(C)(C)CO,-1.04
|
144 |
+
CCC(C)(C)O,0.088175
|
145 |
+
CCC(C)(O)CC,-0.366666667
|
146 |
+
CCC(CC)C=O,-1.52
|
147 |
+
CCC(CC)CO,-1.17
|
148 |
+
CCC(O)(CC)CC,-0.85
|
149 |
+
CCC(O)CC,-0.239
|
150 |
+
CCC1(C(=O)NC(=O)NC1=O)C1=CCCCC1,-2.273
|
151 |
+
CCC1(C(=O)NC(=O)NC1=O)C1=CCCCCC1,-3
|
152 |
+
CCC1(C(=O)NC(=O)NC1=O)C1=C[C@@H]2CC[C@@H](C2)C1,-2.773
|
153 |
+
CCC1(C(=O)NC(=O)NC1=O)c1ccccc1,-2.293
|
154 |
+
CCC1(C(=O)NCNC1=O)c1ccccc1,-2.64
|
155 |
+
CCC1(C(C)C)C(=O)NC(=O)NC1=O,-2.153
|
156 |
+
CCC1(C)C(=O)NC(=O)NC1=O,-1.162
|
157 |
+
CCC1(CC)C(=O)NC(=O)NC1=O,-1.41
|
158 |
+
CCC1(CC)C(=O)NC[C@H](C)C1=O,-0.382
|
159 |
+
CCC1(CC=C(C)C)C(=O)NC(=O)NC1=O,-2.253
|
160 |
+
CCC1(CC=C)C(=O)NC(=O)NC1=O,-1.614
|
161 |
+
CCC1(CCC(C)C)C(=O)NC(=O)NC1=O,-2.47
|
162 |
+
CCC1=C(C)CN(C(=O)NCCc2ccc(cc2)S(=O)(=O)NC(=O)N[C@H]2CC[C@H](C)CC2)C1=O,-6.44
|
163 |
+
CCC1C(=O)NC(=O)NC1=O,-1.427
|
164 |
+
CCCC(=O)C=C,-0.83
|
165 |
+
CCCC(=O)CC,-0.83
|
166 |
+
CCCC(=O)CCC,-1.3
|
167 |
+
CCCC(=O)Nc1ccc(OC[C@@H](O)CNC(C)C)c(c1)C(C)=O,-2.4375
|
168 |
+
CCCC(=O)OC,-0.82
|
169 |
+
CCCC(=O)OCC,-1.28
|
170 |
+
CCCC(C)(C)CO,-1.52
|
171 |
+
CCCC(C)(C)O,-0.49
|
172 |
+
CCCC(C)(COC(N)=O)COC(N)=O,-1.807
|
173 |
+
CCCC(O)CCC,-1.4
|
174 |
+
CCCC1(CC)C(=O)NC(=O)NC1=O,-1.491
|
175 |
+
CCCC1(CCC)C(=O)NC(=O)NC1=O,-2.527
|
176 |
+
CCCCC(=O)CCCC,-2.583333333
|
177 |
+
CCCCC(=O)OC,-1.36
|
178 |
+
CCCCC(=O)OCC,-1.75
|
179 |
+
CCCCC(C)(C)O,-1.08
|
180 |
+
CCCCC(C)=O,-0.8
|
181 |
+
CCCCC1(CC)C(=O)NC(=O)NC1=O,-1.686
|
182 |
+
CCCCC1(CC=C)C(=O)NC(=O)NC1=O,-2.172
|
183 |
+
CCCCC1C(=O)N(N(C1=O)c1ccccc1)c1ccccc1,-4.391
|
184 |
+
CCCCCC(=O)OC,-1.913333333
|
185 |
+
CCCCCC(=O)OCC,-2.336666667
|
186 |
+
CCCCCC(C)(C)O,-1.72
|
187 |
+
CCCCCC(C)=O,-1.44
|
188 |
+
CCCCCC1(CC)C(=O)NC(=O)NC1=O,-2.34
|
189 |
+
CCCCCC=O,-1.3
|
190 |
+
CCCCCCC(=O)OCC,-2.73
|
191 |
+
CCCCCCC(C)=O,-2.05
|
192 |
+
CCCCCCC1(CC)C(=O)NC(=O)NC1=O,-3.049
|
193 |
+
CCCCCCC=O,-1.7
|
194 |
+
CCCCCCCC(C)=O,-2.58
|
195 |
+
CCCCCCCC1(CC)C(=O)NC(=O)NC1=O,-3.218
|
196 |
+
CCCCCCCC=O,-2.36
|
197 |
+
CCCCCCCCC1(CC)C(=O)NC(=O)NC1=O,-3.943
|
198 |
+
CCCCCCCCCC1(CC)C(=O)NC(=O)NC1=O,-4.462
|
199 |
+
CCCCCCCCC[C@@H](C)O,-2.94
|
200 |
+
CCCCCCCCO,-2.385
|
201 |
+
CCCCCCCN(CC)CCC[C@@H](O)c1ccc(NS(C)(=O)=O)cc1,-1.81
|
202 |
+
CCCCCCCO,-1.8175
|
203 |
+
CCCCCCCOC(N)=O,-2.62
|
204 |
+
CCCCCCC[C@@H](C)O,-2.74
|
205 |
+
CCCCCCO,-1.2375
|
206 |
+
CCCCCCOC(C)=O,-2.46
|
207 |
+
CCCCCCOC(N)=O,-1.92
|
208 |
+
CCCCCC[C@@H](C)O,-1.99
|
209 |
+
CCCCCO,-0.60225
|
210 |
+
CCCCCOC(=O)CC,-2.25
|
211 |
+
CCCCCOC(C)=O,-1.8875
|
212 |
+
CCCCCOC(N)=O,-1.47
|
213 |
+
CCCCC[C@@H](C)O,-1.55
|
214 |
+
CCCCC[C@@H](O)CC,-1.98
|
215 |
+
CCCCC[C@H](O)\C=C\[C@H]1[C@H](O)CC(=O)[C@@H]1CCCCCCC(O)=O,-3.67
|
216 |
+
CCCCC[C@H](O)\C=C\[C@H]1[C@H](O)CC(=O)[C@@H]1C\C=C/CCCC(O)=O,-2.47
|
217 |
+
CCCCN(CCN(CCCC)C(=O)N1CCOCC1)C(=O)N1CCOCC1,0.098
|
218 |
+
CCCCN1CCCC[C@@H]1C(=O)Nc1c(C)cccc1C,-3.511
|
219 |
+
CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,-3.4665
|
220 |
+
CCCCNC(=O)OC[C@@](C)(CCC)COC(N)=O,-2.739
|
221 |
+
CCCCNc1cc(cc(c1Oc1ccccc1)S(N)(=O)=O)C(O)=O,-3.562
|
222 |
+
CCCCNc1ccc(cc1)C(=O)OCCN(C)C,-3.011
|
223 |
+
CCCCOC,-0.99
|
224 |
+
CCCCOC(=O)c1ccc(N)cc1,-3.131
|
225 |
+
CCCCOC(=O)c1ccc(O)cc1,-3.101
|
226 |
+
CCCCOC(C)=O,-1.29
|
227 |
+
CCCCOC(N)=O,-0.66
|
228 |
+
CCCCOCCCC,-1.885
|
229 |
+
CCCCOCCO,-0.42
|
230 |
+
CCCCOc1cc(C(=O)NCCN(CC)CC)c2ccccc2n1,-4.39
|
231 |
+
CCCCOc1ccc(OCCCN2CCOCC2)cc1,-3.5
|
232 |
+
CCCC[C@@H](C)O,-0.89
|
233 |
+
CCCC[C@@H](CC)C=O,-2.13
|
234 |
+
CCCC[C@@H](CC)CO,-2.11
|
235 |
+
CCCC[C@@H](O)CC,-1.47
|
236 |
+
CCCC[C@H](C)[C@H](C)O,-1.72
|
237 |
+
CCCC[C@](C)(O)CC,-1.6
|
238 |
+
CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1,-8.174
|
239 |
+
CCCCn1c(=O)n(C)c2ncn(C)c2c1=O,-1.625
|
240 |
+
CCCCn1c(C)nc2n(C)c(=O)n(C)c(=O)c12,-1.745
|
241 |
+
CCCCn1cnc2n(C)c(=O)n(C)c(=O)c12,-1.805
|
242 |
+
CCCN(CCC)S(=O)(=O)c1ccc(cc1)C(O)=O,-4.888
|
243 |
+
CCCNC(=O)NS(=O)(=O)c1ccc(Cl)cc1,-3.2745
|
244 |
+
CCCNC[C@@H](O)COc1ccccc1C(=O)CCc1ccccc1,-5.2
|
245 |
+
CCCOC(=O)CC,-1.34
|
246 |
+
CCCOC(=O)CCC,-1.92
|
247 |
+
CCCOC(=O)c1ccc(O)cc1,-2.557
|
248 |
+
CCCOC(=O)c1ccccc1,-2.67
|
249 |
+
CCCOC(C)=O,-0.72
|
250 |
+
CCCOC(C)C,-1.34
|
251 |
+
CCCOC=O,-0.49
|
252 |
+
CCCOCC,-0.66
|
253 |
+
CCCOCCC,-1.446666667
|
254 |
+
CCCOc1ccc2[C@@H]([C@H]([C@@H](c2c1)c1ccc(OC)cc1OCC(O)=O)C(O)=O)c1ccc2OCOc2c1,-6.771
|
255 |
+
CCCSCCC,-2.58
|
256 |
+
CCC[C@@H](C)O,-0.29
|
257 |
+
CCC[C@@H](O)CC,-0.8
|
258 |
+
CCC[C@@H](O)[C@H](CC)CO,-0.54
|
259 |
+
CCC[C@@]1(CCc2ccccc2)CC(=O)[C@@H]([C@H](CC)c2cccc(NS(=O)(=O)c3ccc(cn3)C(F)(F)F)c2)C(=O)O1,-6.3
|
260 |
+
CCC[C@H](C)C1(CC)C(=O)NC(=O)NC1=O,-2.41
|
261 |
+
CCC[C@H](C)C1(CC=C)C(=O)NC(=O)NC1=O,-2.333
|
262 |
+
CCC[C@H](C)CO,-1.11
|
263 |
+
CCC[C@H](O)C=C,-0.59
|
264 |
+
CCC[C@](C)(O)CC,-0.986666667
|
265 |
+
CCC[N+]([O-])=O,-0.8
|
266 |
+
CCC[S@](=O)CCCN(CC)C[C@@H](O)COc1ccc(cc1)C#N,-1.17
|
267 |
+
CCC\C(=C(/C)O)C(C)=O,-0.88
|
268 |
+
CCC\C=C(\CC)C=O,-2.46
|
269 |
+
CCCc1cc(=O)[nH]c(=S)[nH]1,-2.185
|
270 |
+
CCCn1c(=O)n(C)c2ncn(C)c2c1=O,-1.207
|
271 |
+
CCN(CC)C(=O)C(\Cl)=C(/C)OP(=O)(OC)OC,0.523
|
272 |
+
CCN(CC)C(=O)CSc1ccc(Cl)nn1,-1.716
|
273 |
+
CCN(CC)C(=O)Nc1ccc(OC[C@@H](O)CNC(C)(C)C)c(c1)C(C)=O,-1.9
|
274 |
+
CCN(CC)C(=S)SSC(=S)N(CC)CC,-2.995
|
275 |
+
CCN(CC)CC(=O)Nc1c(C)cccc1C,-1.874
|
276 |
+
CCN(CC)CCC[C@@H](C)Nc1c2ccc(Cl)cc2nc2ccc(OC)cc12,-4.35
|
277 |
+
CCN(CC)CCC[C@@H](C)Nc1ccnc2cc(Cl)ccc12,-3.89
|
278 |
+
CCN(CC)CCNC(=O)c1cc(Cl)c(N)cc1OC,-3.565
|
279 |
+
CCN(CC)CCOC(=O)c1ccc(N)cc1,-1.719
|
280 |
+
CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O,-5.79
|
281 |
+
CCN(Cc1ccncc1)C(=O)[C@@H](CO)c1ccccc1,-1.698
|
282 |
+
CCN(N=O)C(N)=O,-0.96
|
283 |
+
CCN1CCC[C@@H]1CNC(=O)c1cc(ccc1OC)S(N)(=O)=O,-2.876
|
284 |
+
CCN1CCN(CC1)c1cc2n(cc(C(O)=O)c(=O)c2cc1F)C1CC1,-3.18
|
285 |
+
CCN1c2ncccc2N(C)C(=O)c2cccnc12,-2.62
|
286 |
+
CCN1c2ncccc2N(CC)C(=O)c2cccnc12,-2.86
|
287 |
+
CCNC(=O)[C@H](C)OC(=O)Nc1ccccc1,-1.83
|
288 |
+
CCNC(=S)NCC,-1.46
|
289 |
+
CCN[C@H]1CN(CCCOC)S(=O)(=O)c2sc(cc12)S(N)(=O)=O,-2.95
|
290 |
+
CCNc1nc(Cl)nc(NC(C)C)n1,-3.489
|
291 |
+
CCOC(=O)C1=C(C)NC(C)=C([C@H]1c1cccc(Cl)c1Cl)C(=O)OC,-5.89
|
292 |
+
CCOC(=O)C=C,-0.74
|
293 |
+
CCOC(=O)CC,-0.66
|
294 |
+
CCOC(=O)CC(=O)OCC,-0.82
|
295 |
+
CCOC(=O)CCC(=O)OCC,-0.96
|
296 |
+
CCOC(=O)N(C)C(=O)CSP(=S)(OCC)OCC,-2.518
|
297 |
+
CCOC(=O)NCCOc1ccc(Oc2ccccc2)cc1,-4.719
|
298 |
+
CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CCC[C@H]1C(O)=O,-1.305
|
299 |
+
CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1Cc2ccccc2C[C@H]1C(O)=O,-1.9
|
300 |
+
CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1[C@H]2CCCC[C@@H]2C[C@H]1C(O)=O,-2.84
|
301 |
+
CCOC(=O)c1ccc(N)cc1,-2.41
|
302 |
+
CCOC(=O)c1ccc(O)cc1,-2.346
|
303 |
+
CCOC(=O)c1ccccc1,-2.32
|
304 |
+
CCOC(=O)c1ccccc1C(=O)OCC,-2.35
|
305 |
+
CCOC(=O)c1cncn1[C@H](C)c1ccccc1,-6.735
|
306 |
+
CCOC(C)=O,-0.035675
|
307 |
+
CCOC(C)C,-0.55
|
308 |
+
CCOC(C)OCC,-0.43
|
309 |
+
CCOC(N)=O,0.85
|
310 |
+
CCOCCOCC,-0.77
|
311 |
+
CCOP(=O)(OCC)OCC,0.43
|
312 |
+
CCOP(=S)(OCC)Oc1ccc(cc1)[S@](C)=O,-2.3
|
313 |
+
CCOP(=S)(OCC)Oc1nc(Cl)c(Cl)cc1Cl,-5.244
|
314 |
+
CCOc1ccc(NC(C)=O)cc1,-2.4255
|
315 |
+
CCOc1ccc(NC(N)=O)cc1,-2.17
|
316 |
+
CCOc1ccccc1,-2.33
|
317 |
+
CCS(=O)(=O)CC,0.04
|
318 |
+
CCSCc1ccccc1OC(=O)NC,-2.09
|
319 |
+
CCSSCC,-2.42
|
320 |
+
CC[C@@H](C)C(C)(C)O,-0.89
|
321 |
+
CC[C@@H](C)C(C)=O,-0.67
|
322 |
+
CC[C@@H](C)OC(N)=O,-0.3
|
323 |
+
CC[C@@H](CO)NCCN[C@@H](CC)CO,-0.565
|
324 |
+
CC[C@@H](Cc1c(I)cc(I)c(N)c1I)C(O)=O,-4.58
|
325 |
+
CC[C@@H](O)C(C)(C)C,-1.15
|
326 |
+
CC[C@@H](O)C(C)C,-0.7
|
327 |
+
CC[C@@](C)(O)C(C)(C)C,-1.27
|
328 |
+
CC[C@@](C)(O)C(C)C,-0.85
|
329 |
+
CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@@H]12,-6
|
330 |
+
CC[C@H](C)C1(CC)C(=O)NC(=O)NC1=O,-2.333
|
331 |
+
CC[C@H](C)C1(CC=C)C(=O)NC(=O)NC1=O,-2.016
|
332 |
+
CC[C@H](C)CO,-0.47
|
333 |
+
CC[C@H](C)[C@H](C)O,-0.716666667
|
334 |
+
CC[C@H](N)C(O)=O,0.3075
|
335 |
+
CC[C@H]1NC(=O)NC1=O,-0.06
|
336 |
+
CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@@H]([C@H]2O)N(C)C)[C@](C)(O)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,-3.15
|
337 |
+
CC[C@]1(CCC(=O)NC1=O)c1ccccc1,-2.337
|
338 |
+
CC\C(=C(/c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,-8.02
|
339 |
+
CC\C(=C(\CC)c1ccc(O)cc1)c1ccc(O)cc1,-4.7145
|
340 |
+
CC\C=C(/C)C1(CC)C(=O)NC(=O)NC1=O,-2.458
|
341 |
+
CCc1ccc(CCOc2ccc(C[C@H]3SC(=O)NC3=O)cc2)nc1,-6.185
|
342 |
+
CCc1cccc(C)c1N([C@@H](C)COC)C(=O)CCl,-2.73
|
343 |
+
CCc1ccccc1O,-1.36
|
344 |
+
CCc1cccs1,-2.59
|
345 |
+
CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,-4.109
|
346 |
+
CCn1c(=O)n(C)c2ncn(C)c2c1=O,-0.719
|
347 |
+
CCn1cc(C(O)=O)c(=O)c2cc(F)c(N3CCN[C@@H](C)C3)c(F)c12,-2.43291365
|
348 |
+
CCn1cc(C(O)=O)c(=O)c2cc(F)c(cc12)N1CCNCC1,-2.9065
|
349 |
+
CCn1cc(C(O)=O)c(=O)c2ccc(C)nc12,-3.4885
|
350 |
+
CCn1cnc2n(C)c(=O)n(C)c(=O)c12,-0.757
|
351 |
+
CN(C(C)=O)c1ccccc1,-0.95
|
352 |
+
CN(C)C(=O)C(CCN1CCC(O)(CC1)c1ccc(Cl)cc1)(c1ccccc1)c1ccccc1,-7.074
|
353 |
+
CN(C)C(=O)C(c1ccccc1)c1ccccc1,-2.98
|
354 |
+
CN(C)C(=O)N(C)C,0.94
|
355 |
+
CN(C)C(=O)NC1CCCCCCC1,-2.289
|
356 |
+
CN(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-3.76
|
357 |
+
CN(C)C(=O)Nc1ccc(Cl)cc1,-2.9
|
358 |
+
CN(C)C(=O)Nc1cccc(OC(=O)NC(C)(C)C)c1,-2.93
|
359 |
+
CN(C)C(=O)Nc1cccc(c1)C(F)(F)F,-3.463
|
360 |
+
CN(C)C(=O)Nc1ccccc1,-1.6175
|
361 |
+
CN(C)C(=O)OC1=CC(=O)CC(C)(C)C1,-0.85
|
362 |
+
CN(C)C(=O)Oc1cc(C)nn1-c1ccccc1,-2.09
|
363 |
+
CN(C)CCC=C1c2ccccc2CCc2ccccc12,-4.55
|
364 |
+
CN(C)CCCN1c2ccccc2CCc2ccccc12,-4.3125
|
365 |
+
CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc12,-5.1455
|
366 |
+
CN(C)CCCOc1nn(Cc2ccccc2)c2ccccc12,-3.78
|
367 |
+
CN(C)CCOC(c1ccccc1)c1ccccc1,-2.947
|
368 |
+
CN(C)CCO[C@@H](c1ccccc1)c1ccccc1C,-4.1
|
369 |
+
CN(C)CC[C@@H](c1ccc(Cl)cc1)c1ccccn1,-2.659
|
370 |
+
CN(C)CC\C=C1\c2ccccc2Sc2ccc(Cl)cc12,-6.308
|
371 |
+
CN(C)[C@H]1[C@@H]2C[C@@H]3Cc4c(ccc(O)c4C(=O)C3=C(O)[C@]2(O)C(=O)[C@H](C(N)=O)C1=O)N(C)C,-0.944
|
372 |
+
CN(C)[C@H]1[C@@H]2C[C@H]3C(=C(O)[C@]2(O)C(=O)[C@H](C(N)=O)C1=O)C(=O)c1c(O)ccc(Cl)c1[C@@]3(C)O,-2.94
|
373 |
+
CN(C)[C@H]1[C@@H]2C[C@H]3C(=C(O)[C@]2(O)C(=O)[C@H](C(N)=O)C1=O)C(=O)c1c(O)cccc1[C@@]3(C)O,-2.924
|
374 |
+
CN(C)[C@H]1[C@@H]2[C@@H](O)[C@H]3C(=C(O)[C@]2(O)C(=O)[C@H](C(N)=O)C1=O)C(=O)c1c(O)cccc1[C@@]3(C)O,-3.093
|
375 |
+
CN(C)c1c(C)n(C)n(-c2ccccc2)c1=O,-0.619
|
376 |
+
CN(C)c1cnc2cncnc2n1,-0.021
|
377 |
+
CN(C)c1ncc2nccnc2n1,0.36
|
378 |
+
CN(C)c1ncnc2nccnc12,-1.021
|
379 |
+
CN(CCOc1ccc(C[C@H]2SC(=O)NC2=O)cc1)c1ccccn1,-5.25
|
380 |
+
CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(cc1)C(=O)N[C@@H](CCC(O)=O)C(O)=O,-4.1
|
381 |
+
CN(N=O)C(N)=O,-0.85
|
382 |
+
CN1C(=O)NC(=O)[C@@](C)(C1=O)C1=CCCCC1,-2.674
|
383 |
+
CN1C(C(=O)Nc2cc(C)on2)=C(O)c2ccccc2S1(=O)=O,-5.61
|
384 |
+
CN1C(C(=O)Nc2ccccn2)=C(O)c2ccccc2S1(=O)=O,-4.8
|
385 |
+
CN1C(C(=O)Nc2ccccn2)=C(O)c2sccc2S1(=O)=O,-3.875
|
386 |
+
CN1CCC(CC1)=C1c2ccccc2C=Cc2ccccc12,-5.9
|
387 |
+
CN1CCCC1=O,1
|
388 |
+
CN1CCN(CC1)C1=Nc2ccccc2Nc2sc(C)cc12,-4.35
|
389 |
+
CN1CCN(CC1)c1cc2n(cc(C(O)=O)c(=O)c2cc1F)-c1ccc(F)cc1,-3.6
|
390 |
+
CN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc23)CC1,-4.398
|
391 |
+
CN1CC[C@@]23[C@H]4Oc5c2c(C[C@@H]1[C@@H]3C=C[C@@H]4O)ccc5O,-3.154
|
392 |
+
CN1CC[C@@]23[C@H]4Oc5c2c(C[C@@H]1[C@@H]3C=C[C@@H]4OC(C)=O)ccc5OC(C)=O,-2.798
|
393 |
+
CN1C[C@@H](O)N(C1=O)c1nnc(s1)C(C)(C)C,-1.877
|
394 |
+
CN1C[C@@H]2C[C@H]1CN2c1cc2n(cc(C(O)=O)c(=O)c2cc1F)C1CC1,-2.9
|
395 |
+
CN1[C@@H](CCl)Nc2cc(Cl)c(cc2S1(=O)=O)S(N)(=O)=O,-3.778
|
396 |
+
CN1[C@H]2CC[C@@H]1C[C@H](C2)OC(=O)[C@@H](CO)c1ccccc1,-2.004
|
397 |
+
CN1[C@H]2CC[C@@H]1C[C@H](C2)OC(=O)[C@H](CO)c1ccccc1,-1.91
|
398 |
+
CN1c2ccc(Cl)cc2C(=NCC1=O)c1ccccc1,-3.802
|
399 |
+
CNC(=O)O\N=C(/SC)C(=O)N(C)C,0.106
|
400 |
+
CNC(=O)O\N=C(\CSC)C(C)(C)C,-1.62
|
401 |
+
CNC(=O)O\N=C1\[C@H](Cl)[C@H]2C[C@@H](C#N)[C@@H]1C2,-2.08
|
402 |
+
CNC(=O)Oc1cc(C)cc(C)c1,-2.581
|
403 |
+
CNC(=O)Oc1cccc(C)c1,-1.802
|
404 |
+
CNC(=O)Oc1cccc2CC(C)(C)Oc12,-2.5
|
405 |
+
CNC(=O)Oc1ccccc1C(C)C,-2.863
|
406 |
+
CNC(=O)Oc1ccccc1C1OCCO1,-1.57
|
407 |
+
CNC(=O)Oc1ccccc1OC(C)C,-2.02
|
408 |
+
CNC(=O)[C@@H](C)SCCSP(=O)(OC)OC,1.144
|
409 |
+
CNC(=O)\C=C(/C)OP(=O)(OC)OC,0.651
|
410 |
+
CNCCC=C1c2ccccc2CCc2ccccc12,-4.018
|
411 |
+
CNCCCN1c2ccccc2CCc2ccccc12,-3.76
|
412 |
+
CNCCC[C@@]12CC[C@@H](c3ccccc13)c1ccccc21,-4.796
|
413 |
+
CNCC[C@H](Oc1ccc(cc1)C(F)(F)F)c1ccccc1,-3.92
|
414 |
+
CNC[C@H](O)c1ccc(O)c(O)c1,-2.74
|
415 |
+
CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc12,-4.83
|
416 |
+
CN\C(NCCSCc1ccc(CN(C)C)o1)=C/[N+]([O-])=O,-2.5
|
417 |
+
CNc1ccccc1,-1.28
|
418 |
+
COC(=O)C(C)=C,-0.8
|
419 |
+
COC(=O)[C@@H]1[C@H]2CC[C@@H](C[C@@H]1OC(=O)c1ccccc1)N2C,-2.26
|
420 |
+
COC(=O)[C@@H]1[C@H]2C[C@H](C=C2)[C@H]1C(=O)OC,-1.2
|
421 |
+
COC(=O)c1ccc(N)cc1,-1.59
|
422 |
+
COC(=O)c1ccc(O)cc1,-1.705
|
423 |
+
COC(=O)c1ccc(OC)cc1,-2.41
|
424 |
+
COC(=O)c1ccccc1,-1.85
|
425 |
+
COC(=O)c1ccccc1C(=O)OC,-1.66
|
426 |
+
COC(=O)c1ccccc1O[C@@H]1O[C@H](CO[C@@H]2OC[C@@H](O)[C@H](O)[C@H]2O)[C@@H](O)[C@H](O)[C@H]1O,-0.742
|
427 |
+
COC(=O)c1cccnc1,-0.46
|
428 |
+
COC(C)(C)C,-0.23875
|
429 |
+
COC1=CC(=O)C[C@@H](C)[C@]11Oc2c(C1=O)c(OC)cc(OC)c2Cl,-4.83
|
430 |
+
COC1=CC=C2[C@H]3Cc4ccc(OC)c5O[C@@H]1[C@]2(CCN3C)c45,-2.658
|
431 |
+
COCC(=O)N([C@H](C)C(=O)OC)c1c(C)cccc1C,-1.6005
|
432 |
+
COCCOc1c(OC)cc(Cc2cnc(N)nc2N)cc1OC,-2.101
|
433 |
+
COCCc1ccc(OC[C@@H](O)CNC(C)C)cc1,-1.315
|
434 |
+
CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-3.521
|
435 |
+
CON(C)C(=O)Nc1ccc(Cl)cc1,-2.57
|
436 |
+
COP(=O)(OC)O[C@@H](Br)C(Cl)(Cl)Br,-2.28
|
437 |
+
COP(=O)(OC)[C@H](O)C(Cl)(Cl)Cl,-0.22
|
438 |
+
COP(=S)(OC)Oc1cc(Cl)c(Cl)cc1Cl,-3.905
|
439 |
+
COP(=S)(OC)SCC(=O)N(C)C=O,-1.995
|
440 |
+
CO[C@]12CC[C@@]3(C[C@@H]1[C@](C)(O)C(C)(C)C)[C@H]1Cc4ccc(O)c5O[C@@H]2[C@]3(CCN1CC1CC1)c45,-4.37
|
441 |
+
CO[C@]12[C@H]3N[C@H]3CN1C1=C([C@H]2COC(N)=O)C(=O)C(N)=C(C)C1=O,-2.564
|
442 |
+
CO[P@@](=O)(NC(C)=O)SC,0.54
|
443 |
+
COc1c2ccoc2c(OC)c2oc(C)cc(=O)c12,-3.017
|
444 |
+
COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,-2.87
|
445 |
+
COc1cc(N[C@@H](C)CCCN)c2ncccc2c1,-2.52
|
446 |
+
COc1cc2CC[C@H](NC(C)=O)c3cc(=O)c(OC)ccc3-c2c(OC)c1OC,-0.944
|
447 |
+
COc1cc2nc(nc(N)c2cc1OC)N1CCN(CC1)C(=O)[C@H]1CCCO1,-1.8
|
448 |
+
COc1cc2nc(nc(N)c2cc1OC)N1CCN(CC1)C(=O)c1ccco1,-5.086
|
449 |
+
COc1ccc(C=O)cc1,-1.49
|
450 |
+
COc1ccc(CC=C)cc1,-2.92
|
451 |
+
COc1ccc(CCN(C)CCC[C@@](C#N)(C(C)C)c2ccc(OC)c(OC)c2)cc1OC,-4.6
|
452 |
+
COc1ccc(Cc2nccc3cc(OC)c(OC)cc23)cc1OC,-4.103
|
453 |
+
COc1ccc(Cl)cc1,-2.78
|
454 |
+
COc1ccc(Cl)cc1C(=O)NCCc1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1,-6.755
|
455 |
+
COc1ccc(NC(=O)N(C)C)cc1Cl,-2.564
|
456 |
+
COc1ccc(NC(C)=O)cc1,-1.3
|
457 |
+
COc1ccc(cc1)[C@@H]1Sc2ccccc2N(CCN(C)C)C(=O)[C@@H]1OC(C)=O,-3.0545
|
458 |
+
COc1ccc(cc1)[C@@](O)(C1CC1)c1cncnc1,-2.598
|
459 |
+
COc1ccc(cc1)[N+]([O-])=O,-2.41
|
460 |
+
COc1ccc2COC(=O)c2c1OC,-1.8945
|
461 |
+
COc1ccc2cc(ccc2c1)[C@H](C)C(O)=O,-4.215
|
462 |
+
COc1ccc2nc([nH]c2c1)[S@](=O)Cc1ncc(C)c(OC)c1C,-3.42
|
463 |
+
COc1ccc2nccc([C@@H](O)[C@@H]3C[C@@H]4CC[N@]3C[C@@H]4C=C)c2c1,-2.788
|
464 |
+
COc1ccc2nccc([C@H](O)[C@H]3C[C@@H]4CC[N@]3C[C@@H]4C=C)c2c1,-2.812
|
465 |
+
COc1cccc(Cl)c1,-2.78
|
466 |
+
COc1cccc(c1)[C@@]1(O)CCCC[C@@H]1CN(C)C,-2.24
|
467 |
+
COc1ccccc1,-1.85
|
468 |
+
COc1ccccc1Cl,-2.46
|
469 |
+
COc1ccccc1OC[C@@H](O)CO,-0.598
|
470 |
+
COc1ccccc1OC[C@@H](O)COC(N)=O,-0.985
|
471 |
+
COc1ccccc1[N+]([O-])=O,-1.96
|
472 |
+
COc1cnc2cncnc2n1,-0.91
|
473 |
+
COc1cnc2ncncc2n1,-1.139
|
474 |
+
COc1ncc2nccnc2n1,-1.112
|
475 |
+
COc1ncnc2nccnc12,-1.112
|
476 |
+
CS(=O)(=O)OCCCCOS(C)(=O)=O,-2.267
|
477 |
+
CS(=O)(=O)c1ccc(cc1)[C@@H](O)[C@@H](CO)NC(=O)C(Cl)Cl,-2.154
|
478 |
+
CSCC[C@H](N)C(O)=O,-0.42
|
479 |
+
CSc1ccc2Sc3ccccc3N(CC[C@H]3CCCCN3C)c2c1,-5.362
|
480 |
+
CSc1ccccc1,-2.39
|
481 |
+
CSc1cnc2cncnc2n1,-1.551
|
482 |
+
CSc1ncc2nccnc2n1,-1.754
|
483 |
+
CSc1ncnc2nccnc12,-2.365
|
484 |
+
CSc1nnc(c(=O)n1N)C(C)(C)C,-2.253
|
485 |
+
C[C@@H](C(O)=O)c1ccc(c(F)c1)-c1ccccc1,-4.256
|
486 |
+
C[C@@H](C(O)=O)c1ccc(cc1)N1Cc2ccccc2C1=O,-4.772
|
487 |
+
C[C@@H](C(O)=O)c1ccc2c(c1)[nH]c1ccc(Cl)cc21,-4.699
|
488 |
+
C[C@@H](C(O)=O)c1cccc(Oc2ccccc2)c1,-3.699
|
489 |
+
C[C@@H](C(O)=O)c1cccc(c1)C(=O)c1ccccc1,-3.38
|
490 |
+
C[C@@H](CCc1ccccc1)NC[C@H](O)c1ccc(O)c(c1)C(N)=O,-3.41
|
491 |
+
C[C@@H](CN1c2ccccc2Sc2ccccc12)N(C)C,-4.34
|
492 |
+
C[C@@H](Cc1ccccc1)N(C)CC#C,-2.513
|
493 |
+
C[C@@H](N(O)C(N)=O)c1cc2ccccc2s1,-3.373
|
494 |
+
C[C@@H](N)C(O)=O,0.243
|
495 |
+
C[C@@H](N)CC(O)=O,1.08
|
496 |
+
C[C@@H](O)C(C)(C)C,-0.62
|
497 |
+
C[C@@H](O)C(F)(F)F,0.3
|
498 |
+
C[C@@H](O)[C@@H](N)C(O)=O,-0.09
|
499 |
+
C[C@@H](O)c1ccccc1,-0.92
|
500 |
+
C[C@@H](Oc1ccc(Cl)cc1Cl)C(O)=O,-2.827
|
501 |
+
C[C@@H]1CCCC(=O)C1,-1.87
|
502 |
+
C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)C3=CC[C@]2(C)[C@H]1C(=O)CN1CCN(CC1)c1cc(nc(n1)N1CCCC1)N1CCCC1,-7.59
|
503 |
+
C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,-3.59
|
504 |
+
C[C@@H]1Nc2cc(Cl)c(cc2C(=O)N1c1ccccc1C)S(N)(=O)=O,-4.33
|
505 |
+
C[C@@]12CC[C@@H](CC1)C(C)(C)O2,-1.69
|
506 |
+
C[C@@]12CC[C@@H]3C(=CCc4cc(O)ccc34)[C@H]1CC[C@@H]2O,-4.402
|
507 |
+
C[C@H](CN(C)C)CN1c2ccccc2CCc2ccccc12,-4.796
|
508 |
+
C[C@H](N)C(O)=O,0.25
|
509 |
+
C[C@H](NCCC(c1ccccc1)c1ccccc1)c1ccccc1,-4
|
510 |
+
C[C@H](O)Cn1cnc2n(C)c(=O)n(C)c(=O)c12,0.623
|
511 |
+
C[C@H](OC(=O)Nc1cccc(Cl)c1)C#C,-2.617
|
512 |
+
C[C@H]1CCCCC1=O,-0.94
|
513 |
+
C[C@H]1CC[C@@H](CC1=O)C(C)=C,-2.18
|
514 |
+
C[C@H]1CC[C@H](O)CC1,-0.88
|
515 |
+
C[C@H]1CCc2cc(F)cc3c2n1cc(C(O)=O)c3=O,-3.733
|
516 |
+
C[C@H]1CN(C[C@@H](C)N1)c1c(F)c(N)c2c(c1F)n(cc(C(O)=O)c2=O)C1CC1,-3.371
|
517 |
+
C[C@H]1COc2c(N3CCN(C)CC3)c(F)cc3c2n1cc(C(O)=O)c3=O,-1.266
|
518 |
+
C[C@H]1C[C@H](C)C(=O)[C@@H](C1)[C@H](O)CC1CC(=O)NC(=O)C1,-1.13
|
519 |
+
C[C@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,-3.77
|
520 |
+
C[C@H]1C[C@H]2[C@@H]3CC[C@](O)(C(=O)CO)[C@@]3(C)C[C@H](O)[C@@H]2[C@@]2(C)C=CC(=O)C=C12,-2.99
|
521 |
+
C[C@H]1Cc2ccccc2N1NC(=O)c1ccc(Cl)c(c1)S(N)(=O)=O,-3.792
|
522 |
+
C[C@H]1[C@H](NC(=O)C(=N\OC(C)(C)C(O)=O)\c2csc(N)n2)C(=O)N1S(O)(=O)=O,-1.639
|
523 |
+
C[C@H]1[C@H]2Cc3ccc(O)cc3[C@]1(C)CCN2CC=C(C)C,-3.803
|
524 |
+
C[C@H]1[C@H]2[C@H](C[C@H]3[C@@H]4CC=C5C[C@@H](O)CC[C@]5(C)[C@H]4CC[C@]23C)O[C@]11CC[C@@H](C)CO1,-2.618
|
525 |
+
C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,-3.99
|
526 |
+
C[C@]12CC(=O)[C@H]3[C@@H](CCC4=CC(=O)C=C[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.239
|
527 |
+
C[C@]12CC(=O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.27
|
528 |
+
C[C@]12CC[C@H]3C(=CCc4cc(O)ccc34)[C@@H]1CCC2=O,-5.282
|
529 |
+
C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@]34C)[C@@H]1CCC2=O,-4.064
|
530 |
+
C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@H]34)[C@@H]1CC[C@@]2(O)C#C,-4.63
|
531 |
+
C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@@H]2C(=O)CO,-3.45
|
532 |
+
C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@@H]2O,-4.159
|
533 |
+
C[C@]12CC[C@H]3[C@@H](CCC4=Cc5oncc5C[C@]34C)[C@@H]1CC[C@@]2(O)C#C,-5.507
|
534 |
+
C[C@]12CC[C@H]3[C@@H](CC[C@H]4CC(=O)CC[C@]34C)[C@@H]1CC[C@@H]2O,-4.743
|
535 |
+
C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CCC2=O,-3.955
|
536 |
+
C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CC[C@@H]2O,-4.845
|
537 |
+
C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CC[C@@]2(O)C#C,-4.217
|
538 |
+
C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1C[C@@H](O)[C@@H]2O,-4.955
|
539 |
+
C[C@]12CCc3c(ccc4cc(O)ccc34)[C@@H]1CCC2=O,-5.249
|
540 |
+
C[C@]12C[C@H](O)[C@@]3(F)[C@@H](CCC4=CC(=O)C=C[C@]34C)[C@@H]1C[C@@H](O)[C@]2(O)C(=O)CO,-3.693
|
541 |
+
C[C@]12C[C@H](O)[C@@]3(F)[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.434
|
542 |
+
C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)C=C[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.18
|
543 |
+
C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@@H]2C(=O)CO,-3.24
|
544 |
+
C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.1
|
545 |
+
C\C(O)=C\C(C)=O,0.22
|
546 |
+
C\N=C(/NCCSCc1[nH]cnc1C)NC#N,-1.655
|
547 |
+
C\N=C1\CN(O)C(c2ccccc2)=c2cc(Cl)ccc2=N1,-2.176
|
548 |
+
Cc1c(N)c(=O)n(-c2ccccc2)n1C,-0.622
|
549 |
+
Cc1c[nH]c(=O)[nH]c1=O,-1.499
|
550 |
+
Cc1c[nH]c(=O)nc1N,-0.56
|
551 |
+
Cc1c[nH]c2ccccc12,-2.42
|
552 |
+
Cc1cc(=O)c2ccccc2[nH]1,-1.2
|
553 |
+
Cc1cc(=O)n(-c2ccccc2)n1C,0.48
|
554 |
+
Cc1cc(C)c(O)c(C)c1,-2.05
|
555 |
+
Cc1cc(C)cc(O)c1,-1.4
|
556 |
+
Cc1cc(C)nc(NS(=O)(=O)c2ccc(N)cc2)n1,-2.73
|
557 |
+
Cc1cc(NS(=O)(=O)c2ccc(N)cc2)no1,-2.705
|
558 |
+
Cc1cc(no1)C(=O)NNCc1ccccc1,-2.461
|
559 |
+
Cc1ccc(C)c(O)c1,-1.54
|
560 |
+
Cc1ccc(CO)cc1,-1.2
|
561 |
+
Cc1ccc(Cl)c(Nc2ccccc2C(O)=O)c1Cl,-6.267
|
562 |
+
Cc1ccc(O)c(C)c1,-1.1975
|
563 |
+
Cc1ccc(O)cc1,-0.72
|
564 |
+
Cc1ccc(O)cc1C,-1.39
|
565 |
+
Cc1ccc(cc1)C(=O)c1ccc(CC(O)=O)n1C,-4.092
|
566 |
+
Cc1ccc(cc1)S(=O)(=O)NC(=O)NN1C[C@@H]2CCC[C@@H]2C1,-4.175
|
567 |
+
Cc1ccc(cc1)S(N)(=O)=O,-1.74
|
568 |
+
Cc1ccc(cc1)[N+]([O-])=O,-2.49
|
569 |
+
Cc1ccc(cc1[N+]([O-])=O)[N+]([O-])=O,-2.82
|
570 |
+
Cc1cccc(C)c1O,-1.296666667
|
571 |
+
Cc1cccc(CN2CCN(CC2)[C@H](c2ccccc2)c2ccc(Cl)cc2)c1,-6.481
|
572 |
+
Cc1cccc(Nc2ccccc2C(O)=O)c1C,-6.544
|
573 |
+
Cc1cccc(O)c1,-0.69025
|
574 |
+
Cc1cccc(O)c1C,-1.43
|
575 |
+
Cc1cccc(c1)[N+]([O-])=O,-2.44
|
576 |
+
Cc1cccc2sc3nncn3c12,-2.07
|
577 |
+
Cc1ccccc1-n1c(C)nc2ccccc2c1=O,-2.921
|
578 |
+
Cc1ccccc1N,-1.756666667
|
579 |
+
Cc1ccccc1O,-0.62375
|
580 |
+
Cc1ccccc1S(N)(=O)=O,-2.02
|
581 |
+
Cc1ccccc1[N+]([O-])=O,-2.33
|
582 |
+
Cc1ccnc(NS(=O)(=O)c2ccc(N)cc2)n1,-3.1205
|
583 |
+
Cc1ccsc1,-2.39
|
584 |
+
Cc1cn([C@H]2C[C@H]([N-][N+]#N)[C@@H](CO)O2)c(=O)[nH]c1=O,-1.0645
|
585 |
+
Cc1cnc(cn1)C(=O)NCCc1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1,-5.454
|
586 |
+
Cc1cnc2cncnc2n1,-0.854
|
587 |
+
Cc1nc2[C@@H](O)CCCn2c(=O)c1CCN1CCC(CC1)c1noc2cc(F)ccc12,-4.435
|
588 |
+
Cc1ncc(n1CCO)[N+]([O-])=O,-1.2165
|
589 |
+
Cc1ncc2nccnc2n1,-0.094
|
590 |
+
Cc1ncnc2nccnc12,-0.466
|
591 |
+
Cc1nnc(NS(=O)(=O)c2ccc(N)cc2)s1,-2.779
|
592 |
+
Cc1nnc(SCC2=C(N3[C@H](SC2)[C@H](NC(=O)Cn2cnnn2)C3=O)C(O)=O)s1,-2.616
|
593 |
+
Cc1nnc2CN=C(c3ccccc3)c3cc(Cl)ccc3-n12,-3.6
|
594 |
+
Cc1nnc2CN=C(c3ccccc3Cl)c3cc(Cl)ccc3-n12,-4.095
|
595 |
+
ClC(Cl)(Cl)C#N,-2.168
|
596 |
+
ClC(Cl)C(=O)N(CC=C)CC=C,-1.62
|
597 |
+
ClCCOCCCl,-1.12
|
598 |
+
ClCCS(=O)(=O)CCCl,-1.5
|
599 |
+
ClCCS(=O)CCCl,-1.16
|
600 |
+
Clc1ccc(CC[C@@](Cn2cncn2)(C#N)c2ccccc2)cc1,-6.226
|
601 |
+
Clc1ccc(CO[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)c(Cl)c1,-5.4305
|
602 |
+
Clc1ccc2oc(=O)[nH]c2c1,-2.7105
|
603 |
+
Clc1cccc(-c2c[nH]cc2C#N)c1Cl,-5.074
|
604 |
+
Clc1cccc(Cl)c1NC1=NCCN1,-0.1
|
605 |
+
Clc1cccc(N2CCN(CCCCOc3ccc4CCC(=O)Nc4c3)CC2)c1Cl,-6.585
|
606 |
+
Clc1cccc(c1)N1CCN(CCCn2nc3ccccn3c2=O)CC1,-3.1875
|
607 |
+
Clc1cnc2cncnc2n1,-0.876
|
608 |
+
Clc1cnc2ncncc2n1,-1.124
|
609 |
+
Clc1ncc2nccnc2n1,-0.699
|
610 |
+
Cn1c(=O)on(-c2ccc(Cl)c(Cl)c2)c1=O,-2.82
|
611 |
+
Cn1c2nc[nH]c2c(=O)n(C)c1=O,-1.3625
|
612 |
+
Cn1c2ncn(C[C@@H](O)CO)c2c(=O)n(C)c1=O,0.118
|
613 |
+
Cn1ccc(=O)[nH]c1=O,-0.8035
|
614 |
+
Cn1ccccc1=O,0.96
|
615 |
+
Cn1cnc(c1Sc1ncnc2nc[nH]c12)[N+]([O-])=O,-3.3255
|
616 |
+
Cn1cnc2n(C)c(=O)[nH]c(=O)c12,-2.557
|
617 |
+
Cn1cnc2n(C)c(=O)n(C)c(=O)c12,-0.951
|
618 |
+
Cn1nnnc1SCC1=C(N2[C@H](SC1)[C@H](NC(=O)[C@H](O)c1ccccc1)C2=O)C(O)=O,-0.143
|
619 |
+
FC1(F)Oc2cccc(c2O1)-c1c[nH]cc1C#N,-5.21
|
620 |
+
Fc1c[nH]c(=O)[nH]c1=O,-1.028
|
621 |
+
Fc1ccc(cc1)C(=O)CCCN1CCC(CC1)n1c2ccccc2[nH]c1=O,-4.28
|
622 |
+
N#Cc1ccccc1,-1
|
623 |
+
N#Cc1ccccc1C#N,-2.38
|
624 |
+
N(Nc1ccccc1)c1ccccc1,-2.92
|
625 |
+
NC(=N)NCC(O)=O,-1.51
|
626 |
+
NC(=N)Nc1nc(CSCC\C(N)=N\S(N)(=O)=O)cs1,-2.49
|
627 |
+
NC(=O)N1c2ccccc2C=Cc2ccccc12,-3.294
|
628 |
+
NC(=O)NCc1ccccc1,-0.95
|
629 |
+
NC(=O)OCc1ccccc1,-0.35
|
630 |
+
NC(=O)c1ccccc1,-0.953
|
631 |
+
NC(=O)c1ccccc1C(N)=O,-2.771
|
632 |
+
NC(=O)c1ccccc1O,-1.836
|
633 |
+
NC(=O)c1cccnc1,0.913
|
634 |
+
NC(=O)c1cnccn1,-0.914
|
635 |
+
NC(=S)Nc1ccccc1,-1.77
|
636 |
+
NC(N)=NC(=O)c1nc(Cl)c(N)nc1N,-3.13
|
637 |
+
NC(N)=N[N+]([O-])=O,-1.37
|
638 |
+
NCCc1c[nH]c2ccccc12,-3.301029996
|
639 |
+
NC[C@@H](CC(O)=O)c1ccc(Cl)cc1,-1.696
|
640 |
+
NNC(=O)c1ccncc1,0.009
|
641 |
+
NNc1nncc2ccccc12,-2.6
|
642 |
+
NS(=O)(=O)c1cc(C(O)=O)c(NCc2ccco2)cc1Cl,-4.75
|
643 |
+
NS(=O)(=O)c1cc(ccc1Cl)[C@]1(O)NC(=O)c2ccccc12,-3.451
|
644 |
+
NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1C(F)(F)F,-2.98
|
645 |
+
NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1Cl,-2.689
|
646 |
+
NS(=O)(=O)c1cc2c(N[C@@H](Cc3ccccc3)NS2(=O)=O)cc1C(F)(F)F,-4.298
|
647 |
+
NS(=O)(=O)c1cc2c(N[C@@H](NS2(=O)=O)C(Cl)Cl)cc1Cl,-3.354253415
|
648 |
+
NS(=O)(=O)c1cc2c(N\C(NS2(=O)=O)=C/SCc2ccccc2)cc1Cl,-4.829
|
649 |
+
NS(=O)(=O)c1ccccc1,-1.56
|
650 |
+
N[C@@H](CC(N)=O)C(O)=O,-0.74
|
651 |
+
N[C@@H](CCC(N)=O)C(O)=O,-0.55
|
652 |
+
N[C@@H](Cc1cc(I)c(Oc2cc(I)c(O)c(I)c2)c(I)c1)C(O)=O,-4.259
|
653 |
+
N[C@@H](Cc1ccc(O)c(O)c1)C(O)=O,-1.818
|
654 |
+
N[C@@H](Cc1ccc(cc1)N(CCCl)CCCl)C(O)=O,-3.485
|
655 |
+
N[C@@H](Cc1ccccc1)C(O)=O,-0.804
|
656 |
+
N[C@@H]1CONC1=O,-0.009
|
657 |
+
N[C@H]1[C@@H]2CN(C[C@H]12)c1nc2n(cc(C(O)=O)c(=O)c2cc1F)-c1ccc(F)cc1F,-4.48
|
658 |
+
N[C@]12C[C@H]3C[C@H](C[C@H](C3)C1)C2,-1.854
|
659 |
+
Nc1[nH]c(=O)nc2nc[nH]c12,-3.401
|
660 |
+
Nc1cc(nc(=N)n1O)N1CCCCC1,-2.009
|
661 |
+
Nc1cc[nH]c(=O)n1,-1.159
|
662 |
+
Nc1ccc(Br)cc1C(O)=O,-3.074
|
663 |
+
Nc1ccc(C(O)=O)c(O)c1,-1.963
|
664 |
+
Nc1ccc(Cl)cc1,-1.66
|
665 |
+
Nc1ccc(O)c(c1)C(O)=O,-2.259
|
666 |
+
Nc1ccc(\N=N\c2ccccc2)c(N)n1,-4.362
|
667 |
+
Nc1ccc(cc1)C(O)=O,-1.368
|
668 |
+
Nc1ccc(cc1)S(=O)(=O)Nc1ncccn1,-3.529
|
669 |
+
Nc1ccc(cc1)S(=O)(=O)Nc1nccs1,-2.7465
|
670 |
+
Nc1ccc(cc1)S(=O)(=O)c1ccc(N)cc1,-3.094
|
671 |
+
Nc1ccc(cc1)S(N)(=O)=O,-1.361
|
672 |
+
Nc1ccc(cc1)[N+]([O-])=O,-2.37
|
673 |
+
Nc1cccc(Cl)c1,-1.37
|
674 |
+
Nc1cccc(c1)C(F)(F)F,-1.47
|
675 |
+
Nc1cccc(c1)[N+]([O-])=O,-2.19
|
676 |
+
Nc1cccc2ccccc12,-1.92
|
677 |
+
Nc1ccccc1,-0.4115
|
678 |
+
Nc1ccccc1Cl,-1.52
|
679 |
+
Nc1ccccc1O,-0.72
|
680 |
+
Nc1ccccc1[N+]([O-])=O,-1.96
|
681 |
+
Nc1cnc2cncnc2n1,-2.313
|
682 |
+
Nc1cnc2ncncc2n1,-2.343
|
683 |
+
Nc1cnn(-c2ccccc2)c(=O)c1Cl,-2.874
|
684 |
+
Nc1nc(=O)[nH]cc1F,-0.959
|
685 |
+
Nc1nc(=O)c2nc(CNc3ccc(cc3)C(=O)N[C@@H](CCC(O)=O)C(O)=O)cnc2[nH]1,-5.344
|
686 |
+
Nc1nc(=O)c2ncn(COCCO)c2[nH]1,-2.2244
|
687 |
+
Nc1nc2nc[nH]c2c(=O)[nH]1,-4.0045
|
688 |
+
Nc1ncc2nccnc2n1,-2.298
|
689 |
+
Nc1ncnc2n(cnc12)[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,-1.728
|
690 |
+
Nc1ncnc2nc[nH]c12,-2.432
|
691 |
+
Nc1ncnc2nccnc12,-2.313
|
692 |
+
O=C(C1CCCCC1)N1C[C@H]2N(CCc3ccccc23)C(=O)C1,-2.893
|
693 |
+
O=C1C=CC(=O)C=C1,-0.9345
|
694 |
+
O=C1CCC(=O)N1,0.3
|
695 |
+
O=C1CCCCC1,-0.6
|
696 |
+
O=C1CNC(=O)CN1,-0.83
|
697 |
+
O=C1CNC(=O)N1,-0.4
|
698 |
+
O=C1C[C@@H]2OCC=C3CN4CC[C@@]56[C@@H]4C[C@@H]3[C@@H]2[C@@H]5N1c1ccccc61,-3.33
|
699 |
+
O=C1N([C@@H]2CCC(=O)NC2=O)C(=O)c2ccccc12,-3.699
|
700 |
+
O=C1NC(=O)C(C(=O)N1)(c1ccccc1)c1ccccc1,-4.196
|
701 |
+
O=C1NC(=O)C(C2CCC2)C(=O)N1,-2.349
|
702 |
+
O=C1NC(=O)C(C2CCCC2)C(=O)N1,-3.06
|
703 |
+
O=C1NC(=O)C(C2CCCCC2)C(=O)N1,-3.168
|
704 |
+
O=C1NC(=O)C(C2CCCCCC2)C(=O)N1,-2.982
|
705 |
+
O=C1NC(=O)C(N1)(c1ccccc1)c1ccccc1,-4.125
|
706 |
+
O=C1NC(=O)C2(CC2)C(=O)N1,-1.886
|
707 |
+
O=C1NS(=O)(=O)c2ccccc12,-1.725
|
708 |
+
O=C1OC(=O)c2ccccc12,-1.39
|
709 |
+
O=CNc1ccccc1,-0.68
|
710 |
+
O=Cc1ccc2OCOc2c1,-1.63
|
711 |
+
O=Cc1ccccc1,-1.2075
|
712 |
+
O=Cc1ccco1,-0.093925
|
713 |
+
O=NN1CCCCC1,-0.17
|
714 |
+
O=c1[nH]c2[nH]c(=O)[nH]c(=O)c2[nH]1,-3.402
|
715 |
+
O=c1[nH]c2nc[nH]c2c(=O)[nH]1,-2.483
|
716 |
+
O=c1cc[nH]c(=O)[nH]1,-1.493
|
717 |
+
O=c1cc[nH]cc1,1.02
|
718 |
+
O=c1cc[nH]cn1,0.59
|
719 |
+
O=c1ccc2ccccc2[nH]1,-2.14
|
720 |
+
O=c1ccc2ccccc2o1,-1.89
|
721 |
+
O=c1cccc[nH]1,1.02
|
722 |
+
O=c1cnc2cncnc2[nH]1,-2.124
|
723 |
+
O=c1cnc2ncncc2[nH]1,-2.714
|
724 |
+
O=c1nc2nccnc2c[nH]1,-1.947
|
725 |
+
O=c1nc[nH]c2n[nH]cc12,-2.453
|
726 |
+
O=c1nc[nH]c2nc[nH]c12,-2.28
|
727 |
+
O=c1nc[nH]c2nccnc12,-1.471
|
728 |
+
OC(=O)CCC(=O)c1ccc(cc1)-c1ccccc1,-5.2455
|
729 |
+
OC(=O)Cc1ccc(OCC=C)c(Cl)c1,-3.125
|
730 |
+
OC(=O)Cc1ccccc1Nc1c(Cl)cccc1Cl,-5.398
|
731 |
+
OC(=O)Cc1ccccc1Oc1ccc(Cl)cc1Cl,-3.854
|
732 |
+
OC(=O)[C@@H]1CCCN1,1.149
|
733 |
+
OC(=O)c1cc(Br)c(O)cc1O,-2.62
|
734 |
+
OC(=O)c1cc(ccc1O)-c1ccc(F)cc1F,-4.72275
|
735 |
+
OC(=O)c1cc(ccc1O)\N=N\c1ccc(cc1)S(=O)(=O)Nc1ccccn1,-6.137
|
736 |
+
OC(=O)c1ccc(O)cc1,-1.464
|
737 |
+
OC(=O)c1ccc2ccccc2c1,-3.8
|
738 |
+
OC(=O)c1cccc2ccccc12,-3.774
|
739 |
+
OC(=O)c1ccccc1,-1.58
|
740 |
+
OC(=O)c1ccccc1C(O)=O,-1.50975
|
741 |
+
OC(=O)c1ccccc1Nc1cccc(c1)C(F)(F)F,-5.33
|
742 |
+
OC(=O)c1ccccc1O,-1.931
|
743 |
+
OC(=O)c1cccnc1,-0.85
|
744 |
+
OC(=O)c1cccnc1Nc1cccc(c1)C(F)(F)F,-4.585
|
745 |
+
OC(=O)c1cn(-c2ccc(F)cc2)c2cc(N3CCNCC3)c(F)cc2c1=O,-3.131
|
746 |
+
OC(=O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O,-3.742
|
747 |
+
OC(CCl)CCl,-0.11
|
748 |
+
OC(Cn1cncn1)(Cn1cncn1)c1ccc(F)cc1F,-1.8
|
749 |
+
OC(c1ccccc1)c1ccccc1,-2.55
|
750 |
+
OC1(CCN(CCCC(=O)c2ccc(F)cc2)CC1)c1ccc(Cl)cc1,-4.3645
|
751 |
+
OC1CCCCC1,-0.44
|
752 |
+
OC1CCCCCC1,-0.88
|
753 |
+
OC1CCCCCCC1,-1.29
|
754 |
+
OCC(O)CO,1.12
|
755 |
+
OCCCc1ccccc1,-1.38
|
756 |
+
OCCN(CCO)c1ccccc1,-0.73
|
757 |
+
OCCN(CCO)c1nc(N2CCCCC2)c2nc(nc(N3CCCCC3)c2n1)N(CCO)CCO,-5.332
|
758 |
+
OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc23)CC1,-4.3875
|
759 |
+
OCCN1CCN(CC\C=C2/c3ccccc3Sc3ccc(cc23)C(F)(F)F)CC1,-4.0185
|
760 |
+
OCCOc1ccccc1,-0.703333333
|
761 |
+
OCCc1ccccc1,-0.74
|
762 |
+
OC[C@@H](NC(=O)C(Cl)Cl)[C@H](O)c1ccc(cc1)[N+]([O-])=O,-2.111
|
763 |
+
OC[C@@H](O)[C@@H](O)[C@H](O)C(=O)CO,0.64
|
764 |
+
OC[C@@H](O)[C@@H](O)[C@H](O)[C@@H](O)C=O,0.74
|
765 |
+
OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,0.05845
|
766 |
+
OC[C@@H](O)[C@H]1O[C@@H]2O[C@@H](O[C@@H]2[C@H]1O)C(Cl)(Cl)Cl,-1.84
|
767 |
+
OC[C@@H]1CC[C@@H](O1)n1cnc2c1[nH]cnc2=O,-0.937
|
768 |
+
OC[C@H](O)COC(=O)c1ccccc1Nc1ccnc2cc(Cl)ccc12,-4.571
|
769 |
+
OC[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,1.148
|
770 |
+
OC[C@H](O)[C@H](O)CO,0.7
|
771 |
+
OC[C@H](O)[C@H]1OC(=O)C(O)=C1O,0.277
|
772 |
+
OC[C@H]1O[C@@H](OC[C@H]2O[C@@H](O[C@H](C#N)c3ccccc3)[C@H](O)[C@@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@@H]1O,-0.77
|
773 |
+
OC[C@H]1O[C@@H](Oc2ccccc2CO)[C@H](O)[C@@H](O)[C@@H]1O,-0.85
|
774 |
+
OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O,0.74
|
775 |
+
OC[C@H]1O[C@H]([C@H](O)[C@@H]1O)n1cnc2c1[nH]cnc2=O,-1.23
|
776 |
+
OC[C@]1(O)OC[C@@H](O)[C@@H](O)[C@@H]1O,0.64
|
777 |
+
OCc1c(Cl)cccc1Cl,-2.1
|
778 |
+
OCc1ccccc1,-0.4
|
779 |
+
OCc1ccccc1O,-0.29
|
780 |
+
O[C@@H](C(=O)c1ccccc1)c1ccccc1,-2.85
|
781 |
+
O[C@@H]([C@@H](O)c1ccccc1)c1ccccc1,-1.93
|
782 |
+
O[C@@H]1CCOC1,1.05
|
783 |
+
O[C@@H]1CO[C@H](O)[C@@H](O)[C@@H]1O,0.39
|
784 |
+
O[C@@H]1C[C@@H](O)[C@H](O)C(O)[C@H]1O,-0.17
|
785 |
+
O[C@@H]1N=C(c2ccccc2)c2cc(Cl)ccc2NC1=O,-3.952
|
786 |
+
O[C@@H]1N=C(c2ccccc2Cl)c2cc(Cl)ccc2NC1=O,-3.604
|
787 |
+
O[C@@](CCN1CCCC1)(C1CCCCC1)c1ccccc1,-4.7
|
788 |
+
O[C@@](CCN1CCCCC1)(C1CCCCC1)c1ccccc1,-5.2
|
789 |
+
O[C@H](Cc1ccccc1)c1ccccc1,-2.52
|
790 |
+
O[C@H]1CO[C@H](O)[C@H](O)[C@H]1O,0.39
|
791 |
+
Oc1ccc(Cl)cc1,-0.7
|
792 |
+
Oc1ccc(I)cc1,-1.714
|
793 |
+
Oc1ccc(O)cc1,-0.1695
|
794 |
+
Oc1ccc(cc1)C1(OC(=O)c2ccccc12)c1ccc(O)cc1,-2.9
|
795 |
+
Oc1ccc2CCCCc2c1,-1.99
|
796 |
+
Oc1ccc2C[C@H]3N(CC=C)CC[C@@]45[C@@H](Oc1c24)C(=O)CC[C@@]35O,-2.898
|
797 |
+
Oc1ccc2ccccc2c1,-2.159
|
798 |
+
Oc1cccc(Cl)c1,-0.7
|
799 |
+
Oc1cccc(O)c1,0.81
|
800 |
+
Oc1cccc2C(=O)c3cccc(O)c3C(=O)c12,-5.187
|
801 |
+
Oc1cccc2ccccc12,-1.9715
|
802 |
+
Oc1ccccc1,-0.00835
|
803 |
+
Oc1ccccc1C=O,-0.86
|
804 |
+
Oc1ccccc1O,0.62
|
805 |
+
S=C1NCCN1,-0.71
|
806 |
+
S=c1cnc2cncnc2[nH]1,-2.706
|
807 |
+
S=c1nc2nccnc2c[nH]1,-2.629
|
808 |
+
S=c1nc[nH]c2nccnc12,-2.646
|
809 |
+
[O-][N+](=O)C(Cl)(Cl)Cl,-2
|
810 |
+
[O-][N+](=O)OCC(CO[N+]([O-])=O)O[N+]([O-])=O,-2.22
|
811 |
+
[O-][N+](=O)O[C@@H]1CO[C@@H]2[C@H](CO[C@H]12)O[N+]([O-])=O,-2.63
|
812 |
+
[O-][N+](=O)c1cc(cc(c1)[N+]([O-])=O)[N+]([O-])=O,-2.89
|
813 |
+
[O-][N+](=O)c1ccc(Cl)cc1,-2.92
|
814 |
+
[O-][N+](=O)c1ccc(\C=N\N2CC(=O)NC2=O)o1,-3.26
|
815 |
+
[O-][N+](=O)c1cccc(Cl)c1,-2.77
|
816 |
+
[O-][N+](=O)c1cccc(c1)[N+]([O-])=O,-2.316666667
|
817 |
+
[O-][N+](=O)c1ccccc1,-1.8
|
818 |
+
[O-][N+](=O)c1ccccc1Cl,-2.55
|
819 |
+
[O-][N+](=O)c1nccn1CC(=O)NCc1ccccc1,-2.81
|
820 |
+
c1[nH]nc2ccccc12,-2.16
|
821 |
+
c1cc2ccccc2[nH]1,-1.416666667
|
822 |
+
c1ccc(nc1)-c1ccccn1,-1.42
|
823 |
+
c1ccc2n[nH]nc2c1,-0.78
|
824 |
+
c1ccc2ncccc2c1,-1.3
|
825 |
+
c1cn(cn1)[C@H](c1ccccc1)c1ccc(cc1)-c1ccccc1,-6.27
|
826 |
+
c1cnc2c(c1)ccc1cccnc21,-1.618
|
827 |
+
c1cnc2ncncc2n1,0.0205
|
828 |
+
c1nc(cs1)-c1nc2ccccc2[nH]1,-3.484
|
829 |
+
c1nc2ccccc2o1,-1.16
|
830 |
+
c1nc2ccccc2s1,-1.5
|
data/delaney-processed.csv
ADDED
@@ -0,0 +1,1129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Compound ID,ESOL predicted log solubility in mols per litre,Minimum Degree,Molecular Weight,Number of H-Bond Donors,Number of Rings,Number of Rotatable Bonds,Polar Surface Area,measured log solubility in mols per litre,smiles
|
2 |
+
Amigdalin,-0.9740000000000001,1,457.4320000000001,7,3,7,202.31999999999996,-0.77,OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O
|
3 |
+
Fenfuram,-2.885,1,201.22500000000002,1,2,2,42.24,-3.3,Cc1occc1C(=O)Nc2ccccc2
|
4 |
+
citral,-2.5789999999999997,1,152.237,0,0,4,17.07,-2.06,CC(C)=CCCC(C)=CC(=O)
|
5 |
+
Picene,-6.617999999999999,2,278.354,0,5,0,0.0,-7.87,c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43
|
6 |
+
Thiophene,-2.2319999999999998,2,84.14299999999999,0,1,0,0.0,-1.33,c1ccsc1
|
7 |
+
benzothiazole,-2.733,2,135.191,0,2,0,12.89,-1.5,c2ccc1scnc1c2
|
8 |
+
"2,2,4,6,6'-PCB",-6.545,1,326.437,0,2,1,0.0,-7.32,Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl
|
9 |
+
Estradiol,-4.138,1,272.388,2,4,0,40.46,-5.03,CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O
|
10 |
+
Dieldrin,-4.533,1,380.913,0,5,0,12.53,-6.29,ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl
|
11 |
+
Rotenone,-5.246,1,394.42300000000023,0,5,3,63.22,-4.42,COc5cc4OCC3Oc2c1CC(Oc1ccc2C(=O)C3c4cc5OC)C(C)=C
|
12 |
+
2-pyrrolidone,0.243,1,85.10600000000001,1,1,0,29.1,1.07,O=C1CCCN1
|
13 |
+
2-Chloronapthalene,-4.063,1,162.61899999999997,0,2,0,0.0,-4.14,Clc1ccc2ccccc2c1
|
14 |
+
1-Pentene ,-2.01,1,70.135,0,0,2,0.0,-2.68,CCCC=C
|
15 |
+
Primidone,-1.8969999999999998,1,218.256,2,2,2,58.2,-2.64,CCC1(C(=O)NCNC1=O)c2ccccc2
|
16 |
+
Tetradecane,-5.45,1,198.39399999999995,0,0,11,0.0,-7.96,CCCCCCCCCCCCCC
|
17 |
+
2-Chloropropane,-1.585,1,78.542,0,0,0,0.0,-1.41,CC(C)Cl
|
18 |
+
2-Methylbutanol,-1.0270000000000001,1,88.14999999999999,1,0,2,20.23,-0.47,CCC(C)CO
|
19 |
+
Benzonitrile,-2.03,1,103.12399999999997,0,1,0,23.79,-1.0,N#Cc1ccccc1
|
20 |
+
Diazinon,-3.989,1,304.35200000000003,0,1,7,53.47,-3.64,CCOP(=S)(OCC)Oc1cc(C)nc(n1)C(C)C
|
21 |
+
2-Undecanol,-3.096,1,172.312,1,0,8,20.23,-2.94,CCCCCCCCCC(C)O
|
22 |
+
"2,2',3,4,6-PCB",-6.627000000000001,1,326.437,0,2,1,0.0,-7.43,Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl
|
23 |
+
Lenacil,-3.355,1,234.29899999999995,1,3,1,54.86,-4.593999999999999,O=c2[nH]c1CCCc1c(=O)n2C3CCCCC3
|
24 |
+
Phorate,-3.747,1,260.38599999999997,0,0,8,18.46,-4.11,CCOP(=S)(OCC)SCSCC
|
25 |
+
Phenacetin,-2.342,1,179.219,1,1,3,38.33,-2.35,CCOc1ccc(NC(=O)C)cc1
|
26 |
+
Dinitramine,-4.479,1,322.243,1,1,5,115.54000000000002,-5.47,CCN(CC)c1c(cc(c(N)c1N(=O)=O)C(F)(F)F)N(=O)=O
|
27 |
+
1-Heptanol,-1.751,1,116.204,1,0,5,20.23,-1.81,CCCCCCCO
|
28 |
+
Theophylline,-1.452,1,180.16699999999997,1,2,0,72.68,-1.39,Cn1c(=O)n(C)c2nc[nH]c2c1=O
|
29 |
+
Butethal,-1.974,1,212.249,2,1,4,75.27000000000001,-1.661,CCCCC1(CC)C(=O)NC(=O)NC1=O
|
30 |
+
"P,P'-DDE",-6.553,1,318.0300000000001,0,2,2,0.0,-6.9,ClC(Cl)=C(c1ccc(Cl)cc1)c2ccc(Cl)cc2
|
31 |
+
Methyl octanoate,-2.608,1,158.24099999999999,0,0,6,26.3,-3.17,CCCCCCCC(=O)OC
|
32 |
+
"1,4-Diethylbenzene ",-3.633,1,134.22199999999998,0,1,2,0.0,-3.75,CCc1ccc(CC)cc1
|
33 |
+
Terbufos,-4.367,1,288.44,0,0,7,18.46,-4.755,CCOP(=S)(OCC)SCSC(C)(C)C
|
34 |
+
Phenmedipham,-4.229,1,300.314,2,2,3,76.66,-4.805,COC(=O)Nc1cccc(OC(=O)Nc2cccc(C)c2)c1
|
35 |
+
"1,1-Dichloroethylene",-1.939,1,96.94399999999999,0,0,0,0.0,-1.64,ClC(=C)Cl
|
36 |
+
1-Methylfluorene,-4.478,1,180.25000000000003,0,3,0,0.0,-5.22,Cc1cccc2c1Cc3ccccc32
|
37 |
+
Valeraldehyde,-1.103,1,86.13399999999999,0,0,3,17.07,-0.85,CCCCC=O
|
38 |
+
Diphenylamine,-3.8569999999999998,2,169.227,1,2,2,12.03,-3.5039999999999996,N(c1ccccc1)c2ccccc2
|
39 |
+
Fenothiocarb,-3.2969999999999997,1,253.367,0,1,6,29.540000000000003,-3.927,CN(C)C(=O)SCCCCOc1ccccc1
|
40 |
+
Piperophos,-4.637,1,353.4900000000001,0,1,9,38.77,-4.15,CCCOP(=S)(OCCC)SCC(=O)N1CCCCC1C
|
41 |
+
1-Iodoheptane,-3.904,1,226.101,0,0,5,0.0,-4.81,CCCCCCCI
|
42 |
+
3-Chlorobiphenyl,-4.685,1,188.657,0,2,1,0.0,-4.88,c1c(Cl)cccc1c2ccccc2
|
43 |
+
4-Pentene-1-ol,-0.7909999999999999,1,86.134,1,0,3,20.23,-0.15,OCCCC=C
|
44 |
+
Cyclobutyl-5-spirobarbituric acid,-0.527,1,168.15200000000002,2,2,0,75.27,-1.655,O=C2NC(=O)C1(CCC1)C(=O)N2
|
45 |
+
menthol,-2.782,1,156.269,1,1,1,20.23,-2.53,CC(C)C1CCC(C)CC1O
|
46 |
+
Isopropyl formate,-0.684,1,88.106,0,0,2,26.3,-0.63,CC(C)OC=O
|
47 |
+
2-Heptanol ,-1.6780000000000002,1,116.20399999999998,1,0,4,20.23,-1.55,CCCCCC(C)O
|
48 |
+
p-Bromoacetanilide,-3.012,1,214.06199999999998,1,1,1,29.1,-3.083,CC(=O)Nc1ccc(Br)cc1
|
49 |
+
brompyrazone,-3.005,1,266.098,1,2,1,60.910000000000004,-3.127,c1ccccc1n2ncc(N)c(Br)c2(=O)
|
50 |
+
nifedipine,-4.248,1,346.33900000000017,1,2,4,107.77,-4.76,COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C
|
51 |
+
"2,7-dimethylquinoline",-3.342,1,157.216,0,2,0,12.89,-1.94,c2c(C)cc1nc(C)ccc1c2
|
52 |
+
1-Octyne ,-2.509,1,110.19999999999999,0,0,4,0.0,-3.66,CCCCCCC#C
|
53 |
+
cyclobarbital,-2.421,1,236.27099999999993,2,2,2,75.27000000000001,-2.17,CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2
|
54 |
+
Chrysene,-5.568,2,228.29399999999998,0,4,0,0.0,-8.057,c1ccc2c(c1)ccc3c4ccccc4ccc23
|
55 |
+
Bromacil,-3.4189999999999996,1,261.11899999999997,1,1,2,54.86,-2.523,CCC(C)n1c(=O)[nH]c(C)c(Br)c1=O
|
56 |
+
"2,2',3,3',5,6-PCB",-7.185,1,360.88200000000006,0,2,1,0.0,-8.6,Clc1cccc(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl
|
57 |
+
2-Methylphenol,-2.281,1,108.13999999999999,1,1,0,20.23,-0.62,Cc1ccccc1O
|
58 |
+
"2,2,5-Trimethylhexane",-3.6310000000000002,1,128.259,0,0,2,0.0,-5.05,CC(C)CCC(C)(C)C
|
59 |
+
"1,4-Dimethylnaphthalene ",-4.147,1,156.228,0,2,0,0.0,-4.14,Cc1ccc(C)c2ccccc12
|
60 |
+
6-Methylchrysene,-5.931,1,242.321,0,4,0,0.0,-6.57,Cc1cc2c3ccccc3ccc2c4ccccc14
|
61 |
+
2-Pentanone,-0.846,1,86.13399999999999,0,0,2,17.07,-0.19,CCCC(=O)C
|
62 |
+
"2,2',3,3',5,5',6,6'-PCB",-8.304,1,429.77200000000016,0,2,1,0.0,-9.15,Clc1cc(Cl)c(Cl)c(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl
|
63 |
+
Methyl butyrate,-1.545,1,116.15999999999998,0,0,3,26.3,-0.82,CCCOC(=O)CC
|
64 |
+
Triamcinolone,-2.734,1,394.43900000000014,4,4,2,115.06000000000002,-3.68,CC34CC(O)C1(F)C(CCC2=CC(=O)C=CC12C)C3CC(O)C4(O)C(=O)CO
|
65 |
+
p-Aminophenol,-1.2309999999999999,1,109.12799999999999,2,1,0,46.25,-0.8,Nc1ccc(O)cc1
|
66 |
+
Benznidazole,-2.3209999999999997,1,260.253,1,2,5,90.06,-2.81,O=C(Cn1ccnc1N(=O)=O)NCc2ccccc2
|
67 |
+
"Atovaquone(0,430mg/ml) - neutral",-6.269,1,366.84400000000016,1,4,2,54.37,-5.931,OC4=C(C1CCC(CC1)c2ccc(Cl)cc2)C(=O)c3ccccc3C4=O
|
68 |
+
Trietazine,-3.233,1,229.71499999999997,1,1,5,53.940000000000005,-4.06,CCNc1nc(Cl)nc(n1)N(CC)CC
|
69 |
+
Pyrazinamide,-0.674,1,123.11499999999998,1,1,1,68.87,-0.667,NC(=O)c1cnccn1
|
70 |
+
Carbromal,-2.198,1,237.09699999999998,2,0,3,72.19,-2.68,CCC(Br)(CC)C(=O)NC(N)=O
|
71 |
+
"2,2'-PCB",-4.984,1,223.102,0,2,1,0.0,-5.27,Clc1ccccc1c2ccccc2Cl
|
72 |
+
nitrofurantoin,-1.2429999999999999,1,238.15899999999996,1,2,3,118.04999999999998,-3.38,O=C2CN(N=Cc1ccc(o1)N(=O)=O)C(=O)N2
|
73 |
+
Nitrofen,-5.361000000000001,1,284.09799999999996,0,2,3,52.37,-5.46,Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2
|
74 |
+
Camphor,-2.158,1,152.237,0,2,0,17.07,-1.96,CC1(C)C2CCC1(C)C(=O)C2
|
75 |
+
5-Allyl-5-phenylbarbital,-2.36,1,244.25,2,2,3,75.27000000000001,-2.369,O=C1NC(=O)NC(=O)C1(CC=C)c1ccccc1
|
76 |
+
Pentyl propanoate,-1.899,1,130.18699999999998,0,0,4,26.3,-2.25,CCCCC(=O)OCC
|
77 |
+
Isopentyl acetate,-1.817,1,130.18699999999998,0,0,3,26.3,-1.92,CC(C)CCOC(=O)C
|
78 |
+
3-Hexanoyloxymethylphenyltoin,-4.1530000000000005,1,380.444,1,3,8,75.71,-5.886,O=C1N(COC(=O)CCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
|
79 |
+
"2,3',5-PCB",-5.7620000000000005,1,257.547,0,2,1,0.0,-6.01,Clc1cccc(c1)c2cc(Cl)ccc2Cl
|
80 |
+
1-Bromopropane,-1.949,1,122.993,0,0,1,0.0,-1.73,CCCBr
|
81 |
+
Propiconazole,-4.603,1,342.2260000000001,0,3,5,49.17,-3.4930000000000003,CCCC1COC(Cn2cncn2)(O1)c3ccc(Cl)cc3Cl
|
82 |
+
Formothion,-2.0869999999999997,1,257.27299999999997,0,0,6,55.84,-1.995,COP(=S)(OC)SCC(=O)N(C)C=O
|
83 |
+
4-methylpteridine,-1.24,1,146.15299999999996,0,2,0,51.56,-0.466,Cc1ncnc2nccnc12
|
84 |
+
Thiourea,0.32899999999999996,1,76.12400000000001,2,0,0,52.04,0.32,NC(=S)N
|
85 |
+
p-Xylene ,-3.035,1,106.16799999999999,0,1,0,0.0,-2.77,Cc1ccc(C)cc1
|
86 |
+
"1,2-Diethylbenzene",-3.6010000000000004,1,134.22199999999998,0,1,2,0.0,-3.28,CCc1ccccc1CC
|
87 |
+
Hexachloroethane,-4.215,1,236.74,0,0,0,0.0,-3.67,ClC(Cl)(Cl)C(Cl)(Cl)Cl
|
88 |
+
Flucythrinate,-6.877999999999999,1,451.46900000000005,0,3,9,68.55000000000001,-6.876,CC(C)C(C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1)c3ccc(OC(F)F)cc3
|
89 |
+
1-Nitropropane,-0.816,1,89.09399999999998,0,0,2,43.14,-0.8,CCCN(=O)=O
|
90 |
+
Menthone,-2.516,1,154.253,0,1,1,17.07,-2.35,CC(C)C1CCC(C)CC1=O
|
91 |
+
RTI 24,-4.423,1,273.723,1,3,1,45.230000000000004,-5.36,CCN2c1cc(Cl)ccc1NC(=O)c3cccnc23
|
92 |
+
"2,3-Dichloronitrobenzene",-3.322,1,192.00100000000003,0,1,1,43.14,-3.48,O=N(=O)c1c(Cl)c(Cl)ccc1
|
93 |
+
thiamylal,-3.063,1,254.35500000000002,2,1,5,58.2,-3.46,CCCC(C)C1(CC=C)C(=O)NC(=S)NC1=O
|
94 |
+
Fluoranthene,-4.957,2,202.25599999999997,0,4,0,0.0,-6.0,c1ccc2c(c1)c3cccc4cccc2c34
|
95 |
+
Propylisopropylether,-1.354,1,102.17699999999998,0,0,3,9.23,-1.34,CCCOC(C)C
|
96 |
+
"1,3-Dimethylnaphthalene",-4.147,1,156.22799999999998,0,2,0,0.0,-4.29,Cc1cc(C)c2ccccc2c1
|
97 |
+
diethylstilbestrol,-5.074,1,268.356,2,2,4,40.46,-4.07,CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2
|
98 |
+
Chlorothalonil,-3.995,1,265.914,0,1,0,47.58,-5.64,c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1
|
99 |
+
"2,3',4',5-PCB",-6.312,1,291.992,0,2,1,0.0,-7.25,Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2
|
100 |
+
styrene oxide,-1.8259999999999998,2,120.15099999999995,0,2,1,12.53,-1.6,C1OC1c2ccccc2
|
101 |
+
Isopropylbenzene ,-3.265,1,120.19499999999995,0,1,1,0.0,-3.27,CC(C)c1ccccc1
|
102 |
+
Deoxycorticosterone,-3.9389999999999996,1,330.4680000000001,1,4,2,54.370000000000005,-3.45,CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO
|
103 |
+
chlorquinox,-4.438,1,267.93,0,2,0,25.78,-5.43,c2(Cl)c(Cl)c(Cl)c1nccnc1c2(Cl)
|
104 |
+
L-arabinose,0.601,1,150.13,4,1,0,90.15,0.39,C1OC(O)C(O)C(O)C1O
|
105 |
+
Dichloromethane,-1.156,1,84.93299999999999,0,0,0,0.0,-0.63,ClCCl
|
106 |
+
1-Ethylnaphthalene ,-4.1,1,156.22799999999998,0,2,1,0.0,-4.17,CCc1cccc2ccccc12
|
107 |
+
Methyl formate,-0.048,1,60.05200000000001,0,0,1,26.3,0.58,COC=O
|
108 |
+
o-Nitrophenol,-2.318,1,139.10999999999999,1,1,1,63.37,-1.74,Oc1ccccc1N(=O)=O
|
109 |
+
thymine,-0.78,1,126.115,2,1,0,65.72,-1.506,Cc1c[nH]c(=O)[nH]c1=O
|
110 |
+
2-Methylpropane,-1.891,1,58.123999999999995,0,0,0,0.0,-2.55,CC(C)C
|
111 |
+
Inosine,-0.8340000000000001,1,268.22900000000004,4,3,2,133.75,-1.23,OCC1OC(C(O)C1O)n2cnc3c(O)ncnc23
|
112 |
+
Ioxynil,-4.615,1,370.91499999999996,1,1,0,44.019999999999996,-3.61,Oc1c(I)cc(C#N)cc1I
|
113 |
+
Niclosamide,-5.032,1,327.1230000000001,2,2,3,92.47,-4.7,Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O
|
114 |
+
Pentane,-2.261,1,72.151,0,0,2,0.0,-3.18,CCCCC
|
115 |
+
Phenol,-1.9909999999999999,1,94.11299999999999,1,1,0,20.23,0.0,c1ccccc1O
|
116 |
+
2-aminoanthracene,-3.7889999999999997,1,193.249,1,3,0,26.02,-5.17,Nc3ccc2cc1ccccc1cc2c3
|
117 |
+
theobromine,-1.05,1,180.16699999999997,1,2,0,72.68,-2.523,Cn1cnc2n(C)c(=O)[nH]c(=O)c12
|
118 |
+
Isoquinoline,-2.531,2,129.16199999999998,0,2,0,12.89,-1.45,c1ccc2cnccc2c1
|
119 |
+
Anilofos,-5.106,1,367.86,0,1,7,38.77,-4.4319999999999995,COP(=S)(OC)SCC(=O)N(C(C)C)c1ccc(Cl)cc1
|
120 |
+
Hexylbenzene ,-4.22,1,162.276,0,1,5,0.0,-5.21,CCCCCCc1ccccc1
|
121 |
+
2-Chlorobiphenyl,-4.5280000000000005,1,188.657,0,2,1,0.0,-4.54,Clc1ccccc1c2ccccc2
|
122 |
+
2-Methyl-1-Pentene,-2.3480000000000003,1,84.16199999999999,0,0,2,0.0,-3.03,CCCC(=C)C
|
123 |
+
"2,3,4-Trimethylpentane",-3.2760000000000002,1,114.23199999999999,0,0,2,0.0,-4.8,CC(C)C(C)C(C)C
|
124 |
+
Pentachlorobenzene,-5.167999999999999,1,250.339,0,1,0,0.0,-5.65,Clc1cc(Cl)c(Cl)c(Cl)c1Cl
|
125 |
+
m-Nitrophenol,-2.318,1,139.10999999999999,1,1,1,63.37,-1.01,Oc1cccc(c1)N(=O)=O
|
126 |
+
1-Decene,-3.781,1,140.26999999999998,0,0,7,0.0,-5.51,CCCCCCCCC=C
|
127 |
+
Glyceryl triacetate,-1.285,1,218.20499999999998,0,0,5,78.9,-0.6,CC(=O)OCC(COC(=O)C)OC(=O)C
|
128 |
+
dimethirimol,-3.57,1,209.29299999999998,1,1,4,49.25000000000001,-2.24,CCCCc1c(C)nc(nc1O)N(C)C
|
129 |
+
Cyfluthrin,-6.84,1,434.29400000000015,0,3,6,59.32000000000001,-7.337000000000001,CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2ccc(F)c(Oc3ccccc3)c2
|
130 |
+
Pyridine,-1.4809999999999999,2,79.10199999999998,0,1,0,12.89,0.76,c1ccncc1
|
131 |
+
1-Bromoheptane,-3.366,1,179.101,0,0,5,0.0,-4.43,CCCCCCCBr
|
132 |
+
"3,4-Dimethylpyridine",-2.0669999999999997,1,107.15599999999999,0,1,0,12.89,0.36,Cc1ccncc1C
|
133 |
+
Fludrocortisone,-3.1719999999999997,1,380.45600000000013,3,4,2,94.83,-3.43,CC34CC(O)C1(F)C(CCC2=CC(=O)CCC12C)C3CCC4(O)C(=O)CO
|
134 |
+
ethiofencarb,-2.855,1,225.313,1,1,4,38.33,-2.09,CCSCc1ccccc1OC(=O)NC
|
135 |
+
Malonic acid diethylester,-1.413,1,160.16899999999998,0,0,4,52.60000000000001,-0.82,CCOC(=O)CC(=O)OCC
|
136 |
+
d-Limonene,-3.429,1,136.238,0,1,1,0.0,-4.26,CC1=CCC(CC1)C(C)=C
|
137 |
+
Indan,-3.057,2,118.17899999999997,0,2,0,0.0,-3.04,C1Cc2ccccc2C1
|
138 |
+
p-t-Butylphenol,-3.1919999999999997,1,150.22099999999998,1,1,0,20.23,-2.41,CC(C)(C)c1ccc(O)cc1
|
139 |
+
Cyclopropyl-5-spirobarbituric acid,-0.08800000000000001,1,154.125,2,2,0,75.27,-1.886,O=C2NC(=O)C1(CC1)C(=O)N2
|
140 |
+
m-Chloroiodobenzene,-4.3839999999999995,1,238.45499999999998,0,1,0,0.0,-3.55,Clc1cccc(I)c1
|
141 |
+
1-Bromonapthalene,-4.434,1,207.07,0,2,0,0.0,-4.35,Brc1cccc2ccccc12
|
142 |
+
trans-2-Pentene ,-2.076,1,70.135,0,0,1,0.0,-2.54,CC/C=C/C
|
143 |
+
"2,6-Dimethylpyridine",-2.0980000000000003,1,107.156,0,1,0,12.89,0.45,Cc1cccc(C)n1
|
144 |
+
Trichloroethylene,-2.312,1,131.389,0,0,0,0.0,-1.96,ClC=C(Cl)Cl
|
145 |
+
1-Napthylamine,-2.721,1,143.189,1,2,0,26.02,-1.92,Nc1cccc2ccccc12
|
146 |
+
m-Xylene ,-3.035,1,106.16799999999999,0,1,0,0.0,-2.82,Cc1cccc(C)c1
|
147 |
+
2-hydroxypteridine,-1.4040000000000001,1,148.125,1,2,0,71.79,-1.9469999999999998,Oc2ncc1nccnc1n2
|
148 |
+
Methanol,0.441,1,32.042,1,0,0,20.23,1.57,CO
|
149 |
+
Amobarbital,-2.312,1,226.27599999999998,2,1,4,75.27000000000001,-2.468,CCC1(CCC(C)C)C(=O)NC(=O)NC1=O
|
150 |
+
2-Butanone,-0.491,1,72.107,0,0,1,17.07,0.52,CCC(=O)C
|
151 |
+
5-fluorouracil,-0.792,1,130.078,2,1,0,65.72,-1.077,Fc1c[nH]c(=O)[nH]c1=O
|
152 |
+
tubercidin,-0.892,1,266.257,4,3,2,126.65,-1.95,Nc1ncnc2n(ccc12)C3OC(CO)C(O)C3O
|
153 |
+
"1,3-Benzenediol",-1.59,1,110.11199999999998,2,1,0,40.46,0.81,Oc1cccc(O)c1
|
154 |
+
1-Hexanol,-1.3969999999999998,1,102.17699999999999,1,0,4,20.23,-1.24,CCCCCCO
|
155 |
+
1-Chloropentane,-2.294,1,106.596,0,0,3,0.0,-2.73,CCCCCCl
|
156 |
+
"1,3-Butadiene",-1.376,1,54.09199999999999,0,0,1,0.0,-1.87,C=CC=C
|
157 |
+
Propyl acetate,-1.125,1,102.13299999999998,0,0,2,26.3,-0.72,CCCOC(=O)C
|
158 |
+
"5,6,7,8-tetrahydro-2-naphthol",-3.0860000000000003,1,148.205,1,2,0,20.23,-1.99,Oc2ccc1CCCCc1c2
|
159 |
+
chloroacetamide,-0.106,1,93.513,1,0,1,43.09,-0.02,NC(=O)CCl
|
160 |
+
Iodofenphos,-6.148,1,413.0,0,1,4,27.69,-6.62,COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl
|
161 |
+
4-Chlorotoluene,-3.2969999999999997,1,126.586,0,1,0,0.0,-3.08,Cc1ccc(Cl)cc1
|
162 |
+
Metribuzin,-2.324,1,214.29399999999998,1,1,1,73.8,-2.253,CSc1nnc(c(=O)n1N)C(C)(C)C
|
163 |
+
Tricresyl phosphate,-6.39,1,368.3690000000001,0,3,6,44.760000000000005,-6.01,Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1
|
164 |
+
Caproaldehyde,-1.4569999999999999,1,100.16099999999999,0,0,4,17.07,-1.3,CCCCCC=O
|
165 |
+
Butamben,-3.0389999999999997,1,193.24599999999998,1,1,4,52.32,-3.082,CCCCOC(=O)c1ccc(N)cc1
|
166 |
+
RTI 3,-3.049,1,255.277,1,3,0,68.45,-3.043,O2c1cc(C)ccc1N(C)C(=O)c3cc(N)cnc23
|
167 |
+
Nerol,-2.603,1,154.253,1,0,4,20.23,-2.46,CC(C)=CCC/C(C)=C\CO
|
168 |
+
"2,4'-PCB",-5.142,1,223.102,0,2,1,0.0,-5.28,Clc1ccc(cc1)c2ccccc2Cl
|
169 |
+
3-Octanoyloxymethylphenytoin,-4.84,1,408.498,1,3,10,75.71,-6.523,O=C1N(COC(=O)CCCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
|
170 |
+
Nitroethane,-0.462,1,75.067,0,0,1,43.14,-0.22,CCN(=O)=O
|
171 |
+
Ethalfluralin,-5.063,1,333.266,0,1,6,89.51999999999998,-6.124,CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O
|
172 |
+
"1,2,3,4-Tetrachlorobenzene",-4.546,1,215.894,0,1,0,0.0,-4.57,Clc1ccc(Cl)c(Cl)c1Cl
|
173 |
+
Meprobamate,-1.376,1,218.25299999999996,2,0,6,104.63999999999999,-1.807,CCCC(C)(COC(N)=O)COC(N)=O
|
174 |
+
pregnenolone,-4.342,1,316.48500000000007,1,4,1,37.3,-4.65,CC(=O)C3CCC4C2CC=C1CC(O)CCC1(C)C2CCC34C
|
175 |
+
Iodomethane,-1.646,1,141.939,0,0,0,0.0,-1.0,CI
|
176 |
+
cycloheximide,-1.5319999999999998,1,281.35200000000003,2,2,3,83.47,-1.13,CC1CC(C)C(=O)C(C1)C(O)CC2CC(=O)NC(=O)C2
|
177 |
+
3-Heptanoyloxymethylphenytoin,-4.496,1,394.471,1,3,9,75.71,-6.301,O=C1N(COC(=O)CCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
|
178 |
+
isophorone,-2.015,1,138.20999999999998,0,1,0,17.07,-1.06,CC1=CC(=O)CC(C)(C)C1
|
179 |
+
Butabarbital,-1.9580000000000002,1,212.24899999999997,2,1,3,75.27000000000001,-2.39,O=C1NC(=O)NC(=O)C1(CC)C(C)CC
|
180 |
+
5-Nonanone,-2.329,1,142.242,0,0,6,17.07,-2.58,CCCCC(=O)CCCC
|
181 |
+
Glutethimide,-2.591,1,217.268,1,2,2,46.17,-2.3369999999999997,CCC1(CCC(=O)NC1=O)c2ccccc2
|
182 |
+
3-Methylpentane,-2.6,1,86.178,0,0,2,0.0,-3.68,CCC(C)CC
|
183 |
+
Etofenprox,-6.896,1,376.49600000000004,0,3,9,27.69,-8.6,CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3
|
184 |
+
Methaqualone,-3.8810000000000002,1,250.30100000000002,0,3,1,34.89,-2.925,Cc1ccccc1n3c(C)nc2ccccc2c3=O
|
185 |
+
Chloroacetonitrile,-0.44799999999999995,1,75.498,0,0,0,23.79,-0.092,ClCC#N
|
186 |
+
Trichloronate,-5.225,1,333.60400000000004,0,1,5,18.46,-5.752000000000001,CCOP(=S)(CC)Oc1cc(Cl)c(Cl)cc1Cl
|
187 |
+
Ethisterone,-3.858,1,312.45300000000003,1,4,0,37.3,-5.66,CC12CCC(=O)C=C1CCC3C2CCC4(C)C3CCC4(O)C#C
|
188 |
+
Pyridazine,-0.619,2,80.08999999999999,0,1,0,25.78,1.1,c1ccnnc1
|
189 |
+
"1,2,3,5-Tetrachlorobenzene",-4.621,1,215.894,0,1,0,0.0,-4.63,Clc1cc(Cl)c(Cl)c(Cl)c1
|
190 |
+
Diosgenin,-5.681,1,414.63000000000017,1,6,0,38.69,-7.32,C1C(O)CCC2(C)CC3CCC4(C)C5(C)CC6OCC(C)CC6OC5CC4C3C=C21
|
191 |
+
o-Aminophenol,-1.465,1,109.12799999999999,2,1,0,46.25,-0.72,Nc1ccccc1O
|
192 |
+
Ethyl nonanoate,-3.3160000000000003,1,186.295,0,0,8,26.3,-3.8,CCCCCCCCC(=O)OCC
|
193 |
+
metalaxyl,-2.87,1,279.336,0,1,5,55.84,-1.601,COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C
|
194 |
+
Propoxur,-2.4090000000000003,1,209.24499999999998,1,1,3,47.56,-2.05,CNC(=O)Oc1ccccc1OC(C)C
|
195 |
+
2-Chlorobutane,-1.94,1,92.569,0,0,1,0.0,-1.96,CCC(C)Cl
|
196 |
+
2-Napthol,-3.08,1,144.17299999999997,1,2,0,20.23,-2.28,Oc1ccc2ccccc2c1
|
197 |
+
Oxadiazon,-5.265,1,345.22600000000017,0,2,3,57.26,-5.696000000000001,CC(C)Oc1cc(c(Cl)cc1Cl)n2nc(oc2=O)C(C)(C)C
|
198 |
+
1-Hexyne ,-1.801,1,82.14599999999999,0,0,2,0.0,-2.36,CCCCC#C
|
199 |
+
1-Nonyne ,-2.864,1,124.22699999999999,0,0,5,0.0,-4.24,CCCCCCCC#C
|
200 |
+
2-Chlorotoluene,-3.2969999999999997,1,126.586,0,1,0,0.0,-3.52,Cc1ccccc1Cl
|
201 |
+
Diisopropyl ether ,-1.281,1,102.17699999999999,0,0,2,9.23,-1.1,CC(C)OC(C)C
|
202 |
+
Dapsone,-2.464,1,248.307,2,2,2,86.18,-3.094,Nc1ccc(cc1)S(=O)(=O)c2ccc(N)cc2
|
203 |
+
Methyl hydrazine,0.5429999999999999,1,46.073,2,0,0,38.05,1.34,CNN
|
204 |
+
Propyne,-0.672,1,40.065000000000005,0,0,0,0.0,-0.41,CC#C
|
205 |
+
Phoxim,-4.5569999999999995,1,298.304,0,1,7,63.839999999999996,-4.862,CCOP(=S)(OCC)ON=C(C#N)c1ccccc1
|
206 |
+
Propetamphos,-2.826,1,281.314,1,0,7,56.790000000000006,-3.408,CCNP(=S)(OC)OC(=CC(=O)OC(C)C)C
|
207 |
+
Acrolein,-0.184,1,56.064,0,0,1,17.07,0.57,C=CC=O
|
208 |
+
Hypoxanthine,-0.6559999999999999,1,136.114,2,2,0,74.43,-2.296,O=c1[nH]cnc2nc[nH]c12
|
209 |
+
6-hydroxyquinoline,-2.725,1,145.161,1,2,0,33.120000000000005,-2.16,Oc2ccc1ncccc1c2
|
210 |
+
Fluorobenzene,-2.5140000000000002,1,96.10399999999998,0,1,0,0.0,-1.8,Fc1ccccc1
|
211 |
+
1-Chloropropane,-1.585,1,78.542,0,0,1,0.0,-1.47,CCCCl
|
212 |
+
Ethyl acetate,-0.77,1,88.106,0,0,1,26.3,-0.04,CCOC(=O)C
|
213 |
+
"2,2-Dimethylpentane",-2.938,1,100.20499999999998,0,0,1,0.0,-4.36,CCCC(C)(C)C
|
214 |
+
Pentamethylbenzene,-3.9930000000000003,1,148.249,0,1,0,0.0,-4.0,Cc1cc(C)c(C)c(C)c1C
|
215 |
+
eucalyptol,-2.5789999999999997,1,154.253,0,3,0,9.23,-1.64,CC12CCC(CC1)C(C)(C)O2
|
216 |
+
dibutyl sebacate,-4.726,1,314.46600000000007,0,0,15,52.60000000000001,-3.8960000000000004,CCCCOC(=O)CCCCCCCCC(=O)OCCCC
|
217 |
+
"4,4'-PCB",-5.2989999999999995,1,223.102,0,2,1,0.0,-6.56,Clc1ccc(cc1)c2ccc(Cl)cc2
|
218 |
+
"2,3-Dimethylpyridine",-2.0669999999999997,1,107.156,0,1,0,12.89,0.38,Cc1cccnc1C
|
219 |
+
Carvone,-2.042,1,150.22099999999998,0,1,1,17.07,-2.06,CC(=C)C1CC=C(C)C(=O)C1
|
220 |
+
Carbophenthion,-5.827999999999999,1,342.875,0,1,8,18.46,-5.736000000000001,CCOP(=S)(OCC)SCSc1ccc(Cl)cc1
|
221 |
+
"Etoposide (148-167,25mg/ml)",-3.292,1,588.5620000000001,3,7,5,160.83,-3.571,COc1cc(cc(OC)c1O)C6C2C(COC2=O)C(OC4OC3COC(C)OC3C(O)C4O)c7cc5OCOc5cc67
|
222 |
+
Perylene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.804,c1cc2cccc3c4cccc5cccc(c(c1)c23)c54
|
223 |
+
"2,4-Dinitrotoluene",-2.6039999999999996,1,182.135,0,1,2,86.28,-2.82,Cc1ccc(cc1N(=O)=O)N(=O)=O
|
224 |
+
2-bromonaphthalene,-4.434,1,207.07000000000002,0,2,0,0.0,-4.4,c1c(Br)ccc2ccccc12
|
225 |
+
Formetanate,-1.8459999999999999,1,221.26,1,1,3,53.93,-2.34,CNC(=O)Oc1cccc(N=CN(C)C)c1
|
226 |
+
6-methoxypteridine,-1.589,1,162.15200000000002,0,2,1,60.790000000000006,-1.139,COc2cnc1ncncc1n2
|
227 |
+
nevirapine,-3.397,1,266.30400000000003,1,4,1,58.120000000000005,-3.19,Cc3ccnc4N(C1CC1)c2ncccc2C(=O)Nc34
|
228 |
+
Isazofos,-3.76,1,313.747,0,1,7,58.4,-3.658,CCOP(=S)(OCC)Oc1nc(Cl)n(n1)C(C)C
|
229 |
+
"2-Methyl-1,3-Butadiene ",-1.714,1,68.11900000000001,0,0,1,0.0,-2.03,CC(=C)C=C
|
230 |
+
linalool,-2.399,1,154.253,1,0,4,20.23,-1.99,CC(C)=CCCC(O)(C)C=C
|
231 |
+
Fenthion,-4.265,1,278.335,0,1,5,27.69,-4.57,COP(=S)(OC)Oc1ccc(SC)c(C)c1
|
232 |
+
Cyclohexanol ,-1.261,1,100.161,1,1,0,20.23,-0.44,OC1CCCCC1
|
233 |
+
5-Allyl-5-methylbarbital,-1.013,1,182.17899999999997,2,1,2,75.27000000000001,-1.16,O=C1NC(=O)NC(=O)C1(C)CC=C
|
234 |
+
Epiandrosterone,-3.8819999999999997,1,290.447,1,4,0,37.3,-4.16,CC34CCC1C(CCC2CC(O)CCC12C)C3CCC4=O
|
235 |
+
mannitol,0.647,1,182.172,6,0,5,121.38000000000001,0.06,OCC(O)C(O)C(O)C(O)CO
|
236 |
+
4-Methylbiphenyl,-4.4239999999999995,1,168.239,0,2,1,0.0,-4.62,Cc1ccc(cc1)c2ccccc2
|
237 |
+
Atrazine,-3.069,1,215.68800000000002,2,1,4,62.730000000000004,-3.85,CCNc1nc(Cl)nc(NC(C)C)n1
|
238 |
+
Phenylthiourea,-1.7009999999999998,1,152.22199999999998,2,1,1,38.05,-1.77,NC(=S)Nc1ccccc1
|
239 |
+
4-Heptanone,-1.62,1,114.18799999999999,0,0,4,17.07,-1.3,CCCC(=O)CCC
|
240 |
+
"3,3-Dimethyl-2-butanone",-1.25,1,100.16099999999999,0,0,0,17.07,-0.72,CC(=O)C(C)(C)C
|
241 |
+
4-Chlorophenol ,-2.761,1,128.558,1,1,0,20.23,-0.7,Oc1ccc(Cl)cc1
|
242 |
+
Cyclohexanone,-0.996,1,98.14500000000001,0,1,0,17.07,-0.6,O=C1CCCCC1
|
243 |
+
m-Methylaniline,-1.954,1,107.156,1,1,0,26.02,-0.85,Cc1cccc(N)c1
|
244 |
+
Trichloroacetonitrile,-2.019,1,144.388,0,0,0,23.79,-2.168,ClC(Cl)(Cl)C#N
|
245 |
+
norflurazon,-4.029,1,303.67100000000005,1,2,2,46.92,-4.046,CNc2cnn(c1cccc(c1)C(F)(F)F)c(=O)c2Cl
|
246 |
+
2-Decanone,-2.617,1,156.269,0,0,7,17.07,-3.3,CCCCCCCCC(=O)C
|
247 |
+
Ipazine,-3.497,1,243.74200000000002,1,1,5,53.940000000000005,-3.785,CCN(CC)c1nc(Cl)nc(NC(C)C)n1
|
248 |
+
Benzocaine,-2.383,1,165.19199999999998,1,1,2,52.32,-2.616,CCOC(=O)c1ccc(N)cc1
|
249 |
+
"1,2,4-Trichlorobenzene",-4.083,1,181.449,0,1,0,0.0,-3.59,Clc1ccc(Cl)c(Cl)c1
|
250 |
+
Triazolam,-3.948,1,343.2170000000001,0,4,1,43.07,-4.09,Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34
|
251 |
+
"1,2-Benzenediol",-1.635,1,110.11199999999998,2,1,0,40.46,0.62,Oc1ccccc1O
|
252 |
+
Reverse Transcriptase inhibitor 1,-2.7939999999999996,1,254.29299999999998,0,3,1,49.330000000000005,-2.62,CCN2c1ncccc1N(C)C(=O)c3cccnc23
|
253 |
+
Dimethyl sulfide,-0.758,1,62.137,0,0,0,0.0,-0.45,CSC
|
254 |
+
2-Bromotoluene,-3.667,1,171.03699999999998,0,1,0,0.0,-2.23,Cc1ccccc1Br
|
255 |
+
O-Ethyl carbamate,-0.218,1,89.09400000000001,1,0,1,52.32,0.85,CCOC(=O)N
|
256 |
+
megestrol acetate,-4.417,1,384.5160000000002,0,4,2,60.440000000000005,-5.35,CC(=O)OC3(CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C)C(C)=O
|
257 |
+
"2,4-Dimethyl-3-pentanol",-1.6469999999999998,1,116.20399999999998,1,0,2,20.23,-1.22,CC(C)C(O)C(C)C
|
258 |
+
Napthalene,-3.468,2,128.17399999999995,0,2,0,0.0,-3.6,c1ccc2ccccc2c1
|
259 |
+
N-Ethylaniline,-2.3890000000000002,1,121.18299999999995,1,1,2,12.03,-1.7,CCNc1ccccc1
|
260 |
+
Phenytoin,-3.057,1,252.27300000000002,2,3,2,58.2,-4.0969999999999995,O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3
|
261 |
+
"7,12-Dimethylbenz(a)anthracene",-6.297000000000001,1,256.348,0,4,0,0.0,-7.02,Cc1c2ccccc2c(C)c3ccc4ccccc4c13
|
262 |
+
Dialifor,-5.026,1,393.85400000000016,0,2,8,55.84,-6.34,CCOP(=S)(OCC)SC(CCl)N1C(=O)c2ccccc2C1=O
|
263 |
+
Methoxychlor,-5.537999999999999,1,345.6529999999999,0,2,4,18.46,-6.89,COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl
|
264 |
+
TEFLUBENZURON,-5.462000000000001,1,381.1120000000001,2,2,2,58.2,-7.28,Fc1cccc(F)c1C(=O)NC(=O)Nc2cc(Cl)c(F)c(Cl)c2F
|
265 |
+
3-Pentanoyloxymethylphenytoin,-3.81,1,366.41700000000003,1,3,7,75.71,-4.678,O=C1N(COC(=O)CCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
|
266 |
+
Monuron,-2.6710000000000003,1,198.653,1,1,1,32.34,-2.89,CN(C)C(=O)Nc1ccc(Cl)cc1
|
267 |
+
Flutriafol,-3.569,1,301.296,1,3,4,50.94,-3.37,OC(Cn1cncn1)(c2ccc(F)cc2)c3ccccc3F
|
268 |
+
triamcinolone diacetate,-3.8760000000000003,1,478.51300000000026,2,4,4,127.20000000000002,-4.13,CC(=O)OCC(=O)C3(O)C(CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)OC(C)=O
|
269 |
+
1-Bromobutane,-2.303,1,137.01999999999998,0,0,2,0.0,-2.37,CCCCBr
|
270 |
+
"1,2,4,5-Tetrabromobenzene",-6.001,1,393.69800000000004,0,1,0,0.0,-6.98,Brc1cc(Br)c(Br)cc1Br
|
271 |
+
4-Methyl-2-pentanone,-1.1840000000000002,1,100.16099999999999,0,0,2,17.07,-0.74,CC(C)CC(=O)C
|
272 |
+
cycloate,-3.35,1,215.36199999999994,0,1,3,20.310000000000002,-3.4,CCSC(=O)N(CC)C1CCCCC1
|
273 |
+
4-Chloroanisole,-3.057,1,142.585,0,1,1,9.23,-2.78,COc1ccc(Cl)cc1
|
274 |
+
Deltamethrin,-7.44,1,505.20600000000024,0,3,6,59.32000000000001,-8.402000000000001,CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2
|
275 |
+
Talbutal,-2.06,1,224.26,2,1,4,75.27000000000001,-2.016,CCC(C)C1(CC=C)C(=O)NC(=O)NC1=O
|
276 |
+
Fenitrothion,-3.845,1,277.238,0,1,5,70.83000000000001,-4.04,COP(=S)(OC)Oc1ccc(N(=O)=O)c(C)c1
|
277 |
+
1-Iodonapthalene,-4.888999999999999,1,254.07000000000002,0,2,0,0.0,-4.55,Ic1cccc2ccccc12
|
278 |
+
Sorbitol,0.647,1,182.172,6,0,5,121.38000000000001,1.09,OCC(O)C(O)C(O)C(O)CO
|
279 |
+
Ethanethiol,-0.968,1,62.137,1,0,0,0.0,-0.6,CCS
|
280 |
+
"1,1,2-Trichloroethane",-1.9609999999999999,1,133.405,0,0,1,0.0,-1.48,ClCC(Cl)Cl
|
281 |
+
Pyrolan,-3.141,1,245.282,0,2,2,47.36000000000001,-2.09,CN(C)C(=O)Oc1cc(C)nn1c2ccccc2
|
282 |
+
o-Hydroxybenzamide,-1.942,1,137.13799999999998,2,1,1,63.32000000000001,-1.82,NC(=O)c1ccccc1O
|
283 |
+
o-Nitrotoluene,-2.589,1,137.138,0,1,1,43.14,-2.33,Cc1ccccc1N(=O)=O
|
284 |
+
"5,5-Diisopropylbarbital",-1.942,1,212.249,2,1,2,75.27000000000001,-2.766,O=C1NC(=O)NC(=O)C1(C(C)C)C(C)C
|
285 |
+
2-Ethyltoluene,-3.2960000000000003,1,120.19499999999996,0,1,1,0.0,-3.21,CCc1ccccc1C
|
286 |
+
1-Chloroheptane,-3.003,1,134.65,0,0,5,0.0,-4.0,CCCCCCCCl
|
287 |
+
Barbital,-1.265,1,184.19499999999996,2,1,2,75.27000000000001,-2.4,O=C1NC(=O)NC(=O)C1(CC)CC
|
288 |
+
Bibenzyl ,-4.301,2,182.266,0,2,3,0.0,-4.62,C(Cc1ccccc1)c2ccccc2
|
289 |
+
"1,1,2,2-Tetrachloroethane",-2.549,1,167.85,0,0,1,0.0,-1.74,ClC(Cl)C(Cl)Cl
|
290 |
+
RTI 23,-4.228,1,283.331,1,3,2,54.46,-5.153,CCN2c1cc(OC)cc(C)c1NC(=O)c3cccnc23
|
291 |
+
2-Methylphenanthrene,-4.87,1,192.261,0,3,0,0.0,-5.84,Cc1ccc2c(ccc3ccccc32)c1
|
292 |
+
dibutylphthalate,-4.378,1,278.348,0,1,8,52.60000000000001,-4.4,CCCCOC(=O)c1ccccc1C(=O)OCCCC
|
293 |
+
tetrachloroguaiacol,-4.2989999999999995,1,261.919,1,1,1,29.46,-4.02,COc1c(O)c(Cl)c(Cl)c(Cl)c1Cl
|
294 |
+
Dimecron,-2.426,1,299.6909999999999,0,0,8,65.07000000000001,0.523,CCN(CC)C(=O)C(=CCOP(=O)(OC)OC)Cl
|
295 |
+
Equilin,-3.555,1,268.356,1,4,0,37.3,-5.282,CC34CCC1C(=CCc2cc(O)ccc12)C3CCC4=O
|
296 |
+
Chlorimuron-ethyl (ph 7),-3.719,1,414.82700000000017,1,2,8,127.79,-4.5760000000000005,CCOC(=O)c1ccccc1S(=O)(=O)NN(C=O)c2nc(Cl)cc(OC)n2
|
297 |
+
p-Nitroanisole,-2.522,1,153.13699999999997,0,1,2,52.37,-2.41,COc1ccc(cc1)N(=O)=O
|
298 |
+
1-Chlorohexane,-2.648,1,120.623,0,0,4,0.0,-3.12,CCCCCCCl
|
299 |
+
"2,2',3,3',4,4',5,5'-PCB",-8.468,1,429.77200000000016,0,2,1,0.0,-9.16,Clc1cc(c(Cl)c(Cl)c1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl
|
300 |
+
Raffinose,0.496,1,504.43800000000005,11,3,8,268.67999999999995,-0.41,OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O
|
301 |
+
hexacosane,-9.702,1,366.7180000000002,0,0,23,0.0,-8.334,CCCCCCCCCCCCCCCCCCCCCCCCCC
|
302 |
+
RTI 5,-3.471,1,253.30499999999995,0,3,1,36.44,-3.324,CCN2c1ccccc1N(C)C(=O)c3cccnc23
|
303 |
+
"1,1-Dichloroethane",-1.5759999999999998,1,98.96000000000001,0,0,0,0.0,-1.29,CC(Cl)Cl
|
304 |
+
Sulfanilamide,-0.9540000000000001,1,172.20899999999997,2,1,1,86.18,-1.34,Nc1ccc(cc1)S(N)(=O)=O
|
305 |
+
Isopropalin,-5.306,1,309.36600000000004,0,1,8,89.51999999999998,-6.49,CCCN(CCC)c1c(cc(cc1N(=O)=O)C(C)C)N(=O)=O
|
306 |
+
Lindane,-4.0089999999999995,1,290.832,0,1,0,0.0,-4.64,ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl
|
307 |
+
Isofenphos,-4.538,1,345.4010000000002,1,1,8,56.790000000000006,-4.194,CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C
|
308 |
+
"1,2,3-Trichlorobenzene",-4.008,1,181.44899999999998,0,1,0,0.0,-4.0,Clc1cccc(Cl)c1Cl
|
309 |
+
Tetrachloromethane,-2.6069999999999998,1,153.823,0,0,0,0.0,-2.31,ClC(Cl)(Cl)Cl
|
310 |
+
"3,4-Dichloronitrobenzene",-3.448,1,192.001,0,1,1,43.14,-3.2,O=N(=O)c1cc(Cl)c(Cl)cc1
|
311 |
+
Cyclooctanol,-2.14,1,128.215,1,1,0,20.23,-1.29,OC1CCCCCCC1
|
312 |
+
17a-Methyltestosterone,-4.073,1,302.4580000000001,1,4,0,37.3,-3.9989999999999997,CC1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C
|
313 |
+
Dulcin,-2.167,1,180.20699999999997,2,1,3,64.35,-2.17,CCOc1ccc(NC(N)=O)cc1
|
314 |
+
"trans-1,4-Dimethylcyclohexane",-3.305,1,112.216,0,1,0,0.0,-4.47,C/C1CCC(\C)CC1
|
315 |
+
"1,7-phenantroline",-2.9939999999999998,2,180.20999999999998,0,3,0,25.78,-2.68,c1cnc2c(c1)ccc3ncccc23
|
316 |
+
Methyl t-butyl ether ,-0.9840000000000001,1,88.14999999999999,0,0,0,9.23,-0.24,COC(C)(C)C
|
317 |
+
Anethole,-3.2539999999999996,1,148.20499999999998,0,1,2,9.23,-3.13,COc1ccc(C=CC)cc1
|
318 |
+
1-Hexadecanol,-4.94,1,242.44699999999992,1,0,14,20.23,-7.0,CCCCCCCCCCCCCCCCO
|
319 |
+
uracil,-0.441,1,112.088,2,1,0,65.72,-1.4880000000000002,O=c1cc[nH]c(=O)[nH]1
|
320 |
+
adenine,-1.255,1,135.13,2,2,0,80.47999999999999,-2.12,Nc1ncnc2nc[nH]c12
|
321 |
+
"2,2',3,4,5-PCB",-6.709,1,326.437,0,2,1,0.0,-7.21,Clc1cc(Cl)c(cc1Cl)c2cccc(Cl)c2Cl
|
322 |
+
Ancymidol,-2.181,1,256.30499999999995,1,3,4,55.24,-2.596,COc1ccc(cc1)C(O)(C2CC2)c3cncnc3
|
323 |
+
Benzo(b)fluoranthene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.23,c1ccc2c(c1)c3cccc4c3c2cc5ccccc54
|
324 |
+
Carbanilide,-3.611,1,212.25199999999998,2,2,2,41.13,-3.15,O=C(Nc1ccccc1)Nc2ccccc2
|
325 |
+
phenobarbital,-2.272,1,232.239,2,2,2,75.27000000000001,-2.322,CCC1(C(=O)NC(=O)NC1=O)c2ccccc2
|
326 |
+
"2',3,4-PCB",-5.686,1,257.547,0,2,1,0.0,-6.29,Clc1ccc(cc1)c2cccc(Cl)c2Cl
|
327 |
+
Isoproturon,-2.867,1,206.289,1,1,2,32.34,-3.536,CC(C)c1ccc(NC(=O)N(C)C)cc1
|
328 |
+
Azintamide,-2.231,1,259.762,0,1,5,46.09,-1.716,CCN(CC)C(=O)CSc1ccc(Cl)nn1
|
329 |
+
"2,2-Dimethyl-1-butanol",-1.365,1,102.17699999999998,1,0,2,20.23,-1.04,CCC(C)(C)CO
|
330 |
+
Ethyl pentanoate,-1.899,1,130.18699999999998,0,0,4,26.3,-1.75,CCCOC(=O)CCC
|
331 |
+
"2,4,6-Trinitrotoluene",-2.6060000000000003,1,227.13199999999998,0,1,3,129.42000000000002,-3.22,Cc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O
|
332 |
+
Bensulide,-4.99,1,397.52400000000006,1,1,10,64.63,-4.2,CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1
|
333 |
+
Cycloheptane,-2.9160000000000004,2,98.18900000000001,0,1,0,0.0,-3.51,C1CCCCCC1
|
334 |
+
Propyl formate,-0.757,1,88.10599999999998,0,0,3,26.3,-0.49,CCCOC=O
|
335 |
+
2-Isopropyltoluene,-3.585,1,134.22199999999995,0,1,1,0.0,-3.76,CC(C)c1ccccc1C
|
336 |
+
m-Chloroaniline,-2.392,1,127.574,1,1,0,26.02,-1.37,Nc1cccc(Cl)c1
|
337 |
+
"2,4-Dimethylpentane",-2.938,1,100.20499999999998,0,0,2,0.0,-4.26,CC(C)CC(C)C
|
338 |
+
Dibenzofurane,-4.2010000000000005,2,168.195,0,3,0,13.14,-4.6,o1c2ccccc2c3ccccc13
|
339 |
+
ethofumesate,-3.1839999999999997,1,286.34900000000005,0,2,4,61.830000000000005,-3.42,CCOC2Oc1ccc(OS(C)(=O)=O)cc1C2(C)C
|
340 |
+
Fluometuron,-3.065,1,232.20499999999996,1,1,1,32.34,-3.43,CN(C)C(=O)Nc1cccc(c1)C(F)(F)F
|
341 |
+
Acridine,-3.846,2,179.22199999999998,0,3,0,12.89,-3.67,c3ccc2nc1ccccc1cc2c3
|
342 |
+
Cortisone,-2.8930000000000002,1,360.45000000000016,2,4,2,91.67,-3.11,CC12CC(=O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO
|
343 |
+
glucose,0.501,1,180.156,5,1,1,110.38000000000001,0.74,OCC1OC(O)C(O)C(O)C1O
|
344 |
+
3-Methylphenol,-2.313,1,108.13999999999999,1,1,0,20.23,-0.68,Cc1cccc(O)c1
|
345 |
+
Indapamide,-4.345,1,365.84200000000004,2,3,3,92.5,-3.5860000000000003,CC2Cc1ccccc1N2NC(=O)c3ccc(Cl)c(c3)S(N)(=O)=O
|
346 |
+
Lovastatin,-4.731,1,404.54700000000025,1,3,6,72.83,-6.005,CCC(C)C(=O)OC2CC(C)C=C3C=CC(C)C(CCC1CC(O)CC(=O)O1)C23
|
347 |
+
"1,4-Dinitrobenzene",-2.281,1,168.10799999999995,0,1,2,86.28,-3.39,O=N(=O)c1ccc(cc1)N(=O)=O
|
348 |
+
Reposal,-2.781,1,262.30899999999997,2,3,2,75.27000000000001,-2.696,CCC1(C(=O)NC(=O)NC1=O)C2=CCC3CCC2C3
|
349 |
+
Ethyl decanoate,-3.6710000000000003,1,200.32199999999997,0,0,9,26.3,-4.1,CCCCCCCCCC(=O)OCC
|
350 |
+
Fenuron,-1.847,1,164.208,1,1,1,32.34,-1.6,CN(C)C(=O)Nc1ccccc1
|
351 |
+
Ethyl propyl ether,-1.072,1,88.14999999999999,0,0,3,9.23,-0.66,CCCOCC
|
352 |
+
2-Propanol,-0.261,1,60.096000000000004,1,0,0,20.23,0.43,CC(C)O
|
353 |
+
2-Methylnapthalene,-3.802,1,142.201,0,2,0,0.0,-3.77,Cc1ccc2ccccc2c1
|
354 |
+
Chlorodibromethane,-2.54,1,208.28,0,0,0,0.0,-1.9,ClC(Br)Br
|
355 |
+
Hexestrol,-4.854,1,270.372,2,2,5,40.46,-4.43,CCC(C(CC)c1ccc(O)cc1)c2ccc(O)cc2
|
356 |
+
Malathion,-3.391,1,330.3640000000001,0,0,9,71.06,-3.37,CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC
|
357 |
+
Benzylchloride,-2.887,1,126.58599999999996,0,1,1,0.0,-2.39,ClCc1ccccc1
|
358 |
+
t-Crotonaldehyde,-0.604,1,70.09100000000001,0,0,1,17.07,0.32,C/C=C/C=O
|
359 |
+
Chlorbromuron,-3.938,1,293.548,1,1,2,41.57,-3.924,CON(C)C(=O)Nc1ccc(Br)c(Cl)c1
|
360 |
+
"9,10-Dimethylanthracene",-5.228,1,206.28799999999998,0,3,0,0.0,-6.57,Cc1c2ccccc2c(C)c3ccccc13
|
361 |
+
Methyl hexanoate,-1.899,1,130.18699999999998,0,0,4,26.3,-1.87,CCCCCC(=O)OC
|
362 |
+
Dimefuron,-3.8310000000000004,1,338.79500000000013,1,2,2,80.37,-4.328,CN(C)C(=O)Nc1ccc(c(Cl)c1)n2nc(oc2=O)C(C)(C)C
|
363 |
+
p-Fluoroacetanilide,-2.181,1,153.156,1,1,1,29.1,-1.78,CC(=O)Nc1ccc(F)cc1
|
364 |
+
alachlor,-3.319,1,269.77199999999993,0,1,6,29.54,-3.26,CCc1cccc(CC)c1N(COC)C(=O)CCl
|
365 |
+
Cyclohexene,-2.16,2,82.146,0,1,0,0.0,-2.59,C1CCC=CC1
|
366 |
+
Hydrocortisone ,-3.159,1,362.4660000000002,3,4,2,94.83,-3.09,CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO
|
367 |
+
Pyrimidine,-0.884,2,80.08999999999999,0,1,0,25.78,1.1,c1cncnc1
|
368 |
+
p-Chloronitrobenzene,-2.9010000000000002,1,157.55599999999998,0,1,1,43.14,-2.92,Clc1ccc(cc1)N(=O)=O
|
369 |
+
Methyl propionate,-0.836,1,88.106,0,0,1,26.3,-0.14,CCC(=O)OC
|
370 |
+
o-Chloronitrobenzene,-2.775,1,157.55599999999998,0,1,1,43.14,-2.55,Clc1ccccc1N(=O)=O
|
371 |
+
Neburon,-4.157,1,275.179,1,1,4,32.34,-4.77,CCCCN(C)C(=O)Nc1ccc(Cl)c(Cl)c1
|
372 |
+
Buthidazole,-2.398,1,256.33099999999996,1,2,1,69.56,-1.8769999999999998,CN1CC(O)N(C1=O)c2nnc(s2)C(C)(C)C
|
373 |
+
Nitrobenzene,-2.2880000000000003,1,123.11099999999996,0,1,1,43.14,-1.8,O=N(=O)c1ccccc1
|
374 |
+
Iodobenzene,-3.8,1,204.01000000000002,0,1,0,0.0,-3.01,Ic1ccccc1
|
375 |
+
Metolazone,-3.7769999999999997,1,365.8420000000001,2,3,2,92.5,-3.78,CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O
|
376 |
+
Methocarbamol,-1.4280000000000002,1,241.24299999999994,2,1,6,91.00999999999999,-0.985,COc1ccccc1OCC(O)COC(N)=O
|
377 |
+
butachlor,-4.3469999999999995,1,311.85300000000007,0,1,9,29.54,-4.19,CCCCOCN(C(=O)CCl)c1c(CC)cccc1CC
|
378 |
+
"2,3-Dichlorophenol",-3.1439999999999997,1,163.003,1,1,0,20.23,-1.3,Oc1cccc(Cl)c1Cl
|
379 |
+
Propyl butyrate,-1.1909999999999998,1,102.13299999999998,0,0,2,26.3,-1.92,CCCC(=O)OC
|
380 |
+
Propanil,-3.6439999999999997,1,218.08299999999997,1,1,2,29.1,-3.0,CCC(=O)Nc1ccc(Cl)c(Cl)c1
|
381 |
+
Triamterene,-3.051,1,253.26900000000003,3,3,1,129.62,-2.404,Nc3nc(N)c2nc(c1ccccc1)c(N)nc2n3
|
382 |
+
Ethyl hexanoate,-2.254,1,144.21399999999997,0,0,5,26.3,-2.35,CCCCCC(=O)OCC
|
383 |
+
chloralose,-1.8869999999999998,1,309.529,3,2,2,88.38000000000001,-1.84,OCC(O)C2OC1OC(OC1C2O)C(Cl)(Cl)Cl
|
384 |
+
Amitraz,-5.5329999999999995,1,293.41400000000004,0,2,4,27.96,-5.47,CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C
|
385 |
+
Prometon,-3.448,1,225.296,2,1,5,71.96000000000001,-2.478,COc1nc(NC(C)C)nc(NC(C)C)n1
|
386 |
+
1-Octene ,-3.073,1,112.216,0,0,5,0.0,-4.44,CCCCCCC=C
|
387 |
+
p-Methylaniline ,-1.954,1,107.156,1,1,0,26.02,-1.21,Cc1ccc(N)cc1
|
388 |
+
aminothiazole,-1.226,1,100.14599999999999,1,1,0,38.91,-0.36,Nc1nccs1
|
389 |
+
Metolcarb,-1.9469999999999998,1,151.165,1,1,1,38.33,-1.8030000000000002,c1ccccc1(OC(=O)NC)
|
390 |
+
3-Hexanol,-1.324,1,102.17699999999999,1,0,3,20.23,-0.8,CCCC(O)CC
|
391 |
+
9-anthrol,-4.148,1,194.23299999999998,1,3,0,20.23,-4.73,c3ccc2c(O)c1ccccc1cc2c3
|
392 |
+
2-Methylanthracene,-4.87,1,192.261,0,3,0,0.0,-6.96,Cc1ccc2cc3ccccc3cc2c1
|
393 |
+
"1,2,3-Trimethylbenzene ",-3.312,1,120.195,0,1,0,0.0,-3.2,Cc1cccc(C)c1C
|
394 |
+
Aminocarb,-2.677,1,208.26099999999997,1,1,2,41.57,-2.36,CNC(=O)Oc1ccc(N(C)C)c(C)c1
|
395 |
+
2-Nonanol,-2.387,1,144.258,1,0,6,20.23,-2.74,CCCCCCCC(C)O
|
396 |
+
Methyldymron,-3.863,1,268.36,1,2,3,32.34,-3.35,CN(C(=O)NC(C)(C)c1ccccc1)c2ccccc2
|
397 |
+
3-Hexanone,-1.266,1,100.16099999999999,0,0,3,17.07,-0.83,CCCC(=O)CC
|
398 |
+
bromoxynil,-3.793,1,276.91499999999996,1,1,0,44.019999999999996,-3.33,Oc1c(Br)cc(C#N)cc1Br
|
399 |
+
"3,4-PCB",-5.223,1,223.102,0,2,1,0.0,-6.39,Clc1ccc(cc1Cl)c2ccccc2
|
400 |
+
Mefenacet,-4.504,1,298.367,0,3,4,42.43000000000001,-4.873,CN(C(=O)COc1nc2ccccc2s1)c3ccccc3
|
401 |
+
5-hydroxyquinoline,-2.725,1,145.161,1,2,0,33.120000000000005,-2.54,Oc1cccc2ncccc12
|
402 |
+
Carboxin,-2.927,1,235.30800000000002,1,2,2,38.33,-3.14,CC1=C(SCCO1)C(=O)Nc2ccccc2
|
403 |
+
Ethoxyzolamide,-3.085,1,258.324,1,2,3,82.28,-3.81,CCOc2ccc1nc(sc1c2)S(N)(=O)=O
|
404 |
+
Pentachlorophenol,-4.835,1,266.33799999999997,1,1,0,20.23,-4.28,Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl
|
405 |
+
Bromochloromethane,-1.5190000000000001,1,129.384,0,0,0,0.0,-0.89,ClCBr
|
406 |
+
metharbital,-1.6580000000000001,1,198.22199999999998,1,1,2,66.48,-2.23,CCC1(CC)C(=O)NC(=O)N(C)C1=O
|
407 |
+
deoxycorticosterone acetate,-4.4719999999999995,1,372.5050000000002,0,4,3,60.440000000000005,-4.63,CC(=O)OCC(=O)C3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C
|
408 |
+
benzylurea,-1.5090000000000001,1,150.18099999999998,2,1,2,55.120000000000005,-0.95,NC(=O)NCc1ccccc1
|
409 |
+
Chlortoluron,-3.048,1,212.67999999999998,1,1,1,32.34,-3.483,CN(C)C(=O)Nc1ccc(C)c(Cl)c1
|
410 |
+
Linuron,-3.5810000000000004,1,249.09699999999998,1,1,2,41.57,-3.592,CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1
|
411 |
+
Cycloheptanol,-1.7,1,114.188,1,1,0,20.23,-0.88,OC1CCCCCC1
|
412 |
+
Thiamphenicol,-1.936,1,356.2270000000001,3,1,6,103.70000000000002,-2.154,CS(=O)(=O)c1ccc(cc1)C(O)C(CO)NC(=O)C(Cl)Cl
|
413 |
+
thiopental,-2.96,1,242.34400000000002,2,1,4,58.2,-3.36,CCCC(C)C1(CC)C(=O)NC(=S)NC1=O
|
414 |
+
acetazolamide,-0.7929999999999999,1,222.251,2,1,2,115.03999999999999,-2.36,CC(=O)Nc1nnc(s1)S(N)(=O)=O
|
415 |
+
p-Nitrophenol,-2.318,1,139.10999999999999,1,1,1,63.37,-0.74,Oc1ccc(cc1)N(=O)=O
|
416 |
+
Aldrin,-5.511,1,364.914,0,4,0,0.0,-6.307,ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl
|
417 |
+
Tetrahydrofurane ,-0.62,2,72.107,0,1,0,9.23,0.49,C1CCOC1
|
418 |
+
o-Nitroaniline,-2.2769999999999997,1,138.126,1,1,1,69.16,-1.96,Nc1ccccc1N(=O)=O
|
419 |
+
"2,2',3,3'-PCB",-6.079,1,291.99199999999996,0,2,1,0.0,-7.28,Clc1cccc(c1Cl)c2cccc(Cl)c2Cl
|
420 |
+
phenylbutazone,-4.0760000000000005,1,308.38100000000003,0,3,5,40.620000000000005,-3.81,CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3
|
421 |
+
"2,6-Dinitrotoluene",-2.553,1,182.135,0,1,2,86.28,-3.0,Cc1c(cccc1N(=O)=O)N(=O)=O
|
422 |
+
Progesterone,-4.17,1,314.46900000000005,0,4,1,34.14,-4.42,CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C
|
423 |
+
Chlorazine,-3.6630000000000003,1,257.76899999999995,0,1,6,45.150000000000006,-4.4110000000000005,CCN(CC)c1nc(Cl)nc(n1)N(CC)CC
|
424 |
+
captafol,-4.365,1,349.06600000000014,0,2,3,37.38,-5.4,ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2=O
|
425 |
+
"1,2,4-tribromobenzene",-5.144,1,314.802,0,1,0,0.0,-4.5,c1(Br)c(Br)cc(Br)cc1
|
426 |
+
Oxazepam,-3.517,1,286.718,2,3,1,61.690000000000005,-3.952,OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O
|
427 |
+
Secobarbital,-2.415,1,238.28699999999995,2,1,5,75.27000000000001,-2.356,O=C1NC(=O)NC(=O)C1(C(C)CCC)CC=C
|
428 |
+
Carvacrol,-3.2239999999999998,1,150.22099999999998,1,1,1,20.23,-2.08,c1(O)c(C)ccc(C(C)C)c1
|
429 |
+
rhodanine,-0.396,1,133.197,1,1,0,29.1,-1.77,C1SC(=S)NC1(=O)
|
430 |
+
Morin,-2.7310000000000003,1,302.23800000000006,5,3,1,131.35999999999999,-3.083,Oc1ccc(c(O)c1)c3oc2cc(O)cc(O)c2c(=O)c3O
|
431 |
+
Kepone,-5.112,1,490.6390000000001,0,6,0,17.07,-5.2589999999999995,ClC1(C(=O)C2(Cl)C3(Cl)C14Cl)C5(Cl)C2(Cl)C3(Cl)C(Cl)(Cl)C45Cl
|
432 |
+
Disulfiram,-3.862,1,296.5520000000001,0,0,4,6.48,-4.86,CCN(CC)C(=S)SSC(=S)N(CC)CC
|
433 |
+
Cyclohexane,-2.477,2,84.162,0,1,0,0.0,-3.1,C1CCCCC1
|
434 |
+
Dienochlor,-7.848,1,474.64,0,2,1,0.0,-7.278,ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl
|
435 |
+
chlordimeform,-3.1639999999999997,1,196.68099999999998,0,1,2,15.6,-2.86,CN(C)C=Nc1ccc(Cl)cc1C
|
436 |
+
Equilenin,-3.927,1,266.34,1,4,0,37.3,-5.24,CC34CCc1c(ccc2cc(O)ccc12)C3CCC4=O
|
437 |
+
1-Octanol,-2.105,1,130.23100000000002,1,0,6,20.23,-2.39,CCCCCCCCO
|
438 |
+
Diethyl sulfide,-1.598,1,90.191,0,0,2,0.0,-1.34,CCSCC
|
439 |
+
"1,2-Dichloroethane",-1.374,1,98.96000000000001,0,0,1,0.0,-1.06,ClCCCl
|
440 |
+
2-Chloro-2-methylbutane,-2.278,1,106.59599999999999,0,0,1,0.0,-2.51,CCC(C)(C)Cl
|
441 |
+
1-Chloro-2-bromoethane,-1.7380000000000002,1,143.411,0,0,1,0.0,-1.32,ClCCBr
|
442 |
+
p-Nitroaniline,-1.936,1,138.126,1,1,1,69.16,-2.37,Nc1ccc(cc1)N(=O)=O
|
443 |
+
Lactose,1.071,1,342.297,8,2,4,189.52999999999997,-0.244,OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1O
|
444 |
+
RTI 2,-3.125,1,268.32,0,3,2,49.330000000000005,-2.86,CCN2c1ncccc1N(CC)C(=O)c3cccnc23
|
445 |
+
Chlorobenzene,-2.975,1,112.55899999999997,0,1,0,0.0,-2.38,Clc1ccccc1
|
446 |
+
1-Nonene ,-3.427,1,126.243,0,0,6,0.0,-5.05,CCCCCCCC=C
|
447 |
+
p-Bromoiodobenzene,-4.754,1,282.90599999999995,0,1,0,0.0,-4.56,Brc1ccc(I)cc1
|
448 |
+
3-Methyl-3-pentanol,-1.308,1,102.17699999999998,1,0,2,20.23,-0.36,CCC(C)(O)CC
|
449 |
+
Pentylbenzene,-3.8989999999999996,1,148.249,0,1,4,0.0,-4.64,CCCCCc1ccccc1
|
450 |
+
allantoin,0.652,1,158.117,4,1,1,113.32,-1.6,NC(=O)NC1NC(=O)NC1=O
|
451 |
+
Glafenine,-5.052,1,372.80800000000016,3,3,6,91.67999999999999,-4.571000000000001,OCC(O)COC(=O)c1ccccc1Nc2ccnc3cc(Cl)ccc23
|
452 |
+
DDD,-6.007999999999999,1,320.04600000000005,0,2,3,0.0,-7.2,ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2
|
453 |
+
testosterone acetate,-4.449,1,330.4680000000001,0,4,1,43.370000000000005,-5.184,CC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C
|
454 |
+
1-Chloronapthalene,-4.063,1,162.61899999999997,0,2,0,0.0,-3.93,Clc1cccc2ccccc12
|
455 |
+
RTI 19,-4.007,1,252.31699999999995,0,3,1,23.55,-4.749,CCN2c1ccccc1N(C)C(=O)c3ccccc23
|
456 |
+
2-Hexanol,-1.324,1,102.17699999999998,1,0,3,20.23,-0.89,CCCCC(C)O
|
457 |
+
Propylcyclopentane,-3.16,1,112.21600000000001,0,1,2,0.0,-4.74,CCCC1CCCC1
|
458 |
+
Etomidate,-3.359,1,244.294,0,2,4,44.12,-4.735,CCOC(=O)c1cncn1C(C)c2ccccc2
|
459 |
+
"3,4-Dichlorophenol",-3.352,1,163.00300000000001,1,1,0,20.23,-1.25,Oc1ccc(Cl)c(Cl)c1
|
460 |
+
Cypermethrin,-6.775,1,416.30400000000014,0,3,6,59.32000000000001,-8.017000000000001,CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2
|
461 |
+
Benzoxazole,-2.214,2,119.12299999999998,0,2,0,26.03,-1.16,c2ccc1ocnc1c2
|
462 |
+
1-Pentanol,-1.042,1,88.14999999999999,1,0,3,20.23,-0.6,CCCCCO
|
463 |
+
"N,N-Diethylaniline",-3.16,1,149.237,0,1,3,3.24,-3.03,CCN(CC)c1ccccc1
|
464 |
+
"1,3-Difluorobenzene",-2.636,1,114.094,0,1,0,0.0,-2.0,Fc1cccc(F)c1
|
465 |
+
3-chloropropionitrile,-0.522,1,89.525,0,0,1,23.79,-0.29,ClCCC#N
|
466 |
+
t-Pentylbenzene,-3.867,1,148.249,0,1,1,0.0,-4.15,CC(C)(C)Cc1ccccc1
|
467 |
+
5-Ethyl-5-phenylbarbital,-2.272,1,232.239,2,2,2,75.27000000000001,-2.322,O=C1NC(=O)NC(=O)C1(CC)c1ccccc1
|
468 |
+
o-Chloroiodobenzene,-4.3839999999999995,1,238.45499999999998,0,1,0,0.0,-3.54,Clc1ccccc1I
|
469 |
+
Benzotriazole,-2.21,2,119.127,1,2,0,41.57,-0.78,c2ccc1[nH]nnc1c2
|
470 |
+
Carbofuran,-3.05,1,221.25599999999994,1,2,1,47.56,-2.8,CNC(=O)Oc1cccc2CC(C)(C)Oc12
|
471 |
+
"2,6-Dimethylphenol",-2.589,1,122.16699999999999,1,1,0,20.23,-1.29,Cc1cccc(C)c1O
|
472 |
+
3-Methyl-2-butanol,-0.9540000000000001,1,88.14999999999999,1,0,1,20.23,-0.18,CC(C)C(C)O
|
473 |
+
benzhydrol,-3.033,1,184.238,1,2,2,20.23,-2.55,c1ccccc1C(O)c2ccccc2
|
474 |
+
Methyl decanoate,-3.3160000000000003,1,186.295,0,0,8,26.3,-4.69,CCCCCCCCCC(=O)OC
|
475 |
+
Dicapthon,-4.188,1,297.656,0,1,5,70.83000000000001,-4.31,COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O
|
476 |
+
1-Bromo-2-methylpropane,-2.2880000000000003,1,137.01999999999998,0,0,1,0.0,-2.43,CC(C)CBr
|
477 |
+
Iodoethane,-2.066,1,155.966,0,0,0,0.0,-1.6,CCI
|
478 |
+
Pirimicarb,-2.34,1,238.29099999999997,0,1,2,58.56000000000001,-1.95,CN(C)C(=O)Oc1nc(nc(C)c1C)N(C)C
|
479 |
+
1-Bromohexane,-3.012,1,165.074,0,0,4,0.0,-3.81,CCCCCCBr
|
480 |
+
2-Methylpentane,-2.6,1,86.178,0,0,2,0.0,-3.74,CCCC(C)C
|
481 |
+
Tetrafluthrin,-6.3389999999999995,1,418.7360000000001,0,2,4,26.3,-7.321000000000001,Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F
|
482 |
+
Metolachlor,-3.431,1,283.7989999999999,0,1,6,29.54,-2.73,CCc1cccc(C)c1N(C(C)COC)C(=O)CCl
|
483 |
+
nifuroxime,-1.8430000000000002,1,156.09699999999998,1,1,2,88.87,-2.19,ON=Cc1ccc(o1)N(=O)=O
|
484 |
+
Fluvalinate,-8.057,1,502.9200000000002,1,3,8,71.35,-8.003,CC(C)C(Nc1ccc(cc1Cl)C(F)(F)F)C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2
|
485 |
+
Amitrole,-0.674,1,84.082,2,1,0,67.59,0.522,Nc1nc[nH]n1
|
486 |
+
Tribromomethane,-2.904,1,252.731,0,0,0,0.0,-1.91,BrC(Br)Br
|
487 |
+
Trichlorfon,-1.8659999999999999,1,257.437,1,0,3,55.760000000000005,-0.22,COP(=O)(OC)C(O)C(Cl)(Cl)Cl
|
488 |
+
Phosalone,-5.024,1,367.8160000000001,0,2,7,53.6,-5.233,CCOP(=S)(OCC)SCn1c(=O)oc2cc(Cl)ccc12
|
489 |
+
Phenylmethanol,-1.699,1,108.13999999999997,1,1,1,20.23,-0.4,OCc1ccccc1
|
490 |
+
Coumatetralyl,-5.194,1,292.33400000000006,1,4,1,50.44,-2.84,O=c2c(C3CCCc4ccccc43)c(O)c1ccccc1o2
|
491 |
+
4-Bromophenol,-3.1319999999999997,1,173.00900000000001,1,1,0,20.23,-1.09,Oc1ccc(Br)cc1
|
492 |
+
2-Bromopropane,-1.949,1,122.993,0,0,0,0.0,-1.59,CC(C)Br
|
493 |
+
"2,2,4-Trimethylpentane",-3.2760000000000002,1,114.23199999999999,0,0,1,0.0,-4.74,CC(C)CC(C)(C)C
|
494 |
+
"1,3,5-Trinitrobenzene",-2.324,1,213.10499999999996,0,1,3,129.42000000000002,-2.89,O=N(=O)c1cc(cc(c1)N(=O)=O)N(=O)=O
|
495 |
+
Nimetazepam,-3.557,1,295.29800000000006,0,3,2,75.81,-3.7960000000000003,CN2C(=O)CN=C(c1ccccc1)c3cc(ccc23)N(=O)=O
|
496 |
+
Propane,-1.5530000000000002,1,44.096999999999994,0,0,0,0.0,-1.94,CCC
|
497 |
+
Minoxidil,-1.8090000000000002,1,209.25299999999996,2,2,1,95.10999999999999,-1.989,Nc1cc(nc(N)n1=O)N2CCCCC2
|
498 |
+
1-aminoacridine,-3.542,1,194.23700000000002,1,3,0,38.91,-4.22,Nc2cccc3nc1ccccc1cc23
|
499 |
+
Benzo(k)fluoranthene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.49,c1ccc2cc3c4cccc5cccc(c3cc2c1)c45
|
500 |
+
Dicofol,-6.268,1,370.49,1,2,2,20.23,-5.666,OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl
|
501 |
+
Acenapthene,-3.792,2,154.21199999999996,0,3,0,0.0,-4.63,C1Cc2cccc3cccc1c23
|
502 |
+
Dialifos,-5.026,1,393.85400000000016,0,2,8,55.84,-6.34,CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O
|
503 |
+
"1,4-Dibromobenzene",-4.298,1,235.90599999999998,0,1,0,0.0,-4.07,Brc1ccc(Br)cc1
|
504 |
+
Methazole,-3.6010000000000004,1,261.064,0,2,1,57.14,-2.82,Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O
|
505 |
+
p-Phenylphenol,-3.701,1,170.211,1,2,1,20.23,-3.48,Oc1ccc(cc1)c2ccccc2
|
506 |
+
pyracarbolid,-2.83,1,217.26800000000003,1,2,2,38.33,-2.56,CC1=C(CCCO1)C(=O)Nc2ccccc2
|
507 |
+
Ethyl vinyl ether,-0.857,1,72.10700000000001,0,0,2,9.23,-0.85,CCOC=C
|
508 |
+
1-Butyne,-1.092,1,54.09199999999999,0,0,0,0.0,-1.24,CCC#C
|
509 |
+
4-methoxypteridine,-1.589,1,162.15200000000002,0,2,1,60.790000000000006,-1.11,COc1ncnc2nccnc12
|
510 |
+
3-Methyl-3-heptanol,-2.017,1,130.23099999999997,1,0,4,20.23,-1.6,CCCCC(C)(O)CC
|
511 |
+
"1,4-Dichlorobenzene",-3.5580000000000003,1,147.00400000000002,0,1,0,0.0,-3.27,Clc1ccc(Cl)cc1
|
512 |
+
3-Ethanoyloxymethylphenytoin,-2.7230000000000003,1,324.33600000000007,1,3,4,75.71,-4.47,O=C1N(COC(=O)C)C(=O)C(N1)(c2ccccc2)c3ccccc3
|
513 |
+
"Sparsomycin (3,8mg/ml)",-1.57,1,361.4450000000001,4,1,8,132.11999999999998,-1.9809999999999999,CSCS(=O)CC(CO)NC(=O)C=Cc1c(C)[nH]c(=O)[nH]c1=O
|
514 |
+
3-methylindole,-2.9810000000000003,1,131.17799999999997,1,2,0,15.79,-2.42,Cc1c[nH]c2ccccc12
|
515 |
+
2-methoxypteridine,-1.589,1,162.152,0,2,1,60.790000000000006,-1.11,COc2ncc1nccnc1n2
|
516 |
+
Dioxacarb,-1.614,1,223.22799999999995,1,2,2,56.790000000000006,-1.57,CNC(=O)Oc1ccccc1C2OCCO2
|
517 |
+
isocarbamid,-1.508,1,185.22699999999998,2,1,2,61.440000000000005,-2.15,C1N(C(=O)NCC(C)C)C(=O)NC1
|
518 |
+
Acetonitrile,0.152,1,41.053,0,0,0,23.79,0.26,CC#N
|
519 |
+
Fenoxycarb,-4.662,1,301.34200000000004,1,2,7,56.790000000000006,-4.7,CCOC(=O)NCCOc2ccc(Oc1ccccc1)cc2
|
520 |
+
acetyl sulfisoxazole,-2.024,1,293.34800000000007,1,2,3,89.43,-3.59,CC(=O)N(S(=O)c1ccc(N)cc1)c2onc(C)c2C
|
521 |
+
"1,1,1,2-Tetrachloroethane",-2.7939999999999996,1,167.85,0,0,0,0.0,-2.18,ClCC(Cl)(Cl)Cl
|
522 |
+
1-Butanol,-0.688,1,74.12299999999999,1,0,2,20.23,0.0,CCCCO
|
523 |
+
Siduron,-3.779,1,232.32700000000003,2,2,2,41.13,-4.11,CC1CCCCC1NC(=O)Nc2ccccc2
|
524 |
+
"1,3,5-Trichlorobenzene",-4.159,1,181.449,0,1,0,0.0,-4.48,Clc1cc(Cl)cc(Cl)c1
|
525 |
+
Furfural,-1.391,1,96.08499999999998,0,1,1,30.21,-0.1,O=Cc1ccco1
|
526 |
+
3-Methylbutan-1-ol,-1.0270000000000001,1,88.14999999999999,1,0,2,20.23,-0.51,CC(C)CCO
|
527 |
+
piperonal,-2.033,1,150.13299999999998,0,2,1,35.53,-1.63,O=Cc2ccc1OCOc1c2
|
528 |
+
2-Methylpropene,-1.5730000000000002,1,56.108000000000004,0,0,0,0.0,-2.33,CC(=C)C
|
529 |
+
Benzaldehyde,-1.999,1,106.12399999999997,0,1,1,17.07,-1.19,O=Cc1ccccc1
|
530 |
+
"2,3-Dimethyl-1,3-Butadiene",-2.052,1,82.146,0,0,1,0.0,-2.4,CC(=C)C(=C)C
|
531 |
+
Benfuracarb,-5.132999999999999,1,410.53600000000023,0,2,8,68.31,-4.71,CCOC(=O)CCN(SN(C)C(=O)Oc1cccc2CC(C)(C)Oc21)C(C)C
|
532 |
+
RTI 10,-2.7710000000000004,1,226.235,0,3,0,42.43,-3.6719999999999997,O2c1ccccc1N(C)C(=O)c3cccnc23
|
533 |
+
Fluorene ,-4.125,2,166.22299999999998,0,3,0,0.0,-5.0,C1c2ccccc2c3ccccc13
|
534 |
+
Methylcyclohexane ,-2.891,1,98.18900000000001,0,1,0,0.0,-3.85,CC1CCCCC1
|
535 |
+
sulfaguanidine,-0.706,1,214.25,4,1,2,122.05999999999999,-1.99,NC(=N)NS(=O)(=O)c1ccc(N)cc1
|
536 |
+
Methylparaben,-2.441,1,152.149,1,1,1,46.53,-1.827,COC(=O)c1ccc(O)cc1
|
537 |
+
2-Methyltetrahydrofurane,-1.034,1,86.134,0,1,0,9.23,0.11,CC1CCCO1
|
538 |
+
Santonin,-2.43,1,246.30599999999995,0,3,0,43.370000000000005,-3.09,CC3C2CCC1(C)C=CC(=O)C(=C1C2OC3=O)C
|
539 |
+
Salicin,-0.975,1,286.28,5,2,4,119.61000000000001,-0.85,OCC2OC(Oc1ccccc1CO)C(O)C(O)C2O
|
540 |
+
1-Iodopropane,-2.4859999999999998,1,169.993,0,0,1,0.0,-2.29,CCCI
|
541 |
+
Ametryn,-3.43,1,227.337,2,1,5,62.730000000000004,-3.04,CCNc1nc(NC(C)C)nc(SC)n1
|
542 |
+
1-Propanol,-0.33399999999999996,1,60.096,1,0,1,20.23,0.62,CCCO
|
543 |
+
Hydroxyprogesterone-17a,-3.8760000000000003,1,330.4680000000001,1,4,1,54.37,-3.8169999999999997,CC(=O)C1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C
|
544 |
+
2-Pentanol,-0.97,1,88.14999999999999,1,0,2,20.23,-0.29,CCCC(C)O
|
545 |
+
benzoin,-3.148,1,212.248,1,2,3,37.3,-2.85,OC(C(=O)c1ccccc1)c2ccccc2
|
546 |
+
"2,4-Dimethylphenol",-2.6210000000000004,1,122.16699999999999,1,1,0,20.23,-1.19,Cc1ccc(O)c(C)c1
|
547 |
+
m-Chloronitrobenzene ,-2.9010000000000002,1,157.55599999999998,0,1,1,43.14,-2.77,Clc1cccc(c1)N(=O)=O
|
548 |
+
ampyrone,-1.192,1,203.245,1,2,1,52.95,-0.624,Cc2c(N)c(=O)n(c1ccccc1)n2C
|
549 |
+
"2,2',4,5'-PCB",-6.23,1,291.99199999999996,0,2,1,0.0,-6.57,Clc1ccc(c(Cl)c1)c2cc(Cl)ccc2Cl
|
550 |
+
"Hexachloro-1,3-butadiene",-4.546,1,260.762,0,0,1,0.0,-4.92,ClC(=C(Cl)C(=C(Cl)Cl)Cl)Cl
|
551 |
+
Terbutryn,-3.75,1,241.364,2,1,4,62.730000000000004,-4.0,CCNc1nc(NC(C)(C)C)nc(SC)n1
|
552 |
+
3-Methyl-2-pentanol,-1.308,1,102.17699999999999,1,0,3,20.23,-0.71,CCC(C)CCO
|
553 |
+
2-methylpteridine,-1.24,1,146.153,0,2,0,51.56,-0.12,Cc2ncc1nccnc1n2
|
554 |
+
Danazol,-4.5569999999999995,1,337.4630000000001,1,5,0,46.260000000000005,-5.507000000000001,CC23Cc1cnoc1C=C2CCC4C3CCC5(C)C4CCC5(O)C#C
|
555 |
+
1-Iodobutane,-2.841,1,184.01999999999998,0,0,2,0.0,-2.96,CCCCI
|
556 |
+
2-Bromonapthalene,-4.434,1,207.07,0,2,0,0.0,-4.4,Brc1ccc2ccccc2c1
|
557 |
+
"Digoxin (L1=41,8mg/mL, L2=68,2mg/mL, Z=40,1mg/mL)",-5.312,1,780.9490000000001,6,8,7,203.05999999999997,-4.081,CC1OC(CC(O)C1O)OC2C(O)CC(OC2C)OC8C(O)CC(OC7CCC3(C)C(CCC4C3CC(O)C5(C)C(CCC45O)C6=CC(=O)OC6)C7)OC8C
|
558 |
+
Benzyltrifluoride,-3.0989999999999998,1,146.111,0,1,0,0.0,-2.51,FC(F)(F)c1ccccc1
|
559 |
+
Dihexyl phthalate,-5.757999999999999,1,334.45600000000024,0,1,12,52.60000000000001,-6.144,CCCCCCOC(=O)c1ccccc1C(=O)OCCCCCC
|
560 |
+
Dibenzothiophene,-4.5969999999999995,2,184.263,0,3,0,0.0,-4.38,c1ccc2c(c1)sc3ccccc23
|
561 |
+
"2,3',4,4'-PCB",-6.709,1,326.437,0,2,1,0.0,-7.8,Clc1ccc(c(Cl)c1)c2ccc(Cl)c(Cl)c2Cl
|
562 |
+
"2,2',3,3',4,4'-PCB",-7.192,1,360.88200000000006,0,2,1,0.0,-8.01,Clc1ccc(c(Cl)c1Cl)c2ccc(Cl)c(Cl)c2Cl
|
563 |
+
Warfarin,-3.9130000000000003,1,308.3330000000001,1,3,4,67.50999999999999,-3.8930000000000002,CC(=O)CC(c1ccccc1)c3c(O)c2ccccc2oc3=O
|
564 |
+
hydrobenzoin,-2.645,1,214.264,2,2,3,40.46,-1.93,c1ccccc1C(O)C(O)c2ccccc2
|
565 |
+
Dimethyl phthalate,-2.347,1,194.18599999999995,0,1,2,52.60000000000001,-1.66,COC(=O)c1ccccc1C(=O)OC
|
566 |
+
Ethyl octanoate,-2.9619999999999997,1,172.26799999999997,0,0,7,26.3,-3.39,CCCCCCCC(=O)OCC
|
567 |
+
Diethyldisulfide,-2.364,1,122.258,0,0,3,0.0,-2.42,CCSSCC
|
568 |
+
"1,2-Diethoxyethane ",-0.833,1,118.176,0,0,5,18.46,-0.77,CCOCCOCC
|
569 |
+
"1,2,4,5-Tetrachlorobenzene",-4.621,1,215.894,0,1,0,0.0,-5.56,Clc1cc(Cl)c(Cl)cc1Cl
|
570 |
+
p-benzidine,-2.613,1,184.242,2,2,1,52.04,-2.7,Nc1ccc(cc1)c2ccc(N)cc2
|
571 |
+
1-Heptene,-2.718,1,98.189,0,0,4,0.0,-3.73,CCCCCC=C
|
572 |
+
Ethirimol,-2.7319999999999998,1,209.29299999999998,2,1,5,57.78,-3.028,CCCCc1c(C)nc(NCC)[nH]c1=O
|
573 |
+
Pentobarbital,-2.312,1,226.27599999999995,2,1,4,75.27000000000001,-2.39,O=C1NC(=O)NC(=O)C1(CC)C(C)CCC
|
574 |
+
o-Chloroaniline,-2.392,1,127.574,1,1,0,26.02,-1.52,Nc1ccccc1Cl
|
575 |
+
3-Chloroanisole,-3.057,1,142.58499999999998,0,1,1,9.23,-2.78,COc1cccc(Cl)c1
|
576 |
+
Pebulate,-3.1310000000000002,1,203.35099999999997,0,0,6,20.310000000000002,-3.53,CCCCN(CC)C(=O)SCCC
|
577 |
+
Butyl acetate,-1.111,1,102.13299999999998,0,0,4,26.3,-1.37,CCCCOC=O
|
578 |
+
Prednisolone,-2.9739999999999998,1,360.4500000000002,3,4,2,94.83,-3.18,CC12CC(O)C3C(CCC4=CC(=O)C=CC34C)C2CCC1(O)C(=O)CO
|
579 |
+
Bromodichloromethane,-2.176,1,163.82899999999998,0,0,0,0.0,-1.54,BrC(Cl)Cl
|
580 |
+
adrenosterone,-2.99,1,300.3980000000001,0,4,0,51.21,-3.48,CC34CC(=O)C1C(CCC2=CC(=O)CCC12C)C3CCC4(=O)
|
581 |
+
p-terphenyl,-5.7410000000000005,2,230.31,0,3,2,0.0,-7.11,c1ccc(cc1)c2ccc(cc2)c3ccccc3
|
582 |
+
p-Hydroxybenzaldehyde ,-2.003,1,122.12299999999998,1,1,1,37.3,-0.96,Oc1ccc(C=O)cc1
|
583 |
+
Bromomethane,-1.109,1,94.939,0,0,0,0.0,-0.79,CBr
|
584 |
+
Perfluidone,-4.945,1,379.38100000000003,1,2,4,80.31,-3.8,Cc1cc(ccc1NS(=O)(=O)C(F)(F)F)S(=O)(=O)c2ccccc2
|
585 |
+
Coumachlor,-4.553999999999999,1,342.7780000000001,1,3,4,67.50999999999999,-5.8389999999999995,CC(=O)CC(c1ccc(Cl)cc1)c2c(O)c3ccccc3oc2=O
|
586 |
+
2-Ethylnaphthalene,-4.1,1,156.22799999999998,0,2,1,0.0,-4.29,CCc1ccc2ccccc2c1
|
587 |
+
5-methylcytosine,-0.257,1,125.13099999999999,2,1,0,71.77000000000001,-1.4580000000000002,Nc1c(C)c[nH]c(=O)n1
|
588 |
+
"2,3,4,5,6-PCB",-6.785,1,326.437,0,2,1,0.0,-7.92,Clc2c(Cl)c(Cl)c(c1ccccc1)c(Cl)c2Cl
|
589 |
+
benodanil,-4.245,1,323.133,1,2,2,29.1,-4.21,c1c(NC(=O)c2ccccc2(I))cccc1
|
590 |
+
Riboflavin,-1.865,1,376.36900000000014,5,3,5,161.56,-3.685,Cc3cc2nc1c(=O)[nH]c(=O)nc1n(CC(O)C(O)C(O)CO)c2cc3C
|
591 |
+
o-Fluorobromobenzene,-3.467,1,175.0,0,1,0,0.0,-2.7,Fc1ccccc1Br
|
592 |
+
"2,4-Dichlorophenol ",-3.22,1,163.003,1,1,0,20.23,-1.55,Oc1ccc(Cl)cc1Cl
|
593 |
+
Permethrin,-7.129,1,391.2940000000001,0,3,6,35.53,-6.291,CC1(C)C(C=C(Cl)Cl)C1C(=O)OCc2cccc(Oc3ccccc3)c2
|
594 |
+
piroxicam,-3.4730000000000003,1,331.353,2,3,2,99.60000000000001,-4.16,CN2C(=C(O)c1ccccc1S2(=O)=O)C(=O)Nc3ccccn3
|
595 |
+
3-Propanoyloxymethylphenytoin,-3.128,1,338.36300000000006,1,3,5,75.71,-4.907,O=C1N(COC(=O)CC)C(=O)C(N1)(c2ccccc2)c3ccccc3
|
596 |
+
Cyclopentane ,-2.0380000000000003,2,70.135,0,1,0,0.0,-2.64,C1CCCC1
|
597 |
+
o-Toluidine,-1.922,1,107.156,1,1,0,26.02,-2.21,Cc1ccccc1N
|
598 |
+
Estragole,-3.074,1,148.205,0,1,3,9.23,-2.92,c1(OC)ccc(CC=C)cc1
|
599 |
+
karbutilate,-2.655,1,279.34,2,1,2,70.67,-2.93,CN(C)C(=O)Nc1cccc(OC(=O)NC(C)(C)C)c1
|
600 |
+
3-Methyl-1-Butene,-1.994,1,70.135,0,0,1,0.0,-2.73,CC(C)C=C
|
601 |
+
2-Hydroxypyridine,-1.655,1,95.101,1,1,0,33.120000000000005,1.02,Oc1ccccn1
|
602 |
+
Ethane,-1.1320000000000001,1,30.07,0,0,0,0.0,-1.36,CC
|
603 |
+
"1,2-Dichlorobenzene",-3.4819999999999998,1,147.00399999999996,0,1,0,0.0,-3.05,Clc1ccccc1Cl
|
604 |
+
mercaptobenzothiazole,-3.411,1,167.25799999999998,1,2,0,12.89,-3.18,Sc2nc1ccccc1s2
|
605 |
+
"2,2',3,3',4,4',5,5',6,6'-PCB",-9.589,1,498.66200000000026,0,2,1,0.0,-11.6,Clc1c(Cl)c(Cl)c(c(Cl)c1Cl)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl
|
606 |
+
Methoxsalen,-3.25,1,216.19199999999995,0,3,1,52.58,-3.6639999999999997,COc2c1occc1cc3ccc(=O)oc23
|
607 |
+
Acetamide,0.494,1,59.068,1,0,0,43.09,1.58,CC(=O)N
|
608 |
+
1-Methylnaphthalene,-3.802,1,142.201,0,2,0,0.0,-3.7,Cc1cccc2ccccc12
|
609 |
+
Napropamide,-4.088,1,271.36,0,2,5,29.540000000000003,-3.57,CCN(CC)C(=O)C(C)Oc1cccc2ccccc12
|
610 |
+
"3,3-Dimethyl-2-butanol",-1.2919999999999998,1,102.17699999999999,1,0,0,20.23,-0.62,CC(O)C(C)(C)C
|
611 |
+
Methyl pentanoate,-1.545,1,116.15999999999998,0,0,3,26.3,-1.36,CCCC(=O)OCC
|
612 |
+
Menadione,-2.667,1,172.18299999999996,0,2,0,34.14,-3.03,CC2=CC(=O)c1ccccc1C2=O
|
613 |
+
Phenanthrene,-4.518,2,178.23399999999998,0,3,0,0.0,-5.26,c1ccc2c(c1)ccc3ccccc32
|
614 |
+
"2,4-Dimethylpyridine",-2.0980000000000003,1,107.15599999999999,0,1,0,12.89,0.38,Cc1ccnc(C)c1
|
615 |
+
1-Nonanol,-2.46,1,144.258,1,0,7,20.23,-3.01,CCCCCCCCCO
|
616 |
+
Dibromomethane,-1.883,1,173.83499999999998,0,0,0,0.0,-1.17,BrCBr
|
617 |
+
Dexamethasone,-3.4,1,392.4670000000002,3,4,2,94.83,-3.59,CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO
|
618 |
+
"2,6-Dimethylnaphthalene ",-4.147,1,156.228,0,2,0,0.0,-4.89,Cc1ccc2cc(C)ccc2c1
|
619 |
+
Butylate,-3.4530000000000003,1,217.378,0,0,5,20.310000000000002,-3.68,CCSC(=O)N(CC(C)C)CC(C)C
|
620 |
+
nitroglycerin,-2.029,1,227.08499999999998,0,0,8,157.11,-2.22,O=N(=O)OCC(CON(=O)=O)ON(=O)=O
|
621 |
+
m-Nitroaniline,-1.936,1,138.126,1,1,1,69.16,-2.19,Nc1cccc(c1)N(=O)=O
|
622 |
+
1-Chlorobutane,-1.94,1,92.56899999999999,0,0,2,0.0,-2.03,CCCCCl
|
623 |
+
triforine,-3.715,1,430.9340000000001,2,1,6,64.68,-4.19,ClC(Cl)(Cl)C(NC=O)N1C=CN(C=C1)C(NC=O)C(Cl)(Cl)Cl
|
624 |
+
Fluridone,-4.249,1,329.32099999999997,0,3,2,22.0,-4.445,Cn2cc(c1ccccc1)c(=O)c(c2)c3cccc(c3)C(F)(F)F
|
625 |
+
6-aminochrysene,-4.849,1,243.309,1,4,0,26.02,-6.2,Nc3cc2c1ccccc1ccc2c4ccccc34
|
626 |
+
Estrone,-3.872,1,270.372,1,4,0,37.3,-3.955,CC12CCC3C(CCc4cc(O)ccc34)C2CCC1=O
|
627 |
+
RTI 17,-4.227,1,269.373,0,3,1,19.37,-4.706,CCN2c1ccccc1N(C)C(=S)c3cccnc23
|
628 |
+
"1,2-Propylene oxide",-0.358,1,58.08,0,1,0,12.53,-0.59,CC1CO1
|
629 |
+
Nitrazepam,-3.4730000000000003,1,281.271,1,3,2,84.6,-3.7960000000000003,O=C3CN=C(c1ccccc1)c2cc(ccc2N3)N(=O)=O
|
630 |
+
"1,3-diethylthiourea",-1.028,1,132.232,2,0,2,24.06,-1.46,CCNC(=S)NCC
|
631 |
+
"2,3,5-Trichlorophenol",-3.78,1,197.44799999999998,1,1,0,20.23,-2.67,Oc1cc(Cl)cc(Cl)c1Cl
|
632 |
+
Propyl propanoate,-1.545,1,116.15999999999998,0,0,3,26.3,-1.34,CCCCC(=O)OC
|
633 |
+
Aniline ,-1.632,1,93.12899999999999,1,1,0,26.02,-0.41,Nc1ccccc1
|
634 |
+
"1,5-Dimethlnapthalene",-4.147,1,156.228,0,2,0,0.0,-4.678999999999999,Cc1cccc2c(C)cccc12
|
635 |
+
hydrochlorothiazide,-1.72,1,297.745,3,2,1,118.35999999999999,-2.63,NS(=O)(=O)c2cc1c(NCNS1(=O)=O)cc2Cl
|
636 |
+
Acenapthylene,-3.682,2,152.19599999999994,0,3,0,0.0,-3.96,C1=Cc2cccc3cccc1c23
|
637 |
+
Ethyl butyrate,-2.254,1,144.21399999999997,0,0,5,26.3,-1.28,CCCCCOC(=O)CC
|
638 |
+
Atratone,-3.185,1,211.26899999999998,2,1,5,71.96000000000001,-2.084,CCNc1nc(NC(C)C)nc(OC)n1
|
639 |
+
Benzo(a)pyrene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.699,c1ccc2c(c1)cc3ccc4cccc5ccc2c3c45
|
640 |
+
Bromoethane,-1.5290000000000001,1,108.966,0,0,0,0.0,-1.09,CCBr
|
641 |
+
3-Hexyne,-1.933,1,82.14599999999999,0,0,0,0.0,-1.99,CCC#CCC
|
642 |
+
Digitoxin,-6.114,1,764.9499999999999,5,8,7,182.82999999999998,-5.292999999999999,CC1OC(CC(O)C1O)OC2C(O)CC(OC2C)OC8C(O)CC(OC7CCC3(C)C(CCC4C3CCC5(C)C(CCC45O)C6=CC(=O)OC6)C7)OC8C
|
643 |
+
2-Methyl-1-Butene,-1.994,1,70.13499999999999,0,0,1,0.0,-2.73,CCC(=C)C
|
644 |
+
8-quinolinol,-2.725,1,145.16099999999997,1,2,0,33.120000000000005,-2.42,Oc1cccc2cccnc12
|
645 |
+
"1,2,3,4-Tetrahydronapthalene",-3.447,2,132.20599999999996,0,2,0,0.0,-4.37,C1CCc2ccccc2C1
|
646 |
+
phenolphthalein,-4.59,1,318.32800000000003,2,4,2,66.76,-2.9,Oc1ccc(cc1)C2(OC(=O)c3ccccc23)c4ccc(O)cc4
|
647 |
+
"1,3,5-Tribromobenzene",-5.27,1,314.802,0,1,0,0.0,-5.6,Brc1cc(Br)cc(Br)c1
|
648 |
+
Ronnel,-5.247000000000001,1,321.549,0,1,4,27.69,-5.72,COP(=S)(OC)Oc1cc(Cl)c(Cl)cc1Cl
|
649 |
+
methylthiouracil,-0.547,1,142.18300000000002,2,1,0,48.65,-2.436,Cc1cc(=O)[nH]c(=S)[nH]1
|
650 |
+
Eugenol,-2.675,1,164.204,1,1,3,29.46,-1.56,COc1cc(CC=C)ccc1O
|
651 |
+
5-Allyl-5-isopropylbarbital,-1.706,1,210.23299999999998,2,1,3,75.27000000000001,-1.7080000000000002,O=C1NC(=O)NC(=O)C1(C(C)C)CC=C
|
652 |
+
Pyrene,-4.957,2,202.25599999999997,0,4,0,0.0,-6.176,c1cc2ccc3cccc4ccc(c1)c2c34
|
653 |
+
"1,1-Diethoxyethane ",-0.899,1,118.176,0,0,4,18.46,-0.43,CCOC(C)OCC
|
654 |
+
Clomazone,-3.077,1,239.702,0,2,2,29.54,-2.338,CC1(C)CON(Cc2ccccc2Cl)C1=O
|
655 |
+
2-Butoxyethanol,-0.775,1,118.17599999999999,1,0,5,29.46,-0.42,CCCCOCCO
|
656 |
+
Quintozene,-5.098,1,295.336,0,1,1,43.14,-5.82,Clc1c(Cl)c(Cl)c(N(=O)=O)c(Cl)c1Cl
|
657 |
+
Androsterone,-3.8819999999999997,1,290.447,1,4,0,37.3,-4.402,CC12CCC(O)CC1CCC3C2CCC4(C)C3CCC4=O
|
658 |
+
Flurochloridone,-4.749,1,312.118,0,2,2,20.310000000000002,-4.047,FC(F)(F)c1cccc(c1)N2CC(CCl)C(Cl)C2=O
|
659 |
+
Quinoline,-2.6630000000000003,2,129.16199999999998,0,2,0,12.89,-1.3,c1ccc2ncccc2c1
|
660 |
+
methyl gallate,-1.913,1,184.147,3,1,1,86.99000000000001,-1.24,COC(=O)c1cc(O)c(O)c(O)c1
|
661 |
+
fluconazole,-2.418,1,306.276,1,3,5,81.64999999999999,-1.8,OC(Cn1cncn1)(Cn2cncn2)c3ccc(F)cc3F
|
662 |
+
Chlorzoxazone,-2.679,1,169.567,1,2,0,46.0,-2.8310000000000004,Clc2ccc1oc(=O)[nH]c1c2
|
663 |
+
"2,2',3,4,4',5',6-PCB",-7.898,1,395.3270000000001,0,2,1,0.0,-7.92,Clc1ccc(c(Cl)c1)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl
|
664 |
+
alloxan,0.436,1,142.07,2,1,0,92.34,-1.25,O=C1NC(=O)C(=O)C(=O)N1
|
665 |
+
"1,3-Dichloropropane",-1.618,1,112.98700000000001,0,0,2,0.0,-1.62,ClCCCCl
|
666 |
+
m-Fluorobromobenzene,-3.467,1,175.0,0,1,0,0.0,-2.67,Fc1cccc(Br)c1
|
667 |
+
p-Chlorobromobenzene,-3.928,1,191.45499999999998,0,1,0,0.0,-3.63,Clc1ccc(Br)cc1
|
668 |
+
"2,3-Dimethylbutane",-2.5839999999999996,1,86.178,0,0,1,0.0,-3.65,CC(C)C(C)C
|
669 |
+
1-Butene,-1.655,1,56.108,0,0,1,0.0,-1.94,CCC=C
|
670 |
+
"2,2',3,4,5,5'-PCB",-7.343,1,360.88200000000006,0,2,1,0.0,-7.68,Clc1ccc(Cl)c(c1)c2cc(Cl)c(Cl)c(Cl)c2Cl
|
671 |
+
cytosine,0.051,1,111.104,2,1,0,71.77000000000001,-1.155,Nc1cc[nH]c(=O)n1
|
672 |
+
"1,1,2-Trichlorotrifluoroethane",-3.077,1,187.37500000000003,0,0,1,0.0,-3.04,FC(F)(Cl)C(F)(Cl)Cl
|
673 |
+
Propionitrile,-0.26899999999999996,1,55.07999999999999,0,0,0,23.79,0.28,CCC#N
|
674 |
+
"O,P'-DDD",-6.007999999999999,1,320.04600000000005,0,2,3,0.0,-6.51,ClC(Cl)C(c1ccc(Cl)cc1)c2ccccc2Cl
|
675 |
+
o-Nitroanisole,-2.346,1,153.13699999999997,0,1,2,52.37,-1.96,COc1ccccc1N(=O)=O
|
676 |
+
Prasterone,-3.5639999999999996,1,288.43100000000004,1,4,0,37.3,-4.12,CC34CCC1C(CC=C2CC(O)CCC12C)C3CCC4=O
|
677 |
+
Procymidone,-3.464,1,284.142,0,3,1,37.38,-4.8,CC12CC2(C)C(=O)N(C1=O)c3cc(Cl)cc(Cl)c3
|
678 |
+
Benzo[ghi]perylene,-6.446000000000001,2,276.338,0,6,0,0.0,-9.017999999999999,c1cc2ccc3ccc4ccc5cccc6c(c1)c2c3c4c56
|
679 |
+
Dinoseb,-3.715,1,240.21499999999995,1,1,4,106.51000000000002,-3.38,CCC(C)c1cc(cc(N(=O)=O)c1O)N(=O)=O
|
680 |
+
meconin,-0.825,1,196.20199999999997,0,2,2,44.760000000000005,-1.899,c1c(OC)c(OC)C2C(=O)OCC2c1
|
681 |
+
Glycerol,0.688,1,92.09400000000001,3,0,2,60.69,1.12,OCC(O)CO
|
682 |
+
Guaiacol,-1.9409999999999998,1,124.13899999999997,1,1,1,29.46,-1.96,COc1ccccc1O
|
683 |
+
chlorpyrifos,-4.9719999999999995,1,350.591,0,1,6,40.58,-5.67,CCOP(=S)(OCC)Oc1nc(Cl)c(Cl)cc1Cl
|
684 |
+
9-Methylanthracene,-4.87,1,192.261,0,3,0,0.0,-5.89,Cc1c2ccccc2cc3ccccc13
|
685 |
+
Antipyrene,-1.733,1,188.23000000000002,0,2,1,26.93,0.715,Cc1cc(=O)n(c2ccccc2)n1C
|
686 |
+
Methyl butyl ether ,-1.072,1,88.14999999999999,0,0,3,9.23,-0.99,CCCCOC
|
687 |
+
7-methylpteridine,-1.24,1,146.153,0,2,0,51.56,-0.8540000000000001,Cc2cnc1cncnc1n2
|
688 |
+
simazine,-2.8110000000000004,1,201.661,2,1,4,62.730000000000004,-4.55,CCNc1nc(Cl)nc(NCC)n1
|
689 |
+
"N,N-Dimethylacetamide",0.12300000000000001,1,87.12199999999999,0,0,0,20.310000000000002,1.11,CN(C)C(=O)C
|
690 |
+
Simetryn,-2.6889999999999996,1,213.31,0,1,3,45.150000000000006,-2.676,CSc1nc(nc(n1)N(C)C)N(C)C
|
691 |
+
Ethylene,-0.815,1,28.053999999999995,0,0,0,0.0,-0.4,C=C
|
692 |
+
"3,3-Dimethyl-1-butanol",-1.365,1,102.17699999999999,1,0,1,20.23,-0.5,CC(C)(C)CCO
|
693 |
+
5-Allyl-5-ethylbarbital,-1.368,1,196.20599999999996,2,1,3,75.27000000000001,-1.614,O=C1NC(=O)NC(=O)C1(CC)CC=C
|
694 |
+
"2,3,4-Trichlorophenol",-3.705,1,197.448,1,1,0,20.23,-2.67,Oc1ccc(Cl)c(Cl)c1Cl
|
695 |
+
Anisole,-2.3680000000000003,1,108.13999999999997,0,1,1,9.23,-1.85,COc1ccccc1
|
696 |
+
chloropropylate,-5.093,1,339.21800000000013,1,2,4,46.53,-4.53,c1ccc(Cl)cc1C(c2ccc(Cl)cc2)(O)C(=O)OC(C)C
|
697 |
+
aldosterone,-3.0660000000000003,1,360.45000000000005,2,4,3,91.67000000000002,-3.85,CC13CCC(=O)C=C1CCC4C2CCC(C(=O)CO)C2(CC(O)C34)C=O
|
698 |
+
Difenoxuron,-3.928,1,286.331,1,2,4,50.800000000000004,-4.16,COc2ccc(Oc1ccc(NC(=O)N(C)C)cc1)cc2
|
699 |
+
4-Ethyltoluene,-3.3280000000000003,1,120.19499999999996,0,1,1,0.0,-3.11,CCc1ccc(C)cc1
|
700 |
+
Diisopropylsulfide,-2.162,1,118.24499999999999,0,0,2,0.0,-2.24,CC(C)SC(C)C
|
701 |
+
"1,3-Dinitrobenzene",-2.281,1,168.10799999999995,0,1,2,86.28,-2.29,O=N(=O)c1cccc(c1)N(=O)=O
|
702 |
+
Ethion,-5.471,1,384.4870000000002,0,0,12,36.92,-5.54,CCOP(=S)(OCC)SCSP(=S)(OCC)OCC
|
703 |
+
probarbital,-1.6030000000000002,1,198.22199999999998,2,1,2,75.27000000000001,-2.21,CCC1(C(C)C)C(=O)NC(=O)NC1=O
|
704 |
+
cortisone acetate,-3.426,1,402.48700000000025,1,4,3,97.74000000000001,-4.21,CC(=O)OCC(=O)C3(O)CCC4C2CCC1=CC(=O)CCC1(C)C2C(=O)CC34C
|
705 |
+
Metronidazole,-0.8590000000000001,1,171.15599999999998,1,1,3,81.19,-1.22,Cc1ncc(N(=O)=O)n1CCO
|
706 |
+
p-Chloroaniline,-2.392,1,127.574,1,1,0,26.02,-1.66,Nc1ccc(Cl)cc1
|
707 |
+
"2,2-Dimethylpentanol",-1.719,1,116.20399999999998,1,0,3,20.23,-1.52,CCCC(C)(C)CO
|
708 |
+
Furane,-1.837,2,68.07499999999999,0,1,0,13.14,-0.82,c1ccoc1
|
709 |
+
Methoproptryne,-3.259,1,271.39,2,1,8,71.96000000000001,-2.928,COCCCNc1nc(NC(C)C)nc(SC)n1
|
710 |
+
Norea,-2.47,1,222.33199999999994,1,3,1,32.34,-3.1710000000000003,CN(C)C(=O)NC1CC2CC1C3CCCC23
|
711 |
+
t-Butylbenzene ,-3.554,1,134.22199999999998,0,1,0,0.0,-3.66,CC(C)(C)c1ccccc1
|
712 |
+
kebuzone,-2.645,1,322.36400000000003,0,3,5,57.690000000000005,-3.27,CC(=O)CCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3
|
713 |
+
prednisolone acetate,-3.5069999999999997,1,402.48700000000014,2,4,3,100.90000000000002,-4.37,CC(=O)OCC(=O)C3(O)CCC4C2CCC1=CC(=O)C=CC1(C)C2C(O)CC34C
|
714 |
+
Methyl propyl ether ,-0.718,1,74.12299999999999,0,0,2,9.23,-0.39,CCCOC
|
715 |
+
Isopropyl acetate,-1.1909999999999998,1,102.133,0,0,1,26.3,-0.55,CC(C)OC(=O)C
|
716 |
+
Bromobenzene,-3.345,1,157.01,0,1,0,0.0,-2.55,Brc1ccccc1
|
717 |
+
Ethyl-p-hydroxybenzoate ,-2.761,1,166.176,1,1,2,46.53,-2.35,CCOC(=O)c1ccc(O)cc1
|
718 |
+
3-Butanoyloxymethylphenytoin,-3.469,1,352.39000000000004,1,3,6,75.71,-5.071000000000001,O=C1N(COC(=O)CCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
|
719 |
+
testosterone propionate,-4.87,1,344.4950000000001,0,4,2,43.370000000000005,-5.37,CCC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C
|
720 |
+
Coronene,-6.885,2,300.36000000000007,0,7,0,0.0,-9.332,c1cc2ccc3ccc4ccc5ccc6ccc1c7c2c3c4c5c67
|
721 |
+
allopurinol,-0.84,1,136.114,2,2,0,74.43,-2.266,O=c1[nH]cnc2[nH]ncc12
|
722 |
+
Chloroethylene,-1.188,1,62.499,0,0,0,0.0,-1.75,ClC=C
|
723 |
+
diphenamid,-3.147,1,239.318,0,2,3,20.310000000000002,-2.98,CN(C)C(=O)C(c1ccccc1)c2ccccc2
|
724 |
+
Tetrabromomethane,-4.063,1,331.62699999999995,0,0,0,0.0,-3.14,BrC(Br)(Br)Br
|
725 |
+
RTI 22,-4.408,1,296.374,1,3,2,48.47,-4.871,CCN2c1cc(N(C)C)cc(C)c1NC(=O)c3cccnc23
|
726 |
+
phthalimide,-1.882,1,147.13299999999998,1,2,0,46.17,-2.61,O=C1NC(=O)c2ccccc12
|
727 |
+
Fenarimol,-4.1080000000000005,1,331.202,1,3,3,46.010000000000005,-4.38,OC(c1ccc(Cl)cc1)(c2cncnc2)c3ccccc3Cl
|
728 |
+
Methyl benzoate ,-2.4619999999999997,1,136.14999999999998,0,1,1,26.3,-1.85,COC(=O)c1ccccc1
|
729 |
+
1-methyluracil,-0.375,1,126.115,1,1,0,54.86,-0.807,Cn1ccc(=O)[nH]c1=O
|
730 |
+
oxyphenbutazone,-3.739,1,324.38000000000005,1,3,5,60.85000000000001,-3.73,CCCCC1C(=O)N(N(C1=O)c2ccc(O)cc2)c3ccccc3
|
731 |
+
"2,2',3,5'-PCB",-6.155,1,291.9920000000001,0,2,1,0.0,-6.47,Clc1ccc(Cl)c(c1)c2cccc(Cl)c2Cl
|
732 |
+
Quinethazone,-2.184,1,289.7440000000001,3,2,2,101.28999999999999,-3.29,CCC2NC(=O)c1cc(c(Cl)cc1N2)S(N)(=O)=O
|
733 |
+
Diuron,-3.301,1,233.09799999999998,1,1,1,32.34,-3.8,CN(C)C(=O)Nc1ccc(Cl)c(Cl)c1
|
734 |
+
Cyclopentene ,-1.72,2,68.11900000000001,0,1,0,0.0,-2.1,C1CC=CC1
|
735 |
+
alloxantin,0.919,1,286.156,6,2,1,191.0,-1.99,C1(=O)NC(=O)NC(=O)C1(O)C2(O)C(=O)NC(=O)NC2(=O)
|
736 |
+
Nonane,-3.678,1,128.259,0,0,6,0.0,-5.88,CCCCCCCCC
|
737 |
+
2-Chlorophenol,-2.553,1,128.558,1,1,0,20.23,-1.06,Oc1ccccc1Cl
|
738 |
+
5-Methylchrysene,-5.931,1,242.321,0,4,0,0.0,-6.59,c1cccc2c3c(C)cc4ccccc4c3ccc12
|
739 |
+
Phenetole,-2.66,1,122.16699999999996,0,1,2,9.23,-2.33,CCOc1ccccc1
|
740 |
+
ethyl cinnamate,-3.0980000000000003,1,176.215,0,1,3,26.3,-3.0,CCOC(=O)C=Cc1ccccc1
|
741 |
+
Terbacil,-3.033,1,216.66799999999998,1,1,0,54.86,-2.484,Cc1[nH]c(=O)n(c(=O)c1Cl)C(C)(C)C
|
742 |
+
Clonazepam,-3.707,1,315.716,1,3,2,84.6,-3.4989999999999997,Clc1ccccc1C2=NCC(=O)Nc3ccc(cc23)N(=O)=O
|
743 |
+
p-Toluenesulfonamide ,-1.815,1,171.22099999999998,1,1,1,60.16,-1.74,Cc1ccc(cc1)S(=O)(=O)N
|
744 |
+
Chlorbufam,-3.6289999999999996,1,223.659,1,1,2,38.33,-2.617,CC(OC(=O)Nc1cccc(Cl)c1)C#C
|
745 |
+
2-Methylheptane,-3.3080000000000003,1,114.23199999999999,0,0,4,0.0,-5.08,CCCCCC(C)C
|
746 |
+
Cyhalothrin,-6.905,1,449.8560000000001,0,3,6,59.32000000000001,-8.176,CC1(C)C(C=C(Cl)C(F)(F)F)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2
|
747 |
+
Apazone,-2.9,1,300.3620000000001,0,3,2,56.220000000000006,-3.5380000000000003,CCCC1C(=O)N3N(C1=O)c2cc(C)ccc2N=C3N(C)C
|
748 |
+
Diazepam,-4.05,1,284.74600000000004,0,3,1,32.67,-3.7539999999999996,CN2C(=O)CN=C(c1ccccc1)c3cc(Cl)ccc23
|
749 |
+
2-Methyl-3-pentanol,-1.308,1,102.17699999999999,1,0,2,20.23,-0.7,CCC(O)C(C)C
|
750 |
+
fensulfothion,-3.283,1,308.36100000000005,0,1,7,44.760000000000005,-2.3,CCOP(=S)(OCC)Oc1ccc(cc1)S(C)=O
|
751 |
+
borneol,-2.423,1,154.253,1,2,0,20.23,-2.32,CC1(C)C2CCC1(C)C(O)C2
|
752 |
+
Testosterone,-3.659,1,288.431,1,4,0,37.3,-4.02,CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1O
|
753 |
+
Heptane,-2.97,1,100.205,0,0,4,0.0,-4.53,CCCCCCC
|
754 |
+
1-Napthol,-3.08,1,144.17299999999997,1,2,0,20.23,-2.22,Oc1cccc2ccccc12
|
755 |
+
"cis-1,2-Dimethylcyclohexane",-3.305,1,112.216,0,1,0,0.0,-4.3,C/C1CCCCC1\C
|
756 |
+
Trimazosin,-3.958,1,435.48100000000034,2,3,6,132.5,-3.638,COc2cc1c(N)nc(nc1c(OC)c2OC)N3CCN(CC3)C(=O)OCC(C)(C)O
|
757 |
+
Cholanthrene,-5.942,2,254.33199999999997,0,5,0,0.0,-7.85,C1Cc2c3c1cccc3cc4c2ccc5ccccc54
|
758 |
+
Medrogestone,-4.593,1,340.5070000000001,0,4,1,34.14,-5.27,CC(=O)C3(C)CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C
|
759 |
+
2-Heptanone,-1.554,1,114.18799999999999,0,0,4,17.07,-1.45,CCCCCC(=O)C
|
760 |
+
Acephate,-0.41600000000000004,1,183.16899999999998,1,0,3,55.4,0.54,COP(=O)(NC(C)=O)SC
|
761 |
+
DEF,-4.074,1,314.5220000000001,0,0,12,17.07,-5.14,CCCCSP(=O)(SCCCC)SCCCC
|
762 |
+
phthalamide,-0.636,1,149.149,1,2,0,46.17,-2.932,c1cC2C(=O)NC(=O)C2cc1
|
763 |
+
Trichlomethiazide,-2.98,1,380.66200000000003,3,2,2,118.35999999999999,-2.68,NS(=O)(=O)c2cc1c(NC(NS1(=O)=O)C(Cl)Cl)cc2Cl
|
764 |
+
2-Methy-2-Butene,-1.994,1,70.13499999999999,0,0,0,0.0,-2.56,CC=C(C)C
|
765 |
+
"1,2,4-Trimethylbenzene",-3.343,1,120.195,0,1,0,0.0,-3.31,Cc1ccc(C)c(C)c1
|
766 |
+
"2,4,5-Trichlorophenol ",-3.78,1,197.448,1,1,0,20.23,-2.21,Oc1cc(Cl)c(Cl)cc1Cl
|
767 |
+
phenanthridine,-3.713,2,179.22199999999998,0,3,0,12.89,-2.78,c1ccc2c(c1)cnc3ccccc23
|
768 |
+
3-Methyl-3-hexanol,-1.663,1,116.20399999999998,1,0,3,20.23,-0.98,CCCC(C)(O)CC
|
769 |
+
Octane,-3.324,1,114.232,0,0,5,0.0,-5.24,CCCCCCCC
|
770 |
+
Anthracene,-4.518,2,178.23399999999995,0,3,0,0.0,-6.35,c1ccc2cc3ccccc3cc2c1
|
771 |
+
Phenylhydrazine,-1.8659999999999999,1,108.14399999999998,2,1,1,38.05,0.07,NNc1ccccc1
|
772 |
+
Propionaldehyde,-0.39399999999999996,1,58.08,0,0,1,17.07,0.58,CCC=O
|
773 |
+
Cyclooctane,-3.355,2,112.21600000000001,0,1,0,0.0,-4.15,C1CCCCCCC1
|
774 |
+
"5,5-Diallylbarbital",-1.4709999999999999,1,208.21699999999996,2,1,4,75.27000000000001,-2.077,O=C1NC(=O)NC(=O)C1(CC=C)CC=C
|
775 |
+
Trichloromethane,-1.8119999999999998,1,119.37800000000001,0,0,0,0.0,-1.17,ClC(Cl)Cl
|
776 |
+
thiouracil,-0.992,1,128.15599999999998,2,1,0,45.75,-2.273,Sc1nccc(=O)[nH]1
|
777 |
+
Pencycuron,-5.126,1,328.84299999999996,1,3,4,32.34,-5.915,Clc1ccc(CN(C2CCCC2)C(=O)Nc3ccccc3)cc1
|
778 |
+
1-Methylcyclohexene ,-2.574,1,96.17300000000002,0,1,0,0.0,-3.27,CC1=CCCCC1
|
779 |
+
2-Ethylhexanal,-2.2319999999999998,1,128.21499999999997,0,0,5,17.07,-2.13,CCCCC(CC)C=O
|
780 |
+
Khellin,-3.603,1,260.24499999999995,0,3,2,61.81,-3.0210000000000004,COc2c1occc1c(OC)c3c(=O)cc(C)oc23
|
781 |
+
5-Ethyl-5-(3-methylbutyl)barbital,-2.312,1,226.27599999999995,2,1,4,75.27000000000001,-2.658,O=C1NC(=O)NC(=O)C1(CC)CCC(C)C
|
782 |
+
Benzo(j)fluoranthene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.0,c1ccc2c3c(ccc2c1)c4cccc5cccc3c45
|
783 |
+
2-Ethylbutanal,-1.5230000000000001,1,100.16099999999999,0,0,3,17.07,-1.52,CCC(CC)C=O
|
784 |
+
Dipropyl ether,-1.426,1,102.17699999999999,0,0,4,9.23,-1.62,CCCOCCC
|
785 |
+
1-Tetradecanol,-4.231,1,214.39299999999994,1,0,12,20.23,-5.84,CCCCCCCCCCCCCCO
|
786 |
+
"2,3,6-Trichlorophenol",-3.572,1,197.44799999999998,1,1,0,20.23,-2.64,Oc1c(Cl)ccc(Cl)c1Cl
|
787 |
+
Urea,0.8320000000000001,1,60.056,2,0,0,69.11,0.96,NC(=O)N
|
788 |
+
1-Pentyne,-1.446,1,68.11899999999999,0,0,1,0.0,-1.64,CCCC#C
|
789 |
+
"1,3-Dibromobenzene",-4.298,1,235.90599999999998,0,1,0,0.0,-3.54,Brc1cccc(Br)c1
|
790 |
+
1-Octadecanol,-5.649,1,270.50099999999986,1,0,16,20.23,-8.4,CCCCCCCCCCCCCCCCCCO
|
791 |
+
Acetanilide,-1.857,1,135.16599999999997,1,1,1,29.1,-1.33,CC(=O)Nc1ccccc1
|
792 |
+
hematein,-1.795,1,300.266,4,4,0,107.22000000000001,-2.7,c1cc(O)c(O)c2OCC3(O)CC4=CC(=O)C(O)=CC4=C3c21
|
793 |
+
Isonazid,-0.7170000000000001,1,137.14200000000002,2,1,1,68.01,0.009000000000000001,c1nccc(C(=O)NN)c1
|
794 |
+
hydroxychlordene,-4.156000000000001,1,354.8749999999999,1,3,0,20.23,-5.46,OC1C=CC2C1C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl
|
795 |
+
Isopentyl formate,-1.449,1,116.15999999999998,0,0,4,26.3,-1.52,CC(C)CCOC=O
|
796 |
+
Acetophenone,-2.0780000000000003,1,120.15099999999995,0,1,1,17.07,-1.28,CC(=O)c1ccccc1
|
797 |
+
biquinoline,-4.9030000000000005,2,256.308,0,4,1,25.78,-5.4,c2ccc1nc(ccc1c2)c4ccc3ccccc3n4
|
798 |
+
Triethyl phosphate,-0.953,1,182.15599999999998,0,0,6,44.760000000000005,0.43,CCOP(=O)(OCC)OCC
|
799 |
+
D-fenchone,-2.158,1,152.237,0,2,0,17.07,-1.85,CC2(C)C1CCC(C)(C1)C2=O
|
800 |
+
7-methoxypteridine,-1.589,1,162.152,0,2,1,60.790000000000006,-0.91,COc2cnc1cncnc1n2
|
801 |
+
Chlordene,-5.152,1,338.876,0,3,0,0.0,-5.64,ClC2=C(Cl)C3(Cl)C1C=CCC1C2(Cl)C3(Cl)Cl
|
802 |
+
2-Nitropropane,-0.743,1,89.094,0,0,1,43.14,-0.62,CC(C)N(=O)=O
|
803 |
+
Carbazole,-3.8360000000000003,2,167.21099999999998,1,3,0,15.79,-5.27,c1ccc2c(c1)[nH]c3ccccc32
|
804 |
+
Erythritol,0.675,1,122.11999999999999,4,0,3,80.92,0.7,OCC(O)C(O)CO
|
805 |
+
Risocaine,-2.7089999999999996,1,179.21899999999997,1,1,3,52.32,-2.452,CCCOC(=O)c1ccc(N)cc1
|
806 |
+
Azodrin,-0.9490000000000001,1,223.16499999999996,1,0,5,73.86,0.6509999999999999,CNC(=O)C=C(C)OP(=O)(OC)OC
|
807 |
+
Succinimide,0.282,1,99.089,1,1,0,46.17,0.3,O=C1CCC(=O)N1
|
808 |
+
"2,3-Dimethylpentane",-2.938,1,100.20499999999998,0,0,2,0.0,-4.28,CCC(C)C(C)C
|
809 |
+
bupirimate,-3.4930000000000003,1,316.4270000000001,1,1,8,84.42,-4.16,CCCCc1c(C)nc(NCC)nc1OS(=O)(=O)N(C)C
|
810 |
+
RTI 16,-3.411,1,270.361,0,3,1,32.260000000000005,-4.6339999999999995,CCN2c1ncccc1N(C)C(=S)c3cccnc23
|
811 |
+
RTI 9,-3.784,1,239.274,0,3,1,29.54,-3.68,O2c1ccccc1N(CC)C(=O)c3ccccc23
|
812 |
+
Tetrahydropyran ,-0.978,2,86.134,0,1,0,9.23,-0.03,C1CCOCC1
|
813 |
+
1-Heptyne,-2.155,1,96.17299999999999,0,0,3,0.0,-3.01,CCCCCC#C
|
814 |
+
osthole,-4.0760000000000005,1,244.28999999999994,0,2,3,39.44,-4.314,c1cc2ccc(OC)c(CC=C(C)(C))c2oc1=O
|
815 |
+
3-Methylcholanthrene,-6.311,1,268.3589999999999,0,5,0,0.0,-7.92,c1cc(C)cc2c1c3cc4cccc5CCc(c45)c3cc2
|
816 |
+
Ethyl benzoate ,-2.775,1,150.177,0,1,2,26.3,-2.32,CCOC(=O)c1ccccc1
|
817 |
+
1-Chloro-2-methylpropane,-1.9240000000000002,1,92.569,0,0,1,0.0,-2.0,ClCC(C)C
|
818 |
+
Ethinyl estradiol,-4.317,1,296.41,2,4,0,40.46,-4.3,CC34CCC1C(CCc2cc(O)ccc12)C3CCC4(O)C#C
|
819 |
+
methyl laurate,-4.025,1,214.34899999999996,0,0,10,26.3,-4.69,CCCCCCCCCCCC(=O)OC
|
820 |
+
Di-n-propylsulfide,-2.307,1,118.24499999999999,0,0,4,0.0,-2.58,CCCSCCC
|
821 |
+
Napthacene,-5.568,2,228.29399999999998,0,4,0,0.0,-8.6,c1ccc2cc3cc4ccccc4cc3cc2c1
|
822 |
+
1-Bromopentane,-2.658,1,151.047,0,0,3,0.0,-3.08,CCCCCBr
|
823 |
+
trans-2-Heptene ,-2.784,1,98.18899999999998,0,0,3,0.0,-3.82,CCCC/C=C/C
|
824 |
+
Metranidazole,-0.8590000000000001,1,171.15599999999998,1,1,3,81.19,-1.26,Cc1ncc(N(=O)=O)n1CCO
|
825 |
+
Pentylcyclopentane,-3.8689999999999998,1,140.26999999999998,0,1,4,0.0,-6.08,CCCCCC1CCCC1
|
826 |
+
"2,2',3,5,5',6-PCB",-7.261,1,360.88200000000006,0,2,1,0.0,-7.42,Clc1ccc(Cl)c(c1)c2c(Cl)c(Cl)cc(Cl)c2Cl
|
827 |
+
5-Ethyl-5-isopropylbarbituric acid,-1.6030000000000002,1,198.22199999999998,2,1,2,75.27000000000001,-2.148,O=C1NC(=O)NC(=O)C1(CC)C(C)C
|
828 |
+
"1,1,1-Trichloroethane",-2.2319999999999998,1,133.405,0,0,0,0.0,-2.0,CC(Cl)(Cl)Cl
|
829 |
+
Monolinuron,-2.948,1,214.652,1,1,2,41.57,-2.57,CON(C)C(=O)Nc1ccc(Cl)cc1
|
830 |
+
Cyclohexyl-5-spirobarbituric acid,-1.405,1,196.206,2,2,0,75.27,-3.06,O=C2NC(=O)C1(CCCCC1)C(=O)N2
|
831 |
+
dimetan,-2.3040000000000003,1,211.26099999999994,0,1,1,46.61,-0.85,CN(C)C(=O)OC1=CC(=O)CC(C)(C)C1
|
832 |
+
4-Bromotoluene,-3.667,1,171.03700000000003,0,1,0,0.0,-3.19,Cc1ccc(Br)cc1
|
833 |
+
Diethyl ether ,-0.718,1,74.123,0,0,2,9.23,-0.09,CCOCC
|
834 |
+
Rovral,-4.004,1,330.17100000000005,1,2,2,69.72,-4.376,CC(C)NC(=O)N1CC(=O)N(C1=O)c2cc(Cl)cc(Cl)c2
|
835 |
+
Benfluralin,-5.205,1,335.28200000000004,0,1,7,89.51999999999998,-5.53,CCCCN(CC)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O
|
836 |
+
"2,4,6-Trimethylphenol",-2.9410000000000003,1,136.194,1,1,0,20.23,-2.05,Cc1cc(C)c(O)c(C)c1
|
837 |
+
Benzene ,-2.418,2,78.11399999999999,0,1,0,0.0,-1.64,c1ccccc1
|
838 |
+
p-Chloroiodobenzene,-4.3839999999999995,1,238.45499999999998,0,1,0,0.0,-4.03,Clc1ccc(I)cc1
|
839 |
+
Metoxuron,-2.6830000000000003,1,228.67899999999997,1,1,2,41.57,-2.5639999999999996,COc1ccc(NC(=O)N(C)C)cc1Cl
|
840 |
+
propachlor,-3.0180000000000002,1,211.69200000000004,0,1,3,20.310000000000002,-2.48,CC(C)N(C(=O)CCl)c1ccccc1
|
841 |
+
Styrene,-2.85,1,104.15199999999997,0,1,1,0.0,-2.82,C=Cc1ccccc1
|
842 |
+
Dimethoxymethane,0.092,1,76.095,0,0,2,18.46,0.48,COCOC
|
843 |
+
o-Xylene ,-3.0039999999999996,1,106.16799999999999,0,1,0,0.0,-2.8,Cc1ccccc1C
|
844 |
+
Butan-2-ol,-0.616,1,74.12299999999999,1,0,1,20.23,0.47,CCC(C)O
|
845 |
+
"1,4-Benzenediol",-1.59,1,110.11199999999998,2,1,0,40.46,-0.17,Oc1ccc(O)cc1
|
846 |
+
estriol,-3.858,1,288.387,3,4,0,60.69,-4.955,CC34CCC1C(CCc2cc(O)ccc12)C3CC(O)C4O
|
847 |
+
Benzo(b)fluorene,-5.189,2,216.283,0,4,0,0.0,-8.04,C1c2ccccc2c3cc4ccccc4cc13
|
848 |
+
hydantoin,0.603,1,100.077,2,1,0,58.2,-0.4,O=C1CNC(=O)N1
|
849 |
+
4-hexylresorcinol,-3.4930000000000003,1,194.27399999999992,2,1,5,40.46,-2.59,c1(O)cc(O)ccc1CCCCCC
|
850 |
+
allicin,-2.045,1,162.27899999999997,0,0,5,17.07,-0.83,C=CCS(=O)SCC=C
|
851 |
+
Coumaphos,-5.04,1,362.77100000000013,0,2,6,57.9,-5.382000000000001,CCOP(=S)(OCC)Oc2ccc1oc(=O)c(Cl)c(C)c1c2
|
852 |
+
"5,6-Dimethylchrysene",-6.265,1,256.348,0,4,0,0.0,-7.01,Cc1c(C)c2c3ccccc3ccc2c4ccccc14
|
853 |
+
Betamethasone-17-valerate,-5.062,1,476.5850000000002,2,4,6,100.90000000000002,-4.71,CCCCC(=O)OC3(C(C)CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)C(=O)CO
|
854 |
+
uric acid,-0.541,1,168.112,4,2,0,114.36999999999998,-3.93,O=c2[nH]c(=O)c1[nH]c(=O)[nH]c1[nH]2
|
855 |
+
"2,3,4,6-Tetrachlorophenol",-4.203,1,231.89299999999997,1,1,0,20.23,-3.1,Oc1c(Cl)cc(Cl)c(Cl)c1Cl
|
856 |
+
"1,3-Dichlorobenzene",-3.5580000000000003,1,147.004,0,1,0,0.0,-3.04,Clc1cccc(Cl)c1
|
857 |
+
DDT,-6.638,1,354.491,0,2,2,0.0,-7.15,Clc1ccc(cc1)C(c2ccc(Cl)cc2)C(Cl)(Cl)Cl
|
858 |
+
Isobutyl formate,-1.095,1,102.13299999999998,0,0,3,26.3,-1.01,CC(C)COC=O
|
859 |
+
thioanisole,-2.87,1,124.208,0,1,1,0.0,-2.39,c1ccccc1SC
|
860 |
+
RTI 13,-4.45,1,322.29,1,3,1,58.120000000000005,-4.207,CCN2c1nc(C)cc(C(F)(F)F)c1NC(=O)c3cccnc23
|
861 |
+
Hexane ,-2.615,1,86.178,0,0,3,0.0,-3.84,CCCCCC
|
862 |
+
methyl nicotinate,-1.621,1,137.138,0,1,1,39.19,-0.46,COC(=O)c1cccnc1
|
863 |
+
Bendroflumethiazide,-3.741,1,421.4220000000001,3,3,3,118.35999999999999,-3.59,NS(=O)(=O)c3cc2c(NC(Cc1ccccc1)NS2(=O)=O)cc3C(F)(F)F
|
864 |
+
"2,3,3',4,4',5-PCB",-7.425,1,360.88200000000006,0,2,1,0.0,-7.82,Clc1ccc(cc1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl
|
865 |
+
Vinclozolin,-4.377,1,286.11400000000003,0,2,2,46.61,-4.925,CC1(OC(=O)N(C1=O)c2cc(Cl)cc(Cl)c2)C=C
|
866 |
+
Cyanazine,-2.49,1,240.698,2,1,4,86.52,-3.15,CCNc1nc(Cl)nc(NC(C)(C)C#N)n1
|
867 |
+
Triphenylene,-5.568,2,228.29399999999998,0,4,0,0.0,-6.726,c1ccc2c(c1)c3ccccc3c4ccccc24
|
868 |
+
Dienestrol,-4.775,1,266.34,2,2,3,40.46,-4.95,CC=C(C(=CC)c1ccc(O)cc1)c2ccc(O)cc2
|
869 |
+
Di(2-ethylhexyl)-phthalate,-7.117000000000001,1,390.5640000000003,0,1,14,52.60000000000001,-6.96,CCCCC(CC)COC(=O)c1ccccc1C(=O)OCC(CC)CCCC
|
870 |
+
2-Ethyl pyridine,-2.051,1,107.15599999999998,0,1,1,12.89,0.51,CCc1ccccn1
|
871 |
+
Naled,-3.548,1,380.784,0,0,5,44.760000000000005,-2.28,COP(=O)(OC)OC(Br)C(Cl)(Cl)Br
|
872 |
+
Biphenyl,-4.079,2,154.21199999999996,0,2,1,0.0,-4.345,c1ccc(cc1)c2ccccc2
|
873 |
+
"2,2',4,4',6,6'-PCB",-7.178999999999999,1,360.88200000000006,0,2,1,0.0,-8.71,Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cc(Cl)cc2Cl
|
874 |
+
Altretamine,-2.492,1,210.285,0,1,3,48.39000000000001,-3.364,CN(C)c1nc(nc(n1)N(C)C)N(C)C
|
875 |
+
"2,4-Dimethyl-2-pentanol ",-1.6469999999999998,1,116.20399999999998,1,0,2,20.23,-0.92,CC(C)CC(C)(C)O
|
876 |
+
Cycloheptyl-5-spirobarbituric acid,-1.844,1,210.23299999999998,2,2,0,75.27,-3.168,O=C2NC(=O)C1(CCCCCC1)C(=O)N2
|
877 |
+
Fructose,0.47100000000000003,1,180.156,5,1,2,110.38000000000001,0.64,OCC1OC(O)(CO)C(O)C1O
|
878 |
+
"3,5-Dimethylphenol",-2.6519999999999997,1,122.16699999999997,1,1,0,20.23,-1.4,Cc1cc(C)cc(O)c1
|
879 |
+
Barban,-4.16,1,258.104,1,1,2,38.33,-4.37,ClCC#CCOC(=O)Nc1cccc(Cl)c1
|
880 |
+
p-Chloroacetanilide,-2.642,1,169.611,1,1,1,29.1,-2.843,CC(=O)Nc1ccc(Cl)cc1
|
881 |
+
"2,2',3,4,5,5',6-PCB",-7.898,1,395.3270000000001,0,2,1,0.0,-8.94,Clc1ccc(Cl)c(c1)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl
|
882 |
+
"2,2-Dimethylbutane",-2.5839999999999996,1,86.17799999999998,0,0,0,0.0,-3.55,CCC(C)(C)C
|
883 |
+
N-Methylaniline ,-2.097,1,107.15599999999998,1,1,1,12.03,-1.28,CNc1ccccc1
|
884 |
+
"1,4-Pentadiene ",-1.758,1,68.119,0,0,2,0.0,-2.09,C=CCC=C
|
885 |
+
Hydrocortisone 21-acetate,-3.6919999999999997,1,404.5030000000002,2,4,3,100.90000000000002,-4.88,CC(=O)OCC(=O)C1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3C(O)CC21C
|
886 |
+
DNOC,-2.818,1,198.134,1,1,2,106.51000000000002,-1.456,Cc1cc(cc(N(=O)=O)c1O)N(=O)=O
|
887 |
+
Lorazepam,-3.75,1,321.163,2,3,1,61.690000000000005,-3.6039999999999996,OC3N=C(c1ccccc1Cl)c2cc(Cl)ccc2NC3=O
|
888 |
+
3-Chlorophenol,-2.761,1,128.558,1,1,0,20.23,-0.7,Oc1cccc(Cl)c1
|
889 |
+
m-Chlorobromobenzene,-3.928,1,191.45499999999998,0,1,0,0.0,-3.21,Clc1cccc(Br)c1
|
890 |
+
chlorothiazide,-1.7519999999999998,1,295.72900000000004,2,2,1,118.69,-3.05,NS(=O)(=O)c2cc1c(N=CNS1(=O)=O)cc2Cl
|
891 |
+
5-Methyl-5-ethylbarbituric acid,-0.9109999999999999,1,170.16799999999998,2,1,1,75.27000000000001,-1.228,O=C1NC(=O)NC(=O)C1(C)CC
|
892 |
+
2-Phenoxyethanol,-1.761,1,138.16599999999997,1,1,3,29.46,-0.7,OCCOc1ccccc1
|
893 |
+
Diphenylmethane,-4.09,2,168.239,0,2,2,0.0,-4.08,C(c1ccccc1)c2ccccc2
|
894 |
+
3-Octanol,-2.033,1,130.23099999999997,1,0,5,20.23,-1.98,CCCCCC(O)CC
|
895 |
+
Flumetralin,-6.584,1,421.7340000000001,0,2,6,89.51999999999998,-6.78,CCN(Cc1c(F)cccc1Cl)c2c(cc(cc2N(=O)=O)C(F)(F)F)N(=O)=O
|
896 |
+
Propazine,-3.3289999999999997,1,229.71500000000003,2,1,4,62.730000000000004,-4.43,CC(C)Nc1nc(Cl)nc(NC(C)C)n1
|
897 |
+
2-Methylpentanol,-1.381,1,102.17699999999999,1,0,3,20.23,-1.11,CCCC(C)CO
|
898 |
+
2-Methyl-2-hexanol,-1.663,1,116.20399999999998,1,0,3,20.23,-1.08,CCCCC(C)(C)O
|
899 |
+
Ethylbenzene,-2.988,1,106.16799999999996,0,1,1,0.0,-2.77,CCc1ccccc1
|
900 |
+
5-(3-Methyl-2-butenyl)-5-ethylbarbital,-2.126,1,224.25999999999996,2,1,3,75.27000000000001,-2.253,O=C1NC(=O)NC(=O)C1(CC)CC=C(C)C
|
901 |
+
Heptachlor,-5.26,1,373.3209999999999,0,3,0,0.0,-6.317,ClC1C=CC2C1C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl
|
902 |
+
butallylonal,-2.766,1,303.156,2,1,4,75.27000000000001,-2.647,CCC(C)C1(CC(Br)=C)C(=O)NC(=O)NC1=O
|
903 |
+
Fenpropathrin,-6.15,1,349.43000000000006,0,3,5,59.32000000000001,-6.025,CC1(C)C(C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2)C1(C)C
|
904 |
+
Methoprene,-4.795,1,310.47800000000007,0,0,10,35.53,-5.19,COC(C)(C)CCCC(C)CC=CC(C)=CC(=O)OC(C)C
|
905 |
+
Ethyl propionate,-1.1909999999999998,1,102.133,0,0,2,26.3,-0.66,CCOC(=O)CC
|
906 |
+
Prometryn,-3.693,1,241.364,2,1,5,62.730000000000004,-4.1,CSc1nc(NC(C)C)nc(NC(C)C)n1
|
907 |
+
Buturon,-3.199,1,236.702,1,1,2,32.34,-3.9,CC(C#C)N(C)C(=O)Nc1ccc(Cl)cc1
|
908 |
+
"2,3-Dimethylnaphthalene",-4.1160000000000005,1,156.22799999999998,0,2,0,0.0,-4.72,Cc1cc2ccccc2cc1C
|
909 |
+
"2,4',5-PCB",-5.7620000000000005,1,257.547,0,2,1,0.0,-6.25,Clc1ccc(cc1)c2cc(Cl)ccc2Cl
|
910 |
+
"2,3',4,4',5-PCB",-7.343,1,360.88200000000006,0,2,1,0.0,-7.39,Clc1ccc(c(Cl)c1)c2cc(Cl)c(Cl)c(Cl)c2Cl
|
911 |
+
2-cyanoguanidine,0.361,1,84.082,2,0,0,88.19,-0.31,NC(N)=NC#N
|
912 |
+
Chloropicrin,-1.8659999999999999,1,164.375,0,0,0,43.14,-2.0,ClC(Cl)(Cl)N(=O)=O
|
913 |
+
"2,6-PCB",-4.984,1,223.102,0,2,1,0.0,-5.21,Clc1cccc(Cl)c1c2ccccc2
|
914 |
+
p-Methoxybenzaldehyde,-2.252,1,136.14999999999998,0,1,2,26.3,-1.49,COc1ccc(C=O)cc1
|
915 |
+
4-Nitroacetanilide,-2.219,1,180.16299999999998,1,1,2,72.24000000000001,-2.6919999999999997,CC(=O)Nc1ccc(cc1)N(=O)=O
|
916 |
+
Ethyl heptanoate,-2.608,1,158.24099999999999,0,0,6,26.3,-2.74,CCCCCCC(=O)OCC
|
917 |
+
p-Hydroxyacetanilide,-1.495,1,151.165,2,1,1,49.33,-1.03,CC(=O)Nc1ccc(O)cc1
|
918 |
+
indazole,-2.34,2,118.13899999999998,1,2,0,28.68,-2.16,c2ccc1[nH]ncc1c2
|
919 |
+
triamcinolone acetonide,-3.928,1,434.50400000000025,2,5,2,93.06000000000002,-4.31,CC5(C)OC4CC3C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC3(C)C4(O5)C(=O)CO
|
920 |
+
guanine,-0.67,1,151.129,3,2,0,100.44999999999999,-3.583,Nc2nc1[nH]cnc1c(=O)[nH]2
|
921 |
+
Methyl acetate,-0.41600000000000004,1,74.07900000000001,0,0,0,26.3,0.46,COC(=O)C
|
922 |
+
Stanolone,-3.8819999999999997,1,290.44699999999995,1,4,0,37.3,-4.743,CC34CCC1C(CCC2CC(=O)CCC12C)C3CCC4O
|
923 |
+
1-Hexene-3-ol,-1.199,1,100.16099999999999,1,0,3,20.23,-0.59,CCCC(O)C=C
|
924 |
+
norbormide,-4.238,1,511.5810000000002,2,7,5,92.18,-3.931,OC(C1=CC2C5C(C1C2=C(c3ccccc3)c4ccccn4)C(=O)NC5=O)(c6ccccc6)c7ccccn7
|
925 |
+
Dibutyl ether ,-2.135,1,130.231,0,0,6,9.23,-1.85,CCCCOCCCC
|
926 |
+
1-Dodecanol,-3.523,1,186.33899999999997,1,0,10,20.23,-4.8,CCCCCCCCCCCCO
|
927 |
+
RTI 6,-3.335,1,313.36100000000005,2,3,4,81.59000000000002,-3.36,CCN2c1nc(N(C)(CCO))ccc1NC(=O)c3cccnc23
|
928 |
+
2-Methyl-2-pentanol,-1.308,1,102.17699999999998,1,0,2,20.23,-0.49,CCCC(C)(C)O
|
929 |
+
Flucytosine,-0.132,1,129.09399999999997,2,1,0,71.77,-0.972,Nc1nc(=O)[nH]cc1F
|
930 |
+
stadacaine,-5.127999999999999,1,293.40700000000004,0,1,9,38.77,-3.84,CCCCOc1ccc(C(=O)OCC)c(c1)N(CC)CC
|
931 |
+
2-Methyl-2-heptanol,-2.017,1,130.231,1,0,4,20.23,-1.72,CCCCCC(C)(C)O
|
932 |
+
Hexamethylbenzene,-4.361000000000001,1,162.27599999999998,0,1,0,0.0,-5.23,Cc1c(C)c(C)c(C)c(C)c1C
|
933 |
+
Thymol,-3.1289999999999996,1,150.22099999999998,1,1,1,20.23,-2.22,CC(C)c1ccc(C)cc1O
|
934 |
+
Pteridine,-0.9059999999999999,2,132.12599999999998,0,2,0,51.56,0.02,c2cnc1ncncc1n2
|
935 |
+
Parathion,-3.949,1,291.26500000000004,0,1,7,70.83000000000001,-4.66,CCOP(=S)(OCC)Oc1ccc(cc1)N(=O)=O
|
936 |
+
Methane,-0.636,0,16.043,0,0,0,0.0,-0.9,C
|
937 |
+
indoline,-2.195,2,119.16699999999999,1,2,0,12.03,-1.04,c2ccc1NCCc1c2
|
938 |
+
1-Nitronapthalene,-3.4139999999999997,1,173.171,0,2,1,43.14,-3.54,O=N(=O)c1cccc2ccccc12
|
939 |
+
3-Methyl-2-pentanone,-1.266,1,100.16099999999999,0,0,2,17.07,-0.67,CCC(C)C(=O)C
|
940 |
+
isoguanine,-1.74,1,151.129,3,2,0,100.71000000000001,-3.4010000000000002,Nc1nc(O)nc2nc[nH]c12
|
941 |
+
bromadiolone,-7.877000000000001,1,527.4140000000002,2,5,6,70.67,-4.445,OC(CC(c1ccccc1)c3c(O)c2ccccc2oc3=O)c4ccc(cc4)c5ccc(Br)cc5
|
942 |
+
Nitromethane,-0.042,1,61.040000000000006,0,0,0,43.14,0.26,CN(=O)=O
|
943 |
+
Triallate,-4.578,1,304.66999999999996,0,0,4,20.310000000000002,-4.88,CC(C)N(C(C)C)C(=O)SCC(Cl)=C(Cl)Cl
|
944 |
+
"1,5-Hexadiene ",-2.112,1,82.14599999999999,0,0,3,0.0,-2.68,C=CCCC=C
|
945 |
+
Indole,-2.654,2,117.15099999999997,1,2,0,15.79,-1.52,c2ccc1[nH]ccc1c2
|
946 |
+
Androstenedione,-3.3930000000000002,1,286.415,0,4,0,34.14,-3.69,CC34CCC1C(CCC2=CC(=O)CCC12C)C3CCC4=O
|
947 |
+
1-Hexene,-2.364,1,84.16199999999999,0,0,3,0.0,-3.23,CCCCC=C
|
948 |
+
Xipamide,-3.642,1,354.8150000000001,3,2,3,109.48999999999998,-3.79,Cc1cccc(C)c1NC(=O)c2cc(c(Cl)cc2O)S(N)(=O)=O
|
949 |
+
Ethylcyclohexane,-3.245,1,112.21600000000001,0,1,1,0.0,-4.25,CCC1CCCCC1
|
950 |
+
2-Nonanone,-2.263,1,142.242,0,0,6,17.07,-2.58,CCCCCCCC(=O)C
|
951 |
+
Mebendazole,-4.118,1,295.298,2,3,3,84.07999999999998,-3.88,COC(=O)Nc2nc1ccc(cc1[nH]2)C(=O)c3ccccc3
|
952 |
+
Chloropham,-3.5439999999999996,1,213.66400000000002,1,1,2,38.33,-3.38,CC(C)OC(=O)Nc1cccc(Cl)c1
|
953 |
+
RTI 12,-3.446,1,288.73800000000006,0,3,1,49.330000000000005,-4.114,CCN2c1nc(Cl)ccc1N(C)C(=O)c3cccnc23
|
954 |
+
Carbaryl,-3.0869999999999997,1,201.225,1,2,1,38.33,-3.2239999999999998,CNC(=O)Oc1cccc2ccccc12
|
955 |
+
Ethyne,-0.252,1,26.037999999999997,0,0,0,0.0,0.29,C#C
|
956 |
+
"3,5-Dimethylpyridine",-2.0980000000000003,1,107.15599999999998,0,1,0,12.89,0.38,Cc1cncc(C)c1
|
957 |
+
"1,4-Cyclohexadiene",-1.8419999999999999,2,80.12999999999998,0,1,0,0.0,-2.06,C1C=CCC=C1
|
958 |
+
Mecarbam,-3.738,1,329.3800000000001,0,0,8,65.07000000000001,-2.5180000000000002,CCOC(=O)N(C)C(=O)CSP(=S)(OCC)OCC
|
959 |
+
1-Phenylethanol,-1.919,1,122.16699999999996,1,1,1,20.23,-0.92,CC(O)c1ccccc1
|
960 |
+
"1,2-Dichloropropane",-1.794,1,112.98700000000001,0,0,1,0.0,-1.6,CC(Cl)CCl
|
961 |
+
2-Ethyl-2-hexanal,-2.081,1,126.19899999999998,0,0,4,17.07,-2.46,CCCC=C(CC)C=O
|
962 |
+
Disulfoton,-3.975,1,274.413,0,0,9,18.46,-4.23,CCOP(=S)(OCC)SCCSCC
|
963 |
+
methyltestosterone acetate,-4.863,1,344.4950000000001,0,4,1,43.370000000000005,-5.284,CC(=O)OC3(C)CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C
|
964 |
+
"2,4,6-PCB",-5.604,1,257.547,0,2,1,0.0,-6.14,Clc1ccc(cc1)c2c(Cl)cccc2Cl
|
965 |
+
difluron,-4.692,1,310.687,2,2,2,58.2,-6.02,Fc1cccc(F)c1C(=O)NC(=O)Nc2ccc(Cl)cc2
|
966 |
+
Triclosan,-5.645,1,289.54499999999996,1,2,2,29.46,-4.46,Oc1cc(Cl)ccc1Oc2ccc(Cl)cc2Cl
|
967 |
+
diisooctyl phthalate,-7.117000000000001,1,390.5640000000002,0,1,14,52.60000000000001,-6.6370000000000005,c1(C(=O)OCCCCCC(C)(C))c(C(=O)OCCCCCC(C)(C))cccc1
|
968 |
+
Corticosterone,-3.4539999999999997,1,346.46700000000016,2,4,2,74.6,-3.24,CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO
|
969 |
+
"1,3,5-Trimethylbenzene ",-3.375,1,120.19499999999998,0,1,0,0.0,-3.4,Cc1cc(C)cc(C)c1
|
970 |
+
dioctyl phthalate,-7.148,1,390.56400000000036,0,1,16,52.60000000000001,-5.115,CCCCCCCCOC(=O)c1ccccc1C(=O)OCCCCCCCC
|
971 |
+
1-Pentadecanol,-4.586,1,228.41999999999993,1,0,13,20.23,-6.35,CCCCCCCCCCCCCCCO
|
972 |
+
"2,2',6,6'-PCB",-5.915,1,291.99199999999996,0,2,1,0.0,-7.39,Clc1cccc(Cl)c1c2c(Cl)cccc2Cl
|
973 |
+
"5,5-Dimethylbarbituric acid",-0.556,1,156.141,2,1,0,75.27000000000001,-1.742,O=C1NC(=O)NC(=O)C1(C)C
|
974 |
+
2-Iodopropane,-2.4859999999999998,1,169.993,0,0,0,0.0,-2.09,CC(C)I
|
975 |
+
"1,2-Dinitrobenzene",-2.281,1,168.10799999999995,0,1,2,86.28,-3.1,O=N(=O)c1ccccc1N(=O)=O
|
976 |
+
3-Methyl-2-butanone,-0.912,1,86.13399999999999,0,0,1,17.07,-0.12,CC(C)C(=O)C
|
977 |
+
Hexadecane,-6.159,1,226.44799999999992,0,0,13,0.0,-8.4,CCCCCCCCCCCCCCCC
|
978 |
+
"1,8-Cineole",-2.5789999999999997,1,154.253,0,3,0,9.23,-1.74,CC12CCC(CC1)C(C)(C)O2
|
979 |
+
Tricyclazole,-2.8680000000000003,1,189.24300000000002,0,3,0,30.19,-2.07,Cc2cccc3sc1nncn1c23
|
980 |
+
2-Octanone,-1.909,1,128.21499999999997,0,0,5,17.07,-2.05,CCCCCCC(=O)C
|
981 |
+
Methyl nonanoate,-2.9619999999999997,1,172.268,0,0,7,26.3,-3.38,CCCCCCCCC(=O)OC
|
982 |
+
"1,4-Difluorobenzene",-2.636,1,114.094,0,1,0,0.0,-1.97,Fc1ccc(F)cc1
|
983 |
+
Thalidomide,-1.944,1,258.233,1,3,1,83.55000000000001,-2.676,O=C1N(C2CCC(=O)NC2=O)C(=O)c3ccccc13
|
984 |
+
Trifluralin,-5.205,1,335.28200000000004,0,1,7,89.51999999999998,-5.68,CCCN(CCC)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O
|
985 |
+
Ethanol,0.02,1,46.069,1,0,0,20.23,1.1,CCO
|
986 |
+
Cyclopentyl-5-spirobarbituric acid,-0.966,1,182.179,2,2,0,75.27,-2.349,O=C2NC(=O)C1(CCCC1)C(=O)N2
|
987 |
+
Carbetamide,-2.29,1,236.271,2,1,4,67.42999999999999,-1.83,c1c(NC(=O)OC(C)C(=O)NCC)cccc1
|
988 |
+
phenothrin,-6.763,1,350.4580000000001,0,3,6,35.53,-5.24,CC(C)=CC3C(C(=O)OCc2cccc(Oc1ccccc1)c2)C3(C)C
|
989 |
+
Cycluron,-2.6289999999999996,1,198.30999999999992,1,1,1,32.34,-2.218,CN(C)C(=O)NC1CCCCCCC1
|
990 |
+
Mirex,-6.155,1,545.5460000000002,0,6,0,0.0,-6.8,ClC1(C2(Cl)C3(Cl)C4(Cl)C5(Cl)C1(Cl)C3(Cl)Cl)C5(Cl)C(Cl)(Cl)C24Cl
|
991 |
+
1-Bromooctane,-3.721,1,193.128,0,0,6,0.0,-5.06,CCCCCCCCBr
|
992 |
+
Benomyl,-2.9019999999999997,1,290.323,2,2,4,85.25,-4.883,CCCCNC(=O)n1c(NC(=O)OC)nc2ccccc12
|
993 |
+
aminopyrine,-2.129,1,231.299,0,2,2,30.17,-0.364,CN(C)c2c(C)n(C)n(c1ccccc1)c2=O
|
994 |
+
3-Pentanol,-0.97,1,88.15,1,0,2,20.23,-0.24,CCC(O)CC
|
995 |
+
p-Nitrotoluene,-2.64,1,137.138,0,1,1,43.14,-2.49,Cc1ccc(cc1)N(=O)=O
|
996 |
+
4-Methylpentanol,-1.381,1,102.17699999999999,1,0,3,20.23,-1.14,CC(C)CCCO
|
997 |
+
Norethisterone,-2.6689999999999996,1,314.42500000000007,2,4,0,57.53,-4.57,CC34CCC1C(CCC2=CC(=O)CCC12O)C3CCC4(O)C#C
|
998 |
+
bromopropylate,-5.832999999999999,1,428.12000000000006,1,2,4,46.53,-4.93,CC(C)OC(=O)C(O)(c1ccc(Br)cc1)c2ccc(Br)cc2
|
999 |
+
Pyrazon,-2.603,1,221.647,1,2,1,60.91,-2.878,Nc2cnn(c1ccccc1)c(=O)c2Cl
|
1000 |
+
2-Methylbutan-2-ol,-0.9540000000000001,1,88.14999999999998,1,0,1,20.23,0.15,CCC(C)(C)O
|
1001 |
+
p-Cresol,-2.313,1,108.13999999999999,1,1,0,20.23,-0.73,Cc1ccc(O)cc1
|
1002 |
+
Ethyl formate,-0.402,1,74.07900000000001,0,0,2,26.3,0.15,CCOC=O
|
1003 |
+
"N,N-Dimethylaniline",-2.542,1,121.18299999999995,0,1,1,3.24,-1.92,CN(C)c1ccccc1
|
1004 |
+
Decalin,-3.715,2,138.254,0,2,0,0.0,-5.19,C1CCC2CCCCC2C1
|
1005 |
+
Butanethiol ,-1.676,1,90.19099999999999,1,0,2,0.0,-2.18,CCCCS
|
1006 |
+
Benzo(e)pyrene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-7.8,c1ccc2c(c1)c3cccc4ccc5cccc2c5c43
|
1007 |
+
Tetrachloroethylene,-3.063,1,165.834,0,0,0,0.0,-2.54,ClC(=C(Cl)Cl)Cl
|
1008 |
+
3-Pentanone,-0.912,1,86.134,0,0,2,17.07,-0.28,CCC(=O)CC
|
1009 |
+
Acrylonitrile,-0.354,1,53.06399999999999,0,0,0,23.79,0.15,C=CC#N
|
1010 |
+
Flumethasone,-3.5389999999999997,1,410.4570000000002,3,4,2,94.83,-5.6129999999999995,CC1CC2C3CC(F)C4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO
|
1011 |
+
2-Hexanone,-1.2,1,100.16099999999999,0,0,3,17.07,-0.8,CCCCC(=O)C
|
1012 |
+
Terbumeton,-3.505,1,225.296,2,1,4,71.96000000000001,-3.239,CCNc1nc(NC(C)(C)C)nc(OC)n1
|
1013 |
+
3-Methylheptane,-3.3080000000000003,1,114.23199999999999,0,0,4,0.0,-5.16,CCCCC(C)CC
|
1014 |
+
"1,2-Dibromoethane",-2.102,1,187.862,0,0,1,0.0,-1.68,BrCCBr
|
1015 |
+
Isoprocarb,-2.734,1,193.24599999999998,1,1,2,38.33,-2.863,CNC(=O)Oc1ccccc1C(C)C
|
1016 |
+
Niridazole,-1.9480000000000002,1,214.20600000000002,1,2,2,88.37,-3.22,O=C1NCCN1c2ncc(s2)N(=O)=O
|
1017 |
+
Benzo(a)fluorene,-5.189,2,216.283,0,4,0,0.0,-6.68,C1c2ccccc2c3ccc4ccccc4c13
|
1018 |
+
2-Chloroanisole,-2.912,1,142.58499999999998,0,1,1,9.23,-2.46,COc1ccccc1Cl
|
1019 |
+
Bromophos,-5.604,1,366.0,0,1,4,27.69,-6.09,COP(=S)(OC)Oc1cc(Cl)c(Br)cc1Cl
|
1020 |
+
Quinonamid,-3.988,1,332.57000000000005,1,2,3,63.24,-5.03,ClC(Cl)CC(=O)NC2=C(Cl)C(=O)c1ccccc1C2=O
|
1021 |
+
"P,P'-DDD",-6.007999999999999,1,320.04600000000005,0,2,3,0.0,-7.2,ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2
|
1022 |
+
Methyl acrylate,-0.878,1,86.09,0,0,1,26.3,-0.22,COC(=O)C=C
|
1023 |
+
Chloroxuron,-4.477,1,290.75,1,2,3,41.57000000000001,-4.89,CN(C)C(=O)Nc2ccc(Oc1ccc(Cl)cc1)cc2
|
1024 |
+
Azobenzene,-4.034,2,182.226,0,2,2,24.72,-4.45,N(=Nc1ccccc1)c2ccccc2
|
1025 |
+
4-Isopropyltoluene,-3.617,1,134.22199999999998,0,1,1,0.0,-3.77,CC(C)c1ccc(C)cc1
|
1026 |
+
"2,6-Dichlorophenol",-3.012,1,163.003,1,1,0,20.23,-1.79,Oc1c(Cl)cccc1Cl
|
1027 |
+
Sucrose,0.31,1,342.297,8,2,5,189.52999999999997,0.79,OCC2OC(OC1(CO)OC(CO)C(O)C1O)C(O)C(O)C2O
|
1028 |
+
d-inositol,-0.887,1,180.156,6,1,0,121.38000000000001,0.35,OC1C(O)C(O)C(O)C(O)C1O
|
1029 |
+
Dyphylline,-0.847,1,254.24599999999995,2,2,3,102.28,-0.17,Cn2c(=O)n(C)c1ncn(CC(O)CO)c1c2=O
|
1030 |
+
Chloramphenicol,-2.613,1,323.13200000000006,3,1,6,112.70000000000002,-2.1109999999999998,OCC(NC(=O)C(Cl)Cl)C(O)c1ccc(cc1)N(=O)=O
|
1031 |
+
3-Ethyl-3-pentanol,-1.663,1,116.204,1,0,3,20.23,-0.85,CCC(O)(CC)CC
|
1032 |
+
Epitostanol,-4.545,1,306.51500000000004,1,5,0,20.23,-5.41,CC45CCC2C(CCC3CC1SC1CC23C)C4CCC5O
|
1033 |
+
"1,2-Dibromobenzene",-4.172,1,235.90599999999998,0,1,0,0.0,-3.5,Brc1ccccc1Br
|
1034 |
+
"2,4,6-Trichlorophenol",-3.648,1,197.44799999999998,1,1,0,20.23,-2.34,Oc1c(Cl)cc(Cl)cc1Cl
|
1035 |
+
oryzalin,-3.784,1,346.3650000000001,1,1,8,149.67999999999998,-5.16,CCCN(CCC)c1c(cc(cc1N(=O)=O)S(N)(=O)=O)N(=O)=O
|
1036 |
+
RTI 20,-3.6630000000000003,1,255.29199999999997,0,3,2,20.310000000000002,-4.7989999999999995,C2c1ccccc1N(CCF)C(=O)c3ccccc23
|
1037 |
+
"2,4-Dimethyl-3-pentanone",-1.7519999999999998,1,114.18799999999997,0,0,2,17.07,-1.3,CC(C)C(=O)C(C)C
|
1038 |
+
5-(3-Methyl-2-butenyl)-5-isoPrbarbital,-2.465,1,238.28699999999998,2,1,3,75.27000000000001,-2.593,O=C1NC(=O)NC(=O)C1(C(C)C)CC=C(C)C
|
1039 |
+
gentisin,-1.2919999999999998,1,262.261,2,3,1,75.99000000000001,-2.943,c1c(O)C2C(=O)C3cc(O)ccC3OC2cc1(OC)
|
1040 |
+
Caffeine,-1.4980000000000002,1,194.19399999999996,0,2,0,61.82,-0.8759999999999999,Cn1cnc2n(C)c(=O)n(C)c(=O)c12
|
1041 |
+
Spironolactone,-3.842,1,416.58300000000025,0,5,1,60.44,-4.173,CC(=O)SC4CC1=CC(=O)CCC1(C)C5CCC2(C)C(CCC23CCC(=O)O3)C45
|
1042 |
+
"3,4-Dimethylphenol",-2.6210000000000004,1,122.16699999999999,1,1,0,20.23,-1.38,Cc1ccc(O)cc1C
|
1043 |
+
Diphenyl ether ,-4.254,2,170.211,0,2,2,9.23,-3.96,O(c1ccccc1)c2ccccc2
|
1044 |
+
"2,2',4,4',5,5'-PCB",-7.343,1,360.88200000000006,0,2,1,0.0,-8.56,Clc1cc(Cl)c(cc1Cl)c2cc(Cl)c(Cl)cc2Cl
|
1045 |
+
nicotinamide,-0.9640000000000001,1,122.12699999999997,1,1,1,55.980000000000004,0.61,NC(=O)c1cccnc1
|
1046 |
+
Thiophenol ,-2.758,1,110.18099999999997,1,1,0,0.0,-2.12,Sc1ccccc1
|
1047 |
+
XMC,-2.688,1,179.219,1,1,1,38.33,-2.5810000000000004,CNC(=O)Oc1cc(C)cc(C)c1
|
1048 |
+
Chlordane,-6.039,1,409.7819999999999,0,3,0,0.0,-6.86,ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl
|
1049 |
+
Dimethyldisulfide,-1.524,1,94.20400000000001,0,0,1,0.0,-1.44,CSSC
|
1050 |
+
Benzamide,-1.501,1,121.13899999999995,1,1,1,43.09,-0.96,NC(=O)c1ccccc1
|
1051 |
+
o-Chlorobromobenzene,-3.84,1,191.45499999999998,0,1,0,0.0,-3.19,Clc1ccccc1Br
|
1052 |
+
Monotropitoside,-1.493,1,446.40500000000003,6,3,6,184.6,-0.742,COC(=O)c1ccccc1OC2OC(COC3OCC(O)C(O)C3O)C(O)C(O)C2O
|
1053 |
+
3-Heptanol ,-1.6780000000000002,1,116.20399999999998,1,0,4,20.23,-1.47,CCCCC(O)CC
|
1054 |
+
RTI 15,-3.891,1,268.32,1,3,1,58.120000000000005,-4.553999999999999,CCN2c1nc(C)cc(C)c1NC(=O)c3cccnc23
|
1055 |
+
"3,5-Dichlorophenol",-3.428,1,163.003,1,1,0,20.23,-1.34,Oc1cc(Cl)cc(Cl)c1
|
1056 |
+
1-Methylphenanthrene,-4.87,1,192.261,0,3,0,0.0,-5.85,Cc1cccc2c1ccc3ccccc32
|
1057 |
+
2-Ethyl-1-hexanol,-2.089,1,130.231,1,0,5,20.23,-2.11,CCCCC(CC)CO
|
1058 |
+
Diallate,-3.827,1,270.225,0,0,4,20.310000000000002,-4.2860000000000005,CC(C)N(C(C)C)C(=O)SCC(=CCl)Cl
|
1059 |
+
Toluene ,-2.713,1,92.14099999999999,0,1,0,0.0,-2.21,Cc1ccccc1
|
1060 |
+
Nitrapyrin,-3.833,1,230.909,0,1,0,12.89,-3.76,Clc1cccc(n1)C(Cl)(Cl)Cl
|
1061 |
+
Cycloheptene,-2.5989999999999998,2,96.173,0,1,0,0.0,-3.18,C1CCC=CCC1
|
1062 |
+
Thiram,-2.444,1,240.44400000000002,0,0,0,6.48,-3.9,CN(C)C(=S)SSC(=S)N(C)C
|
1063 |
+
Griseofulvin,-3.3280000000000003,1,352.7700000000001,0,3,3,71.06,-3.2460000000000004,COC1=CC(=O)CC(C)C13Oc2c(Cl)c(OC)cc(OC)c2C3=O
|
1064 |
+
1-Decanol,-2.8139999999999996,1,158.285,1,0,8,20.23,-3.63,CCCCCCCCCCO
|
1065 |
+
"3,3-Dimethylpentane",-2.938,1,100.20499999999998,0,0,2,0.0,-4.23,CCC(C)(C)CC
|
1066 |
+
vamidothion,-1.446,1,287.34299999999996,1,0,8,64.63000000000001,1.1440000000000001,CNC(=O)C(C)SCCSP(=O)(OC)(OC)
|
1067 |
+
"2,3,4,5-Tetrachlorophenol",-4.335,1,231.893,1,1,0,20.23,-3.15,Oc1cc(Cl)c(Cl)c(Cl)c1Cl
|
1068 |
+
Butyraldehyde,-0.7490000000000001,1,72.107,0,0,2,17.07,-0.01,CCCC=O
|
1069 |
+
dexamethasone acetate,-3.9330000000000003,1,434.5040000000003,2,4,3,100.9,-4.9,CC4CC3C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC3(C)C4(O)C(=O)COC(C)=O
|
1070 |
+
Butane,-1.9069999999999998,1,58.123999999999995,0,0,1,0.0,-2.57,CCCC
|
1071 |
+
o-Methoxyphenol,-1.9409999999999998,1,124.13899999999997,1,1,1,29.46,-1.96,COc1ccccc1O
|
1072 |
+
Fluoromethalone,-3.5069999999999997,1,376.46800000000013,2,4,1,74.6,-4.099,CC1CC2C3CCC(O)(C(=O)C)C3(C)CC(O)C2(F)C4(C)C=CC(=O)C=C14
|
1073 |
+
Pentachloroethane,-3.3819999999999997,1,202.29500000000002,0,0,0,0.0,-2.6,ClC(Cl)C(Cl)(Cl)Cl
|
1074 |
+
Diethyl phthalate ,-3.016,1,222.23999999999995,0,1,4,52.60000000000001,-2.35,CCOC(=O)c1ccccc1C(=O)OCC
|
1075 |
+
2-Methylpropan-1-ol,-0.672,1,74.12299999999999,1,0,1,20.23,0.1,CC(C)CO
|
1076 |
+
Isobutylbenzene,-3.57,1,134.22199999999998,0,1,2,0.0,-4.12,CC(C)Cc1ccccc1
|
1077 |
+
Diiodomethane,-2.958,1,267.835,0,0,0,0.0,-2.34,ICI
|
1078 |
+
4-Heptanol,-1.6780000000000002,1,116.204,1,0,4,20.23,-1.4,CCCC(O)CCC
|
1079 |
+
Pentyl acetate,-1.8330000000000002,1,130.18699999999998,0,0,4,26.3,-1.89,CCCCCOC(=O)C
|
1080 |
+
"2,3,5,6-Tetrachlorophenol",-4.203,1,231.893,1,1,0,20.23,-3.37,Oc1c(Cl)c(Cl)cc(Cl)c1Cl
|
1081 |
+
Propylbenzene ,-3.281,1,120.19499999999995,0,1,2,0.0,-3.37,CCCc1ccccc1
|
1082 |
+
"1,2-Dichlorotetrafluoroethane",-2.697,1,170.92000000000002,0,0,1,0.0,-2.74,FC(F)(Cl)C(F)(F)Cl
|
1083 |
+
2-butenal,-0.604,1,70.09100000000001,0,0,1,17.07,0.32,CC=CC=O
|
1084 |
+
tetramethylurea,-0.495,1,116.16399999999999,0,0,0,23.550000000000004,0.94,CN(C)C(=O)N(C)C
|
1085 |
+
"1,2,4,5-Tetramethylbenzene",-3.6639999999999997,1,134.22199999999998,0,1,0,0.0,-4.59,Cc1cc(C)c(C)cc1C
|
1086 |
+
norethindrone acetate,-4.2410000000000005,1,340.4630000000001,0,4,1,43.370000000000005,-4.8,CC(=O)OC3(CCC4C2CCC1=CC(=O)CCC1C2CCC34C)C#C
|
1087 |
+
Ditalimfos,-3.992,1,299.28800000000007,0,2,5,55.84,-3.35,CCOP(=S)(OCC)N2C(=O)c1ccccc1C2=O
|
1088 |
+
salicylanilide,-3.782,1,213.23600000000002,2,2,2,49.33,-3.59,c1ccccc1NC(=O)c2c(O)cccc2
|
1089 |
+
Sulfallate,-3.2539999999999996,1,223.79399999999998,0,0,4,3.24,-3.39,CCN(CC)C(=S)SCC(Cl)=C
|
1090 |
+
Chloroethane,-1.165,1,64.515,0,0,0,0.0,-1.06,ClCC
|
1091 |
+
Mefluidide,-3.165,1,310.297,2,1,3,75.27000000000001,-3.24,CC(=O)Nc1cc(NS(=O)(=O)C(F)(F)F)c(C)cc1C
|
1092 |
+
Piperine,-3.659,1,285.343,0,3,3,38.77,-3.46,O=C(C=CC=Cc2ccc1OCOc1c2)N3CCCCC3
|
1093 |
+
cis-2-Pentene,-2.076,1,70.135,0,0,1,0.0,-2.54,CC/C=C\C
|
1094 |
+
thiofanox,-2.7,1,218.32199999999997,1,0,3,50.69,-1.62,CNC(=O)ON=C(CSC)C(C)(C)C
|
1095 |
+
Cyclooctyl-5-spirobarbituric acid,-2.2840000000000003,1,224.25999999999996,2,2,0,75.27,-2.9819999999999998,O=C2NC(=O)C1(CCCCCCC1)C(=O)N2
|
1096 |
+
butacarb,-4.6419999999999995,1,263.381,1,1,1,38.33,-4.24,c1(C(C)(C)C)cc(C(C)(C)C)cc(OC(=O)NC)c1
|
1097 |
+
Eriodictyol,-3.1519999999999997,1,288.255,4,3,1,107.22000000000001,-3.62,Oc2cc(O)c1C(=O)CC(Oc1c2)c3ccc(O)c(O)c3
|
1098 |
+
Benzophenone,-3.612,1,182.222,0,2,2,17.07,-3.12,O=C(c1ccccc1)c2ccccc2
|
1099 |
+
Eicosane,-7.5760000000000005,1,282.5559999999999,0,0,17,0.0,-8.172,CCCCCCCCCCCCCCCCCCCC
|
1100 |
+
hydrazobenzene,-3.492,2,184.242,2,2,3,24.06,-2.92,N(Nc1ccccc1)c2ccccc2
|
1101 |
+
2-Ethyl-1-butanol,-1.381,1,102.17699999999999,1,0,3,20.23,-1.17,CCC(CC)CO
|
1102 |
+
4-hydroxypyridine,-1.655,1,95.10099999999998,1,1,0,33.120000000000005,1.02,Oc1ccncc1
|
1103 |
+
"cis 1,2-Dichloroethylene",-1.561,1,96.94400000000002,0,0,0,0.0,-1.3,Cl\C=C/Cl
|
1104 |
+
Methylcyclopentane,-2.452,1,84.162,0,1,0,0.0,-3.3,CC1CCCC1
|
1105 |
+
4-Methyl-2-pentanol,-1.308,1,102.17699999999998,1,0,2,20.23,-0.8,CC(C)CC(C)O
|
1106 |
+
RTI 11,-3.125,1,254.28900000000002,1,3,0,55.56,-3.928,O2c1ccc(N)cc1N(C)C(=O)c3cc(C)ccc23
|
1107 |
+
"2,2-Dimethylpropanol",-1.011,1,88.14999999999999,1,0,0,20.23,-0.4,CC(C)(C)CO
|
1108 |
+
Triadimefon,-4.132,1,293.754,0,2,4,57.010000000000005,-3.61,CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n2cncn2
|
1109 |
+
Isocarboxazid,-2.251,1,231.25500000000002,2,2,4,67.16,-2.461,Cc1cc(no1)C(=O)NNCc2ccccc2
|
1110 |
+
Propylene,-1.235,1,42.080999999999996,0,0,0,0.0,-1.08,CC=C
|
1111 |
+
Dichlorophen,-4.9239999999999995,1,269.127,2,2,2,40.46,-3.9530000000000003,Oc1ccc(Cl)cc1Cc2cc(Cl)ccc2O
|
1112 |
+
Desmedipham,-4.1819999999999995,1,300.314,2,2,4,76.66,-4.632,CCOC(=O)Nc2cccc(OC(=O)Nc1ccccc1)c2
|
1113 |
+
Anthraquinone,-3.34,1,208.21599999999998,0,3,0,34.14,-5.19,O=C1c2ccccc2C(=O)c3ccccc13
|
1114 |
+
2-Octanol,-2.033,1,130.231,1,0,5,20.23,-2.09,CCCCCCC(C)O
|
1115 |
+
Oxycarboxin,-2.169,1,267.306,1,2,2,72.47,-2.281,CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1
|
1116 |
+
Butylbenzene,-3.585,1,134.22199999999998,0,1,3,0.0,-4.06,CCCCc1ccccc1
|
1117 |
+
parabanic acid,1.091,1,114.05999999999999,2,1,0,75.27,-0.4,O=C1NC(=O)C(=O)N1
|
1118 |
+
Abate,-6.678,1,466.47900000000016,0,2,10,55.38000000000001,-6.237,COP(=S)(OC)Oc1ccc(Sc2ccc(OP(=S)(OC)OC)cc2)cc1
|
1119 |
+
Chlorthalidone,-2.5639999999999996,1,338.7720000000001,3,3,2,109.49000000000001,-3.451,NS(=O)(=O)c1cc(ccc1Cl)C2(O)NC(=O)c3ccccc23
|
1120 |
+
Isobutyl acetate,-1.463,1,116.15999999999998,0,0,2,26.3,-1.21,CC(C)COC(=O)C
|
1121 |
+
"2,2,3-Trimethylbutane",-2.9219999999999997,1,100.20499999999998,0,0,0,0.0,-4.36,CC(C)C(C)(C)C
|
1122 |
+
"2,3,3',4,4'6-PCB",-7.746,1,395.3270000000001,0,2,1,0.0,-7.66,Clc1ccc(c(Cl)c1Cl)c2c(Cl)cc(Cl)c(Cl)c2Cl
|
1123 |
+
Phthalonitrile,-1.7169999999999999,1,128.13399999999996,0,1,0,47.58,-2.38,N#Cc1ccccc1C#N
|
1124 |
+
m-Nitrotoluene,-2.64,1,137.138,0,1,1,43.14,-2.44,Cc1cccc(c1)N(=O)=O
|
1125 |
+
halothane,-2.608,1,197.381,0,0,0,0.0,-1.71,FC(F)(F)C(Cl)Br
|
1126 |
+
Oxamyl,-0.9079999999999999,1,219.266,1,0,1,70.99999999999999,0.106,CNC(=O)ON=C(SC)C(=O)N(C)C
|
1127 |
+
Thiometon,-3.323,1,246.35899999999998,0,0,7,18.46,-3.091,CCSCCSP(=S)(OC)OC
|
1128 |
+
2-Methylbutane,-2.245,1,72.151,0,0,1,0.0,-3.18,CCC(C)C
|
1129 |
+
Stirofos,-4.32,1,365.96400000000006,0,1,5,44.760000000000005,-4.522,COP(=O)(OC)OC(=CCl)c1cc(Cl)c(Cl)cc1Cl
|
data/huusk.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
data/ws496_logS.csv
ADDED
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ID,SMILES,exp,InChi
|
2 |
+
1,CCCCCC,-3.84,"InChI=1/C6H14/c1-3-5-6-4-2/h3-6H2,1-2H3"
|
3 |
+
2,CCC(C)CC,-3.68,"InChI=1/C6H14/c1-4-6(3)5-2/h6H,4-5H2,1-3H3"
|
4 |
+
3,C1CCCCC1,-3.1,InChI=1/C6H12/c1-2-4-6-5-3-1/h1-6H2
|
5 |
+
4,CC(C)CC(C)C,-4.26,"InChI=1/C7H16/c1-6(2)5-7(3)4/h6-7H,5H2,1-4H3"
|
6 |
+
5,CCCCCCCC,-5.24,"InChI=1/C8H18/c1-3-5-7-8-6-4-2/h3-8H2,1-2H3"
|
7 |
+
6,C1CCCCCCC1,-4.15,InChI=1/C8H16/c1-2-4-6-8-7-5-3-1/h1-8H2
|
8 |
+
7,C/C=C/CC,-2.54,"InChI=1/C5H10/c1-3-5-4-2/h3,5H,4H2,1-2H3/b5-3+"
|
9 |
+
8,C=CCC(C)C,-3.24,"InChI=1/C6H12/c1-4-5-6(2)3/h4,6H,1,5H2,2-3H3"
|
10 |
+
9,C=CCCCCCCC,-5.05,"InChI=1/C9H18/c1-3-5-7-9-8-6-4-2/h3H,1,4-9H2,2H3"
|
11 |
+
10,C=CCC=C,-2.09,"InChI=1/C5H8/c1-3-5-4-2/h3-4H,1-2,5H2"
|
12 |
+
11,C=CCCC=C,-2.68,"InChI=1/C6H10/c1-3-5-6-4-2/h3-4H,1-2,5-6H2"
|
13 |
+
12,C=CC=C,-1.87,"InChI=1/C4H6/c1-3-4-2/h3-4H,1-2H2"
|
14 |
+
13,C1=CCC=CC1,-1.97,"InChI=1/C6H8/c1-2-4-6-5-3-1/h1-2,5-6H,3-4H2"
|
15 |
+
14,C#CCCCCCC,-3.66,"InChI=1/C8H14/c1-3-5-7-8-6-4-2/h1H,4-8H2,2H3"
|
16 |
+
15,C#CCCCCCCC,-4.24,"InChI=1/C9H16/c1-3-5-7-9-8-6-4-2/h1H,4-9H2,2H3"
|
17 |
+
16,Cc1ccc(C)cc1,-2.77,"InChI=1/C8H10/c1-7-3-5-8(2)6-4-7/h3-6H,1-2H3"
|
18 |
+
17,Cc1cccc(C)c1,-2.82,"InChI=1/C8H10/c1-7-4-3-5-8(2)6-7/h3-6H,1-2H3"
|
19 |
+
18,Cc1cc(C)cc(C)c1,-3.4,"InChI=1/C9H12/c1-7-4-8(2)6-9(3)5-7/h4-6H,1-3H3"
|
20 |
+
19,Cc1cccc(C)c1C,-3.2,"InChI=1/C9H12/c1-7-5-4-6-8(2)9(7)3/h4-6H,1-3H3"
|
21 |
+
20,CCc1ccc(C)cc1,-3.11,"InChI=1/C9H12/c1-3-9-6-4-8(2)5-7-9/h4-7H,3H2,1-2H3"
|
22 |
+
21,CC(C)c1ccccc1,-3.27,"InChI=1/C9H12/c1-8(2)9-6-4-3-5-7-9/h3-8H,1-2H3"
|
23 |
+
22,CCCCc1ccccc1,-4.06,"InChI=1/C10H14/c1-2-3-7-10-8-5-4-6-9-10/h4-6,8-9H,2-3,7H2,1H3"
|
24 |
+
23,CCc1ccc(CC)cc1,-3.75,"InChI=1/C10H14/c1-3-9-5-7-10(4-2)8-6-9/h5-8H,3-4H2,1-2H3"
|
25 |
+
24,CC[C@@H](C)c1ccccc1,-3.89,"InChI=1/C10H14/c1-3-9(2)10-7-5-4-6-8-10/h4-9H,3H2,1-2H3/t9-/m1/s1"
|
26 |
+
25,Cc1cc(C)c(C)c(C)c1C,-4,"InChI=1/C11H16/c1-7-6-8(2)10(4)11(5)9(7)3/h6H,1-5H3"
|
27 |
+
26,CCC(C)(C)c1ccccc1,-4.15,"InChI=1/C11H16/c1-4-11(2,3)10-8-6-5-7-9-10/h5-9H,4H2,1-3H3"
|
28 |
+
27,CCCCCCc1ccccc1,-5.21,"InChI=1/C12H18/c1-2-3-4-6-9-12-10-7-5-8-11-12/h5,7-8,10-11H,2-4,6,9H2,1H3"
|
29 |
+
28,C=Cc1ccccc1,-2.82,"InChI=1/C8H8/c1-2-8-6-4-3-5-7-8/h2-7H,1H2"
|
30 |
+
29,c1ccc(cc1)CCc1ccccc1,-4.62,"InChI=1/C14H14/c1-3-7-13(8-4-1)11-12-14-9-5-2-6-10-14/h1-10H,11-12H2"
|
31 |
+
30,Cc1ccc2ccccc2c1,-3.77,"InChI=1/C11H10/c1-9-6-7-10-4-2-3-5-11(10)8-9/h2-8H,1H3"
|
32 |
+
31,CCc1cccc2ccccc12,-4.17,"InChI=1/C12H12/c1-2-10-7-5-8-11-6-3-4-9-12(10)11/h3-9H,2H2,1H3"
|
33 |
+
32,CCc1ccc2ccccc2c1,-4.29,"InChI=1/C12H12/c1-2-10-7-8-11-5-3-4-6-12(11)9-10/h3-9H,2H2,1H3"
|
34 |
+
33,Cc1ccc(C)c2ccccc12,-4.14,"InChI=1/C12H12/c1-9-7-8-10(2)12-6-4-3-5-11(9)12/h3-8H,1-2H3"
|
35 |
+
34,Cc1ccc2cc(C)ccc2c1,-4.89,"InChI=1/C12H12/c1-9-3-5-12-8-10(2)4-6-11(12)7-9/h3-8H,1-2H3"
|
36 |
+
35,Cc1ccc(C)c2c(C)cccc12,-4.92,"InChI=1/C13H14/c1-9-7-8-11(3)13-10(2)5-4-6-12(9)13/h4-8H,1-3H3"
|
37 |
+
36,c1ccc2c(c1)ccc1ccccc21,-5.26,InChI=1/C14H10/c1-3-7-13-11(5-1)9-10-12-6-2-4-8-14(12)13/h1-10H
|
38 |
+
37,Cc1c2ccccc2cc2ccccc12,-5.89,"InChI=1/C15H12/c1-11-14-8-4-2-6-12(14)10-13-7-3-5-9-15(11)13/h2-10H,1H3"
|
39 |
+
38,c1ccc2c(c1)c1cccc3cccc2c13,-6,InChI=1/C16H10/c1-2-8-13-12(7-1)14-9-3-5-11-6-4-10-15(13)16(11)14/h1-10H
|
40 |
+
39,c1ccc2cc3c(cc2c1)Cc1ccccc31,-8.04,"InChI=1/C17H12/c1-2-6-13-11-17-15(9-12(13)5-1)10-14-7-3-4-8-16(14)17/h1-9,11H,10H2"
|
41 |
+
40,c1ccc2c(c1)c1ccccc1c1ccccc21,-6.74,InChI=1/C18H12/c1-2-8-14-13(7-1)15-9-3-4-11-17(15)18-12-6-5-10-16(14)18/h1-12H
|
42 |
+
41,c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34,-8.19,InChI=1/C20H12/c1-2-7-17-15(4-1)12-16-9-8-13-5-3-6-14-10-11-18(17)20(16)19(13)14/h1-12H
|
43 |
+
42,Cc1c2ccccc2c(C)c2c1ccc1ccccc21,-7.02,"InChI=1/C20H16/c1-13-16-8-5-6-9-17(16)14(2)20-18(13)12-11-15-7-3-4-10-19(15)20/h3-12H,1-2H3"
|
44 |
+
43,c1ccc2c(c1)c1cccc3ccc4cccc2c4c13,-7.8,InChI=1/C20H12/c1-2-8-16-15(7-1)17-9-3-5-13-11-12-14-6-4-10-18(16)20(14)19(13)17/h1-12H
|
45 |
+
44,c1ccc2c(c1)ccc1c3ccccc3ccc21,-8.06,InChI=1/C18H12/c1-3-7-15-13(5-1)9-11-18-16-8-4-2-6-14(16)10-12-17(15)18/h1-12H
|
46 |
+
45,c1cc2cccc3c4cccc5cccc(c(c1)c23)c45,-8.8,InChI=1/C20H12/c1-5-13-6-2-11-17-18-12-4-8-14-7-3-10-16(20(14)18)15(9-1)19(13)17/h1-12H
|
47 |
+
46,c1cc2ccc3ccc4ccc5cccc6c(c1)c2c3c4c56,-9.03,InChI=1/C22H12/c1-3-13-7-9-15-11-12-16-10-8-14-4-2-6-18-17(5-1)19(13)21(15)22(16)20(14)18/h1-12H
|
48 |
+
47,c1ccc2cc3cc(ccc3cc2c1)N,-5.17,"InChI=1/C14H11N/c15-14-6-5-12-7-10-3-1-2-4-11(10)8-13(12)9-14/h1-9H,15H2"
|
49 |
+
48,CCc1ccc2cc3ccccc3cc2c1,-6.89,"InChI=1/C16H14/c1-2-12-7-8-15-10-13-5-3-4-6-14(13)11-16(15)9-12/h3-11H,2H2,1H3"
|
50 |
+
49,c1ccc2c(c1)ccc1c3cccc4cccc(c34)c21,-8,InChI=1/C20H12/c1-2-8-15-13(5-1)11-12-17-16-9-3-6-14-7-4-10-18(19(14)16)20(15)17/h1-12H
|
51 |
+
50,c1ccc2cc3c4cccc5cccc(c3cc2c1)c45,-8.49,InChI=1/C20H12/c1-2-6-15-12-19-17-10-4-8-13-7-3-9-16(20(13)17)18(19)11-14(15)5-1/h1-12H
|
52 |
+
51,C(Br)Cl,-0.89,InChI=1/CH2BrCl/c2-1-3/h1H2
|
53 |
+
52,C(Br)Br,-1.17,InChI=1/CH2Br2/c2-1-3/h1H2
|
54 |
+
53,C(Cl)(Cl)Cl,-1.17,InChI=1/CHCl3/c2-1(3)4/h1H
|
55 |
+
54,C(Br)(Br)Cl,-1.9,InChI=1/CHBr2Cl/c2-1(3)4/h1H
|
56 |
+
55,C(Cl)(Cl)(Cl)Cl,-2.31,"InChI=1/CCl4/c2-1(3,4)5"
|
57 |
+
56,C(Br)(Br)(Br)Br,-3.14,"InChI=1/CBr4/c2-1(3,4)5"
|
58 |
+
57,CC(Cl)Cl,-1.29,"InChI=1/C2H4Cl2/c1-2(3)4/h2H,1H3"
|
59 |
+
58,C(CCl)Br,-1.32,InChI=1/C2H4BrCl/c3-1-2-4/h1-2H2
|
60 |
+
59,C(C(Cl)Cl)Cl,-1.48,"InChI=1/C2H3Cl3/c3-1-2(4)5/h2H,1H2"
|
61 |
+
60,C(C(Cl)Cl)(Cl)Cl,-1.74,InChI=1/C2H2Cl4/c3-1(4)2(5)6/h1-2H
|
62 |
+
61,C(C(Cl)(Cl)Cl)(Cl)(Cl)Cl,-3.67,"InChI=1/C2Cl6/c3-1(4,5)2(6,7)8"
|
63 |
+
62,CCCBr,-1.73,"InChI=1/C3H7Br/c1-2-3-4/h2-3H2,1H3"
|
64 |
+
63,C(CBr)CBr,-2.08,InChI=1/C3H6Br2/c4-2-1-3-5/h1-3H2
|
65 |
+
64,C([C@@H](CCl)Br)Br,-2.38,"InChI=1/C3H5Br2Cl/c4-1-3(5)2-6/h3H,1-2H2/t3-/m0/s1"
|
66 |
+
65,CC[C@@H](C)Cl,-1.96,"InChI=1/C4H9Cl/c1-3-4(2)5/h4H,3H2,1-2H3/t4-/m1/s1"
|
67 |
+
66,CCCCBr,-2.37,"InChI=1/C4H9Br/c1-2-3-4-5/h2-4H2,1H3"
|
68 |
+
67,CC(C)CBr,-2.43,"InChI=1/C4H9Br/c1-4(2)3-5/h4H,3H2,1-2H3"
|
69 |
+
68,CCCCCCl,-2.73,"InChI=1/C5H11Cl/c1-2-3-4-5-6/h2-5H2,1H3"
|
70 |
+
69,CCC(CC)Cl,-2.63,"InChI=1/C5H11Cl/c1-3-5(6)4-2/h5H,3-4H2,1-2H3"
|
71 |
+
70,C[C@H](C(C)(C)Cl)Cl,-2.69,"InChI=1/C5H10Cl2/c1-4(6)5(2,3)7/h4H,1-3H3/t4-/m1/s1"
|
72 |
+
71,CCCCCBr,-3.07,"InChI=1/C5H11Br/c1-2-3-4-5-6/h2-5H2,1H3"
|
73 |
+
72,CCCCCCCCBr,-5.06,"InChI=1/C8H17Br/c1-2-3-4-5-6-7-8-9/h2-8H2,1H3"
|
74 |
+
73,C(C(Cl)(F)F)(Cl)(F)F,-3.12,"InChI=1/C2Cl2F4/c3-1(5,6)2(4,7)8"
|
75 |
+
74,[C@@H]1([C@@H]([C@@H]([C@@H]([C@H]([C@H]1Cl)Cl)Cl)Cl)Cl)Cl,-4.51,"InChI=1/C6H6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9/h1-6H/t1-,2-,3-,4-,5-,6-"
|
76 |
+
75,CCCCCCCBr,-4.43,"InChI=1/C7H15Br/c1-2-3-4-5-6-7-8/h2-7H2,1H3"
|
77 |
+
76,CCCCCCCCl,-3.99,"InChI=1/C7H15Cl/c1-2-3-4-5-6-7-8/h2-7H2,1H3"
|
78 |
+
77,C(=C(Cl)Cl)(Cl)Cl,-2.54,InChI=1/C2Cl4/c3-1(4)2(5)6
|
79 |
+
78,C1=C[C@@H]2C[C@H]1[C@@H]1[C@H]2[C@@]2(C(=C([C@]1(C2(Cl)Cl)Cl)Cl)Cl)Cl,-7.33,"InChI=1/C12H8Cl6/c13-8-9(14)11(16)7-5-2-1-4(3-5)6(7)10(8,15)12(11,17)18/h1-2,4-7H,3H2/t4-,5+,6+,7-,10+,11-"
|
80 |
+
79,C1[C@H]2[C@@H]3[C@H]([C@@H]1[C@H]1[C@@H]2O1)[C@@]1(C(=C([C@]3(C1(Cl)Cl)Cl)Cl)Cl)Cl,-6.18,"InChI=1/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2/t2-,3+,4+,5-,6+,7-,10+,11-"
|
81 |
+
80,C=CCBr,-1.5,"InChI=1/C3H5Br/c1-2-3-4/h2H,1,3H2"
|
82 |
+
81,C(=C(\C(=C(Cl)Cl)Cl)/Cl)\Cl,-4.23,InChI=1/C4HCl5/c5-1-2(6)3(7)4(8)9/h1H/b2-1+
|
83 |
+
82,c1ccc(cc1)Cl,-2.38,InChI=1/C6H5Cl/c7-6-4-2-1-3-5-6/h1-5H
|
84 |
+
83,c1cc(ccc1F)I,-3.13,InChI=1/C6H4FI/c7-5-1-3-6(8)4-2-5/h1-4H
|
85 |
+
84,c1cc(cc(c1)Cl)Br,-3.21,InChI=1/C6H4BrCl/c7-5-2-1-3-6(8)4-5/h1-4H
|
86 |
+
85,c1cc(ccc1Br)Cl,-3.63,InChI=1/C6H4BrCl/c7-5-1-3-6(8)4-2-5/h1-4H
|
87 |
+
86,c1ccc(c(c1)Br)Br,-3.5,InChI=1/C6H4Br2/c7-5-3-1-2-4-6(5)8/h1-4H
|
88 |
+
87,c1cc(c(c(c1)Cl)Cl)Cl,-4,InChI=1/C6H3Cl3/c7-4-2-1-3-5(8)6(4)9/h1-3H
|
89 |
+
88,c1c(cc(cc1Cl)Cl)Cl,-4.48,InChI=1/C6H3Cl3/c7-4-1-5(8)3-6(9)2-4/h1-3H
|
90 |
+
89,c1cc(c(c(c1)Br)Br)Br,-5.04,InChI=1/C6H3Br3/c7-4-2-1-3-5(8)6(4)9/h1-3H
|
91 |
+
90,c1c(c(cc(c1F)F)F)F,-2.38,InChI=1/C6H2F4/c7-3-1-4(8)6(10)2-5(3)9/h1-2H
|
92 |
+
91,c1cc(c(c(c1Cl)Cl)Cl)Cl,-4.57,InChI=1/C6H2Cl4/c7-3-1-2-4(8)6(10)5(3)9/h1-2H
|
93 |
+
92,c1c(c(c(c(c1Cl)Cl)Cl)Cl)Cl,-5.65,InChI=1/C6HCl5/c7-2-1-3(8)5(10)6(11)4(2)9/h1H
|
94 |
+
93,Cc1ccc(cc1)Br,-3.19,"InChI=1/C7H7Br/c1-6-2-4-7(8)5-3-6/h2-5H,1H3"
|
95 |
+
94,c1cc(ccc1F)F,-1.97,InChI=1/C6H4F2/c7-5-1-2-6(8)4-3-5/h1-4H
|
96 |
+
95,c1ccc(cc1)CCl,-2.39,"InChI=1/C7H7Cl/c8-6-7-4-2-1-3-5-7/h1-5H,6H2"
|
97 |
+
96,Cc1ccc(cc1)Cl,-3.08,"InChI=1/C7H7Cl/c1-6-2-4-7(8)5-3-6/h2-5H,1H3"
|
98 |
+
97,c1ccc(cc1)c1ccccc1Cl,-4.54,InChI=1/C12H9Cl/c13-12-9-5-4-8-11(12)10-6-2-1-3-7-10/h1-9H
|
99 |
+
98,c1cc(cc(c1)Cl)c1cccc(c1)Cl,-5.8,InChI=1/C12H8Cl2/c13-11-5-1-3-9(7-11)10-4-2-6-12(14)8-10/h1-8H
|
100 |
+
99,c1ccc(cc1)c1ccc(c(c1)Cl)Cl,-6.39,InChI=1/C12H8Cl2/c13-11-7-6-10(8-12(11)14)9-4-2-1-3-5-9/h1-8H
|
101 |
+
100,c1ccc(cc1)c1cc(c(cc1Cl)Cl)Cl,-6.27,InChI=1/C12H7Cl3/c13-10-7-12(15)11(14)6-9(10)8-4-2-1-3-5-8/h1-7H
|
102 |
+
101,c1ccc(cc1)c1c(cc(cc1Cl)Cl)Cl,-6.14,InChI=1/C12H7Cl3/c13-9-6-10(14)12(11(15)7-9)8-4-2-1-3-5-8/h1-7H
|
103 |
+
102,c1cc(cc(c1)Cl)c1cc(ccc1Cl)Cl,-6.01,InChI=1/C12H7Cl3/c13-9-3-1-2-8(6-9)11-7-10(14)4-5-12(11)15/h1-7H
|
104 |
+
103,c1ccc(c(c1)c1ccc(c(c1)Cl)Cl)Cl,-6.29,InChI=1/C12H7Cl3/c13-10-4-2-1-3-9(10)8-5-6-11(14)12(15)7-8/h1-7H
|
105 |
+
104,c1cc(ccc1c1ccc(cc1Cl)Cl)Cl,-6.21,InChI=1/C12H7Cl3/c13-9-3-1-8(2-4-9)11-6-5-10(14)7-12(11)15/h1-7H
|
106 |
+
105,c1cc(c2cc(ccc2Cl)Cl)c(c(c1)Cl)Cl,-6.47,InChI=1/C12H6Cl4/c13-7-4-5-10(14)9(6-7)8-2-1-3-11(15)12(8)16/h1-6H
|
107 |
+
106,c1cc(c(cc1Cl)c1cc(ccc1Cl)Cl)Cl,-7,InChI=1/C12H6Cl4/c13-7-1-3-11(15)9(5-7)10-6-8(14)2-4-12(10)16/h1-6H
|
108 |
+
107,c1cc(c(c2cc(ccc2Cl)Cl)c(c1)Cl)Cl,-6.8,InChI=1/C12H6Cl4/c13-7-4-5-9(14)8(6-7)12-10(15)2-1-3-11(12)16/h1-6H
|
109 |
+
108,c1cc(c(c(c1)Cl)c1c(cccc1Cl)Cl)Cl,-7.39,InChI=1/C12H6Cl4/c13-7-3-1-4-8(14)11(7)12-9(15)5-2-6-10(12)16/h1-6H
|
110 |
+
109,c1cc(c2ccc(c(c2Cl)Cl)Cl)c(c(c1)Cl)Cl,-7.05,InChI=1/C12H5Cl5/c13-8-3-1-2-6(10(8)15)7-4-5-9(14)12(17)11(7)16/h1-5H
|
111 |
+
110,c1cc(c(cc1c1cc(c(cc1Cl)Cl)Cl)Cl)Cl,-7.39,InChI=1/C12H5Cl5/c13-8-2-1-6(3-10(8)15)7-4-11(16)12(17)5-9(7)14/h1-5H
|
112 |
+
111,c1ccc(cc1)c1c(c(c(c(c1Cl)Cl)Cl)Cl)Cl,-7.92,InChI=1/C12H5Cl5/c13-8-7(6-4-2-1-3-5-6)9(14)11(16)12(17)10(8)15/h1-5H
|
113 |
+
112,c1cc(c(c(c1c1cc(c(cc1Cl)Cl)Cl)Cl)Cl)Cl,-8.32,InChI=1/C12H4Cl6/c13-7-2-1-5(11(17)12(7)18)6-3-9(15)10(16)4-8(6)14/h1-4H
|
114 |
+
113,c1cc(c(cc1Cl)c1cc(c(c(c1Cl)Cl)Cl)Cl)Cl,-7.68,InChI=1/C12H4Cl6/c13-5-1-2-8(14)6(3-5)7-4-9(15)11(17)12(18)10(7)16/h1-4H
|
115 |
+
114,c1cc(c(cc1Cl)c1c(c(cc(c1Cl)Cl)Cl)Cl)Cl,-7.42,InChI=1/C12H4Cl6/c13-5-1-2-7(14)6(3-5)10-11(17)8(15)4-9(16)12(10)18/h1-4H
|
116 |
+
115,c1c(cc(c(c1Cl)c1c(cc(cc1Cl)Cl)Cl)Cl)Cl,-8.71,InChI=1/C12H4Cl6/c13-5-1-7(15)11(8(16)2-5)12-9(17)3-6(14)4-10(12)18/h1-4H
|
117 |
+
116,c1cc(c(cc1c1c(cc(c(c1Cl)Cl)Cl)Cl)Cl)Cl,-7.66,InChI=1/C12H4Cl6/c13-6-2-1-5(3-7(6)14)10-8(15)4-9(16)11(17)12(10)18/h1-4H
|
118 |
+
117,c1cc(c(c(c1c1ccc(c(c1Cl)Cl)Cl)Cl)Cl)Cl,-8.01,InChI=1/C12H4Cl6/c13-7-3-1-5(9(15)11(7)17)6-2-4-8(14)12(18)10(6)16/h1-4H
|
119 |
+
118,c1c(c(cc(c1Cl)Cl)Cl)c1c(cc(c(c1Cl)Cl)Cl)Cl,-7.92,InChI=1/C12H3Cl7/c13-5-2-7(15)6(14)1-4(5)10-8(16)3-9(17)11(18)12(10)19/h1-3H
|
120 |
+
119,c1cc(c(cc1Cl)c1c(c(c(c(c1Cl)Cl)Cl)Cl)Cl)Cl,-8.94,InChI=1/C12H3Cl7/c13-4-1-2-6(14)5(3-4)7-8(15)10(17)12(19)11(18)9(7)16/h1-3H
|
121 |
+
120,c1c(c2cc(c(c(c2Cl)Cl)Cl)Cl)c(c(c(c1Cl)Cl)Cl)Cl,-9.16,InChI=1/C12H2Cl8/c13-5-1-3(7(15)11(19)9(5)17)4-2-6(14)10(18)12(20)8(4)16/h1-2H
|
122 |
+
121,c1c(c2c(c(c(c(c2Cl)Cl)Cl)Cl)Cl)c(c(c(c1Cl)Cl)Cl)Cl,-10.26,InChI=1/C12HCl9/c13-3-1-2(5(14)9(18)6(3)15)4-7(16)10(19)12(21)11(20)8(4)17/h1H
|
123 |
+
122,c1c(c(c(c2c(c(c(c(c2Cl)Cl)Cl)Cl)Cl)c(c1Cl)Cl)Cl)Cl,-10.41,InChI=1/C12HCl9/c13-2-1-3(14)7(16)4(6(2)15)5-8(17)10(19)12(21)11(20)9(5)18/h1H
|
124 |
+
123,c1cc(ccc1C(c1ccc(cc1)Cl)C(Cl)Cl)Cl,-7.2,"InChI=1/C14H10Cl4/c15-11-5-1-9(2-6-11)13(14(17)18)10-3-7-12(16)8-4-10/h1-8,13-14H"
|
125 |
+
124,CCC(CC)O,-0.24,"InChI=1/C5H12O/c1-3-5(6)4-2/h5-6H,3-4H2,1-2H3"
|
126 |
+
125,CCC(C)(C)O,0.08,"InChI=1/C5H12O/c1-4-5(2,3)6/h6H,4H2,1-3H3"
|
127 |
+
126,CC(C)[C@@H](C)O,-0.2,"InChI=1/C5H12O/c1-4(2)5(3)6/h4-6H,1-3H3/t5-/m1/s1"
|
128 |
+
127,CCCC[C@@H](C)O,-0.89,"InChI=1/C6H14O/c1-3-4-5-6(2)7/h6-7H,3-5H2,1-2H3/t6-/m1/s1"
|
129 |
+
128,CC(C)[C@@H](C)CO,-0.39,"InChI=1/C6H14O/c1-5(2)6(3)4-7/h5-7H,4H2,1-3H3/t6-/m0/s1"
|
130 |
+
129,CC[C@H](C(C)C)O,-0.7,"InChI=1/C6H14O/c1-4-6(7)5(2)3/h5-7H,4H2,1-3H3/t6-/m1/s1"
|
131 |
+
130,CC(C)(C)CCO,-0.5,"InChI=1/C6H14O/c1-6(2,3)4-5-7/h7H,4-5H2,1-3H3"
|
132 |
+
131,C[C@H](C(C)(C)C)O,-0.62,"InChI=1/C6H14O/c1-5(7)6(2,3)4/h5,7H,1-4H3/t5-/m1/s1"
|
133 |
+
132,CCC(C)(CC)O,-0.38,"InChI=1/C6H14O/c1-4-6(3,7)5-2/h7H,4-5H2,1-3H3"
|
134 |
+
133,CCCCCCCO,-1.81,"InChI=1/C7H16O/c1-2-3-4-5-6-7-8/h8H,2-7H2,1H3"
|
135 |
+
134,CCCC[C@@H](CC)O,-1.47,"InChI=1/C7H16O/c1-3-5-6-7(8)4-2/h7-8H,3-6H2,1-2H3/t7-/m1/s1"
|
136 |
+
135,CC(C)C[C@H](C)CO,-1.6,"InChI=1/C7H16O/c1-6(2)4-7(3)5-8/h6-8H,4-5H2,1-3H3/t7-/m0/s1"
|
137 |
+
136,CCCCC(C)(C)O,-1.08,"InChI=1/C7H16O/c1-4-5-6-7(2,3)8/h8H,4-6H2,1-3H3"
|
138 |
+
137,CC(C)C(C(C)C)O,-1.22,"InChI=1/C7H16O/c1-5(2)7(8)6(3)4/h5-8H,1-4H3"
|
139 |
+
138,CC[C@@H](C)C(C)(C)O,-0.89,"InChI=1/C7H16O/c1-5-6(2)7(3,4)8/h6,8H,5H2,1-4H3/t6-/m1/s1"
|
140 |
+
139,CC(C)(C)C(C)(C)O,-0.72,"InChI=1/C7H16O/c1-6(2,3)7(4,5)8/h8H,1-5H3"
|
141 |
+
140,CCC(C)(CC)O,-1.6,"InChI=1/C6H14O/c1-4-6(3,7)5-2/h7H,4-5H2,1-3H3"
|
142 |
+
141,CC[C@@](C)(C(C)(C)C)O,-1.27,"InChI=1/C8H18O/c1-6-8(5,9)7(2,3)4/h9H,6H2,1-5H3/t8-/m0/s1"
|
143 |
+
142,CCCCCC(C)(C)O,-1.72,"InChI=1/C8H18O/c1-4-5-6-7-8(2,3)9/h9H,4-7H2,1-3H3"
|
144 |
+
143,CCCCCCCCCCCCO,-4.67,"InChI=1/C12H26O/c1-2-3-4-5-6-7-8-9-10-11-12-13/h13H,2-12H2,1H3"
|
145 |
+
144,c1ccc(cc1)CCCO,-1.38,"InChI=1/C9H12O/c10-8-4-7-9-5-2-1-3-6-9/h1-3,5-6,10H,4,7-8H2"
|
146 |
+
145,CCCCCCCCCO,-3.01,"InChI=1/C9H20O/c1-2-3-4-5-6-7-8-9-10/h10H,2-9H2,1H3"
|
147 |
+
146,C[C@H]1CC[C@H](CC1)O,-0.88,"InChI=1/C7H14O/c1-6-2-4-7(8)5-3-6/h6-8H,2-5H2,1H3/t6-,7+"
|
148 |
+
147,c1cc(cc(c1)O)O,0.81,"InChI=1/C6H6O2/c7-5-2-1-3-6(8)4-5/h1-4,7-8H"
|
149 |
+
148,c1cc(ccc1O)O,-0.17,"InChI=1/C6H6O2/c7-5-1-2-6(8)4-3-5/h1-4,7-8H"
|
150 |
+
149,Cc1ccccc1O,-0.62,"InChI=1/C7H8O/c1-6-4-2-3-5-7(6)8/h2-5,8H,1H3"
|
151 |
+
150,c1ccc(cc1)CO,-0.4,"InChI=1/C7H8O/c8-6-7-4-2-1-3-5-7/h1-5,8H,6H2"
|
152 |
+
151,Cc1cccc(c1)O,-0.68,"InChI=1/C7H8O/c1-6-3-2-4-7(8)5-6/h2-5,8H,1H3"
|
153 |
+
152,Cc1ccc(c(C)c1)O,-1.19,"InChI=1/C8H10O/c1-6-3-4-8(9)7(2)5-6/h3-5,9H,1-2H3"
|
154 |
+
153,Cc1cc(C)cc(c1)O,-1.4,"InChI=1/C8H10O/c1-6-3-7(2)5-8(9)4-6/h3-5,9H,1-2H3"
|
155 |
+
154,CC(C)(C)c1ccc(cc1)O,-2.41,"InChI=1/C10H14O/c1-10(2,3)8-4-6-9(11)7-5-8/h4-7,11H,1-3H3"
|
156 |
+
155,c1ccc2c(c1)cccc2O,-2.22,"InChI=1/C10H8O/c11-10-7-3-5-8-4-1-2-6-9(8)10/h1-7,11H"
|
157 |
+
156,c1cc2c(cccc2O)c(c1)O,-2.92,"InChI=1/C10H8O2/c11-9-5-1-3-7-8(9)4-2-6-10(7)12/h1-6,11-12H"
|
158 |
+
157,c1ccc(cc1)c1ccccc1O,-2.39,"InChI=1/C12H10O/c13-12-9-5-4-8-11(12)10-6-2-1-3-7-10/h1-9,13H"
|
159 |
+
158,CCCCCC=O,-1.3,"InChI=1/C6H12O/c1-2-3-4-5-6-7/h6H,2-5H2,1H3"
|
160 |
+
159,CCCCCCCCC=O,-3.17,"InChI=1/C9H18O/c1-2-3-4-5-6-7-8-9-10/h9H,2-8H2,1H3"
|
161 |
+
160,c1ccc(cc1)C=O,-1.19,InChI=1/C7H6O/c8-6-7-4-2-1-3-5-7/h1-6H
|
162 |
+
161,C1CCC(=O)CC1,-0.6,InChI=1/C6H10O/c7-6-4-2-1-3-5-6/h1-5H2
|
163 |
+
162,CCCCCC(=O)C,-1.42,"InChI=1/C7H14O/c1-3-4-5-6-7(2)8/h3-6H2,1-2H3"
|
164 |
+
163,CCCCC(=O)CCCC,-2.59,"InChI=1/C9H18O/c1-3-5-7-9(10)8-6-4-2/h3-8H2,1-2H3"
|
165 |
+
164,CC(C)CCC(=O)C,-1.33,"InChI=1/C7H14O/c1-6(2)4-5-7(3)8/h6H,4-5H2,1-3H3"
|
166 |
+
165,C1=CC(=O)C=CC1=O,-0.99,InChI=1/C6H4O2/c7-5-1-2-6(8)4-3-5/h1-4H
|
167 |
+
166,CC(=O)CC(=O)C,0.22,"InChI=1/C5H8O2/c1-4(6)3-5(2)7/h3H2,1-2H3"
|
168 |
+
167,CCCCCCCCC(=O)C,-3.31,"InChI=1/C10H20O/c1-3-4-5-6-7-8-9-10(2)11/h3-9H2,1-2H3"
|
169 |
+
168,CC(=O)[C@@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,-4.43,"InChI=1/C21H30O2/c1-13(22)17-6-7-18-16-5-4-14-12-15(23)8-10-20(14,2)19(16)9-11-21(17,18)3/h12,16-19H,4-11H2,1-3H3/t16-,17-,18-,19-,20-,21+/m0/s1"
|
170 |
+
169,c1ccc2c(c1)C(=O)c1ccccc1C2=O,-5.19,InChI=1/C14H8O2/c15-13-9-5-1-2-6-10(9)14(16)12-8-4-3-7-11(12)13/h1-8H
|
171 |
+
170,C(CC(=O)O)CC(=O)O,1,"InChI=1/C5H8O4/c6-4(7)2-1-3-5(8)9/h1-3H2,(H,6,7)(H,8,9)/f/h6,8H"
|
172 |
+
171,C(CCC(=O)O)CC(=O)O,-0.82,"InChI=1/C6H10O4/c7-5(8)3-1-2-4-6(9)10/h1-4H2,(H,7,8)(H,9,10)/f/h7,9H"
|
173 |
+
172,CCCCCCCC(=O)O,-2.3,"InChI=1/C8H16O2/c1-2-3-4-5-6-7-8(9)10/h2-7H2,1H3,(H,9,10)/f/h9H"
|
174 |
+
173,CCCCCCCCCC(=O)O,-3.44,"InChI=1/C10H20O2/c1-2-3-4-5-6-7-8-9-10(11)12/h2-9H2,1H3,(H,11,12)/f/h11H"
|
175 |
+
174,c1ccc(cc1)C(=O)O,-1.55,"InChI=1/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)/f/h8H"
|
176 |
+
175,c1ccc(c(c1)C(=O)O)C(=O)O,-2.11,"InChI=1/C8H6O4/c9-7(10)5-3-1-2-4-6(5)8(11)12/h1-4H,(H,9,10)(H,11,12)/f/h9,11H"
|
177 |
+
176,CCCCCCCCCCCCCCCC(=O)O,-6.81,"InChI=1/C16H32O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16(17)18/h2-15H2,1H3,(H,17,18)/f/h17H"
|
178 |
+
177,C=C1C[C@]23C[C@]1(CC[C@H]2[C@]12C=C[C@@H]([C@](C)([C@H]1[C@@H]3C(=O)O)C(=O)O2)O)O,-1.84,"InChI=1/C19H22O6/c1-9-7-17-8-18(9,24)5-3-10(17)19-6-4-11(20)16(2,15(23)25-19)13(19)12(17)14(21)22/h4,6,10-13,20,24H,1,3,5,7-8H2,2H3,(H,21,22)/t10-,11+,12-,13-,16-,17+,18+,19-/m1/s1/f/h21H"
|
179 |
+
178,CCC(CC)C(=O)O,-0.81,"InChI=1/C6H12O2/c1-3-5(4-2)6(7)8/h5H,3-4H2,1-2H3,(H,7,8)/f/h7H"
|
180 |
+
179,CCCC(CCC)C(=O)O,-1.86,"InChI=1/C8H16O2/c1-3-5-7(6-4-2)8(9)10/h7H,3-6H2,1-2H3,(H,9,10)/f/h9H"
|
181 |
+
180,c1ccc(cc1)OCC(=O)O,-1.1,"InChI=1/C8H8O3/c9-8(10)6-11-7-4-2-1-3-5-7/h1-5H,6H2,(H,9,10)/f/h9H"
|
182 |
+
181,CCCCCCCCCCC(=O)O,-3.55,"InChI=1/C11H22O2/c1-2-3-4-5-6-7-8-9-10-11(12)13/h2-10H2,1H3,(H,12,13)/f/h12H"
|
183 |
+
182,CCCCCCCCCCCCCC(=O)O,-5.33,"InChI=1/C14H28O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14(15)16/h2-13H2,1H3,(H,15,16)/f/h15H"
|
184 |
+
183,COC=O,0.58,"InChI=1/C2H4O2/c1-4-2-3/h2H,1H3"
|
185 |
+
184,CCCC(=O)OC,-0.82,"InChI=1/C5H10O2/c1-3-4-5(6)7-2/h3-4H2,1-2H3"
|
186 |
+
185,CC(C)COC=O,-1.01,"InChI=1/C5H10O2/c1-5(2)3-7-4-6/h4-5H,3H2,1-2H3"
|
187 |
+
186,CCCCOC(=O)C,-1.24,"InChI=1/C6H12O2/c1-3-4-5-8-6(2)7/h3-5H2,1-2H3"
|
188 |
+
187,CCCCC(=O)OC,-1.36,"InChI=1/C6H12O2/c1-3-4-5-6(7)8-2/h3-5H2,1-2H3"
|
189 |
+
188,C=CC(=O)OCC(C)C,-1.21,"InChI=1/C7H12O2/c1-4-7(8)9-5-6(2)3/h4,6H,1,5H2,2-3H3"
|
190 |
+
189,CCCC(=O)OCCC,-1.92,"InChI=1/C7H14O2/c1-3-5-7(8)9-6-4-2/h3-6H2,1-2H3"
|
191 |
+
190,CCCCCCOC(=O)C,-2.46,"InChI=1/C8H16O2/c1-3-4-5-6-7-10-8(2)9/h3-7H2,1-2H3"
|
192 |
+
191,CCCCCC(=O)OCC,-2.31,"InChI=1/C8H16O2/c1-3-5-6-7-8(9)10-4-2/h3-7H2,1-2H3"
|
193 |
+
192,CCCCCCC(=O)OCC,-2.71,"InChI=1/C9H18O2/c1-3-5-6-7-8-9(10)11-4-2/h3-8H2,1-2H3"
|
194 |
+
193,CCCCCCCC(=O)OCC,-3.39,"InChI=1/C10H20O2/c1-3-5-6-7-8-9-10(11)12-4-2/h3-9H2,1-2H3"
|
195 |
+
194,CCCCCCCCC(=O)OCC,-3.8,"InChI=1/C11H22O2/c1-3-5-6-7-8-9-10-11(12)13-4-2/h3-10H2,1-2H3"
|
196 |
+
195,CCCCCCCCCC(=O)OCC,-4.1,"InChI=1/C12H24O2/c1-3-5-6-7-8-9-10-11-12(13)14-4-2/h3-11H2,1-2H3"
|
197 |
+
196,COC(=O)c1ccccc1,-1.85,"InChI=1/C8H8O2/c1-10-8(9)7-5-3-2-4-6-7/h2-6H,1H3"
|
198 |
+
197,CC(C)COC(=O)c1ccccc1C(=O)OCC(C)C,-4.66,"InChI=1/C16H22O4/c1-11(2)9-19-15(17)13-7-5-6-8-14(13)16(18)20-10-12(3)4/h5-8,11-12H,9-10H2,1-4H3"
|
199 |
+
198,CC(C)OC,-0.06,"InChI=1/C4H10O/c1-4(2)5-3/h4H,1-3H3"
|
200 |
+
199,CC(C)(C)OC,-0.24,"InChI=1/C5H12O/c1-5(2,3)6-4/h1-4H3"
|
201 |
+
200,CCCOCCC,-1.62,"InChI=1/C6H14O/c1-3-5-7-6-4-2/h3-6H2,1-2H3"
|
202 |
+
201,CC(C)OC(C)C,-1.1,"InChI=1/C6H14O/c1-5(2)7-6(3)4/h5-6H,1-4H3"
|
203 |
+
202,CCOCCOCC,-0.77,"InChI=1/C6H14O2/c1-3-7-5-6-8-4-2/h3-6H2,1-2H3"
|
204 |
+
203,c1ccc(cc1)Oc1ccccc1,-3.96,InChI=1/C12H10O/c1-3-7-11(8-4-1)13-12-9-5-2-6-10-12/h1-10H
|
205 |
+
204,c1ccc2c(c1)Oc1ccccc1O2,-5.31,InChI=1/C12H8O2/c1-2-6-10-9(5-1)13-11-7-3-4-8-12(11)14-10/h1-8H
|
206 |
+
205,C1CCOC1,0.56,InChI=1/C4H8O/c1-2-4-5-3-1/h1-4H2
|
207 |
+
206,C([C@@H]([C@@H]([C@@H](C(=O)CO)O)O)O)O,0.64,"InChI=1/C6H12O6/c7-1-3(9)5(11)6(12)4(10)2-8/h3,5-9,11-12H,1-2H2/t3-,5-,6+/m0/s1"
|
208 |
+
207,C[C@@]12CCC(=O)C=C1CC[C@@H]1[C@H]3CC[C@@](C(=O)CO)([C@]3(C)C[C@@H]([C@@H]21)O)O,-2.97,"InChI=1/C21H30O5/c1-19-7-5-13(23)9-12(19)3-4-14-15-6-8-21(26,17(25)11-22)20(15,2)10-16(24)18(14)19/h9,14-16,18,22,24,26H,3-8,10-11H2,1-2H3/t14-,15-,16+,18+,19-,20-,21-/m1/s1"
|
209 |
+
208,C[C@@H]1C[C@H]2[C@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3([C@H](C[C@]2(C)[C@]1(C(=O)CO)O)O)F,-3.64,"InChI=1/C22H29FO5/c1-12-8-16-15-5-4-13-9-14(25)6-7-19(13,2)21(15,23)17(26)10-20(16,3)22(12,28)18(27)11-24/h6-7,9,12,15-17,24,26,28H,4-5,8,10-11H2,1-3H3/t12-,15-,16+,17+,19+,20+,21-,22+/m1/s1"
|
210 |
+
209,CC(=O)OCC(=O)[C@]1(CC[C@H]2[C@H]3CCC4=CC(=O)CC[C@]4(C)[C@@H]3[C@H](C[C@]12C)O)O,-4.46,"InChI=1/C23H32O6/c1-13(24)29-12-19(27)23(28)9-7-17-16-5-4-14-10-15(25)6-8-21(14,2)20(16)18(26)11-22(17,23)3/h10,16-18,20,26,28H,4-9,11-12H2,1-3H3/t16-,17+,18+,20+,21+,22+,23+/m1/s1"
|
211 |
+
210,C[C@@]12C=CC(=O)C=C1CC[C@@H]1[C@@H]3CC[C@](C(=O)CO)([C@@]3(C)C[C@H]([C@@H]21)O)O,-3.21,"InChI=1/C21H28O5/c1-19-7-5-13(23)9-12(19)3-4-14-15-6-8-21(26,17(25)11-22)20(15,2)10-16(24)18(14)19/h5,7,9,14-16,18,22,24,26H,3-4,6,8,10-11H2,1-2H3/t14-,15+,16-,18+,19-,20+,21+/m1/s1"
|
212 |
+
211,CC(=O)S[C@H]1CC2=CC(=O)CC[C@@]2(C)[C@@H]2CC[C@@]3(C)[C@@H](CC[C@@]43CCC(=O)O4)[C@@H]12,-4.28,"InChI=1/C24H32O4S/c1-14(25)29-19-13-15-12-16(26)4-8-22(15,2)17-5-9-23(3)18(21(17)19)6-10-24(23)11-7-20(27)28-24/h12,17-19,21H,4-11,13H2,1-3H3/t17-,18+,19+,21+,22-,23+,24+/m1/s1"
|
213 |
+
212,C[C@]12CC[C@H]3c4ccc(cc4CC[C@@H]3[C@@H]1CCC2=O)O,-3.96,"InChI=1/C18H22O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-16,19H,2,4,6-9H2,1H3/t14-,15-,16-,18-/m0/s1"
|
214 |
+
213,C[C@]12CCC(=O)C=C1CC[C@H]1[C@@H]3CCC(=O)[C@@]3(C)CC[C@H]21,-3.69,"InChI=1/C19H26O2/c1-18-9-7-13(20)11-12(18)3-4-14-15-5-6-17(21)19(15,2)10-8-16(14)18/h11,14-16H,3-10H2,1-2H3/t14-,15-,16-,18-,19-/m0/s1"
|
215 |
+
214,C[C@@]12CCC(=O)C=C1CC[C@@H]1[C@H]3CC[C@H](C(=O)CO)[C@]3(C)CC[C@@H]21,-3.75,"InChI=1/C21H30O3/c1-20-9-7-14(23)11-13(20)3-4-15-16-5-6-18(19(24)12-22)21(16,2)10-8-17(15)20/h11,15-18,22H,3-10,12H2,1-2H3/t15-,16-,17-,18-,20-,21-/m1/s1"
|
216 |
+
215,CC1(C)O[C@@H]2C[C@H]3[C@@H]4CCC5=CC(=O)C=C[C@]5(C)[C@]4([C@H](C[C@]3(C)[C@]2(C(=O)CO)O1)O)F,-4.32,"InChI=1/C24H31FO6/c1-20(2)30-19-10-16-15-6-5-13-9-14(27)7-8-21(13,3)23(15,25)17(28)11-22(16,4)24(19,31-20)18(29)12-26/h7-9,15-17,19,26,28H,5-6,10-12H2,1-4H3/t15-,16-,17-,19+,21-,22-,23-,24+/m0/s1"
|
217 |
+
216,C[C@]12CCC(=O)C=C1CC[C@@H]1[C@H]3CC[C@](C(=O)CO)([C@]3(C)C[C@H]([C@@]21F)O)O,-3.43,"InChI=1/C21H29FO5/c1-18-7-5-13(24)9-12(18)3-4-15-14-6-8-20(27,17(26)11-23)19(14,2)10-16(25)21(15,18)22/h9,14-16,23,25,27H,3-8,10-11H2,1-2H3/t14-,15-,16-,18+,19-,20+,21-/m1/s1"
|
218 |
+
217,C[C@H]1C[C@@H]2[C@H]3CC[C@](C(=O)C)([C@]3(C)C[C@H]([C@]2([C@]2(C)C=CC(=O)C=C12)F)O)O,-4.1,"InChI=1/C22H29FO4/c1-12-9-17-15-6-8-21(27,13(2)24)20(15,4)11-18(26)22(17,23)19(3)7-5-14(25)10-16(12)19/h5,7,10,12,15,17-18,26-27H,6,8-9,11H2,1-4H3/t12-,15+,17+,18+,19+,20+,21-,22+/m0/s1"
|
219 |
+
218,CCCCC(=O)O[C@]1([C@@H](C)C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@]3([C@@H](C[C@]12C)O)F)C(=O)CO,-4.71,"InChI=1/C27H37FO6/c1-5-6-7-23(33)34-27(22(32)15-29)16(2)12-20-19-9-8-17-13-18(30)10-11-24(17,3)26(19,28)21(31)14-25(20,27)4/h10-11,13,16,19-21,29,31H,5-9,12,14-15H2,1-4H3/t16-,19-,20-,21+,24-,25-,26-,27+/m0/s1"
|
220 |
+
219,COc1ccccc1O,-1.96,"InChI=1/C7H8O2/c1-9-7-5-3-2-4-6(7)8/h2-5,8H,1H3"
|
221 |
+
220,COc1ccc(cc1)C=O,-1.49,"InChI=1/C8H8O2/c1-10-8-4-2-7(6-9)3-5-8/h2-6H,1H3"
|
222 |
+
221,c1cc(ccc1Cl)O,-0.7,"InChI=1/C6H5ClO/c7-5-1-3-6(8)4-2-5/h1-4,8H"
|
223 |
+
222,c1c(cc(c(c1Cl)O)Cl)Cl,-2.34,"InChI=1/C6H3Cl3O/c7-3-1-4(8)6(10)5(9)2-3/h1-2,10H"
|
224 |
+
223,Cc1cc(ccc1Cl)O,-1.57,"InChI=1/C7H7ClO/c1-5-4-6(9)2-3-7(5)8/h2-4,9H,1H3"
|
225 |
+
224,C(COCCCl)Cl,-1.12,InChI=1/C4H8Cl2O/c5-1-3-7-4-2-6/h1-4H2
|
226 |
+
225,C1[C@H]2[C@H]3[C@@H]([C@@H]1[C@@H]1[C@H]2O1)[C@@]1(C(=C([C@]3(C1(Cl)Cl)Cl)Cl)Cl)Cl,-6.29,"InChI=1/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2/t2-,3+,4-,5+,6-,7+,10+,11-"
|
227 |
+
226,CCCCCCN,-1.1,"InChI=1/C6H15N/c1-2-3-4-5-6-7/h2-7H2,1H3"
|
228 |
+
227,CCCCNCCCC,-1.44,"InChI=1/C8H19N/c1-3-5-7-9-8-6-4-2/h9H,3-8H2,1-2H3"
|
229 |
+
228,CCCCCCCCN,-2.75,"InChI=1/C8H19N/c1-2-3-4-5-6-7-8-9/h2-9H2,1H3"
|
230 |
+
229,c1cc(ccc1N)N,-0.38,"InChI=1/C6H8N2/c7-5-1-2-6(8)4-3-5/h1-4H,7-8H2"
|
231 |
+
230,Cc1ccccc1N,-0.85,"InChI=1/C7H9N/c1-6-4-2-3-5-7(6)8/h2-5H,8H2,1H3"
|
232 |
+
231,CNc1ccccc1,-1.28,"InChI=1/C7H9N/c1-8-7-5-3-2-4-6-7/h2-6,8H,1H3"
|
233 |
+
232,Cc1cccc(c1)N,-0.85,"InChI=1/C7H9N/c1-6-3-2-4-7(8)5-6/h2-5H,8H2,1H3"
|
234 |
+
233,CCN(CC)c1ccccc1,-3.03,"InChI=1/C10H15N/c1-3-11(4-2)10-8-6-5-7-9-10/h5-9H,3-4H2,1-2H3"
|
235 |
+
234,c1cc(ccc1Cc1ccc(cc1)N)N,-2.3,"InChI=1/C13H14N2/c14-12-5-1-10(2-6-12)9-11-3-7-13(15)8-4-11/h1-8H,9,14-15H2"
|
236 |
+
235,c1ccncc1,0.76,InChI=1/C5H5N/c1-2-4-6-5-3-1/h1-5H
|
237 |
+
236,Cc1ccc(C)nc1,0.4,"InChI=1/C7H9N/c1-6-3-4-7(2)8-5-6/h3-5H,1-2H3"
|
238 |
+
237,Cc1cccc(C)n1,0.45,"InChI=1/C7H9N/c1-6-4-3-5-7(2)8-6/h3-5H,1-2H3"
|
239 |
+
238,Cc1ccncc1C,0.36,"InChI=1/C7H9N/c1-6-3-4-8-5-7(6)2/h3-5H,1-2H3"
|
240 |
+
239,Cc1cc(C)cnc1,0.38,"InChI=1/C7H9N/c1-6-3-7(2)5-8-4-6/h3-5H,1-2H3"
|
241 |
+
240,c1cnccc1c1ccncc1,-1.54,InChI=1/C10H8N2/c1-5-11-6-2-9(1)10-3-7-12-8-4-10/h1-8H
|
242 |
+
241,c1cnccc1C(=O)NN,0.01,"InChI=1/C6H7N3O/c7-9-6(10)5-1-3-8-4-2-5/h1-4H,7H2,(H,9,10)"
|
243 |
+
242,c1ccc2c(c1)ccc1c2cccn1,-3.36,InChI=1/C13H9N/c1-2-5-11-10(4-1)7-8-13-12(11)6-3-9-14-13/h1-9H
|
244 |
+
243,c1ccc2c(c1)nn[nH]2,-0.78,"InChI=1/C6H5N3/c1-2-4-6-5(3-1)7-9-8-6/h1-4H,(H,7,8,9)"
|
245 |
+
244,c1cc[nH]c1,-0.17,InChI=1/C4H5N/c1-2-4-5-3-1/h1-5H
|
246 |
+
245,c1ccsc1,-1.45,InChI=1/C4H4S/c1-2-4-5-3-1/h1-4H
|
247 |
+
246,c1ccc2c(c1)cc[nH]2,-1.52,"InChI=1/C8H7N/c1-2-4-8-7(3-1)5-6-9-8/h1-6,9H"
|
248 |
+
247,c1ccnnc1,1.1,InChI=1/C4H4N2/c1-2-4-6-5-3-1/h1-4H
|
249 |
+
248,c1cc2c(cccn2)c(c1)O,-2.54,"InChI=1/C9H7NO/c11-9-5-1-4-8-7(9)3-2-6-10-8/h1-6,11H"
|
250 |
+
249,c1cc2cc(ccc2nc1)O,-2.16,"InChI=1/C9H7NO/c11-8-3-4-9-7(6-8)2-1-5-10-9/h1-6,11H"
|
251 |
+
250,CCC1(CC)C(=NC(=O)N(C)C1=O)O,-2.23,"InChI=1/C9H14N2O3/c1-4-9(5-2)6(12)10-8(14)11(3)7(9)13/h4-5H2,1-3H3,(H,10,12,14)"
|
252 |
+
251,CCCCC1C(=O)N(c2ccccc2)N(c2ccccc2)C1=O,-3.81,"InChI=1/C19H20N2O2/c1-2-3-14-17-18(22)20(15-10-6-4-7-11-15)21(19(17)23)16-12-8-5-9-13-16/h4-13,17H,2-3,14H2,1H3"
|
253 |
+
252,CN(C)CCCN1c2ccccc2Sc2ccc(cc12)Cl,-5.01,"InChI=1/C17H19ClN2S/c1-19(2)10-5-11-20-14-6-3-4-7-16(14)21-17-9-8-13(18)12-15(17)20/h3-4,6-9,12H,5,10-11H2,1-2H3"
|
254 |
+
253,CN1[C@H]2CC[C@@H]1C[C@H](C2)OC(=O)[C@@H](CO)c1ccccc1,-2.12,"InChI=1/C17H23NO3/c1-18-13-7-8-14(18)10-15(9-13)21-17(20)16(11-19)12-5-3-2-4-6-12/h2-6,13-16,19H,7-11H2,1H3/t13-,14+,15-,16-/m0/s1"
|
255 |
+
254,c1cc(CNc2cc(c(cc2C(=O)O)S(=O)(=O)N)Cl)oc1,-3.66,"InChI=1/C12H11ClN2O5S/c13-9-5-10(15-6-7-2-1-3-20-7)8(12(16)17)4-11(9)21(14,18)19/h1-5,15H,6H2,(H,16,17)(H2,14,18,19)/f/h16H,14H2"
|
256 |
+
255,CC/C(=C(/CC)\c1ccc(cc1)O)/c1ccc(cc1)O,-4.35,"InChI=1/C18H20O2/c1-3-17(13-5-9-15(19)10-6-13)18(4-2)14-7-11-16(20)12-8-14/h5-12,19-20H,3-4H2,1-2H3/b18-17+"
|
257 |
+
256,CCC1(CC)C(=NC(=O)N=C1O)O,-1.39,"InChI=1/C8H12N2O3/c1-3-8(4-2)5(11)9-7(13)10-6(8)12/h3-4H2,1-2H3,(H2,9,10,11,12,13)"
|
258 |
+
257,CCCC(C)(COC(=N)O)COC(=N)O,-1.67,"InChI=1/C9H18N2O4/c1-3-4-9(2,5-14-7(10)12)6-15-8(11)13/h3-6H2,1-2H3,(H2,10,12)(H2,11,13)"
|
259 |
+
258,Cc1cc(C)nc(n1)NS(=O)(=O)c1ccc(cc1)N,-2.27,"InChI=1/C12H14N4O2S/c1-8-7-9(2)15-12(14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)"
|
260 |
+
259,c1ccc2c(c1)N(CCCN1CCN(CC1)CCO)c1cc(ccc1S2)Cl,-4.16,"InChI=1/C21H26ClN3OS/c22-17-6-7-21-19(16-17)25(18-4-1-2-5-20(18)27-21)9-3-8-23-10-12-24(13-11-23)14-15-26/h1-2,4-7,16,26H,3,8-15H2"
|
261 |
+
260,CN(C)CCCN1c2ccccc2Sc2ccccc12,-4.3,"InChI=1/C17H20N2S/c1-18(2)12-7-13-19-14-8-3-5-10-16(14)20-17-11-6-4-9-15(17)19/h3-6,8-11H,7,12-13H2,1-2H3"
|
262 |
+
261,c1c(c(cc2c1N=CNS2(=O)=O)S(=O)(=O)N)Cl,-3.05,"InChI=1/C7H6ClN3O4S2/c8-4-1-5-7(2-6(4)16(9,12)13)17(14,15)11-3-10-5/h1-3H,(H,10,11)(H2,9,12,13)"
|
263 |
+
262,CCOc1ccc(cc1)N=C(C)O,-2.37,"InChI=1/C10H13NO2/c1-3-13-10-6-4-9(5-7-10)11-8(2)12/h4-7H,3H2,1-2H3,(H,11,12)"
|
264 |
+
263,c1cc(ccc1N)S(=O)(=O)N,-1.36,"InChI=1/C6H8N2O2S/c7-5-1-3-6(4-2-5)11(8,9)10/h1-4H,7H2,(H2,8,9,10)"
|
265 |
+
264,c1cn(c2c1c(N)ncn2)[C@@H]1[C@@H]([C@H]([C@@H](CO)O1)O)O,-1.95,"InChI=1/C11H14N4O4/c12-9-5-1-2-15(10(5)14-4-13-9)11-8(18)7(17)6(3-16)19-11/h1-2,4,6-8,11,16-18H,3H2,(H2,12,13,14)/t6-,7+,8-,11+/m1/s1"
|
266 |
+
265,c1cc(ccc1N)S(=O)(=O)Nc1nccs1,-2.43,"InChI=1/C9H9N3O2S2/c10-7-1-3-8(4-2-7)16(13,14)12-9-11-5-6-15-9/h1-6H,10H2,(H,11,12)"
|
267 |
+
266,C=CCC1(C(C)C)C(=NC(=O)N=C1O)O,-1.71,"InChI=1/C10H14N2O3/c1-4-5-10(6(2)3)7(13)11-9(15)12-8(10)14/h4,6H,1,5H2,2-3H3,(H2,11,12,13,14,15)"
|
268 |
+
267,CCC[C@@H](C)C1(CC=C)C(=NC(=S)N=C1O)O,-3.46,"InChI=1/C12H18N2O2S/c1-4-6-8(3)12(7-5-2)9(15)13-11(17)14-10(12)16/h5,8H,2,4,6-7H2,1,3H3,(H2,13,14,15,16,17)/t8-/m1/s1"
|
269 |
+
268,CCCCC1(CC)C(=NC(=O)N=C1O)O,-1.64,"InChI=1/C10H16N2O3/c1-3-5-6-10(4-2)7(13)11-9(15)12-8(10)14/h3-6H2,1-2H3,(H2,11,12,13,14,15)"
|
270 |
+
269,COc1ccc(nn1)NS(=O)(=O)c1ccc(cc1)N,-3.28,"InChI=1/C11H12N4O3S/c1-18-11-7-6-10(13-14-11)15-19(16,17)9-4-2-8(12)3-5-9/h2-7H,12H2,1H3,(H,13,15)"
|
271 |
+
270,c1ccc2c(c1)ccc(=O)o2,-1.89,InChI=1/C9H6O2/c10-9-6-5-7-3-1-2-4-8(7)11-9/h1-6H
|
272 |
+
271,CCc1nnc(NS(=O)(=O)c2ccc(cc2)N)s1,-1.94,"InChI=1/C10H12N4O2S2/c1-2-9-12-13-10(17-9)14-18(15,16)8-5-3-7(11)4-6-8/h3-6H,2,11H2,1H3,(H,13,14)"
|
273 |
+
272,C=CCC1(c2ccccc2)C(=NC(=O)N=C1O)O,-2.18,"InChI=1/C13H12N2O3/c1-2-8-13(9-6-4-3-5-7-9)10(16)14-12(18)15-11(13)17/h2-7H,1,8H2,(H2,14,15,16,17,18)"
|
274 |
+
273,CCC1(c2ccccc2)C(=NCN=C1O)O,-2.64,"InChI=1/C12H14N2O2/c1-2-12(9-6-4-3-5-7-9)10(15)13-8-14-11(12)16/h3-7H,2,8H2,1H3,(H,13,15)(H,14,16)"
|
275 |
+
274,c1c(c(cc2c1N[C@H](C(Cl)Cl)NS2(=O)=O)S(=O)(=O)N)Cl,-2.68,"InChI=1/C8H8Cl3N3O4S2/c9-3-1-4-6(2-5(3)19(12,15)16)20(17,18)14-8(13-4)7(10)11/h1-2,7-8,13-14H,(H2,12,15,16)/t8-/m0/s1"
|
276 |
+
275,CC(=NS(=O)(=O)c1ccc(cc1)N)O,-1.23,"InChI=1/C8H10N2O3S/c1-6(11)10-14(12,13)8-4-2-7(9)3-5-8/h2-5H,9H2,1H3,(H,10,11)"
|
277 |
+
276,C([C@@H]([C@@H](CO)O)O)O,0.7,"InChI=1/C4H10O4/c5-1-3(7)4(8)2-6/h3-8H,1-2H2/t3-,4+"
|
278 |
+
277,c1c2c(ncnc2O)n[nH]1,-2.38,"InChI=1/C5H4N4O/c10-5-3-1-8-9-4(3)6-2-7-5/h1-2H,(H2,6,7,8,9,10)"
|
279 |
+
278,COc1cc2c(cc1OC)N1C(=O)C[C@H]3[C@@H]4[C@H]5C[C@H]6[C@]2(CCN6CC5=CCO3)[C@@H]14,-2.09,"InChI=1/C23H26N2O4/c1-27-16-8-14-15(9-17(16)28-2)25-20(26)10-18-21-13-7-19-23(14,22(21)25)4-5-24(19)11-12(13)3-6-29-18/h3,8-9,13,18-19,21-22H,4-7,10-11H2,1-2H3/t13-,18-,19-,21-,22-,23+/m0/s1"
|
280 |
+
279,CCN(CC)CCN=C(c1cc(c(cc1OC)N)Cl)O,-3.18,"InChI=1/C14H22ClN3O2/c1-4-18(5-2)7-6-17-14(19)10-8-11(15)12(16)9-13(10)20-3/h8-9H,4-7,16H2,1-3H3,(H,17,19)"
|
281 |
+
280,CCOc1ccc2c(c1)sc(n2)S(=O)(=O)N,-3.81,"InChI=1/C9H10N2O3S2/c1-2-14-6-3-4-7-8(5-6)15-9(11-7)16(10,12)13/h3-5H,2H2,1H3,(H2,10,12,13)"
|
282 |
+
281,CCC1(C2=CCCCCC2)C(=NC(=O)N=C1O)O,-3,"InChI=1/C13H18N2O3/c1-2-13(9-7-5-3-4-6-8-9)10(16)14-12(18)15-11(13)17/h7H,2-6,8H2,1H3,(H2,14,15,16,17,18)"
|
283 |
+
282,c1cc(c(cc1[C@@H]1CC(=O)c2c(cc(cc2O1)O)O)O)O,-3.62,"InChI=1/C15H12O6/c16-8-4-11(19)15-12(20)6-13(21-14(15)5-8)7-1-2-9(17)10(18)3-7/h1-5,13,16-19H,6H2/t13-/m0/s1"
|
284 |
+
283,COc1cnc(nc1)NS(=O)(=O)c1ccc(cc1)N,-2.58,"InChI=1/C11H12N4O3S/c1-18-9-6-13-11(14-7-9)15-19(16,17)10-4-2-8(12)3-5-10/h2-7H,12H2,1H3,(H,13,14,15)"
|
285 |
+
284,Cc1cc(no1)NS(=O)(=O)c1ccc(cc1)N,-2.62,"InChI=1/C10H11N3O3S/c1-7-6-10(12-16-7)13-17(14,15)9-4-2-8(11)3-5-9/h2-6H,11H2,1H3,(H,12,13)"
|
286 |
+
285,COc1cc(Cc2c[nH]c(=N)[nH]c2=N)cc(c1OC)OC,-2.86,"InChI=1/C14H18N4O3/c1-19-10-5-8(6-11(20-2)12(10)21-3)4-9-7-17-14(16)18-13(9)15/h5-7H,4H2,1-3H3,(H4,15,16,17,18)"
|
287 |
+
286,Cc1nc(cc(n1)OC)NS(=O)(=O)c1ccc(cc1)N,-2.54,"InChI=1/C12H14N4O3S/c1-8-14-11(7-12(15-8)19-2)16-20(17,18)10-5-3-9(13)4-6-10/h3-7H,13H2,1-2H3,(H,14,15,16)"
|
288 |
+
287,CC(C)Cc1ccc(cc1)C(C)C(=O)O,-3.99,"InChI=1/C13H18O2/c1-9(2)8-11-4-6-12(7-5-11)10(3)13(14)15/h4-7,9-10H,8H2,1-3H3,(H,14,15)/f/h14H"
|
289 |
+
288,Cn1cnc2c1c(=O)n(C)c(=O)n2C,-0.97,"InChI=1/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3"
|
290 |
+
289,c1ccc2c(c1)ccc(n2)O,-2.14,"InChI=1/C9H7NO/c11-9-6-5-7-3-1-2-4-8(7)10-9/h1-6H,(H,10,11)"
|
291 |
+
290,Cc1cnc(nc1O)O,-1.52,"InChI=1/C5H6N2O2/c1-3-2-6-5(9)7-4(3)8/h2H,1H3,(H2,6,7,8,9)"
|
292 |
+
291,CN1CCOCC1,1,"InChI=1/C5H11NO/c1-6-2-4-7-5-3-6/h2-5H2,1H3"
|
293 |
+
292,C1CCNCC1,1.07,"InChI=1/C5H11N/c1-2-4-6-5-3-1/h6H,1-5H2"
|
294 |
+
293,Cn1ccc(nc1=O)O,-0.8,"InChI=1/C5H6N2O2/c1-7-3-2-4(8)6-5(7)9/h2-3H,1H3,(H,6,8,9)"
|
295 |
+
294,CN1CCCCC1,0.23,"InChI=1/C6H13N/c1-7-5-3-2-4-6-7/h2-6H2,1H3"
|
296 |
+
295,c1ccc(c(c1)N)O,-0.72,"InChI=1/C6H7NO/c7-5-3-1-2-4-6(5)8/h1-4,8H,7H2"
|
297 |
+
296,c1ccc(c(c1)C(=O)O)N,-1.52,"InChI=1/C7H7NO2/c8-6-4-2-1-3-5(6)7(9)10/h1-4H,8H2,(H,9,10)/f/h9H"
|
298 |
+
297,CCOC(=N)O,0.85,"InChI=1/C3H7NO2/c1-2-6-3(4)5/h2H2,1H3,(H2,4,5)"
|
299 |
+
298,c1ccc(cc1)COC(=N)O,-0.35,"InChI=1/C8H9NO2/c9-8(10)11-6-7-4-2-1-3-5-7/h1-5H,6H2,(H2,9,10)"
|
300 |
+
299,C(=N)(N)O,0.96,"InChI=1/CH4N2O/c2-1(3)4/h(H4,2,3,4)"
|
301 |
+
300,CNC(=N)O,1.13,"InChI=1/C2H6N2O/c1-4-2(3)5/h1H3,(H3,3,4,5)"
|
302 |
+
301,CN(C)C(=O)N(C)C,0.94,InChI=1/C5H12N2O/c1-6(2)5(8)7(3)4/h1-4H3
|
303 |
+
302,c1ccc(cc1)CNC(=N)O,-0.95,"InChI=1/C8H10N2O/c9-8(11)10-6-7-4-2-1-3-5-7/h1-5H,6H2,(H3,9,10,11)"
|
304 |
+
303,c1ccc(cc1)C(=N)O,-0.96,"InChI=1/C7H7NO/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H2,8,9)"
|
305 |
+
304,c1ccc(c(c1)C(=N)O)C(=N)O,-2.92,"InChI=1/C8H8N2O2/c9-7(11)5-3-1-2-4-6(5)8(10)12/h1-4H,(H2,9,11)(H2,10,12)"
|
306 |
+
305,CC(=Nc1ccc(cc1)O)O,-1.03,"InChI=1/C8H9NO2/c1-6(10)9-7-2-4-8(11)5-3-7/h2-5,11H,1H3,(H,9,10)"
|
307 |
+
306,CC(C)Oc1ccccc1OC(=NC)O,-2.05,"InChI=1/C11H15NO3/c1-8(2)14-9-6-4-5-7-10(9)15-11(13)12-3/h4-8H,1-3H3,(H,12,13)"
|
308 |
+
307,CN1CC[C@@]23[C@@H]4C=C[C@H]([C@H]3Oc3c(ccc(C[C@H]14)c23)OC)O,-1.52,"InChI=1/C18H21NO3/c1-19-8-7-18-11-4-5-13(20)17(18)22-16-14(21-2)6-3-10(15(16)18)9-12(11)19/h3-6,11-13,17,20H,7-9H2,1-2H3/t11-,12+,13-,17-,18-/m1/s1"
|
309 |
+
308,CCCN(=O)=O,-0.8,"InChI=1/C3H7NO2/c1-2-3-4(5)6/h2-3H2,1H3"
|
310 |
+
309,c1ccc(cc1)N(=O)=O,-1.8,InChI=1/C6H5NO2/c8-7(9)6-4-2-1-3-5-6/h1-5H
|
311 |
+
310,Cc1cccc(c1)N(=O)=O,-2.44,"InChI=1/C7H7NO2/c1-6-3-2-4-7(5-6)8(9)10/h2-5H,1H3"
|
312 |
+
311,Cc1ccc(cc1)N(=O)=O,-2.49,"InChI=1/C7H7NO2/c1-6-2-4-7(5-3-6)8(9)10/h2-5H,1H3"
|
313 |
+
312,c1cc(ccc1N(=O)=O)O,-0.74,"InChI=1/C6H5NO3/c8-6-3-1-5(2-4-6)7(9)10/h1-4,8H"
|
314 |
+
313,c1ccc(c(c1)N(=O)=O)O,-1.74,"InChI=1/C6H5NO3/c8-6-4-2-1-3-5(6)7(9)10/h1-4,8H"
|
315 |
+
314,c1cc(ccc1C(=O)O)N(=O)=O,-1.68,"InChI=1/C7H5NO4/c9-7(10)5-1-3-6(4-2-5)8(11)12/h1-4H,(H,9,10)/f/h9H"
|
316 |
+
315,COc1ccccc1N(=O)=O,-1.96,"InChI=1/C7H7NO3/c1-11-7-5-3-2-4-6(7)8(9)10/h2-5H,1H3"
|
317 |
+
316,c1ccc(c(c1)N)N(=O)=O,-1.96,"InChI=1/C6H6N2O2/c7-5-3-1-2-4-6(5)8(9)10/h1-4H,7H2"
|
318 |
+
317,c1cc(ccc1N)N(=O)=O,-2.37,"InChI=1/C6H6N2O2/c7-5-1-3-6(4-2-5)8(9)10/h1-4H,7H2"
|
319 |
+
318,c1cc(ccc1Cl)N,-1.66,"InChI=1/C6H6ClN/c7-5-1-3-6(8)4-2-5/h1-4H,8H2"
|
320 |
+
319,c1cc(cc(c1)N)Cl,-1.37,"InChI=1/C6H6ClN/c7-5-2-1-3-6(8)4-5/h1-4H,8H2"
|
321 |
+
320,CCN=c1nc(Cl)[nH]c(=NCC)[nH]1,-4.55,"InChI=1/C7H12ClN5/c1-3-9-6-11-5(8)12-7(13-6)10-4-2/h3-4H2,1-2H3,(H2,9,10,11,12,13)"
|
322 |
+
321,CC(C)OC(=Nc1cccc(c1)Cl)O,-3.38,"InChI=1/C10H12ClNO2/c1-7(2)14-10(13)12-9-5-3-4-8(11)6-9/h3-7H,1-2H3,(H,12,13)"
|
323 |
+
322,CN(C(=O)Nc1ccc(cc1)Cl)OC,-2.57,"InChI=1/C9H11ClN2O2/c1-12(14-2)9(13)11-8-5-3-7(10)4-6-8/h3-6H,1-2H3,(H,11,13)"
|
324 |
+
323,CCC(=Nc1ccc(c(c1)Cl)Cl)O,-3,"InChI=1/C9H9Cl2NO/c1-2-9(13)12-6-3-4-7(10)8(11)5-6/h3-5H,2H2,1H3,(H,12,13)"
|
325 |
+
324,Cc1c(c(=O)n(c(n1)O)C(C)(C)C)Cl,-2.48,"InChI=1/C9H13ClN2O2/c1-5-6(10)7(13)12(8(14)11-5)9(2,3)4/h1-4H3,(H,11,14)"
|
326 |
+
325,CCCN(CCC)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-5.68,"InChI=1/C13H16F3N3O4/c1-3-5-17(6-4-2)12-10(18(20)21)7-9(13(14,15)16)8-11(12)19(22)23/h7-8H,3-6H2,1-2H3"
|
327 |
+
326,CC(C)(C)C(=O)[C@@H](n1cncn1)Oc1ccc(cc1)Cl,-3.61,"InChI=1/C14H16ClN3O2/c1-14(2,3)12(19)13(18-9-16-8-17-18)20-11-6-4-10(15)5-7-11/h4-9,13H,1-3H3/t13-/m0/s1"
|
328 |
+
327,CCCCS,-2.18,"InChI=1/C4H10S/c1-2-3-4-5/h5H,2-4H2,1H3"
|
329 |
+
328,c1ccc(cc1)S,-2.12,"InChI=1/C6H6S/c7-6-4-2-1-3-5-6/h1-5,7H"
|
330 |
+
329,CCOc1nc(=NC(C)C)nc([nH]1)SC,-3.04,"InChI=1/C9H16N4OS/c1-5-14-8-11-7(10-6(2)3)12-9(13-8)15-4/h6H,5H2,1-4H3,(H,10,11,12,13)"
|
331 |
+
330,CCN=c1nc([nH]c(n1)SC)NC(C)(C)C,-4,"InChI=1/C10H19N5S/c1-6-11-7-12-8(15-10(2,3)4)14-9(13-7)16-5/h6H2,1-5H3,(H2,11,12,13,14,15)"
|
332 |
+
331,c1ccc2c(c1)C(=NS2(=O)=O)O,-1.64,"InChI=1/C7H5NO3S/c9-7-5-3-1-2-4-6(5)12(10,11)8-7/h1-4H,(H,8,9)"
|
333 |
+
332,Cc1ccccc1S(=O)(=O)N,-2.02,"InChI=1/C7H9NO2S/c1-6-4-2-3-5-7(6)11(8,9)10/h2-5H,1H3,(H2,8,9,10)"
|
334 |
+
333,CCOP(=S)(OCC)SCSCC,-4.11,"InChI=1/C7H17O2PS3/c1-4-8-10(11,9-5-2)13-7-12-6-3/h4-7H2,1-3H3"
|
335 |
+
334,COP(=S)(OC)SCN1C(=O)c2ccccc2C1=O,-4.1,"InChI=1/C11H12NO4PS2/c1-15-17(18,16-2)19-7-12-10(13)8-5-3-4-6-9(8)11(12)14/h3-6H,7H2,1-2H3"
|
336 |
+
335,COP(=S)(OC)Oc1ccc(cc1)N(=O)=O,-3.68,"InChI=1/C8H10NO5PS/c1-12-15(16,13-2)14-8-5-3-7(4-6-8)9(10)11/h3-6H,1-2H3"
|
337 |
+
336,COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O,-4.31,"InChI=1/C8H9ClNO5PS/c1-13-16(17,14-2)15-8-4-3-6(10(11)12)5-7(8)9/h3-5H,1-2H3"
|
338 |
+
337,CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,-5.54,"InChI=1/C9H22O4P2S4/c1-5-10-14(16,11-6-2)18-9-19-15(17,12-7-3)13-8-4/h5-9H2,1-4H3"
|
339 |
+
338,COP(=S)(OC)Oc1cc(c(cc1Cl)Cl)Cl,-5.72,"InChI=1/C8H8Cl3O3PS/c1-12-15(16,13-2)14-8-4-6(10)5(9)3-7(8)11/h3-4H,1-2H3"
|
340 |
+
339,CC(CCC(=O)O)C1CCC2C3CC(C4CC(CCC4(C)C3CCC12C)O)O,-3.82,"InChI=1/C24H40O4/c1-14(4-7-22(27)28)17-5-6-18-16-13-21(26)20-12-15(25)8-10-24(20,3)19(16)9-11-23(17,18)2/h14-21,25-26H,4-13H2,1-3H3,(H,27,28)/f/h27H"
|
341 |
+
340,C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@H]3[C@H](CC[C@]12C)[C@@]1(C)CC[C@H](C[C@H]1C[C@@H]3O)O,-4.29,"InChI=1/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14-,15+,16-,17-,18+,19+,20+,22+,23+,24-/m1/s1/f/h27H"
|
342 |
+
341,C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@H]3[C@H](CC[C@]12C)[C@@]1(C)CC[C@H](C[C@H]1C[C@H]3O)O,-3.64,"InChI=1/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14-,15+,16-,17-,18+,19+,20-,22+,23+,24-/m1/s1/f/h27H"
|
343 |
+
342,C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@H]3[C@H](C[C@@H]([C@]12C)O)[C@@]1(C)CC[C@H](C[C@H]1C[C@H]3O)O,-4.35,"InChI=1/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13-,14+,15-,16-,17+,18+,19-,20+,22+,23+,24-/m1/s1/f/h28H"
|
344 |
+
343,Cc1nnc2CN=C(c3ccccc3Cl)c3cc(ccc3n12)Cl,-4.08,"InChI=1/C17H12Cl2N4/c1-10-21-22-16-9-20-17(12-4-2-3-5-14(12)19)13-8-11(18)6-7-15(13)23(10)16/h2-8H,9H2,1H3"
|
345 |
+
344,C[C@H]1CNCCN1,0.74,"InChI=1/C5H12N2/c1-5-4-6-2-3-7-5/h5-7H,2-4H2,1H3/t5-/m0/s1"
|
346 |
+
345,CC(=Nc1ccc(cc1)Br)O,-3.08,"InChI=1/C8H8BrNO/c1-6(11)10-8-4-2-7(9)3-5-8/h2-5H,1H3,(H,10,11)"
|
347 |
+
346,CC(=Nc1ccc(cc1)N(=O)=O)O,-2.69,"InChI=1/C8H8N2O3/c1-6(11)9-7-2-4-8(5-3-7)10(12)13/h2-5H,1H3,(H,9,11)"
|
348 |
+
347,CN(C)C(=N[C@H]1C[C@H]2[C@H]3CC[C@H](C3)[C@H]2C1)O,-3.01,"InChI=1/C13H22N2O/c1-15(2)13(16)14-10-6-11-8-3-4-9(5-8)12(11)7-10/h8-12H,3-7H2,1-2H3,(H,14,16)/t8-,9+,10-,11-,12+"
|
349 |
+
348,C#C[C@@]1(CC[C@@H]2[C@@H]3CCC4=CC(=O)CC[C@H]4[C@H]3CC[C@@]12C)O,-4.79,"InChI=1/C20H26O2/c1-3-20(22)11-9-18-17-6-4-13-12-14(21)5-7-15(13)16(17)8-10-19(18,20)2/h1,12,15-18,22H,4-11H2,2H3/t15-,16-,17-,18-,19-,20+/m1/s1"
|
350 |
+
349,c1cc(c(Cc2cc(ccc2O)Cl)cc1Cl)O,-3.95,"InChI=1/C13H10Cl2O2/c14-10-1-3-12(16)8(6-10)5-9-7-11(15)2-4-13(9)17/h1-4,6-7,16-17H,5H2"
|
351 |
+
350,c1c(cc(c(c1C(=O)O)Cl)N)Cl,-2.47,"InChI=1/C7H5Cl2NO2/c8-3-1-4(7(11)12)6(9)5(10)2-3/h1-2H,10H2,(H,11,12)/f/h11H"
|
352 |
+
351,C[C@@]1(c2cccc(c2C(=O)C2=C([C@]3([C@H](C[C@@H]12)[C@@H](C(=C(C3=O)C(=N)O)O)N(C)C)O)O)O)O,-3.12,"InChI=1/C22H24N2O8/c1-21(31)8-5-4-6-11(25)12(8)16(26)13-9(21)7-10-15(24(2)3)17(27)14(20(23)30)19(29)22(10,32)18(13)28/h4-6,9-10,15,25,27-28,31-32H,7H2,1-3H3,(H2,23,30)/t9-,10-,15+,21+,22+/m1/s1"
|
353 |
+
352,C[C@@]1(c2cccc(c2C(=O)C2=C([C@]3([C@@H]([C@@H](C(=C(C3=O)C(=N)O)O)N(C)C)[C@@H]([C@H]12)O)O)O)O)O,-3.14,"InChI=1/C22H24N2O9/c1-21(32)7-5-4-6-8(25)9(7)15(26)10-12(21)17(28)13-14(24(2)3)16(27)11(20(23)31)19(30)22(13,33)18(10)29/h4-6,12-14,17,25,27-29,32-33H,1-3H3,(H2,23,31)/t12-,13+,14+,17-,21+,22+/m1/s1"
|
354 |
+
353,CN(C)C(=O)C(c1ccccc1)c1ccccc1,-2.98,"InChI=1/C16H17NO/c1-17(2)16(18)15(13-9-5-3-6-10-13)14-11-7-4-8-12-14/h3-12,15H,1-2H3"
|
355 |
+
354,CC(C)N(C(C)C)C(=O)SCC(=C(Cl)Cl)Cl,-4.88,"InChI=1/C10H16Cl3NOS/c1-6(2)14(7(3)4)10(15)16-5-8(11)9(12)13/h6-7H,5H2,1-4H3"
|
356 |
+
355,CCCCN(CC)C(=O)SCCC,-3.35,"InChI=1/C10H21NOS/c1-4-7-8-11(6-3)10(12)13-9-5-2/h4-9H2,1-3H3"
|
357 |
+
356,CCCCOC(=O)c1ccc(cc1)O,-2.72,"InChI=1/C11H14O3/c1-2-3-8-14-11(13)9-4-6-10(12)7-5-9/h4-7,12H,2-3,8H2,1H3"
|
358 |
+
357,COC(=O)c1ccc(cc1)O,-1.78,"InChI=1/C8H8O3/c1-11-8(10)6-2-4-7(9)5-3-6/h2-5,9H,1H3"
|
359 |
+
358,CCOC(=O)CCC(=O)OCC,-0.96,"InChI=1/C8H14O4/c1-3-11-7(9)5-6-8(10)12-4-2/h3-6H2,1-2H3"
|
360 |
+
359,CCCCOC(=O)c1ccccc1,-3.48,"InChI=1/C11H14O2/c1-2-3-9-13-11(12)10-7-5-4-6-8-10/h4-8H,2-3,9H2,1H3"
|
361 |
+
360,COC(=O)c1ccc(cc1)N,-1.59,"InChI=1/C8H9NO2/c1-11-8(10)6-2-4-7(9)5-3-6/h2-5H,9H2,1H3"
|
362 |
+
361,CCCCCOC(=O)c1ccc(cc1)N,-3.26,"InChI=1/C12H17NO2/c1-2-3-4-9-15-12(14)10-5-7-11(13)8-6-10/h5-8H,2-4,9,13H2,1H3"
|
363 |
+
362,CCCCCCOC(=O)c1ccc(cc1)N,-3.95,"InChI=1/C13H19NO2/c1-2-3-4-5-10-16-13(15)11-6-8-12(14)9-7-11/h6-9H,2-5,10,14H2,1H3"
|
364 |
+
363,CCCCCCCCOC(=O)c1ccc(cc1)N,-5.4,"InChI=1/C15H23NO2/c1-2-3-4-5-6-7-12-18-15(17)13-8-10-14(16)11-9-13/h8-11H,2-7,12,16H2,1H3"
|
365 |
+
364,Cc1cccc(c1)N=C(C)O,-2.09,"InChI=1/C9H11NO/c1-7-4-3-5-9(6-7)10-8(2)11/h3-6H,1-2H3,(H,10,11)"
|
366 |
+
365,CC[C@H]1CCCCN1,-0.35,"InChI=1/C7H15N/c1-2-7-5-3-4-6-8-7/h7-8H,2-6H2,1H3/t7-/m0/s1"
|
367 |
+
366,CCCCOc1ccc(c(c1)N(CC)CC)C(=O)OCC,-3.84,"InChI=1/C17H27NO3/c1-5-9-12-21-14-10-11-15(17(19)20-8-4)16(13-14)18(6-2)7-3/h10-11,13H,5-9,12H2,1-4H3"
|
368 |
+
367,C=CCc1ccc(cc1)OC,-2.92,"InChI=1/C10H12O/c1-3-4-9-5-7-10(11-2)8-6-9/h3,5-8H,1,4H2,2H3"
|
369 |
+
368,COc1c(c(c(c(c1O)Cl)Cl)Cl)Cl,-4.02,"InChI=1/C7H4Cl4O2/c1-13-7-5(11)3(9)2(8)4(10)6(7)12/h12H,1H3"
|
370 |
+
369,C=C(C)[C@@H]1CC=C(C)C(=O)C1,-2.06,"InChI=1/C10H14O/c1-7(2)9-5-4-8(3)10(11)6-9/h4,9H,1,5-6H2,2-3H3/t9-/m1/s1"
|
371 |
+
370,C1(=O)C(=NC(=O)N=C1O)O,-1.25,"InChI=1/C4H2N2O4/c7-1-2(8)5-4(10)6-3(1)9/h(H2,5,6,8,9,10)"
|
372 |
+
371,c1cncnc1O,0.59,"InChI=1/C4H4N2O/c7-4-1-2-5-3-6-4/h1-3H,(H,5,6,7)"
|
373 |
+
372,C1CC(=O)N=C1O,0.3,"InChI=1/C4H5NO2/c6-3-1-2-4(7)5-3/h1-2H2,(H,5,6,7)"
|
374 |
+
373,C(CS(=O)CCCl)Cl,-1.16,InChI=1/C4H8Cl2OS/c5-1-3-8(7)4-2-6/h1-4H2
|
375 |
+
374,C([C@@H](C(=O)O)N)C(=N)O,-0.74,"InChI=1/C4H8N2O3/c5-2(4(8)9)1-3(6)7/h2H,1,5H2,(H2,6,7)(H,8,9)/t2-/m0/s1"
|
376 |
+
375,CC[C@H](C(=O)O)N,0.31,"InChI=1/C4H9NO2/c1-2-3(5)4(6)7/h3H,2,5H2,1H3,(H,6,7)/t3-/m1/s1/f/h6H"
|
377 |
+
376,c1cc(cnc1)O,-0.46,"InChI=1/C5H5NO/c7-5-2-1-3-6-4-5/h1-4,7H"
|
378 |
+
377,c1c[nH]ccc1=O,1.02,"InChI=1/C5H5NO/c7-5-1-3-6-4-2-5/h1-4H,(H,6,7)"
|
379 |
+
378,c1nc2c(N)nc[nH]c2n1,-2.12,"InChI=1/C5H5N5/c6-4-3-5(9-1-7-3)10-2-8-4/h1-2H,(H3,6,7,8,9,10)"
|
380 |
+
379,CC1(C)C(=O)N(C(=O)N1Cl)Cl,-2.6,InChI=1/C5H6Cl2N2O2/c1-5(2)3(10)8(6)4(11)9(5)7/h1-2H3
|
381 |
+
380,Cc1cc(nc(n1)S)O,-2.43,"InChI=1/C5H6N2OS/c1-3-2-4(8)7-5(9)6-3/h2H,1H3,(H2,6,7,8,9)"
|
382 |
+
381,CC[C@@H]1C(=NC(=N1)O)O,-0.06,"InChI=1/C5H8N2O2/c1-2-3-4(8)7-5(9)6-3/h3H,2H2,1H3,(H2,6,7,8,9)/t3-/m1/s1"
|
383 |
+
382,CC(=O)/N=c\1/n(C)nc(s1)S(=O)(=O)N,-1.83,"InChI=1/C5H8N4O3S2/c1-3(10)7-4-9(2)8-5(13-4)14(6,11)12/h1-2H3,(H2,6,11,12)/b7-4-"
|
384 |
+
383,C[C@@H]1CCCC[C@@H]1NC(=Nc1ccccc1)O,-4.98,"InChI=1/C14H20N2O/c1-11-7-5-6-10-13(11)16-14(17)15-12-8-3-2-4-9-12/h2-4,8-9,11,13H,5-7,10H2,1H3,(H2,15,16,17)/t11-,13+/m1/s1"
|
385 |
+
384,c1cnc2c(cncn2)n1,0.02,InChI=1/C6H4N4/c1-2-9-6-5(8-1)3-7-4-10-6/h1-4H
|
386 |
+
385,CSc1ncc2c(nccn2)n1,-2.36,"InChI=1/C7H6N4S/c1-12-7-10-4-5-6(11-7)9-3-2-8-5/h2-4H,1H3"
|
387 |
+
386,c1cnc2c(c(ncn2)S)n1,-2.77,"InChI=1/C6H4N4S/c11-6-4-5(9-3-10-6)8-2-1-7-4/h1-3H,(H,8,9,10,11)"
|
388 |
+
387,Cn1c(=O)c2c([nH]c(n2)O)nc1O,-1.56,"InChI=1/C6H6N4O3/c1-10-4(11)2-3(9-6(10)13)8-5(12)7-2/h1H3,(H,9,13)(H2,7,8,12)"
|
389 |
+
388,c1ccc(cc1)S(=O)(=O)N,-1.56,"InChI=1/C6H7NO2S/c7-10(8,9)6-4-2-1-3-5-6/h1-5H,(H2,7,8,9)"
|
390 |
+
389,C1[C@H]([C@@H]2[C@@H]([C@H](CO2)ON(=O)=O)O1)ON(=O)=O,-2.63,"InChI=1/C6H8N2O8/c9-7(10)15-3-1-13-6-4(16-8(11)12)2-14-5(3)6/h3-6H,1-2H2/t3-,4+,5-,6-/m1/s1"
|
391 |
+
390,CN(C)C(=S)SSC(=S)N(C)C,-3.9,InChI=1/C6H12N2S4/c1-7(2)5(9)11-12-6(10)8(3)4/h1-4H3
|
392 |
+
391,[C@@H]1([C@@H]([C@@H]([C@H]([C@@H]([C@H]1O)O)O)O)O)O,0.35,"InChI=1/C6H12O6/c7-1-2(8)4(10)6(12)5(11)3(1)9/h1-12H/t1-,2-,3-,4-,5+,6+/m0/s1"
|
393 |
+
392,CCCCCOC(=N)O,-1.47,"InChI=1/C6H13NO2/c1-2-3-4-5-9-6(7)8/h2-5H2,1H3,(H2,7,8)"
|
394 |
+
393,CC(C)(C)COC(=N)O,-0.8,"InChI=1/C6H13NO2/c1-6(2,3)4-9-5(7)8/h4H2,1-3H3,(H2,7,8)"
|
395 |
+
394,c1c(cc(c(c1Br)O)Br)C#N,-3.33,"InChI=1/C7H3Br2NO/c8-5-1-4(3-10)2-6(9)7(5)11/h1-2,11H"
|
396 |
+
395,c1c(cc(c(c1C(=O)O)O)I)I,-3.31,"InChI=1/C7H4I2O3/c8-3-1-4(7(11)12)6(10)5(9)2-3/h1-2,10H,(H,11,12)/f/h11H"
|
397 |
+
396,Cc1c2c(nccn2)ncn1,-0.47,"InChI=1/C7H6N4/c1-5-6-7(11-4-10-5)9-3-2-8-6/h2-4H,1H3"
|
398 |
+
397,Cc1cnc2cncnc2n1,0.06,"InChI=1/C7H6N4/c1-5-2-9-6-3-8-4-10-7(6)11-5/h2-4H,1H3"
|
399 |
+
398,COc1ncc2c(nccn2)n1,-1.11,"InChI=1/C7H6N4O/c1-12-7-10-4-5-6(11-7)9-3-2-8-5/h2-4H,1H3"
|
400 |
+
399,COc1cnc2cncnc2n1,-0.91,"InChI=1/C7H6N4O/c1-12-6-3-9-5-2-8-4-10-7(5)11-6/h2-4H,1H3"
|
401 |
+
400,CSc1c2c(nccn2)ncn1,-2.36,"InChI=1/C7H6N4S/c1-12-7-5-6(10-4-11-7)9-3-2-8-5/h2-4H,1H3"
|
402 |
+
401,CSc1cnc2cncnc2n1,-1.55,"InChI=1/C7H6N4S/c1-12-6-3-9-5-2-8-4-10-7(5)11-6/h2-4H,1H3"
|
403 |
+
402,c1ccc(c(c1)C=O)O,-0.86,"InChI=1/C7H6O2/c8-5-6-3-1-2-4-7(6)9/h1-5,9H"
|
404 |
+
403,c1c(cc(c(c1O)O)O)C(=O)O,-1.16,"InChI=1/C7H6O5/c8-4-1-3(7(11)12)2-5(9)6(4)10/h1-2,8-10H,(H,11,12)/f/h11H"
|
405 |
+
404,C[C@@H]1CCCC(=O)C1,-1.87,"InChI=1/C7H12O/c1-6-3-2-4-7(8)5-6/h6H,2-5H2,1H3/t6-/m1/s1"
|
406 |
+
405,CCCCCN=C(O)OC,-1.92,"InChI=1/C7H15NO2/c1-3-4-5-6-8-7(9)10-2/h3-6H2,1-2H3,(H,8,9)"
|
407 |
+
406,c1cc(ccc1C(=O)O)C(F)(F)F,-1.6,"InChI=1/C8H5F3O2/c9-8(10,11)6-3-1-5(2-4-6)7(12)13/h1-4H,(H,12,13)/f/h12H"
|
408 |
+
407,c1ccc(c(c1)Cl)OCC(=O)O,-2.16,"InChI=1/C8H7ClO3/c9-6-3-1-2-4-7(6)12-5-8(10)11/h1-4H,5H2,(H,10,11)/f/h10H"
|
409 |
+
408,CC(=O)OC1CCCCC1,-1.67,"InChI=1/C8H14O2/c1-7(9)10-8-5-3-2-4-6-8/h8H,2-6H2,1H3"
|
410 |
+
409,COc1cc(ccc1O)C(=O)O,-2.05,"InChI=1/C8H8O4/c1-12-7-4-5(8(10)11)2-3-6(7)9/h2-4,9H,1H3,(H,10,11)/f/h10H"
|
411 |
+
410,CC(=O)c1ccc(cc1)N,-1.61,"InChI=1/C8H9NO/c1-6(10)7-2-4-8(9)5-3-7/h2-5H,9H2,1H3"
|
412 |
+
411,CNc1ccccc1C(=O)O,-2.88,"InChI=1/C8H9NO2/c1-9-7-5-3-2-4-6(7)8(10)11/h2-5,9H,1H3,(H,10,11)/f/h10H"
|
413 |
+
412,Cc1ccc(C)c(c1)O,-1.54,"InChI=1/C8H10O/c1-6-3-4-7(2)8(9)5-6/h3-5,9H,1-2H3"
|
414 |
+
413,Cc1cccc(C)c1O,-1.31,"InChI=1/C8H10O/c1-6-4-3-5-7(2)8(6)9/h3-5,9H,1-2H3"
|
415 |
+
414,Cc1ccc(cc1C)O,-1.41,"InChI=1/C8H10O/c1-6-3-4-8(9)5-7(6)2/h3-5,9H,1-2H3"
|
416 |
+
415,C1CC2C(C(C1O2)C(=O)O)C(=O)O,-0.27,"InChI=1/C8H10O5/c9-7(10)5-3-1-2-4(13-3)6(5)8(11)12/h3-6H,1-2H2,(H,9,10)(H,11,12)/f/h9,11H"
|
417 |
+
416,CCN(CC)C(=S)SCC(=C)Cl,-3.39,"InChI=1/C8H14ClNS2/c1-4-10(5-2)8(11)12-6-7(3)9/h3-6H2,1-2H3"
|
418 |
+
417,CC(C)CC(=O)CC(=O)C,-1.6,"InChI=1/C8H14O2/c1-6(2)4-8(10)5-7(3)9/h6H,4-5H2,1-3H3"
|
419 |
+
418,C[C@H]1CC[C@H](C)CC1,-4.47,"InChI=1/C8H16/c1-7-3-5-8(2)6-4-7/h7-8H,3-6H2,1-2H3/t7-,8-"
|
420 |
+
419,C[C@H]1CCCC[C@@H]1C,-4.27,"InChI=1/C8H16/c1-7-5-3-4-6-8(7)2/h7-8H,3-6H2,1-2H3/t7-,8-/m0/s1"
|
421 |
+
420,CCCC1CCCC1,-4.74,"InChI=1/C8H16/c1-2-5-8-6-3-4-7-8/h8H,2-7H2,1H3"
|
422 |
+
421,CC(C(=O)O)Oc1ccc(cc1Cl)Cl,-2.45,"InChI=1/C9H8Cl2O3/c1-5(9(12)13)14-8-3-2-6(10)4-7(8)11/h2-5H,1H3,(H,12,13)/f/h12H"
|
423 |
+
422,CC(C(=O)O)Oc1ccccc1Cl,-2.22,"InChI=1/C9H9ClO3/c1-6(9(11)12)13-8-5-3-2-4-7(8)10/h2-6H,1H3,(H,11,12)/f/h11H"
|
424 |
+
423,Cc1cc(ccc1OCC(=O)O)Cl,-2.23,"InChI=1/C9H9ClO3/c1-6-4-7(10)2-3-8(6)13-5-9(11)12/h2-4H,5H2,1H3,(H,11,12)/f/h11H"
|
425 |
+
424,c1ccc(cc1)CCC(=O)O,-1.41,"InChI=1/C9H10O2/c10-9(11)7-6-8-4-2-1-3-5-8/h1-5H,6-7H2,(H,10,11)/f/h10H"
|
426 |
+
425,COc1ccc(cc1)C(=O)OC,-2.41,"InChI=1/C9H10O3/c1-11-8-5-3-7(4-6-8)9(10)12-2/h3-6H,1-2H3"
|
427 |
+
426,Cc1cccc(c1)OC(=NC)O,-1.8,"InChI=1/C9H11NO2/c1-7-4-3-5-8(6-7)12-9(11)10-2/h3-6H,1-2H3,(H,10,11)"
|
428 |
+
427,c1cc(c(cc1C[C@@H](C(=O)O)N)O)O,-1.6,"InChI=1/C9H11NO4/c10-6(9(13)14)3-5-1-2-7(11)8(12)4-5/h1-2,4,6,11-12H,3,10H2,(H,13,14)/t6-/m0/s1/f/h13H"
|
429 |
+
428,CC(C)(C(=O)O)C(C)(CC(=O)O)C(=O)O,-0.29,"InChI=1/C9H14O6/c1-8(2,6(12)13)9(3,7(14)15)4-5(10)11/h4H2,1-3H3,(H,10,11)(H,12,13)(H,14,15)/f/h10,12,14H"
|
430 |
+
429,C(CCCC(=O)O)CCCC(=O)O,-1.89,"InChI=1/C9H16O4/c10-8(11)6-4-2-1-3-5-7-9(12)13/h1-7H2,(H,10,11)(H,12,13)/f/h10,12H"
|
431 |
+
430,CCCCCCCCOC(=N)O,-3.3,"InChI=1/C9H19NO2/c1-2-3-4-5-6-7-8-12-9(10)11/h2-8H2,1H3,(H2,10,11)"
|
432 |
+
431,C1=C[C@H]2[C@@H](C1)[C@@]1(C(=C([C@]2(C1(Cl)Cl)Cl)Cl)Cl)Cl,-5.64,"InChI=1/C10H6Cl6/c11-6-7(12)9(14)5-3-1-2-4(5)8(6,13)10(9,15)16/h1-2,4-5H,3H2/t4-,5+,8+,9-/m0/s1"
|
433 |
+
432,c1ccc(cc1)n1c(=O)c(c(cn1)N)Br,-3.12,"InChI=1/C10H8BrN3O/c11-9-8(12)6-13-14(10(9)15)7-4-2-1-3-5-7/h1-6H,12H2"
|
434 |
+
433,c1cc(ccc1N)S(=O)(=O)N=c1cncc[nH]1,-3.7,"InChI=1/C10H10N4O2S/c11-8-1-3-9(4-2-8)17(15,16)14-10-7-12-5-6-13-10/h1-7H,11H2,(H,13,14)"
|
435 |
+
434,Cc1cc(ccc1O[C@H](C)C(=O)O)Cl,-2.55,"InChI=1/C10H11ClO3/c1-6-5-8(11)3-4-9(6)14-7(2)10(12)13/h3-5,7H,1-2H3,(H,12,13)/t7-/m1/s1/f/h12H"
|
436 |
+
435,C1CCc2cc(ccc2C1)O,-1.99,"InChI=1/C10H12O/c11-10-6-5-8-3-1-2-4-9(8)7-10/h5-7,11H,1-4H2"
|
437 |
+
436,CCCN=C(NS(=O)(=O)c1ccc(cc1)Cl)O,-3.03,"InChI=1/C10H13ClN2O3S/c1-2-7-12-10(14)13-17(15,16)9-5-3-8(11)4-6-9/h3-6H,2,7H2,1H3,(H2,12,13,14)"
|
438 |
+
437,CC(C)c1ccc(C)c(c1)O,-2.08,"InChI=1/C10H14O/c1-7(2)9-5-4-8(3)10(11)6-9/h4-7,11H,1-3H3"
|
439 |
+
438,c1ccc(cc1)N(CCO)CCO,-0.73,"InChI=1/C10H15NO2/c12-8-6-11(7-9-13)10-4-2-1-3-5-10/h1-5,12-13H,6-9H2"
|
440 |
+
439,CC1(C)[C@@H]2CC[C@@](C)(C2)C1=O,-1.85,"InChI=1/C10H16O/c1-9(2)7-4-5-10(3,6-7)8(9)11/h7H,4-6H2,1-3H3/t7-,10+/m1/s1"
|
441 |
+
440,C=C(C)[C@@H]1CC[C@@H](C)C(=O)C1,-2.18,"InChI=1/C10H16O/c1-7(2)9-5-4-8(3)10(11)6-9/h8-9H,1,4-6H2,2-3H3/t8-,9-/m1/s1"
|
442 |
+
441,Cc1ccc2ccc(C)nc2c1,-1.94,"InChI=1/C11H11N/c1-8-3-5-10-6-4-9(2)12-11(10)7-8/h3-7H,1-2H3"
|
443 |
+
442,Cc1ccnc(n1)NS(=O)(=O)c1ccc(cc1)N,-2.85,"InChI=1/C11H12N4O2S/c1-8-6-7-13-11(14-8)15-18(16,17)10-4-2-9(12)3-5-10/h2-7H,12H2,1H3,(H,13,14,15)"
|
444 |
+
443,Cc1c(C)oc(n1)NS(=O)(=O)c1ccc(cc1)N,-2.44,"InChI=1/C11H13N3O3S/c1-7-8(2)17-11(13-7)14-18(15,16)10-5-3-9(12)4-6-10/h3-6H,12H2,1-2H3,(H,13,14)"
|
445 |
+
444,CCSC(=O)N(CC(C)C)CC(C)C,-3.68,"InChI=1/C11H23NOS/c1-6-14-11(13)12(7-9(2)3)8-10(4)5/h9-10H,6-8H2,1-5H3"
|
446 |
+
445,c1cc(ccc1O)O.C1=CC(=O)C=CC1=O,-1.73,"InChI=1/C6H6O2.C6H4O2/c2*7-5-1-2-6(8)4-3-5/h1-4,7-8H;1-4H"
|
447 |
+
446,C#C[C@@H](C)N(C)C(=O)Nc1ccc(cc1)Cl,-3.9,"InChI=1/C12H13ClN2O/c1-4-9(2)15(3)12(16)14-11-7-5-10(13)6-8-11/h1,5-9H,2-3H3,(H,14,16)/t9-/m1/s1"
|
448 |
+
447,Cc1cc(nc(C)n1)NS(=O)(=O)c1ccc(cc1)N,-2.24,"InChI=1/C12H14N4O2S/c1-8-7-12(15-9(2)14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)"
|
449 |
+
448,CC(C)(C)c1nnc(NS(=O)(=O)C2=CC[C@@H](CC2)N)s1,-3.74,"InChI=1/C12H20N4O2S2/c1-12(2,3)10-14-15-11(19-10)16-20(17,18)9-6-4-8(13)5-7-9/h6,8H,4-5,7,13H2,1-3H3,(H,15,16)/t8-/m0/s1"
|
450 |
+
449,CCCCN=C(NS(=O)(=O)c1ccc(C)cc1)O,-3.39,"InChI=1/C12H18N2O3S/c1-3-4-9-13-12(15)14-18(16,17)11-7-5-10(2)6-8-11/h5-8H,3-4,9H2,1-2H3,(H2,13,14,15)"
|
451 |
+
450,CCCCN(CCCC)CCCC,-3.12,"InChI=1/C12H27N/c1-4-7-10-13(11-8-5-2)12-9-6-3/h4-12H2,1-3H3"
|
452 |
+
451,c1ccc(cc1)N=C(Nc1ccccc1)O,-3.15,"InChI=1/C13H12N2O/c16-13(14-11-7-3-1-4-8-11)15-12-9-5-2-6-10-12/h1-10H,(H2,14,15,16)"
|
453 |
+
452,Cc1c(cc(c(c1N(=O)=O)OC(=O)C)C(C)(C)C)N(=O)=O,-4.47,"InChI=1/C13H16N2O6/c1-7-10(14(17)18)6-9(13(3,4)5)12(21-8(2)16)11(7)15(19)20/h6H,1-5H3"
|
454 |
+
453,CCCCCCCCCCCC(=O)OC,-4.69,"InChI=1/C13H26O2/c1-3-4-5-6-7-8-9-10-11-12-13(14)15-2/h3-12H2,1-2H3"
|
455 |
+
454,c1ccc(c(c1)C(=C(Cl)Cl)c1ccc(cc1)Cl)Cl,-6.36,InChI=1/C14H8Cl4/c15-10-7-5-9(6-8-10)13(14(17)18)11-3-1-2-4-12(11)16/h1-8H
|
456 |
+
455,c1ccc2c(c1)C(=O)c1ccc(c(c1C2=O)O)O,-2.78,"InChI=1/C14H8O4/c15-10-6-5-9-11(14(10)18)13(17)8-4-2-1-3-7(8)12(9)16/h1-6,15,18H"
|
457 |
+
456,c1cc(c(c(c1)F)C(=NC(=O)Nc1ccc(cc1)Cl)O)F,-6.02,"InChI=1/C14H9ClF2N2O2/c15-8-4-6-9(7-5-8)18-14(21)19-13(20)12-10(16)2-1-3-11(12)17/h1-7H,(H2,18,19,20,21)"
|
458 |
+
457,c1ccc(c(c1)[C@@H](c1ccc(cc1)Cl)C(Cl)Cl)Cl,-6.51,"InChI=1/C14H10Cl4/c15-10-7-5-9(6-8-10)13(14(17)18)11-3-1-2-4-12(11)16/h1-8,13-14H/t13-/m1/s1"
|
459 |
+
458,c1ccc(c(c1)C(=O)O)Nc1cccc(c1)C(F)(F)F,-4.36,"InChI=1/C14H10F3NO2/c15-14(16,17)9-4-3-5-10(8-9)18-12-7-2-1-6-11(12)13(19)20/h1-8,18H,(H,19,20)/f/h19H"
|
460 |
+
459,c1ccc2c(c1)cc1ccccc1c2O,-4.73,"InChI=1/C14H10O/c15-14-12-7-3-1-5-10(12)9-11-6-2-4-8-13(11)14/h1-9,15H"
|
461 |
+
460,Cc1cccc(c1C)Nc1ccccc1C(=O)O,-2.28,"InChI=1/C15H15NO2/c1-10-6-5-9-13(11(10)2)16-14-8-4-3-7-12(14)15(17)18/h3-9,16H,1-2H3,(H,17,18)/f/h17H"
|
462 |
+
461,Cc1cc(ccc1NS(=O)(=O)C(F)(F)F)S(=O)(=O)c1ccccc1,-3.8,"InChI=1/C14H12F3NO4S2/c1-10-9-12(23(19,20)11-5-3-2-4-6-11)7-8-13(10)18-24(21,22)14(15,16)17/h2-9,18H,1H3"
|
463 |
+
462,Cc1ccc2c(c1)sc(c1ccc(cc1)N)n2,-3.68,"InChI=1/C14H12N2S/c1-9-2-7-12-13(8-9)17-14(16-12)10-3-5-11(15)6-4-10/h2-8H,15H2,1H3"
|
464 |
+
463,c1ccc(cc1)C[C@H](c1ccccc1)O,-2.52,"InChI=1/C14H14O/c15-14(13-9-5-2-6-10-13)11-12-7-3-1-4-8-12/h1-10,14-15H,11H2/t14-/m1/s1"
|
465 |
+
464,CCOP(=S)(OCC)Oc1ccc2c(C)c(c(=O)oc2c1)Cl,-5.38,"InChI=1/C14H16ClO5PS/c1-4-17-21(22,18-5-2)20-10-6-7-11-9(3)13(15)14(16)19-12(11)8-10/h6-8H,4-5H2,1-3H3"
|
466 |
+
465,CN(C)CCN(Cc1cccs1)c1ccccn1,-2.64,"InChI=1/C14H19N3S/c1-16(2)9-10-17(12-13-6-5-11-18-13)14-7-3-4-8-15-14/h3-8,11H,9-10,12H2,1-2H3"
|
467 |
+
466,C[C@H]1CCCC[C@H]1NC(=Nc1ccccc1)O,-4.11,"InChI=1/C14H20N2O/c1-11-7-5-6-10-13(11)16-14(17)15-12-8-3-2-4-9-12/h2-4,8-9,11,13H,5-7,10H2,1H3,(H2,15,16,17)/t11-,13+/m0/s1"
|
468 |
+
467,Cc1cccc(c1C)Nc1ccccc1C(=O)O,-3.78,"InChI=1/C15H15NO2/c1-10-6-5-9-13(11(10)2)16-14-8-4-3-7-12(14)15(17)18/h3-9,16H,1-2H3,(H,17,18)/f/h17H"
|
469 |
+
468,CC(=CCc1c(ccc2ccc(=O)oc12)OC)C,-4.31,"InChI=1/C15H16O3/c1-10(2)4-7-12-13(17-3)8-5-11-6-9-14(16)18-15(11)12/h4-6,8-9H,7H2,1-3H3"
|
470 |
+
469,CCc1cccc(C)c1N([C@H](C)COC)C(=O)CCl,-2.73,"InChI=1/C15H22ClNO2/c1-5-13-8-6-7-11(2)15(13)17(14(18)9-16)12(3)10-19-4/h6-8,12H,5,9-10H2,1-4H3/t12-/m1/s1"
|
471 |
+
470,C1CCN2C[C@@H]3C[C@@H](CN4CCCC[C@H]34)[C@H]2C1,-1.89,"InChI=1/C15H26N2/c1-3-7-16-11-13-9-12(14(16)5-1)10-17-8-4-2-6-15(13)17/h12-15H,1-11H2/t12-,13-,14+,15+/m0/s1"
|
472 |
+
471,CCCCCCCCCCCCCCC(=O)O,-4.31,"InChI=1/C15H30O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15(16)17/h2-14H2,1H3,(H,16,17)/f/h16H"
|
473 |
+
472,CN(C)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)OC,-4.16,"InChI=1/C16H18N2O3/c1-18(2)16(19)17-12-4-6-14(7-5-12)21-15-10-8-13(20-3)9-11-15/h4-11H,1-3H3,(H,17,19)"
|
474 |
+
473,CCCCCCCCCCCCCCCCO,-7.26,"InChI=1/C16H34O/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17/h17H,2-16H2,1H3"
|
475 |
+
474,c1ccc(c(c1)[C@@](c1ccc(cc1)Cl)(c1cncnc1)O)Cl,-4.38,"InChI=1/C17H12Cl2N2O/c18-14-7-5-12(6-8-14)17(22,13-9-20-11-21-10-13)15-3-1-2-4-16(15)19/h1-11,22H/t17-/m1/s1"
|
476 |
+
475,CCN(CC)C(=O)[C@@H](C)Oc1cccc2ccccc12,-3.57,"InChI=1/C17H21NO2/c1-4-18(5-2)17(19)13(3)20-16-12-8-10-14-9-6-7-11-15(14)16/h6-13H,4-5H2,1-3H3/t13-/m1/s1"
|
477 |
+
476,CNCCCN1c2ccccc2CCc2ccccc12,-3.66,"InChI=1/C18H22N2/c1-19-13-6-14-20-17-9-4-2-7-15(17)11-12-16-8-3-5-10-18(16)20/h2-5,7-10,19H,6,11-14H2,1H3"
|
478 |
+
477,CC[C@@H](c1ccc(cc1)O)[C@@H](CC)c1ccc(cc1)O,-4.43,"InChI=1/C18H22O2/c1-3-17(13-5-9-15(19)10-6-13)18(4-2)14-7-11-16(20)12-8-14/h5-12,17-20H,3-4H2,1-2H3/t17-,18-/m0/s1"
|
479 |
+
478,C[C@@]12CC[C@H]3c4ccc(cc4CC[C@@H]3[C@H]1CC[C@@H]2O)O,-4.84,"InChI=1/C18H24O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-17,19-20H,2,4,6-9H2,1H3/t14-,15-,16+,17-,18+/m0/s1"
|
480 |
+
479,C=C[C@@H]1CN2CC[C@H]1C[C@@H]2[C@@H](c1ccnc2ccccc12)O,-3.09,"InChI=1/C19H22N2O/c1-2-13-12-21-10-8-14(13)11-18(21)19(22)16-7-9-20-17-6-4-3-5-15(16)17/h2-7,9,13-14,18-19,22H,1,8,10-12H2/t13-,14+,18-,19-/m1/s1"
|
481 |
+
480,C[C@]12CC[C@H](C[C@@H]1CC[C@@H]1[C@@H]3CCC(=O)[C@@]3(C)CC[C@@H]21)O,-4.4,"InChI=1/C19H30O2/c1-18-9-7-13(20)11-12(18)3-4-14-15-5-6-17(21)19(15,2)10-8-16(14)18/h12-16,20H,3-11H2,1-2H3/t12-,13+,14+,15-,16+,18-,19-/m0/s1"
|
482 |
+
481,C[C@]12CC[C@H](C[C@@H]1CC[C@@H]1[C@@H]3CCC(=O)[C@@]3(C)C[C@@H]([C@@H]21)O)O,-3.59,"InChI=1/C19H30O3/c1-18-8-7-12(20)9-11(18)3-4-13-14-5-6-16(22)19(14,2)10-15(21)17(13)18/h11-15,17,20-21H,3-10H2,1-2H3/t11-,12+,13+,14-,15-,17-,18-,19-/m0/s1"
|
483 |
+
482,CN(C)CCC=C1c2ccccc2CCc2ccccc12,-4.46,"InChI=1/C20H23N/c1-21(2)15-7-12-20-18-10-5-3-8-16(18)13-14-17-9-4-6-11-19(17)20/h3-6,8-12H,7,13-15H2,1-2H3"
|
484 |
+
483,CN1CCN(CCCN2c3ccccc3Sc3ccc(cc23)Cl)CC1,-4.4,"InChI=1/C20H24ClN3S/c1-22-11-13-23(14-12-22)9-4-10-24-17-5-2-3-6-19(17)25-20-8-7-16(21)15-18(20)24/h2-3,5-8,15H,4,9-14H2,1H3"
|
485 |
+
484,C=C[C@@H]1CN2CC[C@@H]1C[C@H]2[C@@H](c1ccnc2ccc(cc12)OC)O,-2.76,"InChI=1/C20H24N2O2/c1-3-13-12-22-9-7-14(13)10-19(22)20(23)16-6-8-21-18-5-4-15(24-2)11-17(16)18/h3-6,8,11,13-14,19-20,23H,1,7,9-10,12H2,2H3/t13-,14-,19+,20-/m1/s1"
|
486 |
+
485,Cc1c(CC(=O)O)c2cc(ccc2n1C(=O)C=Cc1ccccc1)OC,-5.54,"InChI=1/C21H19NO4/c1-14-17(13-21(24)25)18-12-16(26-2)9-10-19(18)22(14)20(23)11-8-15-6-4-3-5-7-15/h3-12H,13H2,1-2H3,(H,24,25)/f/h24H"
|
487 |
+
486,CC1(C)[C@@H](C=C(Cl)Cl)[C@@H]1C(=O)OCc1cccc(c1)Oc1ccccc1,-6.29,"InChI=1/C21H20Cl2O3/c1-21(2)17(12-18(22)23)19(21)20(24)25-13-14-7-6-10-16(11-14)26-15-8-4-3-5-9-15/h3-12,17,19H,13H2,1-2H3/t17-,19+/m0/s1"
|
488 |
+
487,CN(C)[C@@H]1[C@H]2C[C@H]3C(=C([C@@]2(C(=O)C(=C1O)C(=N)O)O)O)C(=O)c1c(ccc(c1[C@@H]3O)Cl)O,-2.52,"InChI=1/C21H21ClN2O8/c1-24(2)14-7-5-6-10(16(27)12-9(25)4-3-8(22)11(12)15(6)26)18(29)21(7,32)19(30)13(17(14)28)20(23)31/h3-4,6-7,14-15,25-26,28-29,32H,5H2,1-2H3,(H2,23,31)/t6-,7+,14+,15+,21-/m0/s1"
|
489 |
+
488,c1ccc2c(c1)N(CCCN1CCC(CC1)O)c1cc(ccc1S2)C#N,-3.98,"InChI=1/C21H23N3OS/c22-15-16-6-7-21-19(14-16)24(18-4-1-2-5-20(18)26-21)11-3-10-23-12-8-17(25)9-13-23/h1-2,4-7,14,17,25H,3,8-13H2"
|
490 |
+
489,C#C[C@@]1(CC[C@@H]2[C@H]3CCC4=CC(=O)CC[C@@]4(C)[C@@H]3CC[C@@]12C)O,-5.66,"InChI=1/C21H28O2/c1-4-21(23)12-9-18-16-6-5-14-13-15(22)7-10-19(14,2)17(16)8-11-20(18,21)3/h1,13,16-18,23H,5-12H2,2-3H3/t16-,17+,18+,19+,20+,21-/m0/s1"
|
491 |
+
490,CC(=O)[C@@H]1CC[C@@H]2[C@H]3CC=C4C[C@H](CC[C@]4(C)[C@@H]3CC[C@@]12C)O,-4.65,"InChI=1/C21H32O2/c1-13(22)17-6-7-18-16-5-4-14-12-15(23)8-10-20(14,2)19(16)9-11-21(17,18)3/h4,15-19,23H,5-12H2,1-3H3/t15-,16+,17-,18+,19+,20-,21-/m0/s1"
|
492 |
+
491,CCOC(=O)C(c1c(c2ccccc2oc1=O)O)c1c(c2ccccc2oc1=O)O,-3.66,"InChI=1/C22H16O8/c1-2-28-20(25)15(16-18(23)11-7-3-5-9-13(11)29-21(16)26)17-19(24)12-8-4-6-10-14(12)30-22(17)27/h3-10,15,23-24H,2H2,1H3"
|
493 |
+
492,CN1CCc2cc3c(c(c2[C@@H]1[C@H]1c2ccc(c(c2C(=O)O1)OC)OC)OC)OCO3,-3.14,"InChI=1/C22H23NO7/c1-23-8-7-11-9-14-20(29-10-28-14)21(27-4)15(11)17(23)18-12-5-6-13(25-2)19(26-3)16(12)22(24)30-18/h5-6,9,17-18H,7-8,10H2,1-4H3/t17-,18-/m1/s1"
|
494 |
+
493,C[C@H]1c2cccc(c2C(=O)C2=C([C@]3([C@@H]([C@@H](C(=C(C3=O)C(=N)O)O)N(C)C)[C@H]([C@@H]12)O)O)O)O,-2.87,"InChI=1/C22H24N2O8/c1-7-8-5-4-6-9(25)11(8)16(26)12-10(7)17(27)14-15(24(2)3)18(28)13(21(23)31)20(30)22(14,32)19(12)29/h4-7,10,14-15,17,25,27-29,32H,1-3H3,(H2,23,31)/t7-,10-,14-,15-,17-,22-/m0/s1"
|
495 |
+
494,CC(=C[C@@H]1[C@@H](C(=O)OCc2cccc(c2)Oc2ccccc2)C1(C)C)C,-5.24,"InChI=1/C23H26O3/c1-16(2)13-20-21(23(20,3)4)22(24)25-15-17-9-8-12-19(14-17)26-18-10-6-5-7-11-18/h5-14,20-21H,15H2,1-4H3/t20-,21+/m1/s1"
|
496 |
+
495,CC(=O)OCCN1CCN(CCCN2c3ccccc3Sc3ccc(cc23)Cl)CC1,-4.95,"InChI=1/C23H28ClN3O2S/c1-18(28)29-16-15-26-13-11-25(12-14-26)9-4-10-27-20-5-2-3-6-22(20)30-23-8-7-19(24)17-21(23)27/h2-3,5-8,17H,4,9-16H2,1H3"
|
497 |
+
496,CC(=O)OCCN1CCN(CCCN2c3ccccc3Sc3ccc(cc23)Cl)CC1,-4.7,"InChI=1/C23H28ClN3O2S/c1-18(28)29-16-15-26-13-11-25(12-14-26)9-4-10-27-20-5-2-3-6-22(20)30-23-8-7-19(24)17-21(23)27/h2-3,5-8,17H,4,9-16H2,1H3"
|
extra_code/__pycache__/feature_search.cpython-312.pyc
ADDED
Binary file (47.4 kB). View file
|
|
extra_code/__pycache__/feature_selection.cpython-312.pyc
ADDED
Binary file (53.2 kB). View file
|
|
extra_code/ano_model.py
ADDED
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gc
|
3 |
+
import sys
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.model_selection import train_test_split, KFold
|
6 |
+
from sklearn.metrics import r2_score
|
7 |
+
import tensorflow as tf
|
8 |
+
import logging
|
9 |
+
import psutil
|
10 |
+
import subprocess
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
from tensorflow.keras.mixed_precision import set_global_policy
|
13 |
+
|
14 |
+
set_global_policy('mixed_float16')
|
15 |
+
|
16 |
+
BATCHSIZE = int(sys.argv[1])
|
17 |
+
EPOCHS = int(sys.argv[2])
|
18 |
+
lr = float(sys.argv[3])
|
19 |
+
fps_file = sys.argv[4]
|
20 |
+
y_true_file = sys.argv[5]
|
21 |
+
##################################################################
|
22 |
+
model_name = sys.argv[6] if len(sys.argv) > 6 else None
|
23 |
+
target_path = sys.argv[7] if len(sys.argv) > 7 else None
|
24 |
+
cv = int(sys.argv[8]) if len(sys.argv) > 8 and sys.argv[8] != 'None' else None
|
25 |
+
test_size = float(sys.argv[9]) if len(sys.argv) > 9 else 0.1
|
26 |
+
|
27 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
28 |
+
|
29 |
+
def print_cpu_memory():
|
30 |
+
memory_info = psutil.virtual_memory()
|
31 |
+
logging.info(f"Total Memory: {memory_info.total / (1024 ** 3):.2f} GB")
|
32 |
+
logging.info(f"Available Memory: {memory_info.available / (1024 ** 3):.2f} GB")
|
33 |
+
logging.info(f"Used Memory: {memory_info.used / (1024 ** 3):.2f} GB")
|
34 |
+
logging.info(f"Memory Usage: {memory_info.percent}%")
|
35 |
+
def print_gpu_memory(status=""):
|
36 |
+
try:
|
37 |
+
result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,nounits,noheader'],
|
38 |
+
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
39 |
+
if result.returncode == 0:
|
40 |
+
lines = result.stdout.strip().split('\n')
|
41 |
+
for idx, line in enumerate(lines):
|
42 |
+
used, total = line.split(', ')
|
43 |
+
logging.info(f"[{status}] GPU {idx}: Memory Usage: {used} MB / {total} MB")
|
44 |
+
except Exception as e:
|
45 |
+
logging.error(f"Error executing nvidia-smi: {e}")
|
46 |
+
def save_history_plot(history, target_path, model_name, test_size, fold=None):
|
47 |
+
plt.figure(figsize=(12, 8))
|
48 |
+
plt.subplot(2, 1, 1)
|
49 |
+
plt.plot(history.history['loss'], label='Training Loss')
|
50 |
+
if 'val_loss' in history.history:
|
51 |
+
plt.plot(history.history['val_loss'], label='Validation Loss')
|
52 |
+
plt.title(f'Model Loss (test_size={test_size})')
|
53 |
+
plt.ylabel('Loss')
|
54 |
+
plt.xlabel('Epoch')
|
55 |
+
plt.legend()
|
56 |
+
|
57 |
+
plt.subplot(2, 1, 2)
|
58 |
+
for metric in history.history:
|
59 |
+
if metric.startswith('val_'):
|
60 |
+
continue
|
61 |
+
plt.plot(history.history[metric], label=f'Training {metric}')
|
62 |
+
val_metric = f'val_{metric}'
|
63 |
+
if val_metric in history.history:
|
64 |
+
plt.plot(history.history[val_metric], label=f'Validation {metric}')
|
65 |
+
|
66 |
+
plt.title(f'Model Metrics (test_size={test_size})')
|
67 |
+
plt.ylabel('Value')
|
68 |
+
plt.xlabel('Epoch')
|
69 |
+
plt.legend()
|
70 |
+
|
71 |
+
plt.tight_layout()
|
72 |
+
|
73 |
+
file_name = f"{model_name}_history{'_fold'+str(fold) if fold else ''}_test_size[{test_size}].png"
|
74 |
+
plt.savefig(os.path.join(target_path, model_name, file_name), dpi=300)
|
75 |
+
plt.close()
|
76 |
+
def load_model(target_path, model_name, test_size, cv=None):
|
77 |
+
model_path = f"{target_path}/{model_name}/{model_name}_full_model{'_cv'+str(cv) if cv else ''}_test_size[{test_size}].keras"
|
78 |
+
try:
|
79 |
+
if os.path.exists(model_path):
|
80 |
+
model = tf.keras.models.load_model(model_path, compile=False)
|
81 |
+
logging.info(f"Model successfully loaded from {model_path}")
|
82 |
+
return model
|
83 |
+
else:
|
84 |
+
logging.error(f"Model path does not exist: {model_path}")
|
85 |
+
return None
|
86 |
+
except Exception as e:
|
87 |
+
logging.error(f"Error loading model: {e}")
|
88 |
+
return None
|
89 |
+
def preprocess_data(xtr, ytr, use_parallel=False):
|
90 |
+
dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))
|
91 |
+
if use_parallel:
|
92 |
+
dataset = dataset.map(lambda x, y: (x, y), num_parallel_calls=tf.data.AUTOTUNE)
|
93 |
+
dataset = dataset.shuffle(buffer_size=len(xtr)).batch(BATCHSIZE).cache().prefetch(tf.data.AUTOTUNE)
|
94 |
+
return dataset
|
95 |
+
|
96 |
+
# def train_model(model, train_dataset, target_path, model_name, fold=None):
|
97 |
+
def train_model(model, train_dataset, valid_dataset, target_path, model_name, fold=None):
|
98 |
+
checkpoint_dir = f"{target_path}/checkpoints/{model_name}"
|
99 |
+
os.makedirs(checkpoint_dir, exist_ok=True)
|
100 |
+
checkpoint_path = os.path.join(checkpoint_dir, f"model{'_fold'+str(fold) if fold else ''}.keras")
|
101 |
+
cp = tf.keras.callbacks.ModelCheckpoint(
|
102 |
+
filepath=checkpoint_path,
|
103 |
+
save_weights_only=False,
|
104 |
+
save_best_only=True,
|
105 |
+
monitor='val_loss',
|
106 |
+
mode='min',
|
107 |
+
verbose=1,
|
108 |
+
)
|
109 |
+
# es = tf.keras.callbacks.EarlyStopping(
|
110 |
+
# monitor='val_loss',
|
111 |
+
# patience=EPOCHS,
|
112 |
+
# restore_best_weights=True,
|
113 |
+
# mode='min',
|
114 |
+
# verbose=0,
|
115 |
+
# )
|
116 |
+
|
117 |
+
history = model.fit(
|
118 |
+
train_dataset,
|
119 |
+
epochs=EPOCHS,
|
120 |
+
validation_data=valid_dataset,
|
121 |
+
# callbacks=[cp, es],
|
122 |
+
callbacks=[cp], #, es],
|
123 |
+
verbose=0,
|
124 |
+
)
|
125 |
+
save_history_plot(history, target_path, model_name, fold)
|
126 |
+
del train_dataset
|
127 |
+
gc.collect()
|
128 |
+
def clear_gpu_memory():
|
129 |
+
tf.keras.backend.clear_session()
|
130 |
+
gc.collect()
|
131 |
+
logging.info("GPU memory cleared.")
|
132 |
+
def main():
|
133 |
+
try:
|
134 |
+
os.makedirs(f"{target_path}/{model_name}", exist_ok=True)
|
135 |
+
model = load_model(target_path, model_name, test_size, cv)
|
136 |
+
if model is None:
|
137 |
+
raise ValueError("Failed to load model")
|
138 |
+
|
139 |
+
model.compile(
|
140 |
+
optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
|
141 |
+
loss=tf.keras.losses.MeanSquaredError(),
|
142 |
+
metrics=[
|
143 |
+
tf.keras.metrics.MeanSquaredError(),
|
144 |
+
tf.keras.metrics.MeanAbsoluteError(),
|
145 |
+
tf.keras.metrics.RootMeanSquaredError()
|
146 |
+
]
|
147 |
+
)
|
148 |
+
|
149 |
+
fps = np.load(fps_file)
|
150 |
+
y_true = np.load(y_true_file)
|
151 |
+
|
152 |
+
model_input_shape = model.input_shape
|
153 |
+
if model_input_shape[1] != fps.shape[1]:
|
154 |
+
raise ValueError(f"Model input dimension ({model_input_shape[1]}) does not match data dimension ({fps.shape[1]})")
|
155 |
+
|
156 |
+
if cv is not None and cv > 1:
|
157 |
+
xtr, xte, ytr, yte = train_test_split(fps, y_true, test_size=test_size, random_state=42)
|
158 |
+
kf = KFold(n_splits=cv, shuffle=True, random_state=42)
|
159 |
+
avg_r2_score = []
|
160 |
+
|
161 |
+
for fold, (train_index, test_index) in enumerate(kf.split(xtr), 1):
|
162 |
+
xtr_cv, xte_cv = xtr[train_index], xtr[test_index]
|
163 |
+
ytr_cv, yte_cv = ytr[train_index], ytr[test_index]
|
164 |
+
|
165 |
+
train_dataset = preprocess_data(xtr_cv, ytr_cv, use_parallel=True)
|
166 |
+
train_model(model, train_dataset, target_path, model_name, fold)
|
167 |
+
|
168 |
+
ypred = model.predict(xte_cv, verbose=0)
|
169 |
+
r2_scores = r2_score(yte_cv, ypred)
|
170 |
+
|
171 |
+
if np.isnan(r2_scores) or np.isinf(r2_scores) or r2_scores <= 0:
|
172 |
+
logging.warning(f"[cv][{fold}th] : R2 score : 0.000000 (prune)")
|
173 |
+
else:
|
174 |
+
logging.info(f"[cv][{fold}th] : R2 score : {r2_scores:.6f}")
|
175 |
+
|
176 |
+
avg_r2_score.append(r2_scores)
|
177 |
+
clear_gpu_memory()
|
178 |
+
print_cpu_memory()
|
179 |
+
print_gpu_memory(f"Fold {fold}")
|
180 |
+
r2_result_res_avg = np.mean(avg_r2_score)
|
181 |
+
logging.info(f"[cv][{fold}th][Avg] : R2 score : {r2_result_res_avg:.6f}")
|
182 |
+
ypred = model.predict(xte, verbose=0)
|
183 |
+
r2_result = r2_score(yte, ypred)
|
184 |
+
os.makedirs(f"save_model/{model_name}", exist_ok=True)
|
185 |
+
model.save(f"save_model/{model_name}/{model_name}_full_model{'_cv'+str(cv) if cv else ''}_test_size[{test_size}]_r2score[{r2_result:<.4f}].keras")
|
186 |
+
del model
|
187 |
+
logging.info(f"[cv][{fold}th][Result] : R2 score : {r2_result:.6f}")
|
188 |
+
print(f"{r2_result:.6f}")
|
189 |
+
else:
|
190 |
+
xtr, xte, ytr, yte = train_test_split(fps, y_true, test_size=test_size, random_state=42)
|
191 |
+
xtr, xtev, ytr, ytev = train_test_split(xtr, ytr, test_size=0.1, random_state=42)
|
192 |
+
train_dataset = preprocess_data(xtr, ytr, use_parallel=True)
|
193 |
+
valid_dataset = preprocess_data(xtev, ytev, use_parallel=True)
|
194 |
+
train_model(model, train_dataset, valid_dataset, target_path, model_name)
|
195 |
+
# train_model(model, train_dataset, target_path, model_name)
|
196 |
+
|
197 |
+
ypred = model.predict(xte, verbose=0)
|
198 |
+
r2_result = r2_score(yte, ypred)
|
199 |
+
|
200 |
+
os.makedirs(f"save_model/{model_name}", exist_ok=True)
|
201 |
+
model.save(f"save_model/{model_name}/{model_name}_full_model{'_cv'+str(cv) if cv else ''}_test_size[{test_size}]_r2score[{r2_result:<.4f}].keras")
|
202 |
+
del model
|
203 |
+
|
204 |
+
if np.isnan(r2_result) or np.isinf(r2_result) or r2_result <= 0:
|
205 |
+
logging.warning("R2: 0.000000 (prune)")
|
206 |
+
else:
|
207 |
+
logging.info(f"R2: {r2_result:.6f}")
|
208 |
+
print(f"{r2_result:.6f}")
|
209 |
+
|
210 |
+
except Exception as e:
|
211 |
+
logging.error(f"Error in learning process: {e}")
|
212 |
+
print("0.000000")
|
213 |
+
|
214 |
+
finally:
|
215 |
+
clear_gpu_memory()
|
216 |
+
print_cpu_memory()
|
217 |
+
print_gpu_memory("Final")
|
218 |
+
|
219 |
+
if __name__ == "__main__":
|
220 |
+
main()
|
extra_code/basic_model.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gc
|
3 |
+
import sys
|
4 |
+
import numpy as np
|
5 |
+
import tensorflow as tf
|
6 |
+
from tensorflow.keras.models import model_from_json
|
7 |
+
from sklearn.model_selection import train_test_split
|
8 |
+
import logging
|
9 |
+
|
10 |
+
# Environment settings
|
11 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow INFO and WARNING messages
|
12 |
+
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
|
13 |
+
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
|
14 |
+
os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'
|
15 |
+
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'
|
16 |
+
|
17 |
+
# Suppress TensorFlow logging
|
18 |
+
logging.getLogger('tensorflow').setLevel(logging.ERROR)
|
19 |
+
|
20 |
+
BATCHSIZE = int(sys.argv[1])
|
21 |
+
EPOCHS = int(sys.argv[2])
|
22 |
+
fps_file = sys.argv[3]
|
23 |
+
y_true_file = sys.argv[4]
|
24 |
+
|
25 |
+
def load_model():
|
26 |
+
with open('save_model/model_config.json', 'r') as json_file:
|
27 |
+
model_json = json_file.read()
|
28 |
+
model = model_from_json(model_json)
|
29 |
+
model.load_weights('save_model/model_weights.weights.h5')
|
30 |
+
model.compile(optimizer=tf.keras.optimizers.Adam(),
|
31 |
+
loss=tf.keras.losses.MeanSquaredError(),
|
32 |
+
metrics=[tf.keras.metrics.MeanSquaredError(),
|
33 |
+
tf.keras.metrics.MeanAbsoluteError(),
|
34 |
+
tf.keras.metrics.RootMeanSquaredError()])
|
35 |
+
return model
|
36 |
+
|
37 |
+
def preprocess_data(xtr, ytr):
|
38 |
+
buffer_size = min(10000, len(xtr))
|
39 |
+
dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))
|
40 |
+
dataset = dataset.shuffle(buffer_size=buffer_size).batch(BATCHSIZE).prefetch(tf.data.AUTOTUNE)
|
41 |
+
return dataset
|
42 |
+
|
43 |
+
def train_model(model, train_dataset, epochs):
|
44 |
+
model.fit(train_dataset, epochs=epochs, verbose=0)
|
45 |
+
model.save('save_model/trained_model.keras')
|
46 |
+
return model
|
47 |
+
|
48 |
+
def clear_gpu_memory():
|
49 |
+
tf.keras.backend.clear_session()
|
50 |
+
gc.collect()
|
51 |
+
print("GPU memory cleared.", file=sys.stderr)
|
52 |
+
|
53 |
+
if __name__ == "__main__":
|
54 |
+
fps = np.load(fps_file)
|
55 |
+
y_true = np.load(y_true_file)
|
56 |
+
|
57 |
+
xtr, xte, ytr, yte = train_test_split(fps, y_true, test_size=0.2, random_state=42)
|
58 |
+
train_dataset = preprocess_data(xtr, ytr)
|
59 |
+
|
60 |
+
model = load_model()
|
61 |
+
trained_model = train_model(model, train_dataset, EPOCHS)
|
62 |
+
|
63 |
+
clear_gpu_memory()
|
extra_code/feature_search.py
ADDED
@@ -0,0 +1,702 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import gc
|
5 |
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
6 |
+
|
7 |
+
from rdkit import Chem
|
8 |
+
from rdkit.Chem import AllChem, DataStructs, Draw
|
9 |
+
from rdkit import RDConfig
|
10 |
+
from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges
|
11 |
+
from rdkit.Chem.AllChem import GetMorganGenerator
|
12 |
+
from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray
|
13 |
+
from rdkit.Avalon.pyAvalonTools import GetAvalonFP
|
14 |
+
from rdkit.Chem.Descriptors import ExactMolWt
|
15 |
+
|
16 |
+
def mol3d(mol):
|
17 |
+
mol = Chem.AddHs(mol)
|
18 |
+
optimization_methods = [
|
19 |
+
(AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),
|
20 |
+
(AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),
|
21 |
+
(AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})
|
22 |
+
]
|
23 |
+
|
24 |
+
for method, args, kwargs in optimization_methods:
|
25 |
+
try:
|
26 |
+
method(*args, **kwargs)
|
27 |
+
if mol.GetNumConformers() > 0:
|
28 |
+
return mol
|
29 |
+
except ValueError as e:
|
30 |
+
print(f"Error: {e} - Trying next optimization method [{method}]")
|
31 |
+
|
32 |
+
print(f"Invalid mol for 3d {Chem.MolToSmiles(mol)} - No conformer generated")
|
33 |
+
return None
|
34 |
+
|
35 |
+
def generating_newfps(fps, descriptor, descriptor_name, save_res="np"):
|
36 |
+
try:
|
37 |
+
if descriptor is None:
|
38 |
+
return fps
|
39 |
+
|
40 |
+
if save_res == "pd":
|
41 |
+
new_fps = pd.DataFrame(fps) if not isinstance(fps, pd.DataFrame) else fps
|
42 |
+
|
43 |
+
if isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
|
44 |
+
try:
|
45 |
+
descriptors_df = pd.DataFrame(
|
46 |
+
{f"{descriptor_name}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
|
47 |
+
)
|
48 |
+
new_fps = pd.concat([new_fps, descriptors_df], axis=1)
|
49 |
+
del descriptor
|
50 |
+
except Exception as e:
|
51 |
+
print(f"[-1-] Error occured: {e}")
|
52 |
+
|
53 |
+
elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
|
54 |
+
try:
|
55 |
+
arrays_1d = [arr[:, None] for arr in descriptor if arr.ndim == 1]
|
56 |
+
arrays_2d = [arr for arr in descriptor if arr.ndim == 2]
|
57 |
+
combined_1d = np.concatenate(arrays_1d, axis=1) if arrays_1d else None
|
58 |
+
combined_2d = np.concatenate(arrays_2d, axis=1) if arrays_2d else None
|
59 |
+
|
60 |
+
if combined_1d is not None:
|
61 |
+
df_1d = pd.DataFrame(
|
62 |
+
combined_1d,
|
63 |
+
columns=[f'{descriptor_name}_{i+1}' for i in range(combined_1d.shape[1])]
|
64 |
+
)
|
65 |
+
new_fps = pd.concat([new_fps, df_1d], axis=1)
|
66 |
+
|
67 |
+
if combined_2d is not None:
|
68 |
+
df_2d = pd.DataFrame(
|
69 |
+
combined_2d,
|
70 |
+
columns=[f'{descriptor_name}_{i+1}' for i in range(combined_2d.shape[1])]
|
71 |
+
)
|
72 |
+
new_fps = pd.concat([new_fps, df_2d], axis=1)
|
73 |
+
|
74 |
+
del descriptor, arrays_1d, arrays_2d
|
75 |
+
if combined_1d is not None: del combined_1d
|
76 |
+
if combined_2d is not None: del combined_2d
|
77 |
+
except Exception as e:
|
78 |
+
print(f"[-2-] Error occured: {e}")
|
79 |
+
|
80 |
+
elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
|
81 |
+
try:
|
82 |
+
descriptor = np.asarray(descriptor).astype('float')
|
83 |
+
descriptors_df = pd.DataFrame(
|
84 |
+
{f"{descriptor_name}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
|
85 |
+
)
|
86 |
+
new_fps = pd.concat([new_fps, descriptors_df], axis=1)
|
87 |
+
del descriptor
|
88 |
+
except Exception as e:
|
89 |
+
print(f"[-3-] Error occured: {e}")
|
90 |
+
|
91 |
+
else:
|
92 |
+
descriptor = np.asarray(descriptor).astype('float')
|
93 |
+
new_fps[descriptor_name] = descriptor.flatten()
|
94 |
+
del descriptor
|
95 |
+
|
96 |
+
new_fps = new_fps.replace([np.inf, -np.inf], np.nan).fillna(0)
|
97 |
+
return new_fps
|
98 |
+
|
99 |
+
else:
|
100 |
+
new_fps = fps
|
101 |
+
|
102 |
+
if descriptor is None:
|
103 |
+
pass
|
104 |
+
elif isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
|
105 |
+
try:
|
106 |
+
new_fps = np.concatenate([new_fps, descriptor], axis=1)
|
107 |
+
del descriptor
|
108 |
+
except Exception as e:
|
109 |
+
print(f"[-1-] Error occured: {e}")
|
110 |
+
elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
|
111 |
+
try:
|
112 |
+
arrays_1d = [arr[:, None] for arr in descriptor if arr.ndim == 1]
|
113 |
+
arrays_2d = [arr for arr in descriptor if arr.ndim == 2]
|
114 |
+
combined_1d = np.concatenate(arrays_1d, axis=1) if arrays_1d else None
|
115 |
+
combined_2d = np.concatenate(arrays_2d, axis=1) if arrays_2d else None
|
116 |
+
to_concat = [new_fps] + [arr for arr in [combined_1d, combined_2d] if arr is not None]
|
117 |
+
new_fps = np.concatenate(to_concat, axis=1)
|
118 |
+
del descriptor, arrays_1d, arrays_2d
|
119 |
+
if combined_1d is not None: del combined_1d
|
120 |
+
if combined_2d is not None: del combined_2d
|
121 |
+
except Exception as e:
|
122 |
+
print(f"[-2-] Error occured: {e}")
|
123 |
+
elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
|
124 |
+
try:
|
125 |
+
descriptor = np.asarray(descriptor).astype('float')
|
126 |
+
new_fps = np.concatenate([new_fps, descriptor], axis=1)
|
127 |
+
del descriptor
|
128 |
+
except Exception as e:
|
129 |
+
print(f"[-3-] Error occured: {e}")
|
130 |
+
else:
|
131 |
+
descriptor = np.asarray(descriptor).astype('float')
|
132 |
+
new_fps = np.concatenate([new_fps, descriptor[:,None]], axis=1)
|
133 |
+
del descriptor
|
134 |
+
|
135 |
+
new_fps = np.nan_to_num(new_fps, nan=0.0, posinf=0.0, neginf=0.0).astype('float')
|
136 |
+
return new_fps
|
137 |
+
|
138 |
+
except Exception as e:
|
139 |
+
print(f"Error occurred in {descriptor_name}: {e}")
|
140 |
+
return fps
|
141 |
+
|
142 |
+
def Normalization(descriptor):
|
143 |
+
descriptor = np.asarray(descriptor)
|
144 |
+
epsilon = 1e-10
|
145 |
+
max_value = 1e15
|
146 |
+
descriptor = np.clip(descriptor, -max_value, max_value)
|
147 |
+
descriptor_custom = np.where(np.abs(descriptor) < epsilon, epsilon, descriptor)
|
148 |
+
descriptor_log = np.sign(descriptor_custom) * np.log1p(np.abs(descriptor_custom))
|
149 |
+
descriptor_log = np.nan_to_num(descriptor_log, nan=0.0, posinf=0.0, neginf=0.0)
|
150 |
+
del epsilon
|
151 |
+
gc.collect()
|
152 |
+
return descriptor_log
|
153 |
+
|
154 |
+
def values_chi(mol, chi_type):
|
155 |
+
i = 0
|
156 |
+
chi_func = Chem.GraphDescriptors.ChiNn_ if chi_type == 'n' else Chem.GraphDescriptors.ChiNv_
|
157 |
+
while chi_func(mol, i) != 0.0:
|
158 |
+
i += 1
|
159 |
+
return np.array([chi_func(mol, j) for j in range(i)])
|
160 |
+
|
161 |
+
def generate_chi(mols, chi_type):
|
162 |
+
n_jobs = os.cpu_count()
|
163 |
+
with ProcessPoolExecutor(max_workers=n_jobs) as executor:
|
164 |
+
futures = [executor.submit(values_chi, mol, chi_type) for mol in mols]
|
165 |
+
descriptor = [future.result() for future in futures]
|
166 |
+
|
167 |
+
max_length = max(len(x) for x in descriptor)
|
168 |
+
padded_descriptor = np.array([np.pad(x, (0, max_length - len(x)), 'constant') for x in descriptor])
|
169 |
+
|
170 |
+
return padded_descriptor
|
171 |
+
|
172 |
+
def sanitize_and_compute_descriptor(mol):
|
173 |
+
try:
|
174 |
+
mol = Chem.RemoveHs(mol)
|
175 |
+
Chem.SanitizeMol(mol)
|
176 |
+
try:
|
177 |
+
Chem.rdPartialCharges.ComputeGasteigerCharges(mol)
|
178 |
+
except Exception as e:
|
179 |
+
print(f"Gasteiger charge calculation failed: {e}")
|
180 |
+
return [0] * 8
|
181 |
+
|
182 |
+
try:
|
183 |
+
return Chem.rdMolDescriptors.BCUT2D(mol)
|
184 |
+
except Exception as e:
|
185 |
+
print(f"BCUT2D calculation failed: {e}")
|
186 |
+
return [Descriptors.MolWt(mol)] * 8
|
187 |
+
except Exception as e:
|
188 |
+
return [0] * 8
|
189 |
+
|
190 |
+
def compute_descriptors_parallel(mols, n_jobs=None):
|
191 |
+
with ProcessPoolExecutor(max_workers=n_jobs) as executor:
|
192 |
+
futures = [executor.submit(sanitize_and_compute_descriptor, mol) for mol in mols if mol is not None]
|
193 |
+
descriptors = [future.result() for future in futures]
|
194 |
+
return np.array(descriptors)
|
195 |
+
|
196 |
+
def process_molecules_parallel(mols, max_workers=4, chunk_size=100):
|
197 |
+
results = []
|
198 |
+
for i in range(0, len(mols), chunk_size):
|
199 |
+
chunk = mols[i:i + chunk_size]
|
200 |
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
201 |
+
futures = [executor.submit(mol3d, mol) for mol in chunk]
|
202 |
+
for future in as_completed(futures):
|
203 |
+
result = future.result()
|
204 |
+
if result is not None:
|
205 |
+
results.append(result)
|
206 |
+
gc.collect()
|
207 |
+
return results
|
208 |
+
|
209 |
+
def search_data_descriptor_compress(trial, fps, mols, name, target_path="result", save_res="np"):
|
210 |
+
####################################
|
211 |
+
phase0 = 1 #trial.suggest_int("MolWt", 0, 1) # 'MolWt'
|
212 |
+
phase1 = 1 #trial.suggest_int("MolLogP", 0, 1) # 'MolLogP'
|
213 |
+
phase2 = 1 #trial.suggest_int("MolMR", 0, 1) # 'MolMR'
|
214 |
+
phase3 = 1 #trial.suggest_int("TPSA", 0, 1) # 'TPSA'
|
215 |
+
phase4 = trial.suggest_int("NumRotatableBonds", 0, 1) # 'NumRotatableBonds'
|
216 |
+
phase5 = trial.suggest_int("HeavyAtomCount", 0, 1) # 'HeavyAtomCount'
|
217 |
+
phase6 = trial.suggest_int("NumHAcceptors", 0, 1) # 'NumHAcceptors'
|
218 |
+
phase7 = trial.suggest_int("NumHDonors", 0, 1) # 'NumHDonors'
|
219 |
+
phase8 = trial.suggest_int("NumHeteroatoms", 0, 1) # 'NumHeteroatoms'
|
220 |
+
phase9 = trial.suggest_int("NumValenceElectrons", 0, 1) # 'NumValenceElectrons'
|
221 |
+
phase10 = trial.suggest_int("NHOHCount", 0, 1) # 'NHOHCount'
|
222 |
+
phase11 = trial.suggest_int("NOCount", 0, 1) # 'NOCount'
|
223 |
+
phase12 = trial.suggest_int("RingCount", 0, 1) # 'RingCount'
|
224 |
+
phase13 = trial.suggest_int("NumAromaticRings", 0, 1) # 'NumAromaticRings'
|
225 |
+
phase14 = trial.suggest_int("NumSaturatedRings", 0, 1) # 'NumSaturatedRings'
|
226 |
+
phase15 = trial.suggest_int("NumAliphaticRings", 0, 1) # 'NumAliphaticRings'
|
227 |
+
phase16 = trial.suggest_int("LabuteASA", 0, 1) # 'LabuteASA'
|
228 |
+
phase17 = trial.suggest_int("BalabanJ", 0, 1) # 'BalabanJ'
|
229 |
+
phase18 = trial.suggest_int("BertzCT", 0, 1) # 'BertzCT'
|
230 |
+
phase19 = trial.suggest_int("Ipc", 0, 1) # 'Ipc'
|
231 |
+
phase20 = trial.suggest_int("kappa_Series[1-3]_ind", 0, 1) # 'kappa_Series[1-3]_ind'
|
232 |
+
phase21 = trial.suggest_int("Chi_Series[13]_ind", 0, 1) # 'Chi_Series[13]_ind'
|
233 |
+
phase22 = trial.suggest_int("Phi", 0, 1) # 'Phi'
|
234 |
+
phase23 = trial.suggest_int("HallKierAlpha", 0, 1) # 'HallKierAlpha'
|
235 |
+
phase24 = trial.suggest_int("NumAmideBonds", 0, 1) # 'NumAmideBonds'
|
236 |
+
phase25 = trial.suggest_int("FractionCSP3", 0, 1) # 'FractionCSP3'
|
237 |
+
phase26 = trial.suggest_int("NumSpiroAtoms", 0, 1) # 'NumSpiroAtoms'
|
238 |
+
phase27 = trial.suggest_int("NumBridgeheadAtoms", 0, 1) # 'NumBridgeheadAtoms'
|
239 |
+
phase28 = trial.suggest_int("PEOE_VSA_Series[1-14]_ind", 0, 1) # 'PEOE_VSA_Series[1-14]_ind'
|
240 |
+
phase29 = trial.suggest_int("SMR_VSA_Series[1-10]_ind", 0, 1) # 'SMR_VSA_Series[1-10]_ind'
|
241 |
+
phase30 = trial.suggest_int("SlogP_VSA_Series[1-12]_ind", 0, 1)# 'SlogP_VSA_Series[1-12]_ind'
|
242 |
+
phase31 = trial.suggest_int("EState_VSA_Series[1-11]_ind", 0, 1)# 'EState_VSA_Series[1-11]_ind'
|
243 |
+
phase32 = trial.suggest_int("VSA_EState_Series[1-10]", 0, 1) # 'VSA_EState_Series[1-10]'
|
244 |
+
phase33 = trial.suggest_int("MQNs", 0, 1) # 'MQNs'
|
245 |
+
phase34 = trial.suggest_int("AUTOCORR2D", 0, 1) # 'AUTOCORR2D'
|
246 |
+
phase35 = trial.suggest_int("BCUT2D", 0, 1) # 'BCUT2D'
|
247 |
+
phase36 = trial.suggest_int("Asphericity", 0, 1) # 'Asphericity'
|
248 |
+
phase37 = trial.suggest_int("PBF", 0, 1) # 'PBF'
|
249 |
+
phase38 = trial.suggest_int("RadiusOfGyration", 0, 1) # 'RadiusOfGyration'
|
250 |
+
phase39 = trial.suggest_int("InertialShapeFactor", 0, 1) # 'InertialShapeFactor'
|
251 |
+
phase40 = trial.suggest_int("Eccentricity", 0, 1) # 'Eccentricity'
|
252 |
+
phase41 = trial.suggest_int("SpherocityIndex", 0, 1) # 'SpherocityIndex'
|
253 |
+
phase42 = trial.suggest_int("PMI_series[1-3]_ind", 0, 1) # 'PMI_series[1-3]_ind'
|
254 |
+
phase43 = trial.suggest_int("NPR_series[1-2]_ind", 0, 1) # 'NPR_series[1-2]_ind'
|
255 |
+
phase44 = trial.suggest_int("AUTOCORR3D", 0, 1) # 'AUTOCORR3D'
|
256 |
+
phase45 = trial.suggest_int("RDF", 0, 1) # 'RDF'
|
257 |
+
phase46 = trial.suggest_int("MORSE", 0, 1) # 'MORSE'
|
258 |
+
phase47 = trial.suggest_int("WHIM", 0, 1) # 'WHIM'
|
259 |
+
phase48 = trial.suggest_int("GETAWAY", 0, 1) # 'GETAWAY'
|
260 |
+
####################################
|
261 |
+
def clear_descriptor_memory(descriptor):
|
262 |
+
del descriptor
|
263 |
+
gc.collect()
|
264 |
+
####################################
|
265 |
+
####################################
|
266 |
+
if phase0 == 1:
|
267 |
+
descriptor = [Descriptors.ExactMolWt(alpha) for alpha in mols]
|
268 |
+
fps = generating_newfps(fps, descriptor, 'MolWt', save_res)
|
269 |
+
clear_descriptor_memory(descriptor)
|
270 |
+
if phase1 == 1:
|
271 |
+
descriptor = [Chem.Crippen.MolLogP(alpha) for alpha in mols]
|
272 |
+
fps = generating_newfps(fps, descriptor, 'MolLogP', save_res)
|
273 |
+
clear_descriptor_memory(descriptor)
|
274 |
+
if phase2 == 1:
|
275 |
+
descriptor = [Chem.Crippen.MolMR(alpha) for alpha in mols]
|
276 |
+
fps = generating_newfps(fps, descriptor, 'MolMR', save_res)
|
277 |
+
clear_descriptor_memory(descriptor)
|
278 |
+
if phase3 == 1:
|
279 |
+
descriptor = [Descriptors.TPSA(alpha) for alpha in mols]
|
280 |
+
fps = generating_newfps(fps, descriptor, 'TPSA', save_res)
|
281 |
+
clear_descriptor_memory(descriptor)
|
282 |
+
if phase4 == 1:
|
283 |
+
descriptor = [Chem.Lipinski.NumRotatableBonds(alpha) for alpha in mols]
|
284 |
+
fps = generating_newfps(fps, descriptor, 'NumRotatableBonds', save_res)
|
285 |
+
clear_descriptor_memory(descriptor)
|
286 |
+
if phase5 == 1:
|
287 |
+
descriptor = [Chem.Lipinski.HeavyAtomCount(alpha) for alpha in mols]
|
288 |
+
fps = generating_newfps(fps, descriptor, 'HeavyAtomCount', save_res)
|
289 |
+
clear_descriptor_memory(descriptor)
|
290 |
+
if phase6 == 1:
|
291 |
+
descriptor = [Chem.Lipinski.NumHAcceptors(alpha) for alpha in mols]
|
292 |
+
fps = generating_newfps(fps, descriptor, 'NumHAcceptors', save_res)
|
293 |
+
clear_descriptor_memory(descriptor)
|
294 |
+
if phase7 == 1:
|
295 |
+
descriptor = [Chem.Lipinski.NumHDonors(alpha) for alpha in mols]
|
296 |
+
fps = generating_newfps(fps, descriptor, 'NumHDonors', save_res)
|
297 |
+
clear_descriptor_memory(descriptor)
|
298 |
+
if phase8 == 1:
|
299 |
+
descriptor = [Chem.Lipinski.NumHeteroatoms(alpha) for alpha in mols]
|
300 |
+
fps = generating_newfps(fps, descriptor, 'NumHeteroatoms', save_res)
|
301 |
+
clear_descriptor_memory(descriptor)
|
302 |
+
if phase9 == 1:
|
303 |
+
descriptor = [Chem.Descriptors.NumValenceElectrons(alpha) for alpha in mols]
|
304 |
+
fps = generating_newfps(fps, descriptor, 'NumValenceElectrons', save_res)
|
305 |
+
clear_descriptor_memory(descriptor)
|
306 |
+
if phase10 == 1:
|
307 |
+
descriptor = [Chem.Lipinski.NHOHCount(alpha) for alpha in mols]
|
308 |
+
fps = generating_newfps(fps, descriptor, 'NHOHCount', save_res)
|
309 |
+
clear_descriptor_memory(descriptor)
|
310 |
+
if phase11 == 1:
|
311 |
+
descriptor = [Chem.Lipinski.NOCount(alpha) for alpha in mols]
|
312 |
+
fps = generating_newfps(fps, descriptor, 'NOCount', save_res)
|
313 |
+
clear_descriptor_memory(descriptor)
|
314 |
+
if phase12 == 1:
|
315 |
+
descriptor = [Chem.Lipinski.RingCount(alpha) for alpha in mols]
|
316 |
+
fps = generating_newfps(fps, descriptor, 'RingCount', save_res)
|
317 |
+
clear_descriptor_memory(descriptor)
|
318 |
+
if phase13 == 1:
|
319 |
+
descriptor = [Chem.Lipinski.NumAromaticRings(alpha) for alpha in mols]
|
320 |
+
fps = generating_newfps(fps, descriptor, 'NumAromaticRings', save_res)
|
321 |
+
clear_descriptor_memory(descriptor)
|
322 |
+
if phase14 == 1:
|
323 |
+
descriptor = [Chem.Lipinski.NumSaturatedRings(alpha) for alpha in mols]
|
324 |
+
fps = generating_newfps(fps, descriptor, 'NumSaturatedRings', save_res)
|
325 |
+
clear_descriptor_memory(descriptor)
|
326 |
+
if phase15 == 1:
|
327 |
+
descriptor = [Chem.Lipinski.NumAliphaticRings(alpha) for alpha in mols]
|
328 |
+
fps = generating_newfps(fps, descriptor, 'NumAliphaticRings', save_res)
|
329 |
+
clear_descriptor_memory(descriptor)
|
330 |
+
if phase16 == 1:
|
331 |
+
descriptor = [Chem.rdMolDescriptors.CalcLabuteASA(alpha) for alpha in mols]
|
332 |
+
fps = generating_newfps(fps, descriptor, 'LabuteASA', save_res)
|
333 |
+
clear_descriptor_memory(descriptor)
|
334 |
+
if phase17 == 1:
|
335 |
+
descriptor = [Chem.GraphDescriptors.BalabanJ(alpha) for alpha in mols]
|
336 |
+
# descriptor = Normalization(descriptor)
|
337 |
+
fps = generating_newfps(fps, descriptor, 'BalabanJ', save_res)
|
338 |
+
clear_descriptor_memory(descriptor)
|
339 |
+
if phase18 == 1:
|
340 |
+
descriptor = [Chem.GraphDescriptors.BertzCT(alpha) for alpha in mols]
|
341 |
+
# descriptor = Normalization(descriptor)
|
342 |
+
fps = generating_newfps(fps, descriptor, 'BertzCT', save_res)
|
343 |
+
clear_descriptor_memory(descriptor)
|
344 |
+
if phase19 == 1:
|
345 |
+
descriptor = [Chem.GraphDescriptors.Ipc(alpha) for alpha in mols]
|
346 |
+
descriptor = Normalization(descriptor)
|
347 |
+
fps = generating_newfps(fps, descriptor, 'Ipc', save_res)
|
348 |
+
clear_descriptor_memory(descriptor)
|
349 |
+
if phase20 == 1:
|
350 |
+
d1 = [Chem.GraphDescriptors.Kappa1(alpha) for alpha in mols]
|
351 |
+
d2 = [Chem.GraphDescriptors.Kappa2(alpha) for alpha in mols]
|
352 |
+
d3 = [Chem.GraphDescriptors.Kappa3(alpha) for alpha in mols]
|
353 |
+
d1 = np.asarray(d1)
|
354 |
+
d2 = np.asarray(d2)
|
355 |
+
d3 = np.asarray(d3)
|
356 |
+
fps = generating_newfps(fps, [d1,d2,d3], 'kappa_Series[1-3]_ind', save_res)
|
357 |
+
clear_descriptor_memory(d1)
|
358 |
+
clear_descriptor_memory(d2)
|
359 |
+
clear_descriptor_memory(d3)
|
360 |
+
if phase21 == 1:
|
361 |
+
d1 = [Chem.GraphDescriptors.Chi0(alpha) for alpha in mols]
|
362 |
+
d2 = [Chem.GraphDescriptors.Chi0n(alpha) for alpha in mols]
|
363 |
+
d3 = [Chem.GraphDescriptors.Chi0v(alpha) for alpha in mols]
|
364 |
+
d4 = [Chem.GraphDescriptors.Chi1(alpha) for alpha in mols]
|
365 |
+
d5 = [Chem.GraphDescriptors.Chi1n(alpha) for alpha in mols]
|
366 |
+
d6 = [Chem.GraphDescriptors.Chi1v(alpha) for alpha in mols]
|
367 |
+
d7 = [Chem.GraphDescriptors.Chi2n(alpha) for alpha in mols]
|
368 |
+
d8 = [Chem.GraphDescriptors.Chi2v(alpha) for alpha in mols]
|
369 |
+
d9 = [Chem.GraphDescriptors.Chi3n(alpha) for alpha in mols]
|
370 |
+
d10 = [Chem.GraphDescriptors.Chi3v(alpha) for alpha in mols]
|
371 |
+
d11 = [Chem.GraphDescriptors.Chi4n(alpha) for alpha in mols]
|
372 |
+
d12 = [Chem.GraphDescriptors.Chi4v(alpha) for alpha in mols]
|
373 |
+
d13 = generate_chi(mols, 'n')
|
374 |
+
d14 = generate_chi(mols, 'v')
|
375 |
+
d1 = np.asarray(d1)
|
376 |
+
d2 = np.asarray(d2)
|
377 |
+
d3 = np.asarray(d3)
|
378 |
+
d4 = np.asarray(d4)
|
379 |
+
d5 = np.asarray(d5)
|
380 |
+
d6 = np.asarray(d6)
|
381 |
+
d7 = np.asarray(d7)
|
382 |
+
d8 = np.asarray(d8)
|
383 |
+
d9 = np.asarray(d9)
|
384 |
+
d10 = np.asarray(d10)
|
385 |
+
d11 = np.asarray(d11)
|
386 |
+
d12 = np.asarray(d12)
|
387 |
+
d13 = np.asarray(d13)
|
388 |
+
d14 = np.asarray(d14)
|
389 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14], 'Chi_Series[13]_ind', save_res)
|
390 |
+
clear_descriptor_memory(d1)
|
391 |
+
clear_descriptor_memory(d2)
|
392 |
+
clear_descriptor_memory(d3)
|
393 |
+
clear_descriptor_memory(d4)
|
394 |
+
clear_descriptor_memory(d5)
|
395 |
+
clear_descriptor_memory(d6)
|
396 |
+
clear_descriptor_memory(d7)
|
397 |
+
clear_descriptor_memory(d8)
|
398 |
+
clear_descriptor_memory(d9)
|
399 |
+
clear_descriptor_memory(d10)
|
400 |
+
clear_descriptor_memory(d11)
|
401 |
+
clear_descriptor_memory(d12)
|
402 |
+
clear_descriptor_memory(d13)
|
403 |
+
clear_descriptor_memory(d14)
|
404 |
+
if phase22 == 1:
|
405 |
+
descriptor = [Chem.rdMolDescriptors.CalcPhi(alpha) for alpha in mols]
|
406 |
+
fps = generating_newfps(fps, descriptor, 'Phi', save_res)
|
407 |
+
clear_descriptor_memory(descriptor)
|
408 |
+
if phase23 == 1:
|
409 |
+
descriptor = [Chem.GraphDescriptors.HallKierAlpha(alpha) for alpha in mols]
|
410 |
+
fps = generating_newfps(fps, descriptor, 'HallKierAlpha', save_res)
|
411 |
+
clear_descriptor_memory(descriptor)
|
412 |
+
if phase24 == 1:
|
413 |
+
descriptor = [Chem.rdMolDescriptors.CalcNumAmideBonds(alpha) for alpha in mols]
|
414 |
+
fps = generating_newfps(fps, descriptor, 'NumAmideBonds', save_res)
|
415 |
+
clear_descriptor_memory(descriptor)
|
416 |
+
if phase25 == 1:
|
417 |
+
descriptor = [Chem.Lipinski.FractionCSP3(alpha) for alpha in mols]
|
418 |
+
fps = generating_newfps(fps, descriptor, 'FractionCSP3', save_res)
|
419 |
+
clear_descriptor_memory(descriptor)
|
420 |
+
if phase26 == 1:
|
421 |
+
descriptor = [Chem.rdMolDescriptors.CalcNumSpiroAtoms(alpha) for alpha in mols]
|
422 |
+
fps = generating_newfps(fps, descriptor, 'NumSpiroAtoms', save_res)
|
423 |
+
clear_descriptor_memory(descriptor)
|
424 |
+
if phase27 == 1:
|
425 |
+
descriptor = [Chem.rdMolDescriptors.CalcNumBridgeheadAtoms(alpha) for alpha in mols]
|
426 |
+
fps = generating_newfps(fps, descriptor, 'NumBridgeheadAtoms', save_res)
|
427 |
+
clear_descriptor_memory(descriptor)
|
428 |
+
if phase28 == 1:
|
429 |
+
d1 = [Chem.MolSurf.PEOE_VSA1(alpha) for alpha in mols]
|
430 |
+
d2 = [Chem.MolSurf.PEOE_VSA2(alpha) for alpha in mols]
|
431 |
+
d3 = [Chem.MolSurf.PEOE_VSA3(alpha) for alpha in mols]
|
432 |
+
d4 = [Chem.MolSurf.PEOE_VSA4(alpha) for alpha in mols]
|
433 |
+
d5 = [Chem.MolSurf.PEOE_VSA5(alpha) for alpha in mols]
|
434 |
+
d6 = [Chem.MolSurf.PEOE_VSA6(alpha) for alpha in mols]
|
435 |
+
d7 = [Chem.MolSurf.PEOE_VSA7(alpha) for alpha in mols]
|
436 |
+
d8 = [Chem.MolSurf.PEOE_VSA8(alpha) for alpha in mols]
|
437 |
+
d9 = [Chem.MolSurf.PEOE_VSA9(alpha) for alpha in mols]
|
438 |
+
d10 = [Chem.MolSurf.PEOE_VSA10(alpha) for alpha in mols]
|
439 |
+
d11 = [Chem.MolSurf.PEOE_VSA11(alpha) for alpha in mols]
|
440 |
+
d12 = [Chem.MolSurf.PEOE_VSA12(alpha) for alpha in mols]
|
441 |
+
d13 = [Chem.MolSurf.PEOE_VSA13(alpha) for alpha in mols]
|
442 |
+
d14 = [Chem.MolSurf.PEOE_VSA14(alpha) for alpha in mols]
|
443 |
+
d1 = np.asarray(d1)
|
444 |
+
d2 = np.asarray(d2)
|
445 |
+
d3 = np.asarray(d3)
|
446 |
+
d4 = np.asarray(d4)
|
447 |
+
d5 = np.asarray(d5)
|
448 |
+
d6 = np.asarray(d6)
|
449 |
+
d7 = np.asarray(d7)
|
450 |
+
d8 = np.asarray(d8)
|
451 |
+
d9 = np.asarray(d9)
|
452 |
+
d10 = np.asarray(d10)
|
453 |
+
d11 = np.asarray(d11)
|
454 |
+
d12 = np.asarray(d12)
|
455 |
+
d13 = np.asarray(d13)
|
456 |
+
d14 = np.asarray(d14)
|
457 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14],'PEOE_VSA_Series[1-14]_ind', save_res)
|
458 |
+
clear_descriptor_memory(d1)
|
459 |
+
clear_descriptor_memory(d2)
|
460 |
+
clear_descriptor_memory(d3)
|
461 |
+
clear_descriptor_memory(d4)
|
462 |
+
clear_descriptor_memory(d5)
|
463 |
+
clear_descriptor_memory(d6)
|
464 |
+
clear_descriptor_memory(d7)
|
465 |
+
clear_descriptor_memory(d8)
|
466 |
+
clear_descriptor_memory(d9)
|
467 |
+
clear_descriptor_memory(d10)
|
468 |
+
clear_descriptor_memory(d11)
|
469 |
+
clear_descriptor_memory(d12)
|
470 |
+
clear_descriptor_memory(d13)
|
471 |
+
clear_descriptor_memory(d14)
|
472 |
+
if phase29 == 1:
|
473 |
+
d1 = [Chem.MolSurf.SMR_VSA1(alpha) for alpha in mols]
|
474 |
+
d2 = [Chem.MolSurf.SMR_VSA2(alpha) for alpha in mols]
|
475 |
+
d3 = [Chem.MolSurf.SMR_VSA3(alpha) for alpha in mols]
|
476 |
+
d4 = [Chem.MolSurf.SMR_VSA4(alpha) for alpha in mols]
|
477 |
+
d5 = [Chem.MolSurf.SMR_VSA5(alpha) for alpha in mols]
|
478 |
+
d6 = [Chem.MolSurf.SMR_VSA6(alpha) for alpha in mols]
|
479 |
+
d7 = [Chem.MolSurf.SMR_VSA7(alpha) for alpha in mols]
|
480 |
+
d8 = [Chem.MolSurf.SMR_VSA8(alpha) for alpha in mols]
|
481 |
+
d9 = [Chem.MolSurf.SMR_VSA9(alpha) for alpha in mols]
|
482 |
+
d10 = [Chem.MolSurf.SMR_VSA10(alpha) for alpha in mols]
|
483 |
+
d1 = np.asarray(d1)
|
484 |
+
d2 = np.asarray(d2)
|
485 |
+
d3 = np.asarray(d3)
|
486 |
+
d4 = np.asarray(d4)
|
487 |
+
d5 = np.asarray(d5)
|
488 |
+
d6 = np.asarray(d6)
|
489 |
+
d7 = np.asarray(d7)
|
490 |
+
d8 = np.asarray(d8)
|
491 |
+
d9 = np.asarray(d9)
|
492 |
+
d10 = np.asarray(d10)
|
493 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10],'SMR_VSA_Series[1-10]_ind', save_res)
|
494 |
+
clear_descriptor_memory(d1)
|
495 |
+
clear_descriptor_memory(d2)
|
496 |
+
clear_descriptor_memory(d3)
|
497 |
+
clear_descriptor_memory(d4)
|
498 |
+
clear_descriptor_memory(d5)
|
499 |
+
clear_descriptor_memory(d6)
|
500 |
+
clear_descriptor_memory(d7)
|
501 |
+
clear_descriptor_memory(d8)
|
502 |
+
clear_descriptor_memory(d9)
|
503 |
+
clear_descriptor_memory(d10)
|
504 |
+
if phase30 == 1:
|
505 |
+
d1 = [Chem.MolSurf.SlogP_VSA1(alpha) for alpha in mols]
|
506 |
+
d2 = [Chem.MolSurf.SlogP_VSA2(alpha) for alpha in mols]
|
507 |
+
d3 = [Chem.MolSurf.SlogP_VSA3(alpha) for alpha in mols]
|
508 |
+
d4 = [Chem.MolSurf.SlogP_VSA4(alpha) for alpha in mols]
|
509 |
+
d5 = [Chem.MolSurf.SlogP_VSA5(alpha) for alpha in mols]
|
510 |
+
d6 = [Chem.MolSurf.SlogP_VSA6(alpha) for alpha in mols]
|
511 |
+
d7 = [Chem.MolSurf.SlogP_VSA7(alpha) for alpha in mols]
|
512 |
+
d8 = [Chem.MolSurf.SlogP_VSA8(alpha) for alpha in mols]
|
513 |
+
d9 = [Chem.MolSurf.SlogP_VSA9(alpha) for alpha in mols]
|
514 |
+
d10= [Chem.MolSurf.SlogP_VSA10(alpha) for alpha in mols]
|
515 |
+
d11= [Chem.MolSurf.SlogP_VSA11(alpha) for alpha in mols]
|
516 |
+
d12= [Chem.MolSurf.SlogP_VSA12(alpha) for alpha in mols]
|
517 |
+
d1 = np.asarray(d1)
|
518 |
+
d2 = np.asarray(d2)
|
519 |
+
d3 = np.asarray(d3)
|
520 |
+
d4 = np.asarray(d4)
|
521 |
+
d5 = np.asarray(d5)
|
522 |
+
d6 = np.asarray(d6)
|
523 |
+
d7 = np.asarray(d7)
|
524 |
+
d8 = np.asarray(d8)
|
525 |
+
d9 = np.asarray(d9)
|
526 |
+
d10 = np.asarray(d10)
|
527 |
+
d11 = np.asarray(d11)
|
528 |
+
d12 = np.asarray(d12)
|
529 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12],'SlogP_VSA_Series[1-12]_ind', save_res)
|
530 |
+
clear_descriptor_memory(d1)
|
531 |
+
clear_descriptor_memory(d2)
|
532 |
+
clear_descriptor_memory(d3)
|
533 |
+
clear_descriptor_memory(d4)
|
534 |
+
clear_descriptor_memory(d5)
|
535 |
+
clear_descriptor_memory(d6)
|
536 |
+
clear_descriptor_memory(d7)
|
537 |
+
clear_descriptor_memory(d8)
|
538 |
+
clear_descriptor_memory(d9)
|
539 |
+
clear_descriptor_memory(d10)
|
540 |
+
clear_descriptor_memory(d11)
|
541 |
+
clear_descriptor_memory(d12)
|
542 |
+
if phase31 == 1:
|
543 |
+
d1 = [Chem.EState.EState_VSA.EState_VSA1(alpha) for alpha in mols]
|
544 |
+
d2 = [Chem.EState.EState_VSA.EState_VSA2(alpha) for alpha in mols]
|
545 |
+
d3 = [Chem.EState.EState_VSA.EState_VSA3(alpha) for alpha in mols]
|
546 |
+
d4 = [Chem.EState.EState_VSA.EState_VSA4(alpha) for alpha in mols]
|
547 |
+
d5 = [Chem.EState.EState_VSA.EState_VSA5(alpha) for alpha in mols]
|
548 |
+
d6 = [Chem.EState.EState_VSA.EState_VSA6(alpha) for alpha in mols]
|
549 |
+
d7 = [Chem.EState.EState_VSA.EState_VSA7(alpha) for alpha in mols]
|
550 |
+
d8 = [Chem.EState.EState_VSA.EState_VSA8(alpha) for alpha in mols]
|
551 |
+
d9 = [Chem.EState.EState_VSA.EState_VSA9(alpha) for alpha in mols]
|
552 |
+
d10 = [Chem.EState.EState_VSA.EState_VSA10(alpha) for alpha in mols]
|
553 |
+
d11 = [Chem.EState.EState_VSA.EState_VSA11(alpha) for alpha in mols]
|
554 |
+
d1 = np.asarray(d1)
|
555 |
+
d2 = np.asarray(d2)
|
556 |
+
d3 = np.asarray(d3)
|
557 |
+
d4 = np.asarray(d4)
|
558 |
+
d5 = np.asarray(d5)
|
559 |
+
d6 = np.asarray(d6)
|
560 |
+
d7 = np.asarray(d7)
|
561 |
+
d8 = np.asarray(d8)
|
562 |
+
d9 = np.asarray(d9)
|
563 |
+
d10 = np.asarray(d10)
|
564 |
+
d11 = np.asarray(d11)
|
565 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11],'EState_VSA_Series[1-11]_ind', save_res)
|
566 |
+
clear_descriptor_memory(d1)
|
567 |
+
clear_descriptor_memory(d2)
|
568 |
+
clear_descriptor_memory(d3)
|
569 |
+
clear_descriptor_memory(d4)
|
570 |
+
clear_descriptor_memory(d5)
|
571 |
+
clear_descriptor_memory(d6)
|
572 |
+
clear_descriptor_memory(d7)
|
573 |
+
clear_descriptor_memory(d8)
|
574 |
+
clear_descriptor_memory(d9)
|
575 |
+
clear_descriptor_memory(d10)
|
576 |
+
clear_descriptor_memory(d11)
|
577 |
+
if phase32 == 1:
|
578 |
+
d1 = [Chem.EState.EState_VSA.VSA_EState1(alpha) for alpha in mols]
|
579 |
+
d2 = [Chem.EState.EState_VSA.VSA_EState2(alpha) for alpha in mols]
|
580 |
+
d3 = [Chem.EState.EState_VSA.VSA_EState3(alpha) for alpha in mols]
|
581 |
+
d4 = [Chem.EState.EState_VSA.VSA_EState4(alpha) for alpha in mols]
|
582 |
+
d5 = [Chem.EState.EState_VSA.VSA_EState5(alpha) for alpha in mols]
|
583 |
+
d6 = [Chem.EState.EState_VSA.VSA_EState6(alpha) for alpha in mols]
|
584 |
+
d7 = [Chem.EState.EState_VSA.VSA_EState7(alpha) for alpha in mols]
|
585 |
+
d8 = [Chem.EState.EState_VSA.VSA_EState8(alpha) for alpha in mols]
|
586 |
+
d9 = [Chem.EState.EState_VSA.VSA_EState9(alpha) for alpha in mols]
|
587 |
+
d10 = [Chem.EState.EState_VSA.VSA_EState10(alpha) for alpha in mols]
|
588 |
+
d1 = np.asarray(d1)
|
589 |
+
d2 = np.asarray(d2)
|
590 |
+
d3 = np.asarray(d3)
|
591 |
+
d4 = np.asarray(d4)
|
592 |
+
d5 = np.asarray(d5)
|
593 |
+
d6 = np.asarray(d6)
|
594 |
+
d7 = np.asarray(d7)
|
595 |
+
d8 = np.asarray(d8)
|
596 |
+
d9 = np.asarray(d9)
|
597 |
+
d10 = np.asarray(d10)
|
598 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10],'VSA_EState_Series[1-10]', save_res)
|
599 |
+
clear_descriptor_memory(d1)
|
600 |
+
clear_descriptor_memory(d2)
|
601 |
+
clear_descriptor_memory(d3)
|
602 |
+
clear_descriptor_memory(d4)
|
603 |
+
clear_descriptor_memory(d5)
|
604 |
+
clear_descriptor_memory(d6)
|
605 |
+
clear_descriptor_memory(d7)
|
606 |
+
clear_descriptor_memory(d8)
|
607 |
+
clear_descriptor_memory(d9)
|
608 |
+
clear_descriptor_memory(d10)
|
609 |
+
if phase33 == 1:
|
610 |
+
descriptor = [Chem.rdMolDescriptors.MQNs_(alpha) for alpha in mols]
|
611 |
+
# descriptor = Normalization(descriptor)
|
612 |
+
fps = generating_newfps(fps, descriptor, 'MQNs', save_res)
|
613 |
+
clear_descriptor_memory(descriptor)
|
614 |
+
if phase34 == 1:
|
615 |
+
descriptor = [Chem.rdMolDescriptors.CalcAUTOCORR2D(alpha) for alpha in mols]
|
616 |
+
fps = generating_newfps(fps, descriptor, 'AUTOCORR2D', save_res)
|
617 |
+
clear_descriptor_memory(descriptor)
|
618 |
+
if phase35 == 1:
|
619 |
+
descriptor = compute_descriptors_parallel(mols)
|
620 |
+
fps = generating_newfps(fps, descriptor, 'BCUT2D', save_res)
|
621 |
+
clear_descriptor_memory(descriptor)
|
622 |
+
####################################################
|
623 |
+
mols2 = process_molecules_parallel(mols, max_workers=8)
|
624 |
+
del mols
|
625 |
+
gc.collect()
|
626 |
+
####################################################
|
627 |
+
if phase36 == 1:
|
628 |
+
descriptor = [Chem.rdMolDescriptors.CalcAsphericity(alpha) for alpha in mols2]
|
629 |
+
fps = generating_newfps(fps, descriptor, 'Asphericity', save_res)
|
630 |
+
clear_descriptor_memory(descriptor)
|
631 |
+
if phase37 == 1:
|
632 |
+
descriptor = [Chem.rdMolDescriptors.CalcPBF(alpha) for alpha in mols2]
|
633 |
+
fps = generating_newfps(fps, descriptor, 'PBF', save_res)
|
634 |
+
clear_descriptor_memory(descriptor)
|
635 |
+
if phase38 == 1:
|
636 |
+
descriptor = [Chem.rdMolDescriptors.CalcRadiusOfGyration(alpha) for alpha in mols2]
|
637 |
+
fps = generating_newfps(fps, descriptor, 'RadiusOfGyration', save_res)
|
638 |
+
clear_descriptor_memory(descriptor)
|
639 |
+
if phase39 == 1:
|
640 |
+
descriptor = [Chem.rdMolDescriptors.CalcInertialShapeFactor(alpha) for alpha in mols2]
|
641 |
+
fps = generating_newfps(fps, descriptor, 'InertialShapeFactor', save_res)
|
642 |
+
clear_descriptor_memory(descriptor)
|
643 |
+
if phase40 == 1:
|
644 |
+
descriptor = [Chem.rdMolDescriptors.CalcEccentricity(alpha) for alpha in mols2]
|
645 |
+
fps = generating_newfps(fps, descriptor, 'Eccentricity', save_res)
|
646 |
+
clear_descriptor_memory(descriptor)
|
647 |
+
if phase41 == 1:
|
648 |
+
descriptor = [Chem.rdMolDescriptors.CalcSpherocityIndex(alpha) for alpha in mols2]
|
649 |
+
fps = generating_newfps(fps, descriptor, 'SpherocityIndex', save_res)
|
650 |
+
clear_descriptor_memory(descriptor)
|
651 |
+
if phase42 == 1:
|
652 |
+
d1 = [Chem.rdMolDescriptors.CalcPMI1(alpha) for alpha in mols2]
|
653 |
+
d2 = [Chem.rdMolDescriptors.CalcPMI2(alpha) for alpha in mols2]
|
654 |
+
d3 = [Chem.rdMolDescriptors.CalcPMI3(alpha) for alpha in mols2]
|
655 |
+
d1 = Normalization(d1)
|
656 |
+
d2 = Normalization(d2)
|
657 |
+
d3 = Normalization(d3)
|
658 |
+
d1 = np.asarray(d1)
|
659 |
+
d2 = np.asarray(d2)
|
660 |
+
d3 = np.asarray(d3)
|
661 |
+
fps = generating_newfps(fps, [d1,d2,d3], 'PMI_series[1-3]_ind', save_res)
|
662 |
+
clear_descriptor_memory(d1)
|
663 |
+
clear_descriptor_memory(d2)
|
664 |
+
clear_descriptor_memory(d3)
|
665 |
+
if phase43 == 1:
|
666 |
+
d1 = [Chem.rdMolDescriptors.CalcNPR1(alpha) for alpha in mols2]
|
667 |
+
d2 = [Chem.rdMolDescriptors.CalcNPR2(alpha) for alpha in mols2]
|
668 |
+
d1 = np.asarray(d1)
|
669 |
+
d2 = np.asarray(d2)
|
670 |
+
fps = generating_newfps(fps, [d1,d2], 'NPR_series[1-2]_ind', save_res)
|
671 |
+
clear_descriptor_memory(d1)
|
672 |
+
clear_descriptor_memory(d2)
|
673 |
+
if phase44 == 1:
|
674 |
+
descriptor = [Chem.rdMolDescriptors.CalcAUTOCORR3D(mols) for mols in mols2]
|
675 |
+
fps = generating_newfps(fps, descriptor, 'AUTOCORR3D', save_res)
|
676 |
+
clear_descriptor_memory(descriptor)
|
677 |
+
if phase45 == 1:
|
678 |
+
descriptor = [Chem.rdMolDescriptors.CalcRDF(mols) for mols in mols2]
|
679 |
+
descriptor = Normalization(descriptor)
|
680 |
+
fps = generating_newfps(fps, descriptor, 'RDF', save_res)
|
681 |
+
clear_descriptor_memory(descriptor)
|
682 |
+
if phase46 == 1:
|
683 |
+
descriptor = [Chem.rdMolDescriptors.CalcMORSE(mols) for mols in mols2]
|
684 |
+
descriptor = Normalization(descriptor)
|
685 |
+
fps = generating_newfps(fps, descriptor, 'MORSE', save_res)
|
686 |
+
clear_descriptor_memory(descriptor)
|
687 |
+
if phase47 == 1:
|
688 |
+
descriptor = [Chem.rdMolDescriptors.CalcWHIM(mols) for mols in mols2]
|
689 |
+
descriptor = Normalization(descriptor)
|
690 |
+
fps = generating_newfps(fps, descriptor, 'WHIM', save_res)
|
691 |
+
clear_descriptor_memory(descriptor)
|
692 |
+
if phase48 == 1:
|
693 |
+
descriptor = [Chem.rdMolDescriptors.CalcGETAWAY(mols) for mols in mols2]
|
694 |
+
descriptor = Normalization(descriptor)
|
695 |
+
fps = generating_newfps(fps, descriptor, 'GETAWAY', save_res)
|
696 |
+
clear_descriptor_memory(descriptor)
|
697 |
+
#########################################
|
698 |
+
if save_res == "pd":
|
699 |
+
fps.to_csv(f'{target_path}/{name}_feature_selection.csv')
|
700 |
+
|
701 |
+
fps = fps.astype('float')
|
702 |
+
return fps
|
extra_code/feature_selection.py
ADDED
@@ -0,0 +1,951 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import gc
|
5 |
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
6 |
+
|
7 |
+
from rdkit import Chem
|
8 |
+
from rdkit.Chem import AllChem, DataStructs, Draw
|
9 |
+
from rdkit import RDConfig
|
10 |
+
from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges
|
11 |
+
from rdkit.Chem.AllChem import GetMorganGenerator
|
12 |
+
from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray
|
13 |
+
from rdkit.Avalon.pyAvalonTools import GetAvalonFP
|
14 |
+
from rdkit.Chem.Descriptors import ExactMolWt
|
15 |
+
|
16 |
+
import tensorflow as tf
|
17 |
+
from tensorflow import keras
|
18 |
+
from tensorflow.keras import layers
|
19 |
+
from tensorflow.keras.models import Sequential
|
20 |
+
from tensorflow.keras.layers import Dense, Dropout, Activation
|
21 |
+
from tensorflow.keras.regularizers import l2
|
22 |
+
from tensorflow.keras.optimizers import Adam
|
23 |
+
from tensorflow.keras import regularizers
|
24 |
+
|
25 |
+
import optuna
|
26 |
+
|
27 |
+
def mol3d(mol):
|
28 |
+
mol = Chem.AddHs(mol)
|
29 |
+
optimization_methods = [
|
30 |
+
(AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),
|
31 |
+
(AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),
|
32 |
+
(AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})
|
33 |
+
]
|
34 |
+
|
35 |
+
for method, args, kwargs in optimization_methods:
|
36 |
+
try:
|
37 |
+
method(*args, **kwargs)
|
38 |
+
if mol.GetNumConformers() > 0:
|
39 |
+
return mol
|
40 |
+
except ValueError as e:
|
41 |
+
print(f"Error: {e} - Trying next optimization method [{method}]")
|
42 |
+
|
43 |
+
print(f"Invalid mol for 3d {Chem.MolToSmiles(mol)} - No conformer generated")
|
44 |
+
return None
|
45 |
+
|
46 |
+
import numpy as np
|
47 |
+
import pandas as pd
|
48 |
+
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
|
49 |
+
from typing import Union, List, Optional
|
50 |
+
|
51 |
+
def process_chunk_optimized(chunk_data):
|
52 |
+
chunk, name_prefix, start_idx = chunk_data
|
53 |
+
return pd.DataFrame(
|
54 |
+
chunk,
|
55 |
+
columns=[f"{name_prefix}_{j+1}" for j in range(start_idx, start_idx + chunk.shape[1])]
|
56 |
+
)
|
57 |
+
|
58 |
+
def generate_df_concurrently(descriptor: np.ndarray, name_prefix: str, chunk_size: int = 1000) -> Optional[pd.DataFrame]:
|
59 |
+
try:
|
60 |
+
chunks = [
|
61 |
+
(descriptor[:, i:min(i + chunk_size, descriptor.shape[1])], name_prefix, i)
|
62 |
+
for i in range(0, descriptor.shape[1], chunk_size)
|
63 |
+
]
|
64 |
+
|
65 |
+
with ProcessPoolExecutor() as executor:
|
66 |
+
chunk_dfs = list(executor.map(process_chunk_optimized, chunks))
|
67 |
+
|
68 |
+
return pd.concat(chunk_dfs, axis=1) if chunk_dfs else None
|
69 |
+
|
70 |
+
except Exception as e:
|
71 |
+
print(f"[-1-] Error in generating DataFrame concurrently: {e}")
|
72 |
+
return pd.DataFrame(
|
73 |
+
{f"{name_prefix}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
|
74 |
+
)
|
75 |
+
|
76 |
+
def generating_newfps(
|
77 |
+
fps: Union[np.ndarray, pd.DataFrame],
|
78 |
+
descriptor: Optional[Union[np.ndarray, List[np.ndarray], List[List]]],
|
79 |
+
descriptor_name: str,
|
80 |
+
save_res: str = "np",
|
81 |
+
chunk_size: int = 1000
|
82 |
+
) -> Union[np.ndarray, pd.DataFrame]:
|
83 |
+
try:
|
84 |
+
if descriptor is None:
|
85 |
+
return fps
|
86 |
+
|
87 |
+
if save_res == "pd":
|
88 |
+
new_fps = pd.DataFrame(fps) if not isinstance(fps, pd.DataFrame) else fps
|
89 |
+
|
90 |
+
if isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
|
91 |
+
descriptors_df = generate_df_concurrently(descriptor, descriptor_name, chunk_size)
|
92 |
+
if descriptors_df is not None:
|
93 |
+
new_fps = pd.concat([new_fps, descriptors_df], axis=1)
|
94 |
+
|
95 |
+
elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
|
96 |
+
try:
|
97 |
+
combined = np.hstack([
|
98 |
+
arr if arr.ndim > 1 else arr.reshape(-1, 1)
|
99 |
+
for arr in descriptor
|
100 |
+
])
|
101 |
+
descriptors_df = generate_df_concurrently(combined, descriptor_name, chunk_size)
|
102 |
+
if descriptors_df is not None:
|
103 |
+
new_fps = pd.concat([new_fps, descriptors_df], axis=1)
|
104 |
+
except Exception as e:
|
105 |
+
print(f"[-2-] Error processing array list: {e}")
|
106 |
+
|
107 |
+
elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
|
108 |
+
try:
|
109 |
+
descriptor_array = np.asarray(descriptor, dtype=np.float32)
|
110 |
+
descriptors_df = generate_df_concurrently(descriptor_array, descriptor_name, chunk_size)
|
111 |
+
if descriptors_df is not None:
|
112 |
+
new_fps = pd.concat([new_fps, descriptors_df], axis=1)
|
113 |
+
except Exception as e:
|
114 |
+
print(f"[-3-] Error processing nested list: {e}")
|
115 |
+
|
116 |
+
else:
|
117 |
+
try:
|
118 |
+
descriptor_array = np.asarray(descriptor, dtype=np.float32)
|
119 |
+
new_fps[descriptor_name] = descriptor_array.flatten()
|
120 |
+
except Exception as e:
|
121 |
+
print(f"[-4-] Error processing single descriptor: {e}")
|
122 |
+
|
123 |
+
new_fps.replace([np.inf, -np.inf], np.nan, inplace=True)
|
124 |
+
new_fps.fillna(0, inplace=True)
|
125 |
+
return new_fps
|
126 |
+
|
127 |
+
else: # numpy 처리
|
128 |
+
try:
|
129 |
+
if isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
|
130 |
+
new_fps = np.concatenate([fps, descriptor], axis=1)
|
131 |
+
elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
|
132 |
+
combined_arrays = [
|
133 |
+
arr if arr.ndim > 1 else arr.reshape(-1, 1)
|
134 |
+
for arr in descriptor
|
135 |
+
]
|
136 |
+
new_fps = np.concatenate([fps] + combined_arrays, axis=1)
|
137 |
+
elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
|
138 |
+
descriptor_array = np.asarray(descriptor, dtype=np.float32)
|
139 |
+
new_fps = np.concatenate([fps, descriptor_array], axis=1)
|
140 |
+
else:
|
141 |
+
descriptor_array = np.asarray(descriptor, dtype=np.float32)
|
142 |
+
new_fps = np.concatenate([fps, descriptor_array[:, None]], axis=1)
|
143 |
+
|
144 |
+
return np.nan_to_num(new_fps, nan=0.0, posinf=0.0, neginf=0.0).astype('float32')
|
145 |
+
except Exception as e:
|
146 |
+
print(f"[-5-] Error in numpy processing: {e}")
|
147 |
+
return fps
|
148 |
+
|
149 |
+
except Exception as e:
|
150 |
+
print(f"[-6-] General error in {descriptor_name}: {e}")
|
151 |
+
return fps
|
152 |
+
|
153 |
+
# def generating_newfps(fps, descriptor, descriptor_name, save_res="np"):
|
154 |
+
# try:
|
155 |
+
# if descriptor is None:
|
156 |
+
# return fps
|
157 |
+
|
158 |
+
# if save_res == "pd":
|
159 |
+
# new_fps = pd.DataFrame(fps) if not isinstance(fps, pd.DataFrame) else fps
|
160 |
+
|
161 |
+
# if isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
|
162 |
+
# try:
|
163 |
+
# descriptors_df = pd.DataFrame(
|
164 |
+
# {f"{descriptor_name}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
|
165 |
+
# )
|
166 |
+
# new_fps = pd.concat([new_fps, descriptors_df], axis=1)
|
167 |
+
# del descriptor
|
168 |
+
# except Exception as e:
|
169 |
+
# print(f"[-1-] Error occured: {e}")
|
170 |
+
|
171 |
+
# elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
|
172 |
+
# try:
|
173 |
+
# arrays_1d = [arr[:, None] for arr in descriptor if arr.ndim == 1]
|
174 |
+
# arrays_2d = [arr for arr in descriptor if arr.ndim == 2]
|
175 |
+
# combined_1d = np.concatenate(arrays_1d, axis=1) if arrays_1d else None
|
176 |
+
# combined_2d = np.concatenate(arrays_2d, axis=1) if arrays_2d else None
|
177 |
+
|
178 |
+
# if combined_1d is not None:
|
179 |
+
# df_1d = pd.DataFrame(
|
180 |
+
# combined_1d,
|
181 |
+
# columns=[f'{descriptor_name}_{i+1}' for i in range(combined_1d.shape[1])]
|
182 |
+
# )
|
183 |
+
# new_fps = pd.concat([new_fps, df_1d], axis=1)
|
184 |
+
|
185 |
+
# if combined_2d is not None:
|
186 |
+
# df_2d = pd.DataFrame(
|
187 |
+
# combined_2d,
|
188 |
+
# columns=[f'{descriptor_name}_{i+1}' for i in range(combined_2d.shape[1])]
|
189 |
+
# )
|
190 |
+
# new_fps = pd.concat([new_fps, df_2d], axis=1)
|
191 |
+
|
192 |
+
# del descriptor, arrays_1d, arrays_2d
|
193 |
+
# if combined_1d is not None: del combined_1d
|
194 |
+
# if combined_2d is not None: del combined_2d
|
195 |
+
# except Exception as e:
|
196 |
+
# print(f"[-2-] Error occured: {e}")
|
197 |
+
|
198 |
+
# elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
|
199 |
+
# try:
|
200 |
+
# descriptor = np.asarray(descriptor).astype('float')
|
201 |
+
# descriptors_df = pd.DataFrame(
|
202 |
+
# {f"{descriptor_name}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
|
203 |
+
# )
|
204 |
+
# new_fps = pd.concat([new_fps, descriptors_df], axis=1)
|
205 |
+
# del descriptor
|
206 |
+
# except Exception as e:
|
207 |
+
# print(f"[-3-] Error occured: {e}")
|
208 |
+
|
209 |
+
# else:
|
210 |
+
# descriptor = np.asarray(descriptor).astype('float')
|
211 |
+
# new_fps[descriptor_name] = descriptor.flatten()
|
212 |
+
# del descriptor
|
213 |
+
|
214 |
+
# new_fps = new_fps.replace([np.inf, -np.inf], np.nan).fillna(0)
|
215 |
+
# return new_fps
|
216 |
+
|
217 |
+
# else:
|
218 |
+
# new_fps = fps
|
219 |
+
|
220 |
+
# if descriptor is None:
|
221 |
+
# pass
|
222 |
+
# elif isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
|
223 |
+
# try:
|
224 |
+
# new_fps = np.concatenate([new_fps, descriptor], axis=1)
|
225 |
+
# del descriptor
|
226 |
+
# except Exception as e:
|
227 |
+
# print(f"[-1-] Error occured: {e}")
|
228 |
+
# elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
|
229 |
+
# try:
|
230 |
+
# arrays_1d = [arr[:, None] for arr in descriptor if arr.ndim == 1]
|
231 |
+
# arrays_2d = [arr for arr in descriptor if arr.ndim == 2]
|
232 |
+
# combined_1d = np.concatenate(arrays_1d, axis=1) if arrays_1d else None
|
233 |
+
# combined_2d = np.concatenate(arrays_2d, axis=1) if arrays_2d else None
|
234 |
+
# to_concat = [new_fps] + [arr for arr in [combined_1d, combined_2d] if arr is not None]
|
235 |
+
# new_fps = np.concatenate(to_concat, axis=1)
|
236 |
+
# del descriptor, arrays_1d, arrays_2d
|
237 |
+
# if combined_1d is not None: del combined_1d
|
238 |
+
# if combined_2d is not None: del combined_2d
|
239 |
+
# except Exception as e:
|
240 |
+
# print(f"[-2-] Error occured: {e}")
|
241 |
+
# elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
|
242 |
+
# try:
|
243 |
+
# descriptor = np.asarray(descriptor).astype('float')
|
244 |
+
# new_fps = np.concatenate([new_fps, descriptor], axis=1)
|
245 |
+
# del descriptor
|
246 |
+
# except Exception as e:
|
247 |
+
# print(f"[-3-] Error occured: {e}")
|
248 |
+
# else:
|
249 |
+
# descriptor = np.asarray(descriptor).astype('float')
|
250 |
+
# new_fps = np.concatenate([new_fps, descriptor[:,None]], axis=1)
|
251 |
+
# del descriptor
|
252 |
+
|
253 |
+
# new_fps = np.nan_to_num(new_fps, nan=0.0, posinf=0.0, neginf=0.0).astype('float')
|
254 |
+
# return new_fps
|
255 |
+
|
256 |
+
# except Exception as e:
|
257 |
+
# print(f"Error occurred in {descriptor_name}: {e}")
|
258 |
+
# return fps
|
259 |
+
|
260 |
+
def Normalization(descriptor):
|
261 |
+
descriptor = np.asarray(descriptor)
|
262 |
+
epsilon = 1e-10
|
263 |
+
max_value = 1e15
|
264 |
+
descriptor = np.clip(descriptor, -max_value, max_value)
|
265 |
+
descriptor_custom = np.where(np.abs(descriptor) < epsilon, epsilon, descriptor)
|
266 |
+
descriptor_log = np.sign(descriptor_custom) * np.log1p(np.abs(descriptor_custom))
|
267 |
+
descriptor_log = np.nan_to_num(descriptor_log, nan=0.0, posinf=0.0, neginf=0.0)
|
268 |
+
del epsilon
|
269 |
+
gc.collect()
|
270 |
+
return descriptor_log
|
271 |
+
|
272 |
+
def values_chi(mol, chi_type):
|
273 |
+
i = 0
|
274 |
+
chi_func = Chem.GraphDescriptors.ChiNn_ if chi_type == 'n' else Chem.GraphDescriptors.ChiNv_
|
275 |
+
while chi_func(mol, i) != 0.0:
|
276 |
+
i += 1
|
277 |
+
return np.array([chi_func(mol, j) for j in range(i)])
|
278 |
+
|
279 |
+
def generate_chi(mols, chi_type):
|
280 |
+
n_jobs = os.cpu_count()
|
281 |
+
with ProcessPoolExecutor(max_workers=n_jobs) as executor:
|
282 |
+
futures = [executor.submit(values_chi, mol, chi_type) for mol in mols]
|
283 |
+
descriptor = [future.result() for future in futures]
|
284 |
+
|
285 |
+
max_length = max(len(x) for x in descriptor)
|
286 |
+
padded_descriptor = np.array([np.pad(x, (0, max_length - len(x)), 'constant') for x in descriptor])
|
287 |
+
|
288 |
+
return padded_descriptor
|
289 |
+
|
290 |
+
def sanitize_and_compute_descriptor(mol):
|
291 |
+
try:
|
292 |
+
mol = Chem.RemoveHs(mol)
|
293 |
+
Chem.SanitizeMol(mol)
|
294 |
+
try:
|
295 |
+
Chem.rdPartialCharges.ComputeGasteigerCharges(mol)
|
296 |
+
except Exception as e:
|
297 |
+
print(f"Gasteiger charge calculation failed: {e}")
|
298 |
+
return [0] * 8
|
299 |
+
|
300 |
+
try:
|
301 |
+
return Chem.rdMolDescriptors.BCUT2D(mol)
|
302 |
+
except Exception as e:
|
303 |
+
print(f"BCUT2D calculation failed: {e}")
|
304 |
+
return [Descriptors.MolWt(mol)] * 8
|
305 |
+
except Exception as e:
|
306 |
+
return [0] * 8
|
307 |
+
|
308 |
+
def compute_descriptors_parallel(mols, n_jobs=None):
|
309 |
+
with ProcessPoolExecutor(max_workers=n_jobs) as executor:
|
310 |
+
futures = [executor.submit(sanitize_and_compute_descriptor, mol) for mol in mols if mol is not None]
|
311 |
+
descriptors = [future.result() for future in futures]
|
312 |
+
return np.array(descriptors)
|
313 |
+
|
314 |
+
def process_molecules_parallel(mols, max_workers=4, chunk_size=100):
|
315 |
+
results = []
|
316 |
+
for i in range(0, len(mols), chunk_size):
|
317 |
+
chunk = mols[i:i + chunk_size]
|
318 |
+
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
319 |
+
futures = [executor.submit(mol3d, mol) for mol in chunk]
|
320 |
+
for future in as_completed(futures):
|
321 |
+
result = future.result()
|
322 |
+
if result is not None:
|
323 |
+
results.append(result)
|
324 |
+
gc.collect()
|
325 |
+
return results
|
326 |
+
|
327 |
+
def selection_data_descriptor_compress(selection, fps, mols, name, target_path="result", save_res="np"):
|
328 |
+
if save_res == "pd":
|
329 |
+
fps = pd.DataFrame({'mols': mols})
|
330 |
+
####################################
|
331 |
+
phase0 = 1 #selection[0] #"MolWeight" #
|
332 |
+
phase1 = 1 #selection[1] #"Mol_logP" #
|
333 |
+
phase2 = 1 #selection[2] #"Mol_MR" #
|
334 |
+
phase3 = 1 #selection[3] #"Mol_TPSA" #
|
335 |
+
phase4 = selection[4] #"NumRotatableBonds" #
|
336 |
+
phase5 = selection[5] #"HeavyAtomCount" #
|
337 |
+
phase6 = selection[6] #"NumHAcceptors" #
|
338 |
+
phase7 = selection[7] #"NumHDonors" #
|
339 |
+
phase8 = selection[8] #"NumHeteroatoms" #
|
340 |
+
phase9 = selection[9] #"NumValenceElec" #
|
341 |
+
phase10 = selection[10] #"NHOHCount" #
|
342 |
+
phase11 = selection[11] #"NOCount" #
|
343 |
+
phase12 = selection[12] #"RingCount" #
|
344 |
+
phase13 = selection[13] #"NumAromaticRings" #
|
345 |
+
phase14 = selection[14] #"NumSaturatedRings" #
|
346 |
+
phase15 = selection[15] #"NumAliphaticRings" #
|
347 |
+
phase16 = selection[16] #"LabuteASA" #
|
348 |
+
phase17 = selection[17] #"BalabanJ" #
|
349 |
+
phase18 = selection[18] #"BertzCT" #
|
350 |
+
phase19 = selection[19] #"Ipc" #
|
351 |
+
phase20 = selection[20] #"kappa_Series[1-3]_ind" #
|
352 |
+
phase21 = selection[21] #"Chi_Series[13]_ind" #
|
353 |
+
phase22 = selection[22] #"Phi" #
|
354 |
+
phase23 = selection[23] #"HallKierAlpha" #
|
355 |
+
phase24 = selection[24] #"NumAmideBonds" #
|
356 |
+
phase25 = selection[25] #"FractionCSP3" #
|
357 |
+
phase26 = selection[26] #"NumSpiroAtoms" #
|
358 |
+
phase27 = selection[27] #"NumBridgeheadAtoms" #
|
359 |
+
phase28 = selection[28] #"PEOE_VSA_Series[1-14]_ind" #
|
360 |
+
phase29 = selection[29] #"SMR_VSA_Series[1-10]_ind" #
|
361 |
+
phase30 = selection[30] #"SlogP_VSA_Series[1-12]_ind"#
|
362 |
+
phase31 = selection[31] #"EState_VSA_Series[1-11]_ind"#
|
363 |
+
phase32 = selection[32] #"VSA_EState_Series[1-10]_ind"#
|
364 |
+
phase33 = selection[33] #"MQNs" #
|
365 |
+
phase34 = selection[34] #"AUTOCORR2D" #
|
366 |
+
phase35 = selection[35] #"BCUT2D" #
|
367 |
+
phase36 = selection[36] #"Asphericity" #
|
368 |
+
phase37 = selection[37] #"PBF" #
|
369 |
+
phase38 = selection[38] #"RadiusOfGyration" #
|
370 |
+
phase39 = selection[39] #"InertialShapeFactor"#
|
371 |
+
phase40 = selection[40] #"Eccentricity"
|
372 |
+
phase41 = selection[41] #"SpherocityIndex"
|
373 |
+
phase42 = selection[42] #"PMI_series[1-3]_ind"
|
374 |
+
phase43 = selection[43] #"NPR_series[1-2]_ind"
|
375 |
+
phase44 = selection[44] #"AUTOCORR3D"
|
376 |
+
phase45 = selection[45] #"RDF"
|
377 |
+
phase46 = selection[46] #"MORSE"
|
378 |
+
phase47 = selection[47] #"WHIM"
|
379 |
+
phase48 = selection[48] #"GETAWAY"
|
380 |
+
####################################
|
381 |
+
def clear_descriptor_memory(descriptor):
|
382 |
+
del descriptor
|
383 |
+
gc.collect()
|
384 |
+
####################################
|
385 |
+
####################################
|
386 |
+
if phase0 == 1:
|
387 |
+
descriptor = [Descriptors.ExactMolWt(alpha) for alpha in mols]
|
388 |
+
fps = generating_newfps(fps, descriptor, 'MolWt', save_res)
|
389 |
+
clear_descriptor_memory(descriptor)
|
390 |
+
if phase1 == 1:
|
391 |
+
descriptor = [Chem.Crippen.MolLogP(alpha) for alpha in mols]
|
392 |
+
fps = generating_newfps(fps, descriptor, 'MolLogP', save_res)
|
393 |
+
clear_descriptor_memory(descriptor)
|
394 |
+
if phase2 == 1:
|
395 |
+
descriptor = [Chem.Crippen.MolMR(alpha) for alpha in mols]
|
396 |
+
fps = generating_newfps(fps, descriptor, 'MolMR', save_res)
|
397 |
+
clear_descriptor_memory(descriptor)
|
398 |
+
if phase3 == 1:
|
399 |
+
descriptor = [Descriptors.TPSA(alpha) for alpha in mols]
|
400 |
+
fps = generating_newfps(fps, descriptor, 'TPSA', save_res)
|
401 |
+
clear_descriptor_memory(descriptor)
|
402 |
+
if phase4 == 1:
|
403 |
+
descriptor = [Chem.Lipinski.NumRotatableBonds(alpha) for alpha in mols]
|
404 |
+
fps = generating_newfps(fps, descriptor, 'NumRotatableBonds', save_res)
|
405 |
+
clear_descriptor_memory(descriptor)
|
406 |
+
if phase5 == 1:
|
407 |
+
descriptor = [Chem.Lipinski.HeavyAtomCount(alpha) for alpha in mols]
|
408 |
+
fps = generating_newfps(fps, descriptor, 'HeavyAtomCount', save_res)
|
409 |
+
clear_descriptor_memory(descriptor)
|
410 |
+
if phase6 == 1:
|
411 |
+
descriptor = [Chem.Lipinski.NumHAcceptors(alpha) for alpha in mols]
|
412 |
+
fps = generating_newfps(fps, descriptor, 'NumHAcceptors', save_res)
|
413 |
+
clear_descriptor_memory(descriptor)
|
414 |
+
if phase7 == 1:
|
415 |
+
descriptor = [Chem.Lipinski.NumHDonors(alpha) for alpha in mols]
|
416 |
+
fps = generating_newfps(fps, descriptor, 'NumHDonors', save_res)
|
417 |
+
clear_descriptor_memory(descriptor)
|
418 |
+
if phase8 == 1:
|
419 |
+
descriptor = [Chem.Lipinski.NumHeteroatoms(alpha) for alpha in mols]
|
420 |
+
fps = generating_newfps(fps, descriptor, 'NumHeteroatoms', save_res)
|
421 |
+
clear_descriptor_memory(descriptor)
|
422 |
+
if phase9 == 1:
|
423 |
+
descriptor = [Chem.Descriptors.NumValenceElectrons(alpha) for alpha in mols]
|
424 |
+
fps = generating_newfps(fps, descriptor, 'NumValenceElectrons', save_res)
|
425 |
+
clear_descriptor_memory(descriptor)
|
426 |
+
if phase10 == 1:
|
427 |
+
descriptor = [Chem.Lipinski.NHOHCount(alpha) for alpha in mols]
|
428 |
+
fps = generating_newfps(fps, descriptor, 'NHOHCount', save_res)
|
429 |
+
clear_descriptor_memory(descriptor)
|
430 |
+
if phase11 == 1:
|
431 |
+
descriptor = [Chem.Lipinski.NOCount(alpha) for alpha in mols]
|
432 |
+
fps = generating_newfps(fps, descriptor, 'NOCount', save_res)
|
433 |
+
clear_descriptor_memory(descriptor)
|
434 |
+
if phase12 == 1:
|
435 |
+
descriptor = [Chem.Lipinski.RingCount(alpha) for alpha in mols]
|
436 |
+
fps = generating_newfps(fps, descriptor, 'RingCount', save_res)
|
437 |
+
clear_descriptor_memory(descriptor)
|
438 |
+
if phase13 == 1:
|
439 |
+
descriptor = [Chem.Lipinski.NumAromaticRings(alpha) for alpha in mols]
|
440 |
+
fps = generating_newfps(fps, descriptor, 'NumAromaticRings', save_res)
|
441 |
+
clear_descriptor_memory(descriptor)
|
442 |
+
if phase14 == 1:
|
443 |
+
descriptor = [Chem.Lipinski.NumSaturatedRings(alpha) for alpha in mols]
|
444 |
+
fps = generating_newfps(fps, descriptor, 'NumSaturatedRings', save_res)
|
445 |
+
clear_descriptor_memory(descriptor)
|
446 |
+
if phase15 == 1:
|
447 |
+
descriptor = [Chem.Lipinski.NumAliphaticRings(alpha) for alpha in mols]
|
448 |
+
fps = generating_newfps(fps, descriptor, 'NumAliphaticRings', save_res)
|
449 |
+
clear_descriptor_memory(descriptor)
|
450 |
+
if phase16 == 1:
|
451 |
+
descriptor = [Chem.rdMolDescriptors.CalcLabuteASA(alpha) for alpha in mols]
|
452 |
+
fps = generating_newfps(fps, descriptor, 'LabuteASA', save_res)
|
453 |
+
clear_descriptor_memory(descriptor)
|
454 |
+
if phase17 == 1:
|
455 |
+
descriptor = [Chem.GraphDescriptors.BalabanJ(alpha) for alpha in mols]
|
456 |
+
# descriptor = Normalization(descriptor)
|
457 |
+
fps = generating_newfps(fps, descriptor, 'BalabanJ', save_res)
|
458 |
+
clear_descriptor_memory(descriptor)
|
459 |
+
if phase18 == 1:
|
460 |
+
descriptor = [Chem.GraphDescriptors.BertzCT(alpha) for alpha in mols]
|
461 |
+
# descriptor = Normalization(descriptor)
|
462 |
+
fps = generating_newfps(fps, descriptor, 'BertzCT', save_res)
|
463 |
+
clear_descriptor_memory(descriptor)
|
464 |
+
if phase19 == 1:
|
465 |
+
descriptor = [Chem.GraphDescriptors.Ipc(alpha) for alpha in mols]
|
466 |
+
descriptor = Normalization(descriptor)
|
467 |
+
fps = generating_newfps(fps, descriptor, 'Ipc', save_res)
|
468 |
+
clear_descriptor_memory(descriptor)
|
469 |
+
if phase20 == 1:
|
470 |
+
d1 = [Chem.GraphDescriptors.Kappa1(alpha) for alpha in mols]
|
471 |
+
d2 = [Chem.GraphDescriptors.Kappa2(alpha) for alpha in mols]
|
472 |
+
d3 = [Chem.GraphDescriptors.Kappa3(alpha) for alpha in mols]
|
473 |
+
d1 = np.asarray(d1)
|
474 |
+
d2 = np.asarray(d2)
|
475 |
+
d3 = np.asarray(d3)
|
476 |
+
fps = generating_newfps(fps, [d1,d2,d3], 'kappa_Series[1-3]_ind', save_res)
|
477 |
+
clear_descriptor_memory(d1)
|
478 |
+
clear_descriptor_memory(d2)
|
479 |
+
clear_descriptor_memory(d3)
|
480 |
+
if phase21 == 1:
|
481 |
+
d1 = [Chem.GraphDescriptors.Chi0(alpha) for alpha in mols]
|
482 |
+
d2 = [Chem.GraphDescriptors.Chi0n(alpha) for alpha in mols]
|
483 |
+
d3 = [Chem.GraphDescriptors.Chi0v(alpha) for alpha in mols]
|
484 |
+
d4 = [Chem.GraphDescriptors.Chi1(alpha) for alpha in mols]
|
485 |
+
d5 = [Chem.GraphDescriptors.Chi1n(alpha) for alpha in mols]
|
486 |
+
d6 = [Chem.GraphDescriptors.Chi1v(alpha) for alpha in mols]
|
487 |
+
d7 = [Chem.GraphDescriptors.Chi2n(alpha) for alpha in mols]
|
488 |
+
d8 = [Chem.GraphDescriptors.Chi2v(alpha) for alpha in mols]
|
489 |
+
d9 = [Chem.GraphDescriptors.Chi3n(alpha) for alpha in mols]
|
490 |
+
d10 = [Chem.GraphDescriptors.Chi3v(alpha) for alpha in mols]
|
491 |
+
d11 = [Chem.GraphDescriptors.Chi4n(alpha) for alpha in mols]
|
492 |
+
d12 = [Chem.GraphDescriptors.Chi4v(alpha) for alpha in mols]
|
493 |
+
d13 = generate_chi(mols, 'n')
|
494 |
+
d14 = generate_chi(mols, 'v')
|
495 |
+
d1 = np.asarray(d1)
|
496 |
+
d2 = np.asarray(d2)
|
497 |
+
d3 = np.asarray(d3)
|
498 |
+
d4 = np.asarray(d4)
|
499 |
+
d5 = np.asarray(d5)
|
500 |
+
d6 = np.asarray(d6)
|
501 |
+
d7 = np.asarray(d7)
|
502 |
+
d8 = np.asarray(d8)
|
503 |
+
d9 = np.asarray(d9)
|
504 |
+
d10 = np.asarray(d10)
|
505 |
+
d11 = np.asarray(d11)
|
506 |
+
d12 = np.asarray(d12)
|
507 |
+
d13 = np.asarray(d13)
|
508 |
+
d14 = np.asarray(d14)
|
509 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14], 'Chi_Series[13]_ind', save_res)
|
510 |
+
clear_descriptor_memory(d1)
|
511 |
+
clear_descriptor_memory(d2)
|
512 |
+
clear_descriptor_memory(d3)
|
513 |
+
clear_descriptor_memory(d4)
|
514 |
+
clear_descriptor_memory(d5)
|
515 |
+
clear_descriptor_memory(d6)
|
516 |
+
clear_descriptor_memory(d7)
|
517 |
+
clear_descriptor_memory(d8)
|
518 |
+
clear_descriptor_memory(d9)
|
519 |
+
clear_descriptor_memory(d10)
|
520 |
+
clear_descriptor_memory(d11)
|
521 |
+
clear_descriptor_memory(d12)
|
522 |
+
clear_descriptor_memory(d13)
|
523 |
+
clear_descriptor_memory(d14)
|
524 |
+
if phase22 == 1:
|
525 |
+
descriptor = [Chem.rdMolDescriptors.CalcPhi(alpha) for alpha in mols]
|
526 |
+
fps = generating_newfps(fps, descriptor, 'Phi', save_res)
|
527 |
+
clear_descriptor_memory(descriptor)
|
528 |
+
if phase23 == 1:
|
529 |
+
descriptor = [Chem.GraphDescriptors.HallKierAlpha(alpha) for alpha in mols]
|
530 |
+
fps = generating_newfps(fps, descriptor, 'HallKierAlpha', save_res)
|
531 |
+
clear_descriptor_memory(descriptor)
|
532 |
+
if phase24 == 1:
|
533 |
+
descriptor = [Chem.rdMolDescriptors.CalcNumAmideBonds(alpha) for alpha in mols]
|
534 |
+
fps = generating_newfps(fps, descriptor, 'NumAmideBonds', save_res)
|
535 |
+
clear_descriptor_memory(descriptor)
|
536 |
+
if phase25 == 1:
|
537 |
+
descriptor = [Chem.Lipinski.FractionCSP3(alpha) for alpha in mols]
|
538 |
+
fps = generating_newfps(fps, descriptor, 'FractionCSP3', save_res)
|
539 |
+
clear_descriptor_memory(descriptor)
|
540 |
+
if phase26 == 1:
|
541 |
+
descriptor = [Chem.rdMolDescriptors.CalcNumSpiroAtoms(alpha) for alpha in mols]
|
542 |
+
fps = generating_newfps(fps, descriptor, 'NumSpiroAtoms', save_res)
|
543 |
+
clear_descriptor_memory(descriptor)
|
544 |
+
if phase27 == 1:
|
545 |
+
descriptor = [Chem.rdMolDescriptors.CalcNumBridgeheadAtoms(alpha) for alpha in mols]
|
546 |
+
fps = generating_newfps(fps, descriptor, 'NumBridgeheadAtoms', save_res)
|
547 |
+
clear_descriptor_memory(descriptor)
|
548 |
+
if phase28 == 1:
|
549 |
+
d1 = [Chem.MolSurf.PEOE_VSA1(alpha) for alpha in mols]
|
550 |
+
d2 = [Chem.MolSurf.PEOE_VSA2(alpha) for alpha in mols]
|
551 |
+
d3 = [Chem.MolSurf.PEOE_VSA3(alpha) for alpha in mols]
|
552 |
+
d4 = [Chem.MolSurf.PEOE_VSA4(alpha) for alpha in mols]
|
553 |
+
d5 = [Chem.MolSurf.PEOE_VSA5(alpha) for alpha in mols]
|
554 |
+
d6 = [Chem.MolSurf.PEOE_VSA6(alpha) for alpha in mols]
|
555 |
+
d7 = [Chem.MolSurf.PEOE_VSA7(alpha) for alpha in mols]
|
556 |
+
d8 = [Chem.MolSurf.PEOE_VSA8(alpha) for alpha in mols]
|
557 |
+
d9 = [Chem.MolSurf.PEOE_VSA9(alpha) for alpha in mols]
|
558 |
+
d10 = [Chem.MolSurf.PEOE_VSA10(alpha) for alpha in mols]
|
559 |
+
d11 = [Chem.MolSurf.PEOE_VSA11(alpha) for alpha in mols]
|
560 |
+
d12 = [Chem.MolSurf.PEOE_VSA12(alpha) for alpha in mols]
|
561 |
+
d13 = [Chem.MolSurf.PEOE_VSA13(alpha) for alpha in mols]
|
562 |
+
d14 = [Chem.MolSurf.PEOE_VSA14(alpha) for alpha in mols]
|
563 |
+
d1 = np.asarray(d1)
|
564 |
+
d2 = np.asarray(d2)
|
565 |
+
d3 = np.asarray(d3)
|
566 |
+
d4 = np.asarray(d4)
|
567 |
+
d5 = np.asarray(d5)
|
568 |
+
d6 = np.asarray(d6)
|
569 |
+
d7 = np.asarray(d7)
|
570 |
+
d8 = np.asarray(d8)
|
571 |
+
d9 = np.asarray(d9)
|
572 |
+
d10 = np.asarray(d10)
|
573 |
+
d11 = np.asarray(d11)
|
574 |
+
d12 = np.asarray(d12)
|
575 |
+
d13 = np.asarray(d13)
|
576 |
+
d14 = np.asarray(d14)
|
577 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14],'PEOE_VSA_Series[1-14]_ind', save_res)
|
578 |
+
clear_descriptor_memory(d1)
|
579 |
+
clear_descriptor_memory(d2)
|
580 |
+
clear_descriptor_memory(d3)
|
581 |
+
clear_descriptor_memory(d4)
|
582 |
+
clear_descriptor_memory(d5)
|
583 |
+
clear_descriptor_memory(d6)
|
584 |
+
clear_descriptor_memory(d7)
|
585 |
+
clear_descriptor_memory(d8)
|
586 |
+
clear_descriptor_memory(d9)
|
587 |
+
clear_descriptor_memory(d10)
|
588 |
+
clear_descriptor_memory(d11)
|
589 |
+
clear_descriptor_memory(d12)
|
590 |
+
clear_descriptor_memory(d13)
|
591 |
+
clear_descriptor_memory(d14)
|
592 |
+
if phase29 == 1:
|
593 |
+
d1 = [Chem.MolSurf.SMR_VSA1(alpha) for alpha in mols]
|
594 |
+
d2 = [Chem.MolSurf.SMR_VSA2(alpha) for alpha in mols]
|
595 |
+
d3 = [Chem.MolSurf.SMR_VSA3(alpha) for alpha in mols]
|
596 |
+
d4 = [Chem.MolSurf.SMR_VSA4(alpha) for alpha in mols]
|
597 |
+
d5 = [Chem.MolSurf.SMR_VSA5(alpha) for alpha in mols]
|
598 |
+
d6 = [Chem.MolSurf.SMR_VSA6(alpha) for alpha in mols]
|
599 |
+
d7 = [Chem.MolSurf.SMR_VSA7(alpha) for alpha in mols]
|
600 |
+
d8 = [Chem.MolSurf.SMR_VSA8(alpha) for alpha in mols]
|
601 |
+
d9 = [Chem.MolSurf.SMR_VSA9(alpha) for alpha in mols]
|
602 |
+
d10 = [Chem.MolSurf.SMR_VSA10(alpha) for alpha in mols]
|
603 |
+
d1 = np.asarray(d1)
|
604 |
+
d2 = np.asarray(d2)
|
605 |
+
d3 = np.asarray(d3)
|
606 |
+
d4 = np.asarray(d4)
|
607 |
+
d5 = np.asarray(d5)
|
608 |
+
d6 = np.asarray(d6)
|
609 |
+
d7 = np.asarray(d7)
|
610 |
+
d8 = np.asarray(d8)
|
611 |
+
d9 = np.asarray(d9)
|
612 |
+
d10 = np.asarray(d10)
|
613 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10],'SMR_VSA_Series[1-10]_ind', save_res)
|
614 |
+
clear_descriptor_memory(d1)
|
615 |
+
clear_descriptor_memory(d2)
|
616 |
+
clear_descriptor_memory(d3)
|
617 |
+
clear_descriptor_memory(d4)
|
618 |
+
clear_descriptor_memory(d5)
|
619 |
+
clear_descriptor_memory(d6)
|
620 |
+
clear_descriptor_memory(d7)
|
621 |
+
clear_descriptor_memory(d8)
|
622 |
+
clear_descriptor_memory(d9)
|
623 |
+
clear_descriptor_memory(d10)
|
624 |
+
if phase30 == 1:
|
625 |
+
d1 = [Chem.MolSurf.SlogP_VSA1(alpha) for alpha in mols]
|
626 |
+
d2 = [Chem.MolSurf.SlogP_VSA2(alpha) for alpha in mols]
|
627 |
+
d3 = [Chem.MolSurf.SlogP_VSA3(alpha) for alpha in mols]
|
628 |
+
d4 = [Chem.MolSurf.SlogP_VSA4(alpha) for alpha in mols]
|
629 |
+
d5 = [Chem.MolSurf.SlogP_VSA5(alpha) for alpha in mols]
|
630 |
+
d6 = [Chem.MolSurf.SlogP_VSA6(alpha) for alpha in mols]
|
631 |
+
d7 = [Chem.MolSurf.SlogP_VSA7(alpha) for alpha in mols]
|
632 |
+
d8 = [Chem.MolSurf.SlogP_VSA8(alpha) for alpha in mols]
|
633 |
+
d9 = [Chem.MolSurf.SlogP_VSA9(alpha) for alpha in mols]
|
634 |
+
d10= [Chem.MolSurf.SlogP_VSA10(alpha) for alpha in mols]
|
635 |
+
d11= [Chem.MolSurf.SlogP_VSA11(alpha) for alpha in mols]
|
636 |
+
d12= [Chem.MolSurf.SlogP_VSA12(alpha) for alpha in mols]
|
637 |
+
d1 = np.asarray(d1)
|
638 |
+
d2 = np.asarray(d2)
|
639 |
+
d3 = np.asarray(d3)
|
640 |
+
d4 = np.asarray(d4)
|
641 |
+
d5 = np.asarray(d5)
|
642 |
+
d6 = np.asarray(d6)
|
643 |
+
d7 = np.asarray(d7)
|
644 |
+
d8 = np.asarray(d8)
|
645 |
+
d9 = np.asarray(d9)
|
646 |
+
d10 = np.asarray(d10)
|
647 |
+
d11 = np.asarray(d11)
|
648 |
+
d12 = np.asarray(d12)
|
649 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12],'SlogP_VSA_Series[1-12]_ind', save_res)
|
650 |
+
clear_descriptor_memory(d1)
|
651 |
+
clear_descriptor_memory(d2)
|
652 |
+
clear_descriptor_memory(d3)
|
653 |
+
clear_descriptor_memory(d4)
|
654 |
+
clear_descriptor_memory(d5)
|
655 |
+
clear_descriptor_memory(d6)
|
656 |
+
clear_descriptor_memory(d7)
|
657 |
+
clear_descriptor_memory(d8)
|
658 |
+
clear_descriptor_memory(d9)
|
659 |
+
clear_descriptor_memory(d10)
|
660 |
+
clear_descriptor_memory(d11)
|
661 |
+
clear_descriptor_memory(d12)
|
662 |
+
if phase31 == 1:
|
663 |
+
d1 = [Chem.EState.EState_VSA.EState_VSA1(alpha) for alpha in mols]
|
664 |
+
d2 = [Chem.EState.EState_VSA.EState_VSA2(alpha) for alpha in mols]
|
665 |
+
d3 = [Chem.EState.EState_VSA.EState_VSA3(alpha) for alpha in mols]
|
666 |
+
d4 = [Chem.EState.EState_VSA.EState_VSA4(alpha) for alpha in mols]
|
667 |
+
d5 = [Chem.EState.EState_VSA.EState_VSA5(alpha) for alpha in mols]
|
668 |
+
d6 = [Chem.EState.EState_VSA.EState_VSA6(alpha) for alpha in mols]
|
669 |
+
d7 = [Chem.EState.EState_VSA.EState_VSA7(alpha) for alpha in mols]
|
670 |
+
d8 = [Chem.EState.EState_VSA.EState_VSA8(alpha) for alpha in mols]
|
671 |
+
d9 = [Chem.EState.EState_VSA.EState_VSA9(alpha) for alpha in mols]
|
672 |
+
d10 = [Chem.EState.EState_VSA.EState_VSA10(alpha) for alpha in mols]
|
673 |
+
d11 = [Chem.EState.EState_VSA.EState_VSA11(alpha) for alpha in mols]
|
674 |
+
d1 = np.asarray(d1)
|
675 |
+
d2 = np.asarray(d2)
|
676 |
+
d3 = np.asarray(d3)
|
677 |
+
d4 = np.asarray(d4)
|
678 |
+
d5 = np.asarray(d5)
|
679 |
+
d6 = np.asarray(d6)
|
680 |
+
d7 = np.asarray(d7)
|
681 |
+
d8 = np.asarray(d8)
|
682 |
+
d9 = np.asarray(d9)
|
683 |
+
d10 = np.asarray(d10)
|
684 |
+
d11 = np.asarray(d11)
|
685 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11],'EState_VSA_Series[1-11]_ind', save_res)
|
686 |
+
clear_descriptor_memory(d1)
|
687 |
+
clear_descriptor_memory(d2)
|
688 |
+
clear_descriptor_memory(d3)
|
689 |
+
clear_descriptor_memory(d4)
|
690 |
+
clear_descriptor_memory(d5)
|
691 |
+
clear_descriptor_memory(d6)
|
692 |
+
clear_descriptor_memory(d7)
|
693 |
+
clear_descriptor_memory(d8)
|
694 |
+
clear_descriptor_memory(d9)
|
695 |
+
clear_descriptor_memory(d10)
|
696 |
+
clear_descriptor_memory(d11)
|
697 |
+
if phase32 == 1:
|
698 |
+
d1 = [Chem.EState.EState_VSA.VSA_EState1(alpha) for alpha in mols]
|
699 |
+
d2 = [Chem.EState.EState_VSA.VSA_EState2(alpha) for alpha in mols]
|
700 |
+
d3 = [Chem.EState.EState_VSA.VSA_EState3(alpha) for alpha in mols]
|
701 |
+
d4 = [Chem.EState.EState_VSA.VSA_EState4(alpha) for alpha in mols]
|
702 |
+
d5 = [Chem.EState.EState_VSA.VSA_EState5(alpha) for alpha in mols]
|
703 |
+
d6 = [Chem.EState.EState_VSA.VSA_EState6(alpha) for alpha in mols]
|
704 |
+
d7 = [Chem.EState.EState_VSA.VSA_EState7(alpha) for alpha in mols]
|
705 |
+
d8 = [Chem.EState.EState_VSA.VSA_EState8(alpha) for alpha in mols]
|
706 |
+
d9 = [Chem.EState.EState_VSA.VSA_EState9(alpha) for alpha in mols]
|
707 |
+
d10 = [Chem.EState.EState_VSA.VSA_EState10(alpha) for alpha in mols]
|
708 |
+
d1 = np.asarray(d1)
|
709 |
+
d2 = np.asarray(d2)
|
710 |
+
d3 = np.asarray(d3)
|
711 |
+
d4 = np.asarray(d4)
|
712 |
+
d5 = np.asarray(d5)
|
713 |
+
d6 = np.asarray(d6)
|
714 |
+
d7 = np.asarray(d7)
|
715 |
+
d8 = np.asarray(d8)
|
716 |
+
d9 = np.asarray(d9)
|
717 |
+
d10 = np.asarray(d10)
|
718 |
+
fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10],'VSA_EState_Series[1-10]', save_res)
|
719 |
+
clear_descriptor_memory(d1)
|
720 |
+
clear_descriptor_memory(d2)
|
721 |
+
clear_descriptor_memory(d3)
|
722 |
+
clear_descriptor_memory(d4)
|
723 |
+
clear_descriptor_memory(d5)
|
724 |
+
clear_descriptor_memory(d6)
|
725 |
+
clear_descriptor_memory(d7)
|
726 |
+
clear_descriptor_memory(d8)
|
727 |
+
clear_descriptor_memory(d9)
|
728 |
+
clear_descriptor_memory(d10)
|
729 |
+
if phase33 == 1:
|
730 |
+
descriptor = [Chem.rdMolDescriptors.MQNs_(alpha) for alpha in mols]
|
731 |
+
# descriptor = Normalization(descriptor)
|
732 |
+
fps = generating_newfps(fps, descriptor, 'MQNs', save_res)
|
733 |
+
clear_descriptor_memory(descriptor)
|
734 |
+
if phase34 == 1:
|
735 |
+
descriptor = [Chem.rdMolDescriptors.CalcAUTOCORR2D(alpha) for alpha in mols]
|
736 |
+
fps = generating_newfps(fps, descriptor, 'AUTOCORR2D', save_res)
|
737 |
+
clear_descriptor_memory(descriptor)
|
738 |
+
if phase35 == 1:
|
739 |
+
descriptor = compute_descriptors_parallel(mols)
|
740 |
+
fps = generating_newfps(fps, descriptor, 'BCUT2D', save_res)
|
741 |
+
clear_descriptor_memory(descriptor)
|
742 |
+
####################################################
|
743 |
+
mols2 = process_molecules_parallel(mols, max_workers=8)
|
744 |
+
del mols
|
745 |
+
gc.collect()
|
746 |
+
####################################################
|
747 |
+
if phase36 == 1:
|
748 |
+
descriptor = [Chem.rdMolDescriptors.CalcAsphericity(alpha) for alpha in mols2]
|
749 |
+
fps = generating_newfps(fps, descriptor, 'Asphericity', save_res)
|
750 |
+
clear_descriptor_memory(descriptor)
|
751 |
+
if phase37 == 1:
|
752 |
+
descriptor = [Chem.rdMolDescriptors.CalcPBF(alpha) for alpha in mols2]
|
753 |
+
fps = generating_newfps(fps, descriptor, 'PBF', save_res)
|
754 |
+
clear_descriptor_memory(descriptor)
|
755 |
+
if phase38 == 1:
|
756 |
+
descriptor = [Chem.rdMolDescriptors.CalcRadiusOfGyration(alpha) for alpha in mols2]
|
757 |
+
fps = generating_newfps(fps, descriptor, 'RadiusOfGyration', save_res)
|
758 |
+
clear_descriptor_memory(descriptor)
|
759 |
+
if phase39 == 1:
|
760 |
+
descriptor = [Chem.rdMolDescriptors.CalcInertialShapeFactor(alpha) for alpha in mols2]
|
761 |
+
fps = generating_newfps(fps, descriptor, 'InertialShapeFactor', save_res)
|
762 |
+
clear_descriptor_memory(descriptor)
|
763 |
+
if phase40 == 1:
|
764 |
+
descriptor = [Chem.rdMolDescriptors.CalcEccentricity(alpha) for alpha in mols2]
|
765 |
+
fps = generating_newfps(fps, descriptor, 'Eccentricity', save_res)
|
766 |
+
clear_descriptor_memory(descriptor)
|
767 |
+
if phase41 == 1:
|
768 |
+
descriptor = [Chem.rdMolDescriptors.CalcSpherocityIndex(alpha) for alpha in mols2]
|
769 |
+
fps = generating_newfps(fps, descriptor, 'SpherocityIndex', save_res)
|
770 |
+
clear_descriptor_memory(descriptor)
|
771 |
+
if phase42 == 1:
|
772 |
+
d1 = [Chem.rdMolDescriptors.CalcPMI1(alpha) for alpha in mols2]
|
773 |
+
d2 = [Chem.rdMolDescriptors.CalcPMI2(alpha) for alpha in mols2]
|
774 |
+
d3 = [Chem.rdMolDescriptors.CalcPMI3(alpha) for alpha in mols2]
|
775 |
+
d1 = Normalization(d1)
|
776 |
+
d2 = Normalization(d2)
|
777 |
+
d3 = Normalization(d3)
|
778 |
+
d1 = np.asarray(d1)
|
779 |
+
d2 = np.asarray(d2)
|
780 |
+
d3 = np.asarray(d3)
|
781 |
+
fps = generating_newfps(fps, [d1,d2,d3], 'PMI_series[1-3]_ind', save_res)
|
782 |
+
clear_descriptor_memory(d1)
|
783 |
+
clear_descriptor_memory(d2)
|
784 |
+
clear_descriptor_memory(d3)
|
785 |
+
if phase43 == 1:
|
786 |
+
d1 = [Chem.rdMolDescriptors.CalcNPR1(alpha) for alpha in mols2]
|
787 |
+
d2 = [Chem.rdMolDescriptors.CalcNPR2(alpha) for alpha in mols2]
|
788 |
+
d1 = np.asarray(d1)
|
789 |
+
d2 = np.asarray(d2)
|
790 |
+
fps = generating_newfps(fps, [d1,d2], 'NPR_series[1-2]_ind', save_res)
|
791 |
+
clear_descriptor_memory(d1)
|
792 |
+
clear_descriptor_memory(d2)
|
793 |
+
if phase44 == 1:
|
794 |
+
descriptor = [Chem.rdMolDescriptors.CalcAUTOCORR3D(mols) for mols in mols2]
|
795 |
+
fps = generating_newfps(fps, descriptor, 'AUTOCORR3D', save_res)
|
796 |
+
clear_descriptor_memory(descriptor)
|
797 |
+
if phase45 == 1:
|
798 |
+
descriptor = [Chem.rdMolDescriptors.CalcRDF(mols) for mols in mols2]
|
799 |
+
descriptor = Normalization(descriptor)
|
800 |
+
fps = generating_newfps(fps, descriptor, 'RDF', save_res)
|
801 |
+
clear_descriptor_memory(descriptor)
|
802 |
+
if phase46 == 1:
|
803 |
+
descriptor = [Chem.rdMolDescriptors.CalcMORSE(mols) for mols in mols2]
|
804 |
+
descriptor = Normalization(descriptor)
|
805 |
+
fps = generating_newfps(fps, descriptor, 'MORSE', save_res)
|
806 |
+
clear_descriptor_memory(descriptor)
|
807 |
+
if phase47 == 1:
|
808 |
+
descriptor = [Chem.rdMolDescriptors.CalcWHIM(mols) for mols in mols2]
|
809 |
+
descriptor = Normalization(descriptor)
|
810 |
+
fps = generating_newfps(fps, descriptor, 'WHIM', save_res)
|
811 |
+
clear_descriptor_memory(descriptor)
|
812 |
+
if phase48 == 1:
|
813 |
+
descriptor = [Chem.rdMolDescriptors.CalcGETAWAY(mols) for mols in mols2]
|
814 |
+
descriptor = Normalization(descriptor)
|
815 |
+
fps = generating_newfps(fps, descriptor, 'GETAWAY', save_res)
|
816 |
+
clear_descriptor_memory(descriptor)
|
817 |
+
#########################################
|
818 |
+
if save_res == "pd":
|
819 |
+
fps.to_csv(f'{target_path}/{name}_feature_selection.csv')
|
820 |
+
|
821 |
+
fps = fps.astype('float')
|
822 |
+
return fps
|
823 |
+
|
824 |
+
|
825 |
+
def selection_fromStudy_compress(study_name, storage, unfixed=False, showlog=True):
|
826 |
+
model_fea = np.zeros(49, dtype=int)
|
827 |
+
study = optuna.load_study(study_name=study_name, storage=storage)
|
828 |
+
|
829 |
+
best_trial = study.best_trial
|
830 |
+
|
831 |
+
required_features = ["MolWt", "MolLogP", "MolMR", "TPSA"]
|
832 |
+
required_indices = [0, 1, 2, 3]
|
833 |
+
|
834 |
+
param_to_index = {
|
835 |
+
"MolWt": 0,
|
836 |
+
"MolLogP": 1,
|
837 |
+
"MolMR": 2,
|
838 |
+
"TPSA": 3,
|
839 |
+
"NumRotatableBonds": 4,
|
840 |
+
"HeavyAtomCount": 5,
|
841 |
+
"NumHAcceptors": 6,
|
842 |
+
"NumHDonors": 7,
|
843 |
+
"NumHeteroatoms": 8,
|
844 |
+
"NumValenceElectrons": 9,
|
845 |
+
"NHOHCount": 10,
|
846 |
+
"NOCount": 11,
|
847 |
+
"RingCount": 12,
|
848 |
+
"NumAromaticRings": 13,
|
849 |
+
"NumSaturatedRings": 14,
|
850 |
+
"NumAliphaticRings": 15,
|
851 |
+
"LabuteASA": 16,
|
852 |
+
"BalabanJ": 17,
|
853 |
+
"BertzCT": 18,
|
854 |
+
"Ipc": 19,
|
855 |
+
"kappa_Series[1-3]_ind": 20,
|
856 |
+
"Chi_Series[13]_ind": 21,
|
857 |
+
"Phi": 22,
|
858 |
+
"HallKierAlpha": 23,
|
859 |
+
"NumAmideBonds": 24,
|
860 |
+
"FractionCSP3": 25,
|
861 |
+
"NumSpiroAtoms": 26,
|
862 |
+
"NumBridgeheadAtoms": 27,
|
863 |
+
"PEOE_VSA_Series[1-14]_ind": 28,
|
864 |
+
"SMR_VSA_Series[1-10]_ind": 29,
|
865 |
+
"SlogP_VSA_Series[1-12]_ind": 30,
|
866 |
+
"EState_VSA_Series[1-11]_ind": 31,
|
867 |
+
"VSA_EState_Series[1-10]": 32,
|
868 |
+
"MQNs": 33,
|
869 |
+
"AUTOCORR2D": 34,
|
870 |
+
"BCUT2D": 35,
|
871 |
+
"Asphericity": 36,
|
872 |
+
"PBF": 37,
|
873 |
+
"RadiusOfGyration": 38,
|
874 |
+
"InertialShapeFactor": 39,
|
875 |
+
"Eccentricity": 40,
|
876 |
+
"SpherocityIndex": 41,
|
877 |
+
"PMI_series[1-3]_ind": 42,
|
878 |
+
"NPR_series[1-2]_ind": 43,
|
879 |
+
"AUTOCORR3D": 44,
|
880 |
+
"RDF": 45,
|
881 |
+
"MORSE": 46,
|
882 |
+
"WHIM": 47,
|
883 |
+
"GETAWAY": 48
|
884 |
+
}
|
885 |
+
|
886 |
+
if not unfixed:
|
887 |
+
model_fea[required_indices] = 1
|
888 |
+
|
889 |
+
for param in best_trial.params:
|
890 |
+
if param in param_to_index and param not in required_features:
|
891 |
+
model_fea[param_to_index[param]] = best_trial.params[param]
|
892 |
+
else:
|
893 |
+
for param in best_trial.params:
|
894 |
+
if param in param_to_index:
|
895 |
+
model_fea[param_to_index[param]] = best_trial.params[param]
|
896 |
+
|
897 |
+
if showlog:
|
898 |
+
print(f"Best trial for study '{study_name}':")
|
899 |
+
print("Best trial value:", best_trial.value)
|
900 |
+
print("Best trial parameters:", best_trial.params)
|
901 |
+
print("Generated fea:", model_fea)
|
902 |
+
if not unfixed:
|
903 |
+
print("Fixed features:", required_features)
|
904 |
+
|
905 |
+
return model_fea
|
906 |
+
|
907 |
+
def selection_structure_compress(study_name, storage, input_dim, returnOnly=False):
|
908 |
+
study = optuna.load_study(study_name=study_name, storage=storage)
|
909 |
+
best_trial = study.best_trial
|
910 |
+
print("Best trial params:", best_trial.params)
|
911 |
+
|
912 |
+
try:
|
913 |
+
lr = best_trial.params["lr"]
|
914 |
+
except Exception as e:
|
915 |
+
print(f"Error occurred: {e}")
|
916 |
+
print("Error occurred: changing name 'lr' to 'Learning_rate'")
|
917 |
+
lr = best_trial.params["Learning_rate"]
|
918 |
+
|
919 |
+
if returnOnly:
|
920 |
+
return lr
|
921 |
+
|
922 |
+
n_layers = best_trial.params["n_layers"]
|
923 |
+
model = tf.keras.Sequential()
|
924 |
+
layer_dropout = best_trial.params["layer_dropout"]
|
925 |
+
model.add(tf.keras.layers.Input(shape=(input_dim,)))
|
926 |
+
|
927 |
+
for i in range(n_layers):
|
928 |
+
num_hidden = best_trial.params[f"n_units_l_{i}"]
|
929 |
+
num_decay = best_trial.params[f"n_decay_l_{i}"]
|
930 |
+
|
931 |
+
model.add(tf.keras.layers.Dense(
|
932 |
+
num_hidden,
|
933 |
+
activation="relu",
|
934 |
+
kernel_initializer='glorot_uniform',
|
935 |
+
kernel_regularizer=tf.keras.regularizers.l2(num_decay),
|
936 |
+
))
|
937 |
+
model.add(tf.keras.layers.LeakyReLU(alpha=0.01))
|
938 |
+
if layer_dropout == 1:
|
939 |
+
fdropout = best_trial.params[f"F_dropout_{i}"]
|
940 |
+
model.add(tf.keras.layers.Dropout(rate=fdropout))
|
941 |
+
|
942 |
+
if layer_dropout == 0:
|
943 |
+
final_dropout = best_trial.params["last_dropout"]
|
944 |
+
model.add(tf.keras.layers.Dropout(rate=final_dropout))
|
945 |
+
|
946 |
+
model.add(tf.keras.layers.Dense(units=1))
|
947 |
+
|
948 |
+
print(f"Model created from best trial of '{study_name}':")
|
949 |
+
print(" Params:", best_trial.params)
|
950 |
+
print(" Best trial value:", best_trial.value)
|
951 |
+
return model, lr
|
extra_code/learning_process.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import gc
|
3 |
+
import sys
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
from sklearn.metrics import r2_score
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import tensorflow as tf
|
9 |
+
import logging
|
10 |
+
|
11 |
+
from tensorflow.keras.mixed_precision import set_global_policy
|
12 |
+
set_global_policy('mixed_float16')
|
13 |
+
|
14 |
+
BATCHSIZE = int(sys.argv[1])
|
15 |
+
EPOCHS = int(sys.argv[2])
|
16 |
+
lr = float(sys.argv[3])
|
17 |
+
fps_file = sys.argv[4]
|
18 |
+
y_true_file = sys.argv[5]
|
19 |
+
trial_number = int(sys.argv[6]) if len(sys.argv) > 6 else None
|
20 |
+
|
21 |
+
def save_history_plot(history):
|
22 |
+
plt.figure(figsize=(12, 8))
|
23 |
+
plt.subplot(2, 1, 1)
|
24 |
+
plt.plot(history.history['loss'], label='Training Loss')
|
25 |
+
plt.title(f'Model Loss')
|
26 |
+
plt.ylabel('Loss')
|
27 |
+
plt.xlabel('Epoch')
|
28 |
+
plt.legend()
|
29 |
+
|
30 |
+
plt.subplot(2, 1, 2)
|
31 |
+
for metric in history.history:
|
32 |
+
if metric != 'loss':
|
33 |
+
plt.plot(history.history[metric], label=metric)
|
34 |
+
plt.title(f'Model Metrics')
|
35 |
+
plt.ylabel('Value')
|
36 |
+
plt.xlabel('Epoch')
|
37 |
+
plt.legend()
|
38 |
+
|
39 |
+
plt.tight_layout()
|
40 |
+
plt.savefig(f"save_model/full_model.png", dpi=300)
|
41 |
+
plt.close()
|
42 |
+
|
43 |
+
def load_model():
|
44 |
+
model_path = "save_model/full_model.keras"
|
45 |
+
try:
|
46 |
+
model = tf.keras.models.load_model(model_path, compile=False)
|
47 |
+
logging.info(f"Model successfully loaded from {model_path}")
|
48 |
+
return model
|
49 |
+
except Exception as e:
|
50 |
+
logging.error(f"Error loading model: {e}")
|
51 |
+
return None
|
52 |
+
|
53 |
+
def preprocess_data(xtr, ytr):
|
54 |
+
dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))
|
55 |
+
dataset = dataset.shuffle(buffer_size=len(xtr)).batch(BATCHSIZE).cache().prefetch(tf.data.AUTOTUNE)
|
56 |
+
return dataset
|
57 |
+
|
58 |
+
def train_model(model, train_dataset, valid_dataset):
|
59 |
+
cb = []
|
60 |
+
if trial_number is not None:
|
61 |
+
class ReportIntermediateCallback(tf.keras.callbacks.Callback):
|
62 |
+
def on_epoch_end(self, epoch, logs=None):
|
63 |
+
if logs and 'val_loss' in logs:
|
64 |
+
print(f"intermediate_value:{epoch}:{-logs['val_loss']}")
|
65 |
+
sys.stdout.flush()
|
66 |
+
cb.append(ReportIntermediateCallback())
|
67 |
+
|
68 |
+
cb.append(
|
69 |
+
tf.keras.callbacks.EarlyStopping(
|
70 |
+
monitor='val_loss',
|
71 |
+
patience=50,
|
72 |
+
restore_best_weights=True,
|
73 |
+
mode='min',
|
74 |
+
verbose=1
|
75 |
+
)
|
76 |
+
)
|
77 |
+
|
78 |
+
history = model.fit(
|
79 |
+
train_dataset,
|
80 |
+
epochs=EPOCHS,
|
81 |
+
validation_data=valid_dataset,
|
82 |
+
callbacks=cb,
|
83 |
+
verbose=0
|
84 |
+
)
|
85 |
+
save_history_plot(history)
|
86 |
+
return history
|
87 |
+
|
88 |
+
def clear_gpu_memory():
|
89 |
+
tf.keras.backend.clear_session()
|
90 |
+
gc.collect()
|
91 |
+
logging.info("GPU memory cleared.")
|
92 |
+
|
93 |
+
def main():
|
94 |
+
try:
|
95 |
+
model = load_model()
|
96 |
+
if model is None:
|
97 |
+
raise ValueError("Failed to load model")
|
98 |
+
|
99 |
+
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
|
100 |
+
loss=tf.keras.losses.MeanSquaredError(),
|
101 |
+
metrics=[tf.keras.losses.MeanSquaredError(),
|
102 |
+
tf.keras.losses.MeanAbsoluteError(),
|
103 |
+
tf.keras.metrics.RootMeanSquaredError()])
|
104 |
+
|
105 |
+
fps = np.load(fps_file)
|
106 |
+
y_true = np.load(y_true_file)
|
107 |
+
|
108 |
+
model_input_shape = model.input_shape
|
109 |
+
if model_input_shape[1] != fps.shape[1]:
|
110 |
+
raise ValueError(f"Model input dimension ({model_input_shape[1]}) does not match data dimension ({fps.shape[1]})")
|
111 |
+
|
112 |
+
xtr, xte, ytr, yte = train_test_split(fps, y_true, test_size=0.2, random_state=42)
|
113 |
+
xtr, xtev, ytr, ytev = train_test_split(xtr, ytr, test_size=0.1, random_state=42)
|
114 |
+
train_dataset = preprocess_data(xtr, ytr)
|
115 |
+
valid_dataset = preprocess_data(xtev, ytev)
|
116 |
+
|
117 |
+
train_model(model, train_dataset, valid_dataset)
|
118 |
+
|
119 |
+
ypred = model.predict(xte, verbose=0)
|
120 |
+
|
121 |
+
if np.any(np.isnan(ypred)) or np.any(np.isinf(ypred)):
|
122 |
+
raise ValueError("Invalid predictions: NaN or inf values encountered.")
|
123 |
+
|
124 |
+
r2_result = r2_score(yte, ypred)
|
125 |
+
|
126 |
+
if np.isnan(r2_result) or np.isinf(r2_result) or r2_result <= 0:
|
127 |
+
print("R2: 0.0 (prune)")
|
128 |
+
else:
|
129 |
+
print(f"R2: {r2_result:.6f}")
|
130 |
+
|
131 |
+
except Exception as e:
|
132 |
+
logging.error(f"Error in learning process: {e}")
|
133 |
+
print("0.000000")
|
134 |
+
|
135 |
+
finally:
|
136 |
+
clear_gpu_memory()
|
137 |
+
|
138 |
+
if __name__ == "__main__":
|
139 |
+
main()
|