arer90 commited on
Commit
f2d6c5c
·
1 Parent(s): 3ff3f4b

code 241111

Browse files
1_standard_ML.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
2_solubility_fingerprint_compare.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
3_solubility_feature_checker.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
3_solubility_feature_deeplearning.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
4_ANO_feature.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
5_ANO_structure.ipynb ADDED
@@ -0,0 +1,1750 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 13,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "import sys\n",
11
+ "import time\n",
12
+ "import subprocess\n",
13
+ "import logging\n",
14
+ "import warnings\n",
15
+ "import gc\n",
16
+ "import numpy as np\n",
17
+ "import pandas as pd\n",
18
+ "import seaborn as sns\n",
19
+ "import matplotlib.pyplot as plt\n",
20
+ "import matplotlib.patches as mpatches\n",
21
+ "from concurrent.futures import ProcessPoolExecutor, as_completed"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 14,
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "from rdkit import Chem\n",
31
+ "from rdkit.Chem import AllChem, DataStructs, Draw\n",
32
+ "from rdkit import RDConfig\n",
33
+ "from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges\n",
34
+ "from rdkit.Chem.AllChem import GetMorganGenerator\n",
35
+ "from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray\n",
36
+ "from rdkit.Avalon.pyAvalonTools import GetAvalonFP"
37
+ ]
38
+ },
39
+ {
40
+ "cell_type": "code",
41
+ "execution_count": 15,
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "import tensorflow as tf\n",
46
+ "from tensorflow import keras\n",
47
+ "from tensorflow.keras import layers\n",
48
+ "from tensorflow.keras.models import Sequential\n",
49
+ "from tensorflow.keras.layers import Dense, Dropout, Activation\n",
50
+ "from tensorflow.keras.regularizers import l2\n",
51
+ "from tensorflow.keras.optimizers import Adam\n",
52
+ "from tensorflow.keras import regularizers"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 16,
58
+ "metadata": {},
59
+ "outputs": [],
60
+ "source": [
61
+ "from sklearn.model_selection import train_test_split\n",
62
+ "from sklearn.linear_model import Ridge\n",
63
+ "from sklearn.ensemble import RandomForestRegressor\n",
64
+ "from sklearn.neural_network import MLPRegressor\n",
65
+ "from sklearn.svm import SVR\n",
66
+ "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 17,
72
+ "metadata": {},
73
+ "outputs": [],
74
+ "source": [
75
+ "import optuna\n",
76
+ "from optuna.trial import TrialState"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 18,
82
+ "metadata": {},
83
+ "outputs": [],
84
+ "source": [
85
+ "tf.keras.backend.clear_session()\n",
86
+ "gpus = tf.config.experimental.list_physical_devices('GPU')\n",
87
+ "if gpus:\n",
88
+ " try:\n",
89
+ " for gpu in gpus:\n",
90
+ " tf.config.experimental.set_memory_growth(gpu, True)\n",
91
+ " except RuntimeError as e:\n",
92
+ " print(e)"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": 19,
98
+ "metadata": {},
99
+ "outputs": [],
100
+ "source": [
101
+ "target_path = \"result/5_ANO_structure\"\n",
102
+ "os.makedirs(target_path, exist_ok=True)"
103
+ ]
104
+ },
105
+ {
106
+ "cell_type": "code",
107
+ "execution_count": 20,
108
+ "metadata": {},
109
+ "outputs": [],
110
+ "source": [
111
+ "data_ws = pd.read_csv('./data/ws496_logS.csv', dtype={'SMILES': 'string'})\n",
112
+ "smiles_ws = data_ws['SMILES']\n",
113
+ "y_ws = data_ws.iloc[:, 2]\n",
114
+ "\n",
115
+ "data_delaney = pd.read_csv('./data/delaney-processed.csv', dtype={'smiles': 'string'})\n",
116
+ "smiles_de = data_delaney['smiles']\n",
117
+ "y_de = data_delaney.iloc[:, 1]\n",
118
+ "\n",
119
+ "data_lovric2020 = pd.read_csv('./data/Lovric2020_logS0.csv', dtype={'isomeric_smiles': 'string'})\n",
120
+ "smiles_lo = data_lovric2020['isomeric_smiles']\n",
121
+ "y_lo = data_lovric2020.iloc[:, 1]\n",
122
+ "\n",
123
+ "data_huuskonen = pd.read_csv('./data/huusk.csv', dtype={'SMILES': 'string'})\n",
124
+ "smiles_hu = data_huuskonen['SMILES']\n",
125
+ "y_hu = data_huuskonen.iloc[:, -1].astype('float')"
126
+ ]
127
+ },
128
+ {
129
+ "cell_type": "code",
130
+ "execution_count": 21,
131
+ "metadata": {},
132
+ "outputs": [],
133
+ "source": [
134
+ "def mol3d(mol):\n",
135
+ " mol = Chem.AddHs(mol)\n",
136
+ " optimization_methods = [\n",
137
+ " (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),\n",
138
+ " (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),\n",
139
+ " (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})\n",
140
+ " ]\n",
141
+ "\n",
142
+ " for method, args, kwargs in optimization_methods:\n",
143
+ " try:\n",
144
+ " method(*args, **kwargs)\n",
145
+ " if mol.GetNumConformers() > 0:\n",
146
+ " return mol\n",
147
+ " except ValueError as e:\n",
148
+ " print(f\"Error: {e} - Trying next optimization method [{method}]\")\n",
149
+ "\n",
150
+ " print(f\"Invalid mol for 3d {'\\033[94m'}{Chem.MolToSmiles(mol)}{'\\033[0m'} - No conformer generated\")\n",
151
+ " return None"
152
+ ]
153
+ },
154
+ {
155
+ "cell_type": "code",
156
+ "execution_count": 22,
157
+ "metadata": {},
158
+ "outputs": [],
159
+ "source": [
160
+ "def convert_smiles_to_mol(smiles, fail_folder=None, index=None, yvalue=None):\n",
161
+ " mol = Chem.MolFromSmiles(smiles)\n",
162
+ " if mol is None:\n",
163
+ " print(f\"[convert_smiles_to_mol] Cannot convert {smiles} to Mols\")\n",
164
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": \"Invalid SMILES\"}\n",
165
+ "\n",
166
+ " try:\n",
167
+ " Chem.Kekulize(mol, clearAromaticFlags=True)\n",
168
+ " isomeric_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)\n",
169
+ " mol = Chem.MolFromSmiles(isomeric_smiles)\n",
170
+ " except Exception as e:\n",
171
+ " print(f\"[convert_smiles_to_mol] failed {smiles} isomeric_smiles by {e}\")\n",
172
+ " if fail_folder and index is not None:\n",
173
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
174
+ " img = Draw.MolToImage(mol)\n",
175
+ " img.save(img_path)\n",
176
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"Isomeric SMILES error: {e}\"}\n",
177
+ "\n",
178
+ " try:\n",
179
+ " Chem.SanitizeMol(mol)\n",
180
+ " except Exception as e:\n",
181
+ " print(f\"[convert_smiles_to_mol] failed {smiles} SanitizeMol by {e}\")\n",
182
+ " if fail_folder and index is not None:\n",
183
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
184
+ " img = Draw.MolToImage(mol)\n",
185
+ " img.save(img_path)\n",
186
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"SanitizeMol error: {e}\"}\n",
187
+ "\n",
188
+ " return mol, None"
189
+ ]
190
+ },
191
+ {
192
+ "cell_type": "code",
193
+ "execution_count": 23,
194
+ "metadata": {},
195
+ "outputs": [],
196
+ "source": [
197
+ "def process_smiles(smiles, yvalue, fail_folder, index):\n",
198
+ " mol, error = convert_smiles_to_mol(smiles, fail_folder, index, yvalue)\n",
199
+ " if error:\n",
200
+ " return None, None, error\n",
201
+ "\n",
202
+ " mol_3d = mol3d(mol)\n",
203
+ " if mol_3d:\n",
204
+ " return smiles, yvalue, None\n",
205
+ " else:\n",
206
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
207
+ " img = Draw.MolToImage(mol)\n",
208
+ " img.save(img_path)\n",
209
+ " return None, None, {\"smiles\": smiles, \"y_value\": yvalue}\n",
210
+ "\n",
211
+ "def process_dataset(smiles_list, y_values, dataset_name, target_path=\"result\", max_workers=None):\n",
212
+ " start = time.time()\n",
213
+ " valid_smiles, valid_y = [], []\n",
214
+ " error_smiles_list = []\n",
215
+ " fail_folder = f\"{target_path}/failed/{dataset_name}\"\n",
216
+ " os.makedirs(fail_folder, exist_ok=True)\n",
217
+ "\n",
218
+ " with ProcessPoolExecutor(max_workers=max_workers) as executor:\n",
219
+ " futures = [\n",
220
+ " executor.submit(process_smiles, smiles, yvalue, fail_folder, i)\n",
221
+ " for i, (smiles, yvalue) in enumerate(zip(smiles_list, y_values))\n",
222
+ " ]\n",
223
+ " for future in as_completed(futures):\n",
224
+ " smiles, yvalue, error = future.result()\n",
225
+ " if error:\n",
226
+ " error_smiles_list.append(error)\n",
227
+ " elif smiles is not None and yvalue is not None:\n",
228
+ " valid_smiles.append(smiles)\n",
229
+ " valid_y.append(yvalue)\n",
230
+ "\n",
231
+ " if error_smiles_list:\n",
232
+ " error_df = pd.DataFrame(error_smiles_list)\n",
233
+ " error_df.to_csv(os.path.join(fail_folder, \"failed_smiles.csv\"), index=False)\n",
234
+ " print(f\" [{dataset_name:<10}] : {time.time()-start:.4f} sec\")\n",
235
+ " return valid_smiles, valid_y"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": 24,
241
+ "metadata": {},
242
+ "outputs": [
243
+ {
244
+ "name": "stdout",
245
+ "output_type": "stream",
246
+ "text": [
247
+ " [ws496 ] : 0.8649 sec\n",
248
+ " [delaney ] : 1.3527 sec\n",
249
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x560fe30ad760>]\n",
250
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x560fe30ae5f0>]\n",
251
+ "Invalid mol for 3d \u001b[94m[H]O[C@@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
252
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x560fe30ad760>]\n",
253
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x560fe30ae5f0>]\n",
254
+ "Invalid mol for 3d \u001b[94m[H]O[C@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
255
+ " [Lovric2020_logS0] : 8.3057 sec\n",
256
+ " [huusk ] : 1.5089 sec\n"
257
+ ]
258
+ }
259
+ ],
260
+ "source": [
261
+ "smiles_ws, y_ws = process_dataset(smiles_ws, y_ws, \"ws496\", target_path)\n",
262
+ "smiles_de, y_de = process_dataset(smiles_de, y_de, \"delaney\", target_path)\n",
263
+ "smiles_lo, y_lo = process_dataset(smiles_lo, y_lo, \"Lovric2020_logS0\", target_path)\n",
264
+ "smiles_hu, y_hu = process_dataset(smiles_hu, y_hu, \"huusk\", target_path)"
265
+ ]
266
+ },
267
+ {
268
+ "cell_type": "code",
269
+ "execution_count": 25,
270
+ "metadata": {},
271
+ "outputs": [],
272
+ "source": [
273
+ "LEN_OF_FF = 2048\n",
274
+ "LEN_OF_MA = 167\n",
275
+ "LEN_OF_AV = 512"
276
+ ]
277
+ },
278
+ {
279
+ "cell_type": "code",
280
+ "execution_count": 26,
281
+ "metadata": {},
282
+ "outputs": [],
283
+ "source": [
284
+ "def get_fingerprints(mol):\n",
285
+ " if mol is None:\n",
286
+ " return None, None, None\n",
287
+ " \n",
288
+ " morgan_generator = GetMorganGenerator(radius=2, fpSize=LEN_OF_FF)\n",
289
+ " ecfp = morgan_generator.GetFingerprint(mol)\n",
290
+ " ecfp_array = np.zeros((LEN_OF_FF,),dtype=int)\n",
291
+ " DataStructs.ConvertToNumpyArray(ecfp, ecfp_array)\n",
292
+ " \n",
293
+ " maccs = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol)\n",
294
+ "\n",
295
+ " avalon_fp = GetAvalonFP(mol)\n",
296
+ " avalon_array = np.zeros((LEN_OF_AV,),dtype=int)\n",
297
+ " DataStructs.ConvertToNumpyArray(avalon_fp, avalon_array)\n",
298
+ " \n",
299
+ " return ecfp_array, maccs, avalon_array\n",
300
+ "\n",
301
+ "def fp_converter(data, use_parallel=True):\n",
302
+ " mols = [Chem.MolFromSmiles(smi) for smi in data]\n",
303
+ " \n",
304
+ " if use_parallel:\n",
305
+ " try: \n",
306
+ " with ProcessPoolExecutor() as executor:\n",
307
+ " results = list(executor.map(get_fingerprints, mols))\n",
308
+ " except Exception as e:\n",
309
+ " print(f\"Parallel processing failed due to: {e}. Falling back to sequential processing.\")\n",
310
+ " use_parallel = False\n",
311
+ " \n",
312
+ " if not use_parallel:\n",
313
+ " results = [get_fingerprints(mol) for mol in mols]\n",
314
+ " \n",
315
+ " ECFP, MACCS, AvalonFP = zip(*results)\n",
316
+ " \n",
317
+ " ECFP_container = np.vstack([arr for arr in ECFP if arr is not None])\n",
318
+ " MACCS_container = np.zeros((len(MACCS), LEN_OF_MA), dtype=int)\n",
319
+ " AvalonFP_container = np.vstack([arr for arr in AvalonFP if arr is not None])\n",
320
+ "\n",
321
+ " for i, fp in enumerate(MACCS):\n",
322
+ " if fp is not None:\n",
323
+ " DataStructs.ConvertToNumpyArray(fp, MACCS_container[i])\n",
324
+ " \n",
325
+ " return mols, ECFP_container, MACCS_container, AvalonFP_container"
326
+ ]
327
+ },
328
+ {
329
+ "cell_type": "code",
330
+ "execution_count": 27,
331
+ "metadata": {},
332
+ "outputs": [],
333
+ "source": [
334
+ "mol_ws, x_ws, MACCS_ws, AvalonFP_ws = fp_converter(smiles_ws,target_path)\n",
335
+ "mol_de, x_de, MACCS_de, AvalonFP_de = fp_converter(smiles_de,target_path)\n",
336
+ "mol_lo, x_lo, MACCS_lo, AvalonFP_lo = fp_converter(smiles_lo,target_path)\n",
337
+ "mol_hu, x_hu, MACCS_hu, AvalonFP_hu = fp_converter(smiles_hu,target_path)"
338
+ ]
339
+ },
340
+ {
341
+ "cell_type": "code",
342
+ "execution_count": 28,
343
+ "metadata": {},
344
+ "outputs": [],
345
+ "source": [
346
+ "def concatenate_to_numpy(*dataframes):\n",
347
+ " numpy_arrays = [df.to_numpy() if isinstance(df, pd.DataFrame) else df for df in dataframes]\n",
348
+ " if not all(isinstance(arr, np.ndarray) for arr in numpy_arrays):\n",
349
+ " raise ValueError(\"All inputs must be either pandas DataFrame or numpy array\")\n",
350
+ " return np.concatenate(numpy_arrays, axis=1)"
351
+ ]
352
+ },
353
+ {
354
+ "cell_type": "code",
355
+ "execution_count": 29,
356
+ "metadata": {},
357
+ "outputs": [],
358
+ "source": [
359
+ "group_nws = concatenate_to_numpy(x_ws, MACCS_ws, AvalonFP_ws)\n",
360
+ "group_nde = concatenate_to_numpy(x_de, MACCS_de, AvalonFP_de)\n",
361
+ "group_nlo = concatenate_to_numpy(x_lo, MACCS_lo, AvalonFP_lo)\n",
362
+ "group_nhu = concatenate_to_numpy(x_hu, MACCS_hu, AvalonFP_hu)"
363
+ ]
364
+ },
365
+ {
366
+ "cell_type": "code",
367
+ "execution_count": 30,
368
+ "metadata": {},
369
+ "outputs": [],
370
+ "source": [
371
+ "BATCHSIZE = 32\n",
372
+ "EPOCHS = 1000\n",
373
+ "lr = 0.0001\n",
374
+ "decay = 1e-4"
375
+ ]
376
+ },
377
+ {
378
+ "cell_type": "code",
379
+ "execution_count": 41,
380
+ "metadata": {},
381
+ "outputs": [],
382
+ "source": [
383
+ "def search_model(trial, input_dim):\n",
384
+ " n_layers = trial.suggest_int(\"n_layers\", 1, 3)\n",
385
+ " model = tf.keras.Sequential()\n",
386
+ " model.add(tf.keras.layers.Input(shape=(input_dim,)))\n",
387
+ " layer_dropout = trial.suggest_int(\"layer_dropout\", 0, 1)\n",
388
+ " \n",
389
+ " for i in range(n_layers):\n",
390
+ " num_hidden = trial.suggest_int(f\"n_units_l_{i}\", 2, 10000)\n",
391
+ " num_decay = trial.suggest_categorical(f\"n_decay_l_{i}\", [1e-3, 1e-4, 1e-5])\n",
392
+ " model.add(\n",
393
+ " tf.keras.layers.Dense(\n",
394
+ " num_hidden,\n",
395
+ " activation=\"relu\",\n",
396
+ " kernel_initializer='glorot_uniform',\n",
397
+ " kernel_regularizer=tf.keras.regularizers.l2(num_decay),\n",
398
+ " )\n",
399
+ " )\n",
400
+ " if layer_dropout == 1:\n",
401
+ " fdropout1 = trial.suggest_categorical(f\"F_dropout_{i}\", [0.1, 0.2, 0.3])\n",
402
+ " model.add(tf.keras.layers.Dropout(rate=fdropout1))\n",
403
+ " \n",
404
+ " if layer_dropout == 0:\n",
405
+ " fdropout2 = trial.suggest_categorical(\"last_dropout\", [0.1, 0.2, 0.3])\n",
406
+ " model.add(tf.keras.layers.Dropout(rate=fdropout2))\n",
407
+ " \n",
408
+ " model.add(tf.keras.layers.Dense(units=1))\n",
409
+ " # # Colab\n",
410
+ " # learningr = trial.suggest_categorical(\"Learning_rate\",[0.01,0.001,0.0001])\n",
411
+ " # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),\n",
412
+ " # loss=tf.keras.losses.MeanSquaredError(),\n",
413
+ " # metrics=[tf.keras.losses.MeanSquaredError(),\n",
414
+ " # tf.keras.losses.MeanAbsoluteError(),\n",
415
+ " # tf.keras.metrics.RootMeanSquaredError()])\n",
416
+ " return model\n",
417
+ "\n",
418
+ "def save_model(trial, x_data):\n",
419
+ " model_path = \"save_model/full_model.keras\"\n",
420
+ " if not os.path.exists(model_path):\n",
421
+ " try:\n",
422
+ " model = search_model(trial, x_data.shape[1])\n",
423
+ " os.makedirs(\"save_model\", exist_ok=True)\n",
424
+ " model.save(model_path)\n",
425
+ " print(f\"Model successfully saved to {model_path}\")\n",
426
+ " except Exception as e:\n",
427
+ " print(f\"Error saving model: {e}\")\n",
428
+ " else:\n",
429
+ " print(f\"Model already exists at {model_path}\")\n",
430
+ " os.remove(model_path)\n",
431
+ " save_model(trial, x_data)"
432
+ ]
433
+ },
434
+ {
435
+ "cell_type": "code",
436
+ "execution_count": 42,
437
+ "metadata": {},
438
+ "outputs": [],
439
+ "source": [
440
+ "import logging\n",
441
+ "import warnings\n",
442
+ "\n",
443
+ "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
444
+ "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
445
+ "os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'\n",
446
+ "os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'\n",
447
+ "os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'\n",
448
+ "os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'\n",
449
+ "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
450
+ "os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'\n",
451
+ "os.environ['TF_NUMA_NODES'] = '1'\n",
452
+ "\n",
453
+ "warnings.filterwarnings('ignore')\n",
454
+ "\n",
455
+ "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
456
+ "\n",
457
+ "logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
458
+ "\n",
459
+ "tf.get_logger().setLevel('ERROR')\n",
460
+ "tf.autograph.set_verbosity(0)\n",
461
+ "\n",
462
+ "def suppress_warnings(condition=True):\n",
463
+ " if condition:\n",
464
+ " logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
465
+ " os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
466
+ " else:\n",
467
+ " logging.getLogger('tensorflow').setLevel(logging.WARNING)\n",
468
+ " os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'\n",
469
+ "\n",
470
+ "suppress_warnings(condition=True)"
471
+ ]
472
+ },
473
+ {
474
+ "cell_type": "code",
475
+ "execution_count": 43,
476
+ "metadata": {},
477
+ "outputs": [],
478
+ "source": [
479
+ "def objective_ws_struct(trial):\n",
480
+ " try:\n",
481
+ " y_true = np.asarray(y_ws).astype('float')\n",
482
+ " np.save('new_fps.npy', group_nws)\n",
483
+ " np.save('y_true.npy', y_true)\n",
484
+ " \n",
485
+ " save_model(trial, group_nws)\n",
486
+ "\n",
487
+ " lr = trial.suggest_categorical(f\"lr\", [1e-3, 1e-4, 1e-5])\n",
488
+ "\n",
489
+ " result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
490
+ " str(BATCHSIZE), str(EPOCHS), \n",
491
+ " str(lr), \n",
492
+ " 'new_fps.npy', 'y_true.npy'],\n",
493
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
494
+ "\n",
495
+ " if result.stderr:\n",
496
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
497
+ " if filtered_stderr:\n",
498
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
499
+ "\n",
500
+ " for line in result.stdout.splitlines():\n",
501
+ " if \"R2\" in line:\n",
502
+ " if \"(prune)\" in line:\n",
503
+ " print(f\"Pruning trial due to poor R2: {line}\")\n",
504
+ " r2_result = 0.0\n",
505
+ " trial.report(r2_result, step=0)\n",
506
+ " raise optuna.exceptions.TrialPruned()\n",
507
+ " else:\n",
508
+ " r2_result = float(line.split(\":\")[1].strip())\n",
509
+ " print(f\"R2 score: {r2_result}\")\n",
510
+ " trial.report(r2_result, step=0)\n",
511
+ "\n",
512
+ " if trial.should_prune():\n",
513
+ " raise optuna.exceptions.TrialPruned()\n",
514
+ "\n",
515
+ " except Exception as e:\n",
516
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
517
+ " r2_result = 0.0\n",
518
+ "\n",
519
+ " gc.collect()\n",
520
+ "\n",
521
+ " return r2_result"
522
+ ]
523
+ },
524
+ {
525
+ "cell_type": "code",
526
+ "execution_count": 44,
527
+ "metadata": {},
528
+ "outputs": [],
529
+ "source": [
530
+ "def objective_de_struct(trial):\n",
531
+ " try:\n",
532
+ " y_true = np.asarray(y_de).astype('float')\n",
533
+ " np.save('new_fps.npy', group_nde)\n",
534
+ " np.save('y_true.npy', y_true)\n",
535
+ " \n",
536
+ " save_model(trial, group_nde)\n",
537
+ "\n",
538
+ " lr = trial.suggest_categorical(f\"lr\", [1e-3, 1e-4, 1e-5])\n",
539
+ "\n",
540
+ " result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
541
+ " str(BATCHSIZE), str(EPOCHS), \n",
542
+ " str(lr), \n",
543
+ " 'new_fps.npy', 'y_true.npy'],\n",
544
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
545
+ "\n",
546
+ " if result.stderr:\n",
547
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
548
+ " if filtered_stderr:\n",
549
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
550
+ "\n",
551
+ " for line in result.stdout.splitlines():\n",
552
+ " if \"R2\" in line:\n",
553
+ " if \"(prune)\" in line:\n",
554
+ " print(f\"Pruning trial due to poor R2: {line}\")\n",
555
+ " r2_result = 0.0\n",
556
+ " trial.report(r2_result, step=0)\n",
557
+ " raise optuna.exceptions.TrialPruned()\n",
558
+ " else:\n",
559
+ " r2_result = float(line.split(\":\")[1].strip())\n",
560
+ " print(f\"R2 score: {r2_result}\")\n",
561
+ " trial.report(r2_result, step=0)\n",
562
+ "\n",
563
+ " if trial.should_prune():\n",
564
+ " raise optuna.exceptions.TrialPruned()\n",
565
+ "\n",
566
+ " except Exception as e:\n",
567
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
568
+ " r2_result = 0.0\n",
569
+ "\n",
570
+ " gc.collect()\n",
571
+ "\n",
572
+ " return r2_result"
573
+ ]
574
+ },
575
+ {
576
+ "cell_type": "code",
577
+ "execution_count": 45,
578
+ "metadata": {},
579
+ "outputs": [],
580
+ "source": [
581
+ "def objective_lo_struct(trial):\n",
582
+ " try:\n",
583
+ " y_true = np.asarray(y_lo).astype('float')\n",
584
+ " np.save('new_fps.npy', group_nlo)\n",
585
+ " np.save('y_true.npy', y_true)\n",
586
+ " \n",
587
+ " save_model(trial, group_nlo)\n",
588
+ "\n",
589
+ " lr = trial.suggest_categorical(f\"lr\", [1e-3, 1e-4, 1e-5])\n",
590
+ "\n",
591
+ " result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
592
+ " str(BATCHSIZE), str(EPOCHS), \n",
593
+ " str(lr), \n",
594
+ " 'new_fps.npy', 'y_true.npy'],\n",
595
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
596
+ "\n",
597
+ " if result.stderr:\n",
598
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
599
+ " if filtered_stderr:\n",
600
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
601
+ "\n",
602
+ " for line in result.stdout.splitlines():\n",
603
+ " if \"R2\" in line:\n",
604
+ " if \"(prune)\" in line:\n",
605
+ " print(f\"Pruning trial due to poor R2: {line}\")\n",
606
+ " r2_result = 0.0\n",
607
+ " trial.report(r2_result, step=0)\n",
608
+ " raise optuna.exceptions.TrialPruned()\n",
609
+ " else:\n",
610
+ " r2_result = float(line.split(\":\")[1].strip())\n",
611
+ " print(f\"R2 score: {r2_result}\")\n",
612
+ " trial.report(r2_result, step=0)\n",
613
+ "\n",
614
+ " if trial.should_prune():\n",
615
+ " raise optuna.exceptions.TrialPruned()\n",
616
+ "\n",
617
+ " except Exception as e:\n",
618
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
619
+ " r2_result = 0.0\n",
620
+ "\n",
621
+ " gc.collect()\n",
622
+ "\n",
623
+ " return r2_result"
624
+ ]
625
+ },
626
+ {
627
+ "cell_type": "code",
628
+ "execution_count": 46,
629
+ "metadata": {},
630
+ "outputs": [],
631
+ "source": [
632
+ "def objective_hu_struct(trial):\n",
633
+ " try:\n",
634
+ " y_true = np.asarray(y_hu).astype('float')\n",
635
+ " np.save('new_fps.npy', group_nhu)\n",
636
+ " np.save('y_true.npy', y_true)\n",
637
+ " \n",
638
+ " save_model(trial, group_nhu)\n",
639
+ "\n",
640
+ " lr = trial.suggest_categorical(f\"lr\", [1e-3, 1e-4, 1e-5])\n",
641
+ "\n",
642
+ " result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
643
+ " str(BATCHSIZE), str(EPOCHS), \n",
644
+ " str(lr), \n",
645
+ " 'new_fps.npy', 'y_true.npy'],\n",
646
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
647
+ "\n",
648
+ " if result.stderr:\n",
649
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
650
+ " if filtered_stderr:\n",
651
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
652
+ "\n",
653
+ " for line in result.stdout.splitlines():\n",
654
+ " if \"R2\" in line:\n",
655
+ " if \"(prune)\" in line:\n",
656
+ " print(f\"Pruning trial due to poor R2: {line}\")\n",
657
+ " r2_result = 0.0\n",
658
+ " trial.report(r2_result, step=0)\n",
659
+ " raise optuna.exceptions.TrialPruned()\n",
660
+ " else:\n",
661
+ " r2_result = float(line.split(\":\")[1].strip())\n",
662
+ " print(f\"R2 score: {r2_result}\")\n",
663
+ " trial.report(r2_result, step=0)\n",
664
+ "\n",
665
+ " if trial.should_prune():\n",
666
+ " raise optuna.exceptions.TrialPruned()\n",
667
+ "\n",
668
+ " except Exception as e:\n",
669
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
670
+ " r2_result = 0.0\n",
671
+ "\n",
672
+ " gc.collect()\n",
673
+ "\n",
674
+ " return r2_result"
675
+ ]
676
+ },
677
+ {
678
+ "cell_type": "code",
679
+ "execution_count": 47,
680
+ "metadata": {},
681
+ "outputs": [],
682
+ "source": [
683
+ "storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
684
+ "# storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
685
+ "# storage = optuna.storages.RDBStorage(url=storage_urls)"
686
+ ]
687
+ },
688
+ {
689
+ "cell_type": "code",
690
+ "execution_count": 48,
691
+ "metadata": {},
692
+ "outputs": [],
693
+ "source": [
694
+ "try:\n",
695
+ " optuna.delete_study(study_name=\"ANO_ws_struct\", storage=storage)\n",
696
+ " optuna.delete_study(study_name=\"ANO_de_struct\", storage=storage)\n",
697
+ " optuna.delete_study(study_name=\"ANO_lo_struct\", storage=storage)\n",
698
+ " optuna.delete_study(study_name=\"ANO_hu_struct\", storage=storage)\n",
699
+ "except:\n",
700
+ " pass"
701
+ ]
702
+ },
703
+ {
704
+ "cell_type": "code",
705
+ "execution_count": 49,
706
+ "metadata": {},
707
+ "outputs": [],
708
+ "source": [
709
+ "TRIALS = 5"
710
+ ]
711
+ },
712
+ {
713
+ "cell_type": "code",
714
+ "execution_count": 50,
715
+ "metadata": {},
716
+ "outputs": [
717
+ {
718
+ "name": "stderr",
719
+ "output_type": "stream",
720
+ "text": [
721
+ "[I 2024-10-25 11:27:16,193] A new study created in RDB with name: ANO_ws_struct\n",
722
+ "I0000 00:00:1729823236.262387 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
723
+ "Your kernel may have been built without NUMA support.\n",
724
+ "I0000 00:00:1729823236.262494 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
725
+ "Your kernel may have been built without NUMA support.\n",
726
+ "I0000 00:00:1729823236.262547 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
727
+ "Your kernel may have been built without NUMA support.\n",
728
+ "I0000 00:00:1729823236.414390 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
729
+ "Your kernel may have been built without NUMA support.\n",
730
+ "I0000 00:00:1729823236.414547 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
731
+ "Your kernel may have been built without NUMA support.\n",
732
+ "2024-10-25 11:27:16.414564: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0. Your kernel may not have been built with NUMA support.\n",
733
+ "2024-10-25 11:27:16.414596: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:198] Using CUDA malloc Async allocator for GPU: 0\n",
734
+ "I0000 00:00:1729823236.414877 712386 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
735
+ "Your kernel may have been built without NUMA support.\n",
736
+ "2024-10-25 11:27:16.414914: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3586 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\n"
737
+ ]
738
+ },
739
+ {
740
+ "name": "stdout",
741
+ "output_type": "stream",
742
+ "text": [
743
+ "Model successfully saved to save_model/full_model.keras\n"
744
+ ]
745
+ },
746
+ {
747
+ "name": "stderr",
748
+ "output_type": "stream",
749
+ "text": [
750
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
751
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
752
+ "I0000 00:00:1729823240.072256 713235 service.cc:146] XLA service 0x558b4abf8ec0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
753
+ "I0000 00:00:1729823240.072318 713235 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
754
+ "I0000 00:00:1729823240.199007 713235 service.cc:146] XLA service 0x558b4b330cb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
755
+ "I0000 00:00:1729823240.199045 713235 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
756
+ "I0000 00:00:1729823242.903035 713344 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
757
+ "\n"
758
+ ]
759
+ },
760
+ {
761
+ "name": "stdout",
762
+ "output_type": "stream",
763
+ "text": [
764
+ "R2 score: 0.72685\n"
765
+ ]
766
+ },
767
+ {
768
+ "name": "stderr",
769
+ "output_type": "stream",
770
+ "text": [
771
+ "[I 2024-10-25 11:27:28,379] Trial 0 finished with value: 0.72685 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 9922, 'n_decay_l_0': 1e-05, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 0 with value: 0.72685.\n"
772
+ ]
773
+ },
774
+ {
775
+ "name": "stdout",
776
+ "output_type": "stream",
777
+ "text": [
778
+ "Model already exists at save_model/full_model.keras\n",
779
+ "Model successfully saved to save_model/full_model.keras\n"
780
+ ]
781
+ },
782
+ {
783
+ "name": "stderr",
784
+ "output_type": "stream",
785
+ "text": [
786
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
787
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
788
+ "I0000 00:00:1729823251.215260 714082 service.cc:146] XLA service 0x55a8ec6b6500 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
789
+ "I0000 00:00:1729823251.215311 714082 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
790
+ "I0000 00:00:1729823251.333433 714082 service.cc:146] XLA service 0x55a8ec5cd290 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
791
+ "I0000 00:00:1729823251.333463 714082 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
792
+ "I0000 00:00:1729823263.351498 714193 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
793
+ "\n"
794
+ ]
795
+ },
796
+ {
797
+ "name": "stdout",
798
+ "output_type": "stream",
799
+ "text": [
800
+ "R2 score: 0.707063\n"
801
+ ]
802
+ },
803
+ {
804
+ "name": "stderr",
805
+ "output_type": "stream",
806
+ "text": [
807
+ "[I 2024-10-25 11:28:11,955] Trial 1 finished with value: 0.707063 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 6572, 'n_decay_l_0': 1e-05, 'n_units_l_1': 1332, 'n_decay_l_1': 0.0001, 'last_dropout': 0.3, 'lr': 1e-05}. Best is trial 0 with value: 0.72685.\n"
808
+ ]
809
+ },
810
+ {
811
+ "name": "stdout",
812
+ "output_type": "stream",
813
+ "text": [
814
+ "Model already exists at save_model/full_model.keras\n",
815
+ "Model successfully saved to save_model/full_model.keras\n"
816
+ ]
817
+ },
818
+ {
819
+ "name": "stderr",
820
+ "output_type": "stream",
821
+ "text": [
822
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
823
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
824
+ "I0000 00:00:1729823294.853158 716419 service.cc:146] XLA service 0x55b5e46a62f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
825
+ "I0000 00:00:1729823294.853225 716419 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
826
+ "I0000 00:00:1729823295.002577 716419 service.cc:146] XLA service 0x55b5e46e5fb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
827
+ "I0000 00:00:1729823295.002610 716419 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
828
+ "I0000 00:00:1729823297.511032 716525 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
829
+ "\n"
830
+ ]
831
+ },
832
+ {
833
+ "name": "stdout",
834
+ "output_type": "stream",
835
+ "text": [
836
+ "R2 score: 0.705862\n"
837
+ ]
838
+ },
839
+ {
840
+ "name": "stderr",
841
+ "output_type": "stream",
842
+ "text": [
843
+ "[I 2024-10-25 11:28:23,131] Trial 2 finished with value: 0.705862 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 3241, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'lr': 0.0001}. Best is trial 0 with value: 0.72685.\n"
844
+ ]
845
+ },
846
+ {
847
+ "name": "stdout",
848
+ "output_type": "stream",
849
+ "text": [
850
+ "Model already exists at save_model/full_model.keras\n"
851
+ ]
852
+ },
853
+ {
854
+ "name": "stderr",
855
+ "output_type": "stream",
856
+ "text": [
857
+ "2024-10-25 11:28:23.378722: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 286627920 exceeds 10% of free system memory.\n"
858
+ ]
859
+ },
860
+ {
861
+ "name": "stdout",
862
+ "output_type": "stream",
863
+ "text": [
864
+ "Model successfully saved to save_model/full_model.keras\n"
865
+ ]
866
+ },
867
+ {
868
+ "name": "stderr",
869
+ "output_type": "stream",
870
+ "text": [
871
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
872
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
873
+ "I0000 00:00:1729823306.834696 718042 service.cc:146] XLA service 0x55ff6bc60e10 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
874
+ "I0000 00:00:1729823306.834744 718042 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
875
+ "I0000 00:00:1729823306.978987 718042 service.cc:146] XLA service 0x55ff6bc7c930 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
876
+ "I0000 00:00:1729823306.979019 718042 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
877
+ "I0000 00:00:1729823333.708484 718154 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
878
+ "\n"
879
+ ]
880
+ },
881
+ {
882
+ "name": "stdout",
883
+ "output_type": "stream",
884
+ "text": [
885
+ "R2 score: 0.741337\n"
886
+ ]
887
+ },
888
+ {
889
+ "name": "stderr",
890
+ "output_type": "stream",
891
+ "text": [
892
+ "[I 2024-10-25 11:30:53,672] Trial 3 finished with value: 0.741337 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 787, 'n_decay_l_0': 0.0001, 'n_units_l_1': 9082, 'n_decay_l_1': 0.001, 'n_units_l_2': 7890, 'n_decay_l_2': 0.001, 'last_dropout': 0.1, 'lr': 0.0001}. Best is trial 3 with value: 0.741337.\n"
893
+ ]
894
+ },
895
+ {
896
+ "name": "stdout",
897
+ "output_type": "stream",
898
+ "text": [
899
+ "Model already exists at save_model/full_model.keras\n",
900
+ "Model successfully saved to save_model/full_model.keras\n"
901
+ ]
902
+ },
903
+ {
904
+ "name": "stderr",
905
+ "output_type": "stream",
906
+ "text": [
907
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
908
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
909
+ "I0000 00:00:1729823456.995677 725244 service.cc:146] XLA service 0x56214db0e060 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
910
+ "I0000 00:00:1729823456.995725 725244 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
911
+ "I0000 00:00:1729823457.136743 725244 service.cc:146] XLA service 0x56214da498b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
912
+ "I0000 00:00:1729823457.136787 725244 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
913
+ "I0000 00:00:1729823459.392929 725349 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
914
+ "\n"
915
+ ]
916
+ },
917
+ {
918
+ "name": "stdout",
919
+ "output_type": "stream",
920
+ "text": [
921
+ "R2 score: 0.68373\n"
922
+ ]
923
+ },
924
+ {
925
+ "name": "stderr",
926
+ "output_type": "stream",
927
+ "text": [
928
+ "[I 2024-10-25 11:31:13,484] Trial 4 finished with value: 0.68373 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 1253, 'n_decay_l_0': 0.0001, 'last_dropout': 0.2, 'lr': 1e-05}. Best is trial 3 with value: 0.741337.\n"
929
+ ]
930
+ }
931
+ ],
932
+ "source": [
933
+ "study_ws_struct = optuna.create_study(study_name='ANO_ws_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True) \n",
934
+ "# study_ws_fea = optuna.create_study(study_name='ANO_ws_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
935
+ "study_ws_struct.optimize(objective_ws_struct, n_trials=TRIALS)\n",
936
+ "pruned_trials_ws_struct = study_ws_struct.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
937
+ "complete_trials_ws_struct = study_ws_struct.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
938
+ ]
939
+ },
940
+ {
941
+ "cell_type": "code",
942
+ "execution_count": 51,
943
+ "metadata": {},
944
+ "outputs": [
945
+ {
946
+ "name": "stderr",
947
+ "output_type": "stream",
948
+ "text": [
949
+ "[I 2024-10-25 11:31:13,504] A new study created in RDB with name: ANO_de_struct\n"
950
+ ]
951
+ },
952
+ {
953
+ "name": "stdout",
954
+ "output_type": "stream",
955
+ "text": [
956
+ "Model already exists at save_model/full_model.keras\n",
957
+ "Model successfully saved to save_model/full_model.keras\n"
958
+ ]
959
+ },
960
+ {
961
+ "name": "stderr",
962
+ "output_type": "stream",
963
+ "text": [
964
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
965
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
966
+ "I0000 00:00:1729823476.380438 735317 service.cc:146] XLA service 0x564b5beee4b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
967
+ "I0000 00:00:1729823476.380497 735317 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
968
+ "I0000 00:00:1729823476.549364 735317 service.cc:146] XLA service 0x564b5be2ad00 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
969
+ "I0000 00:00:1729823476.549448 735317 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
970
+ "I0000 00:00:1729823490.704246 735426 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
971
+ "\n"
972
+ ]
973
+ },
974
+ {
975
+ "name": "stdout",
976
+ "output_type": "stream",
977
+ "text": [
978
+ "R2 score: 0.803869\n"
979
+ ]
980
+ },
981
+ {
982
+ "name": "stderr",
983
+ "output_type": "stream",
984
+ "text": [
985
+ "[I 2024-10-25 11:32:02,303] Trial 0 finished with value: 0.803869 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 1737, 'n_decay_l_0': 1e-05, 'n_units_l_1': 6702, 'n_decay_l_1': 1e-05, 'last_dropout': 0.2, 'lr': 0.001}. Best is trial 0 with value: 0.803869.\n"
986
+ ]
987
+ },
988
+ {
989
+ "name": "stdout",
990
+ "output_type": "stream",
991
+ "text": [
992
+ "Model already exists at save_model/full_model.keras\n",
993
+ "Model successfully saved to save_model/full_model.keras\n"
994
+ ]
995
+ },
996
+ {
997
+ "name": "stderr",
998
+ "output_type": "stream",
999
+ "text": [
1000
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1001
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1002
+ "I0000 00:00:1729823525.878834 736385 service.cc:146] XLA service 0x55bad3cb21e0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1003
+ "I0000 00:00:1729823525.878873 736385 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1004
+ "I0000 00:00:1729823526.015032 736385 service.cc:146] XLA service 0x55bad3c304f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1005
+ "I0000 00:00:1729823526.015066 736385 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1006
+ "I0000 00:00:1729823529.879054 736488 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1007
+ "\n"
1008
+ ]
1009
+ },
1010
+ {
1011
+ "name": "stdout",
1012
+ "output_type": "stream",
1013
+ "text": [
1014
+ "R2 score: 0.826782\n"
1015
+ ]
1016
+ },
1017
+ {
1018
+ "name": "stderr",
1019
+ "output_type": "stream",
1020
+ "text": [
1021
+ "[I 2024-10-25 11:32:39,308] Trial 1 finished with value: 0.826782 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 9935, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'n_units_l_1': 3544, 'n_decay_l_1': 1e-05, 'F_dropout_1': 0.3, 'lr': 0.0001}. Best is trial 1 with value: 0.826782.\n"
1022
+ ]
1023
+ },
1024
+ {
1025
+ "name": "stdout",
1026
+ "output_type": "stream",
1027
+ "text": [
1028
+ "Model already exists at save_model/full_model.keras\n",
1029
+ "Model successfully saved to save_model/full_model.keras\n"
1030
+ ]
1031
+ },
1032
+ {
1033
+ "name": "stderr",
1034
+ "output_type": "stream",
1035
+ "text": [
1036
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1037
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1038
+ "I0000 00:00:1729823562.604376 737355 service.cc:146] XLA service 0x55eaf2377f20 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1039
+ "I0000 00:00:1729823562.604431 737355 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1040
+ "I0000 00:00:1729823562.742863 737355 service.cc:146] XLA service 0x55eaf23d2e30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1041
+ "I0000 00:00:1729823562.742895 737355 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1042
+ "I0000 00:00:1729823566.592650 737461 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1043
+ "\n"
1044
+ ]
1045
+ },
1046
+ {
1047
+ "name": "stdout",
1048
+ "output_type": "stream",
1049
+ "text": [
1050
+ "R2 score: 0.823751\n"
1051
+ ]
1052
+ },
1053
+ {
1054
+ "name": "stderr",
1055
+ "output_type": "stream",
1056
+ "text": [
1057
+ "[I 2024-10-25 11:33:33,879] Trial 2 finished with value: 0.823751 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 7233, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'n_units_l_1': 4859, 'n_decay_l_1': 0.0001, 'F_dropout_1': 0.3, 'lr': 0.001}. Best is trial 1 with value: 0.826782.\n"
1058
+ ]
1059
+ },
1060
+ {
1061
+ "name": "stdout",
1062
+ "output_type": "stream",
1063
+ "text": [
1064
+ "Model already exists at save_model/full_model.keras\n",
1065
+ "Model successfully saved to save_model/full_model.keras\n"
1066
+ ]
1067
+ },
1068
+ {
1069
+ "name": "stderr",
1070
+ "output_type": "stream",
1071
+ "text": [
1072
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1073
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1074
+ "I0000 00:00:1729823616.856357 739058 service.cc:146] XLA service 0x55d8efa212a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1075
+ "I0000 00:00:1729823616.856406 739058 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1076
+ "I0000 00:00:1729823616.997938 739058 service.cc:146] XLA service 0x55d8ef979320 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1077
+ "I0000 00:00:1729823616.997975 739058 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1078
+ "I0000 00:00:1729823621.412038 739168 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1079
+ "\n"
1080
+ ]
1081
+ },
1082
+ {
1083
+ "name": "stdout",
1084
+ "output_type": "stream",
1085
+ "text": [
1086
+ "R2 score: 0.796098\n"
1087
+ ]
1088
+ },
1089
+ {
1090
+ "name": "stderr",
1091
+ "output_type": "stream",
1092
+ "text": [
1093
+ "[I 2024-10-25 11:34:39,639] Trial 3 finished with value: 0.796098 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 809, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.3, 'n_units_l_1': 3939, 'n_decay_l_1': 0.0001, 'F_dropout_1': 0.2, 'n_units_l_2': 6198, 'n_decay_l_2': 0.0001, 'F_dropout_2': 0.1, 'lr': 0.001}. Best is trial 1 with value: 0.826782.\n"
1094
+ ]
1095
+ },
1096
+ {
1097
+ "name": "stdout",
1098
+ "output_type": "stream",
1099
+ "text": [
1100
+ "Model already exists at save_model/full_model.keras\n",
1101
+ "Model successfully saved to save_model/full_model.keras\n"
1102
+ ]
1103
+ },
1104
+ {
1105
+ "name": "stderr",
1106
+ "output_type": "stream",
1107
+ "text": [
1108
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1109
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1110
+ "I0000 00:00:1729823682.926391 740686 service.cc:146] XLA service 0x5579d09bb100 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1111
+ "I0000 00:00:1729823682.926465 740686 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1112
+ "I0000 00:00:1729823683.068876 740686 service.cc:146] XLA service 0x5579d09fa810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1113
+ "I0000 00:00:1729823683.068912 740686 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1114
+ "I0000 00:00:1729823696.228323 740798 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1115
+ "\n"
1116
+ ]
1117
+ },
1118
+ {
1119
+ "name": "stdout",
1120
+ "output_type": "stream",
1121
+ "text": [
1122
+ "R2 score: 0.84961\n"
1123
+ ]
1124
+ },
1125
+ {
1126
+ "name": "stderr",
1127
+ "output_type": "stream",
1128
+ "text": [
1129
+ "[I 2024-10-25 11:37:05,301] Trial 4 finished with value: 0.84961 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 7109, 'n_decay_l_0': 0.001, 'n_units_l_1': 3436, 'n_decay_l_1': 1e-05, 'last_dropout': 0.3, 'lr': 1e-05}. Best is trial 4 with value: 0.84961.\n"
1130
+ ]
1131
+ }
1132
+ ],
1133
+ "source": [
1134
+ "study_de_struct = optuna.create_study(study_name='ANO_de_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True) \n",
1135
+ "# study_de_fea = optuna.create_study(study_name='ANO_de_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
1136
+ "study_de_struct.optimize(objective_de_struct, n_trials=TRIALS)\n",
1137
+ "pruned_trials_de_struct = study_de_struct.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1138
+ "complete_trials_de_struct = study_de_struct.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
1139
+ ]
1140
+ },
1141
+ {
1142
+ "cell_type": "code",
1143
+ "execution_count": 52,
1144
+ "metadata": {},
1145
+ "outputs": [
1146
+ {
1147
+ "name": "stderr",
1148
+ "output_type": "stream",
1149
+ "text": [
1150
+ "[I 2024-10-25 11:37:05,323] A new study created in RDB with name: ANO_lo_struct\n"
1151
+ ]
1152
+ },
1153
+ {
1154
+ "name": "stdout",
1155
+ "output_type": "stream",
1156
+ "text": [
1157
+ "Model already exists at save_model/full_model.keras\n",
1158
+ "Model successfully saved to save_model/full_model.keras\n"
1159
+ ]
1160
+ },
1161
+ {
1162
+ "name": "stderr",
1163
+ "output_type": "stream",
1164
+ "text": [
1165
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1166
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1167
+ "I0000 00:00:1729823828.951072 753530 service.cc:146] XLA service 0x560175567120 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1168
+ "I0000 00:00:1729823828.951145 753530 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1169
+ "I0000 00:00:1729823829.108219 753530 service.cc:146] XLA service 0x56017553de50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1170
+ "I0000 00:00:1729823829.108251 753530 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1171
+ "I0000 00:00:1729823833.863752 753634 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1172
+ "\n"
1173
+ ]
1174
+ },
1175
+ {
1176
+ "name": "stdout",
1177
+ "output_type": "stream",
1178
+ "text": [
1179
+ "R2 score: 0.679332\n"
1180
+ ]
1181
+ },
1182
+ {
1183
+ "name": "stderr",
1184
+ "output_type": "stream",
1185
+ "text": [
1186
+ "[I 2024-10-25 11:39:42,324] Trial 0 finished with value: 0.679332 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 7114, 'n_decay_l_0': 0.001, 'F_dropout_0': 0.1, 'n_units_l_1': 7475, 'n_decay_l_1': 0.0001, 'F_dropout_1': 0.3, 'lr': 1e-05}. Best is trial 0 with value: 0.679332.\n"
1187
+ ]
1188
+ },
1189
+ {
1190
+ "name": "stdout",
1191
+ "output_type": "stream",
1192
+ "text": [
1193
+ "Model already exists at save_model/full_model.keras\n",
1194
+ "Model successfully saved to save_model/full_model.keras\n"
1195
+ ]
1196
+ },
1197
+ {
1198
+ "name": "stderr",
1199
+ "output_type": "stream",
1200
+ "text": [
1201
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1202
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1203
+ "I0000 00:00:1729823985.424040 762359 service.cc:146] XLA service 0x55d75759be40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1204
+ "I0000 00:00:1729823985.424082 762359 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1205
+ "I0000 00:00:1729823985.555550 762359 service.cc:146] XLA service 0x55d75744c2d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1206
+ "I0000 00:00:1729823985.555588 762359 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1207
+ "I0000 00:00:1729823989.791130 762463 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1208
+ "\n"
1209
+ ]
1210
+ },
1211
+ {
1212
+ "name": "stdout",
1213
+ "output_type": "stream",
1214
+ "text": [
1215
+ "R2 score: 0.668488\n"
1216
+ ]
1217
+ },
1218
+ {
1219
+ "name": "stderr",
1220
+ "output_type": "stream",
1221
+ "text": [
1222
+ "[I 2024-10-25 11:40:50,103] Trial 1 finished with value: 0.668488 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 2152, 'n_decay_l_0': 0.001, 'F_dropout_0': 0.1, 'n_units_l_1': 1830, 'n_decay_l_1': 0.0001, 'F_dropout_1': 0.1, 'n_units_l_2': 4427, 'n_decay_l_2': 0.0001, 'F_dropout_2': 0.3, 'lr': 0.0001}. Best is trial 0 with value: 0.679332.\n"
1223
+ ]
1224
+ },
1225
+ {
1226
+ "name": "stdout",
1227
+ "output_type": "stream",
1228
+ "text": [
1229
+ "Model already exists at save_model/full_model.keras\n",
1230
+ "Model successfully saved to save_model/full_model.keras\n"
1231
+ ]
1232
+ },
1233
+ {
1234
+ "name": "stderr",
1235
+ "output_type": "stream",
1236
+ "text": [
1237
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1238
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1239
+ "I0000 00:00:1729824052.960321 765604 service.cc:146] XLA service 0x55cc5280bdf0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1240
+ "I0000 00:00:1729824052.960390 765604 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1241
+ "I0000 00:00:1729824053.116021 765604 service.cc:146] XLA service 0x55cc50253a30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1242
+ "I0000 00:00:1729824053.116054 765604 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1243
+ "I0000 00:00:1729824055.695706 765714 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1244
+ "\n"
1245
+ ]
1246
+ },
1247
+ {
1248
+ "name": "stdout",
1249
+ "output_type": "stream",
1250
+ "text": [
1251
+ "R2 score: 0.662751\n"
1252
+ ]
1253
+ },
1254
+ {
1255
+ "name": "stderr",
1256
+ "output_type": "stream",
1257
+ "text": [
1258
+ "[I 2024-10-25 11:41:01,389] Trial 2 finished with value: 0.662751 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 2892, 'n_decay_l_0': 0.001, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 0 with value: 0.679332.\n"
1259
+ ]
1260
+ },
1261
+ {
1262
+ "name": "stdout",
1263
+ "output_type": "stream",
1264
+ "text": [
1265
+ "Model already exists at save_model/full_model.keras\n",
1266
+ "Model successfully saved to save_model/full_model.keras\n"
1267
+ ]
1268
+ },
1269
+ {
1270
+ "name": "stderr",
1271
+ "output_type": "stream",
1272
+ "text": [
1273
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1274
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1275
+ "I0000 00:00:1729824064.281415 766911 service.cc:146] XLA service 0x55b827b832f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1276
+ "I0000 00:00:1729824064.281454 766911 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1277
+ "I0000 00:00:1729824064.424930 766911 service.cc:146] XLA service 0x55b827b5a3c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1278
+ "I0000 00:00:1729824064.424972 766911 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1279
+ "I0000 00:00:1729824067.087899 767014 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1280
+ "\n"
1281
+ ]
1282
+ },
1283
+ {
1284
+ "name": "stdout",
1285
+ "output_type": "stream",
1286
+ "text": [
1287
+ "R2 score: 0.644237\n"
1288
+ ]
1289
+ },
1290
+ {
1291
+ "name": "stderr",
1292
+ "output_type": "stream",
1293
+ "text": [
1294
+ "[I 2024-10-25 11:41:12,311] Trial 3 finished with value: 0.644237 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 6028, 'n_decay_l_0': 0.0001, 'last_dropout': 0.2, 'lr': 0.001}. Best is trial 0 with value: 0.679332.\n"
1295
+ ]
1296
+ },
1297
+ {
1298
+ "name": "stdout",
1299
+ "output_type": "stream",
1300
+ "text": [
1301
+ "Model already exists at save_model/full_model.keras\n",
1302
+ "Model successfully saved to save_model/full_model.keras\n"
1303
+ ]
1304
+ },
1305
+ {
1306
+ "name": "stderr",
1307
+ "output_type": "stream",
1308
+ "text": [
1309
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1310
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1311
+ "I0000 00:00:1729824075.529341 767599 service.cc:146] XLA service 0x563c07a27f10 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1312
+ "I0000 00:00:1729824075.529392 767599 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1313
+ "I0000 00:00:1729824075.650832 767599 service.cc:146] XLA service 0x563c07a82e20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1314
+ "I0000 00:00:1729824075.650868 767599 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1315
+ "I0000 00:00:1729824078.421404 767708 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1316
+ "\n"
1317
+ ]
1318
+ },
1319
+ {
1320
+ "name": "stdout",
1321
+ "output_type": "stream",
1322
+ "text": [
1323
+ "R2 score: 0.619821\n"
1324
+ ]
1325
+ },
1326
+ {
1327
+ "name": "stderr",
1328
+ "output_type": "stream",
1329
+ "text": [
1330
+ "[I 2024-10-25 11:41:23,815] Trial 4 finished with value: 0.619821 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 8402, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'lr': 0.001}. Best is trial 0 with value: 0.679332.\n"
1331
+ ]
1332
+ }
1333
+ ],
1334
+ "source": [
1335
+ "study_lo_struct = optuna.create_study(study_name='ANO_lo_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True) \n",
1336
+ "# study_lo_fea = optuna.create_study(study_name='ANO_lo_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
1337
+ "study_lo_struct.optimize(objective_lo_struct, n_trials=TRIALS)\n",
1338
+ "pruned_trials_lo_struct = study_lo_struct.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1339
+ "complete_trials_lo_struct = study_lo_struct.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
1340
+ ]
1341
+ },
1342
+ {
1343
+ "cell_type": "code",
1344
+ "execution_count": 53,
1345
+ "metadata": {},
1346
+ "outputs": [
1347
+ {
1348
+ "name": "stderr",
1349
+ "output_type": "stream",
1350
+ "text": [
1351
+ "[I 2024-10-25 11:41:23,834] A new study created in RDB with name: ANO_hu_struct\n"
1352
+ ]
1353
+ },
1354
+ {
1355
+ "name": "stdout",
1356
+ "output_type": "stream",
1357
+ "text": [
1358
+ "Model already exists at save_model/full_model.keras\n",
1359
+ "Model successfully saved to save_model/full_model.keras\n"
1360
+ ]
1361
+ },
1362
+ {
1363
+ "name": "stderr",
1364
+ "output_type": "stream",
1365
+ "text": [
1366
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1367
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1368
+ "I0000 00:00:1729824086.884600 768220 service.cc:146] XLA service 0x5626a2e78390 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1369
+ "I0000 00:00:1729824086.884652 768220 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1370
+ "I0000 00:00:1729824087.003367 768220 service.cc:146] XLA service 0x5626a2e4fce0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1371
+ "I0000 00:00:1729824087.003400 768220 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1372
+ "I0000 00:00:1729824105.963199 768323 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1373
+ "\n"
1374
+ ]
1375
+ },
1376
+ {
1377
+ "name": "stdout",
1378
+ "output_type": "stream",
1379
+ "text": [
1380
+ "R2 score: 0.856321\n"
1381
+ ]
1382
+ },
1383
+ {
1384
+ "name": "stderr",
1385
+ "output_type": "stream",
1386
+ "text": [
1387
+ "[I 2024-10-25 11:42:47,320] Trial 0 finished with value: 0.856321 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 6594, 'n_decay_l_0': 0.0001, 'n_units_l_1': 301, 'n_decay_l_1': 0.001, 'last_dropout': 0.3, 'lr': 1e-05}. Best is trial 0 with value: 0.856321.\n"
1388
+ ]
1389
+ },
1390
+ {
1391
+ "name": "stdout",
1392
+ "output_type": "stream",
1393
+ "text": [
1394
+ "Model already exists at save_model/full_model.keras\n",
1395
+ "Model successfully saved to save_model/full_model.keras\n"
1396
+ ]
1397
+ },
1398
+ {
1399
+ "name": "stderr",
1400
+ "output_type": "stream",
1401
+ "text": [
1402
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1403
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1404
+ "I0000 00:00:1729824170.843053 772954 service.cc:146] XLA service 0x558ed359fba0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1405
+ "I0000 00:00:1729824170.843094 772954 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1406
+ "I0000 00:00:1729824170.980423 772954 service.cc:146] XLA service 0x558ed34d34b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1407
+ "I0000 00:00:1729824170.980455 772954 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1408
+ "I0000 00:00:1729824174.981970 773059 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1409
+ "\n"
1410
+ ]
1411
+ },
1412
+ {
1413
+ "name": "stdout",
1414
+ "output_type": "stream",
1415
+ "text": [
1416
+ "R2 score: 0.852474\n"
1417
+ ]
1418
+ },
1419
+ {
1420
+ "name": "stderr",
1421
+ "output_type": "stream",
1422
+ "text": [
1423
+ "[I 2024-10-25 11:46:37,297] Trial 1 finished with value: 0.852474 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 6712, 'n_decay_l_0': 0.0001, 'F_dropout_0': 0.1, 'n_units_l_1': 6556, 'n_decay_l_1': 0.001, 'F_dropout_1': 0.1, 'lr': 1e-05}. Best is trial 0 with value: 0.856321.\n"
1424
+ ]
1425
+ },
1426
+ {
1427
+ "name": "stdout",
1428
+ "output_type": "stream",
1429
+ "text": [
1430
+ "Model already exists at save_model/full_model.keras\n",
1431
+ "Model successfully saved to save_model/full_model.keras\n"
1432
+ ]
1433
+ },
1434
+ {
1435
+ "name": "stderr",
1436
+ "output_type": "stream",
1437
+ "text": [
1438
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1439
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1440
+ "I0000 00:00:1729824400.201392 788855 service.cc:146] XLA service 0x55a8a31a00b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1441
+ "I0000 00:00:1729824400.201453 788855 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1442
+ "I0000 00:00:1729824400.333349 788855 service.cc:146] XLA service 0x55a8a3176de0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1443
+ "I0000 00:00:1729824400.333383 788855 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1444
+ "I0000 00:00:1729824402.770049 788964 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1445
+ "\n"
1446
+ ]
1447
+ },
1448
+ {
1449
+ "name": "stdout",
1450
+ "output_type": "stream",
1451
+ "text": [
1452
+ "R2 score: 0.839939\n"
1453
+ ]
1454
+ },
1455
+ {
1456
+ "name": "stderr",
1457
+ "output_type": "stream",
1458
+ "text": [
1459
+ "[I 2024-10-25 11:46:47,755] Trial 2 finished with value: 0.839939 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 4994, 'n_decay_l_0': 0.0001, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 0 with value: 0.856321.\n"
1460
+ ]
1461
+ },
1462
+ {
1463
+ "name": "stdout",
1464
+ "output_type": "stream",
1465
+ "text": [
1466
+ "Model already exists at save_model/full_model.keras\n",
1467
+ "Model successfully saved to save_model/full_model.keras\n"
1468
+ ]
1469
+ },
1470
+ {
1471
+ "name": "stderr",
1472
+ "output_type": "stream",
1473
+ "text": [
1474
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1475
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1476
+ "I0000 00:00:1729824410.699218 789515 service.cc:146] XLA service 0x55ac8eda5d40 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1477
+ "I0000 00:00:1729824410.699260 789515 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1478
+ "I0000 00:00:1729824410.834295 789515 service.cc:146] XLA service 0x55ac8ece7750 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1479
+ "I0000 00:00:1729824410.834333 789515 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1480
+ "I0000 00:00:1729824435.249758 789626 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1481
+ "\n"
1482
+ ]
1483
+ },
1484
+ {
1485
+ "name": "stdout",
1486
+ "output_type": "stream",
1487
+ "text": [
1488
+ "R2 score: 0.845373\n"
1489
+ ]
1490
+ },
1491
+ {
1492
+ "name": "stderr",
1493
+ "output_type": "stream",
1494
+ "text": [
1495
+ "[I 2024-10-25 11:48:32,252] Trial 3 finished with value: 0.845373 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 4463, 'n_decay_l_0': 0.0001, 'n_units_l_1': 1966, 'n_decay_l_1': 0.001, 'n_units_l_2': 924, 'n_decay_l_2': 0.0001, 'last_dropout': 0.1, 'lr': 0.0001}. Best is trial 0 with value: 0.856321.\n"
1496
+ ]
1497
+ },
1498
+ {
1499
+ "name": "stdout",
1500
+ "output_type": "stream",
1501
+ "text": [
1502
+ "Model already exists at save_model/full_model.keras\n",
1503
+ "Model successfully saved to save_model/full_model.keras\n"
1504
+ ]
1505
+ },
1506
+ {
1507
+ "name": "stderr",
1508
+ "output_type": "stream",
1509
+ "text": [
1510
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1511
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1512
+ "I0000 00:00:1729824515.181650 795905 service.cc:146] XLA service 0x55dbccb5d560 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1513
+ "I0000 00:00:1729824515.181691 795905 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1514
+ "I0000 00:00:1729824515.312754 795905 service.cc:146] XLA service 0x55dbccb71e90 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1515
+ "I0000 00:00:1729824515.312792 795905 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1516
+ "I0000 00:00:1729824518.617949 796015 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1517
+ "\n"
1518
+ ]
1519
+ },
1520
+ {
1521
+ "name": "stdout",
1522
+ "output_type": "stream",
1523
+ "text": [
1524
+ "R2 score: 0.812906\n"
1525
+ ]
1526
+ },
1527
+ {
1528
+ "name": "stderr",
1529
+ "output_type": "stream",
1530
+ "text": [
1531
+ "[I 2024-10-25 11:49:05,409] Trial 4 finished with value: 0.812906 and parameters: {'n_layers': 2, 'layer_dropout': 1, 'n_units_l_0': 1477, 'n_decay_l_0': 0.0001, 'F_dropout_0': 0.3, 'n_units_l_1': 5762, 'n_decay_l_1': 1e-05, 'F_dropout_1': 0.2, 'lr': 0.001}. Best is trial 0 with value: 0.856321.\n"
1532
+ ]
1533
+ }
1534
+ ],
1535
+ "source": [
1536
+ "study_hu_struct = optuna.create_study(study_name='ANO_hu_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True) \n",
1537
+ "# study_hu_fea = optuna.create_study(study_name='ANO_hu_struct', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
1538
+ "study_hu_struct.optimize(objective_hu_struct, n_trials=TRIALS)\n",
1539
+ "pruned_trials_hu_struct = study_hu_struct.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1540
+ "complete_trials_hu_struct = study_hu_struct.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
1541
+ ]
1542
+ },
1543
+ {
1544
+ "cell_type": "code",
1545
+ "execution_count": 58,
1546
+ "metadata": {},
1547
+ "outputs": [
1548
+ {
1549
+ "name": "stdout",
1550
+ "output_type": "stream",
1551
+ "text": [
1552
+ "Study statistics: [ws_structure] \n",
1553
+ " Number of finished trials: 5\n",
1554
+ " Number of pruned trials: 0\n",
1555
+ " Number of complete trials: 5\n",
1556
+ "Best trial:\n",
1557
+ " Value: 0.741337\n",
1558
+ " Params: \n",
1559
+ " n_layers: 3\n",
1560
+ " layer_dropout: 0\n",
1561
+ " n_units_l_0: 787\n",
1562
+ " n_decay_l_0: 0.0001\n",
1563
+ " n_units_l_1: 9082\n",
1564
+ " n_decay_l_1: 0.001\n",
1565
+ " n_units_l_2: 7890\n",
1566
+ " n_decay_l_2: 0.001\n",
1567
+ " last_dropout: 0.1\n",
1568
+ " lr: 0.0001\n"
1569
+ ]
1570
+ }
1571
+ ],
1572
+ "source": [
1573
+ "print(\"Study statistics: [ws_structure] \")\n",
1574
+ "print(\" Number of finished trials: \", len(study_ws_struct.trials))\n",
1575
+ "print(\" Number of pruned trials: \", len(pruned_trials_ws_struct))\n",
1576
+ "print(\" Number of complete trials: \", len(complete_trials_ws_struct))\n",
1577
+ "print(\"Best trial:\")\n",
1578
+ "trials_tmp = study_ws_struct.best_trial\n",
1579
+ "print(\" Value: \", trials_tmp.value)\n",
1580
+ "print(\" Params: \")\n",
1581
+ "for key, value in trials_tmp.params.items():\n",
1582
+ " print(\" {}: {}\".format(key, value))"
1583
+ ]
1584
+ },
1585
+ {
1586
+ "cell_type": "code",
1587
+ "execution_count": 59,
1588
+ "metadata": {},
1589
+ "outputs": [
1590
+ {
1591
+ "name": "stdout",
1592
+ "output_type": "stream",
1593
+ "text": [
1594
+ "Study statistics: [de_structure] \n",
1595
+ " Number of finished trials: 5\n",
1596
+ " Number of pruned trials: 0\n",
1597
+ " Number of complete trials: 5\n",
1598
+ "Best trial:\n",
1599
+ " Value: 0.84961\n",
1600
+ " Params: \n",
1601
+ " n_layers: 2\n",
1602
+ " layer_dropout: 0\n",
1603
+ " n_units_l_0: 7109\n",
1604
+ " n_decay_l_0: 0.001\n",
1605
+ " n_units_l_1: 3436\n",
1606
+ " n_decay_l_1: 1e-05\n",
1607
+ " last_dropout: 0.3\n",
1608
+ " lr: 1e-05\n"
1609
+ ]
1610
+ }
1611
+ ],
1612
+ "source": [
1613
+ "print(\"Study statistics: [de_structure] \")\n",
1614
+ "print(\" Number of finished trials: \", len(study_de_struct.trials))\n",
1615
+ "print(\" Number of pruned trials: \", len(pruned_trials_de_struct))\n",
1616
+ "print(\" Number of complete trials: \", len(complete_trials_de_struct))\n",
1617
+ "print(\"Best trial:\")\n",
1618
+ "trials_tmp = study_de_struct.best_trial\n",
1619
+ "print(\" Value: \", trials_tmp.value)\n",
1620
+ "print(\" Params: \")\n",
1621
+ "for key, value in trials_tmp.params.items():\n",
1622
+ " print(\" {}: {}\".format(key, value))"
1623
+ ]
1624
+ },
1625
+ {
1626
+ "cell_type": "code",
1627
+ "execution_count": 60,
1628
+ "metadata": {},
1629
+ "outputs": [
1630
+ {
1631
+ "name": "stdout",
1632
+ "output_type": "stream",
1633
+ "text": [
1634
+ "Study statistics: [lo_structure] \n",
1635
+ " Number of finished trials: 5\n",
1636
+ " Number of pruned trials: 0\n",
1637
+ " Number of complete trials: 5\n",
1638
+ "Best trial:\n",
1639
+ " Value: 0.679332\n",
1640
+ " Params: \n",
1641
+ " n_layers: 2\n",
1642
+ " layer_dropout: 1\n",
1643
+ " n_units_l_0: 7114\n",
1644
+ " n_decay_l_0: 0.001\n",
1645
+ " F_dropout_0: 0.1\n",
1646
+ " n_units_l_1: 7475\n",
1647
+ " n_decay_l_1: 0.0001\n",
1648
+ " F_dropout_1: 0.3\n",
1649
+ " lr: 1e-05\n"
1650
+ ]
1651
+ }
1652
+ ],
1653
+ "source": [
1654
+ "print(\"Study statistics: [lo_structure] \")\n",
1655
+ "print(\" Number of finished trials: \", len(study_lo_struct.trials))\n",
1656
+ "print(\" Number of pruned trials: \", len(pruned_trials_lo_struct))\n",
1657
+ "print(\" Number of complete trials: \", len(complete_trials_lo_struct))\n",
1658
+ "print(\"Best trial:\")\n",
1659
+ "trials_tmp = study_lo_struct.best_trial\n",
1660
+ "print(\" Value: \", trials_tmp.value)\n",
1661
+ "print(\" Params: \")\n",
1662
+ "for key, value in trials_tmp.params.items():\n",
1663
+ " print(\" {}: {}\".format(key, value))"
1664
+ ]
1665
+ },
1666
+ {
1667
+ "cell_type": "code",
1668
+ "execution_count": 61,
1669
+ "metadata": {},
1670
+ "outputs": [
1671
+ {
1672
+ "name": "stdout",
1673
+ "output_type": "stream",
1674
+ "text": [
1675
+ "Study statistics: [hu_structure] \n",
1676
+ " Number of finished trials: 5\n",
1677
+ " Number of pruned trials: 0\n",
1678
+ " Number of complete trials: 5\n",
1679
+ "Best trial:\n",
1680
+ " Value: 0.856321\n",
1681
+ " Params: \n",
1682
+ " n_layers: 2\n",
1683
+ " layer_dropout: 0\n",
1684
+ " n_units_l_0: 6594\n",
1685
+ " n_decay_l_0: 0.0001\n",
1686
+ " n_units_l_1: 301\n",
1687
+ " n_decay_l_1: 0.001\n",
1688
+ " last_dropout: 0.3\n",
1689
+ " lr: 1e-05\n"
1690
+ ]
1691
+ }
1692
+ ],
1693
+ "source": [
1694
+ "print(\"Study statistics: [hu_structure] \")\n",
1695
+ "print(\" Number of finished trials: \", len(study_hu_struct.trials))\n",
1696
+ "print(\" Number of pruned trials: \", len(pruned_trials_hu_struct))\n",
1697
+ "print(\" Number of complete trials: \", len(complete_trials_hu_struct))\n",
1698
+ "print(\"Best trial:\")\n",
1699
+ "trials_tmp = study_hu_struct.best_trial\n",
1700
+ "print(\" Value: \", trials_tmp.value)\n",
1701
+ "print(\" Params: \")\n",
1702
+ "for key, value in trials_tmp.params.items():\n",
1703
+ " print(\" {}: {}\".format(key, value))"
1704
+ ]
1705
+ },
1706
+ {
1707
+ "cell_type": "code",
1708
+ "execution_count": null,
1709
+ "metadata": {},
1710
+ "outputs": [],
1711
+ "source": []
1712
+ },
1713
+ {
1714
+ "cell_type": "code",
1715
+ "execution_count": null,
1716
+ "metadata": {},
1717
+ "outputs": [],
1718
+ "source": []
1719
+ },
1720
+ {
1721
+ "cell_type": "code",
1722
+ "execution_count": null,
1723
+ "metadata": {},
1724
+ "outputs": [],
1725
+ "source": []
1726
+ }
1727
+ ],
1728
+ "metadata": {
1729
+ "kernelspec": {
1730
+ "display_name": "ai",
1731
+ "language": "python",
1732
+ "name": "python3"
1733
+ },
1734
+ "language_info": {
1735
+ "codemirror_mode": {
1736
+ "name": "ipython",
1737
+ "version": 3
1738
+ },
1739
+ "file_extension": ".py",
1740
+ "mimetype": "text/x-python",
1741
+ "name": "python",
1742
+ "nbconvert_exporter": "python",
1743
+ "pygments_lexer": "ipython3",
1744
+ "version": "3.12.2"
1745
+ },
1746
+ "orig_nbformat": 4
1747
+ },
1748
+ "nbformat": 4,
1749
+ "nbformat_minor": 2
1750
+ }
6_ANO_network_[fea_struc].ipynb ADDED
@@ -0,0 +1,1992 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "import sys\n",
11
+ "import numpy as np\n",
12
+ "import pandas as pd\n",
13
+ "import seaborn as sns\n",
14
+ "import matplotlib.pyplot as plt\n",
15
+ "import matplotlib.patches as mpatches\n",
16
+ "import gc\n",
17
+ "import time\n",
18
+ "import subprocess\n",
19
+ "import logging\n",
20
+ "from concurrent.futures import ProcessPoolExecutor, as_completed"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 2,
26
+ "metadata": {},
27
+ "outputs": [],
28
+ "source": [
29
+ "from rdkit import Chem\n",
30
+ "from rdkit.Chem import AllChem, DataStructs, Draw\n",
31
+ "from rdkit import RDConfig\n",
32
+ "from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges\n",
33
+ "from rdkit.Chem.AllChem import GetMorganGenerator\n",
34
+ "from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray\n",
35
+ "from rdkit.Avalon.pyAvalonTools import GetAvalonFP"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": 3,
41
+ "metadata": {},
42
+ "outputs": [
43
+ {
44
+ "name": "stderr",
45
+ "output_type": "stream",
46
+ "text": [
47
+ "2024-11-04 22:59:19.830835: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
48
+ "2024-11-04 22:59:19.845573: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
49
+ "2024-11-04 22:59:19.849643: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
50
+ "2024-11-04 22:59:19.860597: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
51
+ "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
52
+ "2024-11-04 22:59:21.010480: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
53
+ ]
54
+ }
55
+ ],
56
+ "source": [
57
+ "import tensorflow as tf\n",
58
+ "from tensorflow import keras\n",
59
+ "from tensorflow.keras import layers\n",
60
+ "from tensorflow.keras.models import Sequential\n",
61
+ "from tensorflow.keras.layers import Dense, Dropout, Activation\n",
62
+ "from tensorflow.keras.regularizers import l2\n",
63
+ "from tensorflow.keras.optimizers import Adam\n",
64
+ "from tensorflow.keras import regularizers"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 4,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "from sklearn.model_selection import train_test_split\n",
74
+ "from sklearn.linear_model import Ridge\n",
75
+ "from sklearn.ensemble import RandomForestRegressor\n",
76
+ "from sklearn.neural_network import MLPRegressor\n",
77
+ "from sklearn.svm import SVR\n",
78
+ "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error"
79
+ ]
80
+ },
81
+ {
82
+ "cell_type": "code",
83
+ "execution_count": 5,
84
+ "metadata": {},
85
+ "outputs": [],
86
+ "source": [
87
+ "import optuna\n",
88
+ "from optuna.trial import TrialState\n",
89
+ "from optuna.integration import TFKerasPruningCallback"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "code",
94
+ "execution_count": 6,
95
+ "metadata": {},
96
+ "outputs": [],
97
+ "source": [
98
+ "from extra_code.feature_selection import selection_data_descriptor_compress, selection_fromStudy_compress"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": 7,
104
+ "metadata": {},
105
+ "outputs": [
106
+ {
107
+ "name": "stderr",
108
+ "output_type": "stream",
109
+ "text": [
110
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
111
+ "I0000 00:00:1730728761.540219 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
112
+ "Your kernel may have been built without NUMA support.\n",
113
+ "I0000 00:00:1730728761.611323 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
114
+ "Your kernel may have been built without NUMA support.\n",
115
+ "I0000 00:00:1730728761.611412 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
116
+ "Your kernel may have been built without NUMA support.\n"
117
+ ]
118
+ }
119
+ ],
120
+ "source": [
121
+ "tf.keras.backend.clear_session()\n",
122
+ "gpus = tf.config.experimental.list_physical_devices('GPU')\n",
123
+ "if gpus:\n",
124
+ " try:\n",
125
+ " for gpu in gpus:\n",
126
+ " tf.config.experimental.set_memory_growth(gpu, True)\n",
127
+ " except RuntimeError as e:\n",
128
+ " print(e)"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 8,
134
+ "metadata": {},
135
+ "outputs": [],
136
+ "source": [
137
+ "target_path = \"result/6_ANO_network_[fea_struc]\"\n",
138
+ "os.makedirs(target_path, exist_ok=True)"
139
+ ]
140
+ },
141
+ {
142
+ "cell_type": "code",
143
+ "execution_count": 9,
144
+ "metadata": {},
145
+ "outputs": [],
146
+ "source": [
147
+ "data_ws = pd.read_csv('./data/ws496_logS.csv', dtype={'SMILES': 'string'})\n",
148
+ "smiles_ws = data_ws['SMILES']\n",
149
+ "y_ws = data_ws.iloc[:, 2]\n",
150
+ "\n",
151
+ "data_delaney = pd.read_csv('./data/delaney-processed.csv', dtype={'smiles': 'string'})\n",
152
+ "smiles_de = data_delaney['smiles']\n",
153
+ "y_de = data_delaney.iloc[:, 1]\n",
154
+ "\n",
155
+ "data_lovric2020 = pd.read_csv('./data/Lovric2020_logS0.csv', dtype={'isomeric_smiles': 'string'})\n",
156
+ "smiles_lo = data_lovric2020['isomeric_smiles']\n",
157
+ "y_lo = data_lovric2020.iloc[:, 1]\n",
158
+ "\n",
159
+ "data_huuskonen = pd.read_csv('./data/huusk.csv', dtype={'SMILES': 'string'})\n",
160
+ "smiles_hu = data_huuskonen['SMILES']\n",
161
+ "y_hu = data_huuskonen.iloc[:, -1].astype('float')"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": 10,
167
+ "metadata": {},
168
+ "outputs": [],
169
+ "source": [
170
+ "def mol3d(mol):\n",
171
+ " mol = Chem.AddHs(mol)\n",
172
+ " optimization_methods = [\n",
173
+ " (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),\n",
174
+ " (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),\n",
175
+ " (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})\n",
176
+ " ]\n",
177
+ "\n",
178
+ " for method, args, kwargs in optimization_methods:\n",
179
+ " try:\n",
180
+ " method(*args, **kwargs)\n",
181
+ " if mol.GetNumConformers() > 0:\n",
182
+ " return mol\n",
183
+ " except ValueError as e:\n",
184
+ " print(f\"Error: {e} - Trying next optimization method [{method}]\")\n",
185
+ "\n",
186
+ " print(f\"Invalid mol for 3d {'\\033[94m'}{Chem.MolToSmiles(mol)}{'\\033[0m'} - No conformer generated\")\n",
187
+ " return None"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 11,
193
+ "metadata": {},
194
+ "outputs": [],
195
+ "source": [
196
+ "def convert_smiles_to_mol(smiles, fail_folder=None, index=None, yvalue=None):\n",
197
+ " mol = Chem.MolFromSmiles(smiles)\n",
198
+ " if mol is None:\n",
199
+ " print(f\"[convert_smiles_to_mol] Cannot convert {smiles} to Mols\")\n",
200
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": \"Invalid SMILES\"}\n",
201
+ "\n",
202
+ " try:\n",
203
+ " Chem.Kekulize(mol, clearAromaticFlags=True)\n",
204
+ " isomeric_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)\n",
205
+ " mol = Chem.MolFromSmiles(isomeric_smiles)\n",
206
+ " except Exception as e:\n",
207
+ " print(f\"[convert_smiles_to_mol] failed {smiles} isomeric_smiles by {e}\")\n",
208
+ " if fail_folder and index is not None:\n",
209
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
210
+ " img = Draw.MolToImage(mol)\n",
211
+ " img.save(img_path)\n",
212
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"Isomeric SMILES error: {e}\"}\n",
213
+ "\n",
214
+ " try:\n",
215
+ " Chem.SanitizeMol(mol)\n",
216
+ " except Exception as e:\n",
217
+ " print(f\"[convert_smiles_to_mol] failed {smiles} SanitizeMol by {e}\")\n",
218
+ " if fail_folder and index is not None:\n",
219
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
220
+ " img = Draw.MolToImage(mol)\n",
221
+ " img.save(img_path)\n",
222
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"SanitizeMol error: {e}\"}\n",
223
+ "\n",
224
+ " return mol, None"
225
+ ]
226
+ },
227
+ {
228
+ "cell_type": "code",
229
+ "execution_count": 12,
230
+ "metadata": {},
231
+ "outputs": [],
232
+ "source": [
233
+ "def process_smiles(smiles, yvalue, fail_folder, index):\n",
234
+ " mol, error = convert_smiles_to_mol(smiles, fail_folder, index, yvalue)\n",
235
+ " if error:\n",
236
+ " return None, None, error\n",
237
+ "\n",
238
+ " mol_3d = mol3d(mol)\n",
239
+ " if mol_3d:\n",
240
+ " return smiles, yvalue, None\n",
241
+ " else:\n",
242
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
243
+ " img = Draw.MolToImage(mol)\n",
244
+ " img.save(img_path)\n",
245
+ " return None, None, {\"smiles\": smiles, \"y_value\": yvalue}\n",
246
+ "\n",
247
+ "def process_dataset(smiles_list, y_values, dataset_name, target_path=\"result\", max_workers=None):\n",
248
+ " start = time.time()\n",
249
+ " valid_smiles, valid_y = [], []\n",
250
+ " error_smiles_list = []\n",
251
+ " fail_folder = f\"{target_path}/failed/{dataset_name}\"\n",
252
+ " os.makedirs(fail_folder, exist_ok=True)\n",
253
+ "\n",
254
+ " with ProcessPoolExecutor(max_workers=max_workers) as executor:\n",
255
+ " futures = [\n",
256
+ " executor.submit(process_smiles, smiles, yvalue, fail_folder, i)\n",
257
+ " for i, (smiles, yvalue) in enumerate(zip(smiles_list, y_values))\n",
258
+ " ]\n",
259
+ " for future in as_completed(futures):\n",
260
+ " smiles, yvalue, error = future.result()\n",
261
+ " if error:\n",
262
+ " error_smiles_list.append(error)\n",
263
+ " elif smiles is not None and yvalue is not None:\n",
264
+ " valid_smiles.append(smiles)\n",
265
+ " valid_y.append(yvalue)\n",
266
+ "\n",
267
+ " if error_smiles_list:\n",
268
+ " error_df = pd.DataFrame(error_smiles_list)\n",
269
+ " error_df.to_csv(os.path.join(fail_folder, \"failed_smiles.csv\"), index=False)\n",
270
+ " print(f\" [{dataset_name:<10}] : {time.time()-start:.4f} sec\")\n",
271
+ " return valid_smiles, valid_y"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": 13,
277
+ "metadata": {},
278
+ "outputs": [
279
+ {
280
+ "name": "stdout",
281
+ "output_type": "stream",
282
+ "text": [
283
+ " [ws496 ] : 0.8667 sec\n",
284
+ " [delaney ] : 1.4338 sec\n",
285
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x5596b20fdd50>]\n",
286
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x5596b20febe0>]\n",
287
+ "Invalid mol for 3d \u001b[94m[H]O[C@@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
288
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x5596b20fdd50>]\n",
289
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x5596b20febe0>]\n",
290
+ "Invalid mol for 3d \u001b[94m[H]O[C@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
291
+ " [Lovric2020_logS0] : 8.8955 sec\n",
292
+ " [huusk ] : 1.5899 sec\n"
293
+ ]
294
+ }
295
+ ],
296
+ "source": [
297
+ "smiles_ws, y_ws = process_dataset(smiles_ws, y_ws, \"ws496\", target_path)\n",
298
+ "smiles_de, y_de = process_dataset(smiles_de, y_de, \"delaney\", target_path)\n",
299
+ "smiles_lo, y_lo = process_dataset(smiles_lo, y_lo, \"Lovric2020_logS0\", target_path)\n",
300
+ "smiles_hu, y_hu = process_dataset(smiles_hu, y_hu, \"huusk\", target_path)"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "code",
305
+ "execution_count": 14,
306
+ "metadata": {},
307
+ "outputs": [],
308
+ "source": [
309
+ "LEN_OF_FF = 2048\n",
310
+ "LEN_OF_MA = 167\n",
311
+ "LEN_OF_AV = 512"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": 15,
317
+ "metadata": {},
318
+ "outputs": [],
319
+ "source": [
320
+ "def get_fingerprints(mol):\n",
321
+ " if mol is None:\n",
322
+ " return None, None, None\n",
323
+ " \n",
324
+ " morgan_generator = GetMorganGenerator(radius=2, fpSize=LEN_OF_FF)\n",
325
+ " ecfp = morgan_generator.GetFingerprint(mol)\n",
326
+ " ecfp_array = np.zeros((LEN_OF_FF,),dtype=int)\n",
327
+ " DataStructs.ConvertToNumpyArray(ecfp, ecfp_array)\n",
328
+ " \n",
329
+ " maccs = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol)\n",
330
+ "\n",
331
+ " avalon_fp = GetAvalonFP(mol)\n",
332
+ " avalon_array = np.zeros((LEN_OF_AV,),dtype=int)\n",
333
+ " DataStructs.ConvertToNumpyArray(avalon_fp, avalon_array)\n",
334
+ " \n",
335
+ " return ecfp_array, maccs, avalon_array\n",
336
+ "\n",
337
+ "def fp_converter(data, use_parallel=True):\n",
338
+ " mols = [Chem.MolFromSmiles(smi) for smi in data]\n",
339
+ " \n",
340
+ " if use_parallel:\n",
341
+ " try: \n",
342
+ " with ProcessPoolExecutor() as executor:\n",
343
+ " results = list(executor.map(get_fingerprints, mols))\n",
344
+ " except Exception as e:\n",
345
+ " print(f\"Parallel processing failed due to: {e}. Falling back to sequential processing.\")\n",
346
+ " use_parallel = False\n",
347
+ " \n",
348
+ " if not use_parallel:\n",
349
+ " results = [get_fingerprints(mol) for mol in mols]\n",
350
+ " \n",
351
+ " ECFP, MACCS, AvalonFP = zip(*results)\n",
352
+ " \n",
353
+ " ECFP_container = np.vstack([arr for arr in ECFP if arr is not None])\n",
354
+ " MACCS_container = np.zeros((len(MACCS), LEN_OF_MA), dtype=int)\n",
355
+ " AvalonFP_container = np.vstack([arr for arr in AvalonFP if arr is not None])\n",
356
+ "\n",
357
+ " for i, fp in enumerate(MACCS):\n",
358
+ " if fp is not None:\n",
359
+ " DataStructs.ConvertToNumpyArray(fp, MACCS_container[i])\n",
360
+ " \n",
361
+ " return mols, ECFP_container, MACCS_container, AvalonFP_container"
362
+ ]
363
+ },
364
+ {
365
+ "cell_type": "code",
366
+ "execution_count": 16,
367
+ "metadata": {},
368
+ "outputs": [
369
+ {
370
+ "data": {
371
+ "text/plain": [
372
+ "0"
373
+ ]
374
+ },
375
+ "execution_count": 16,
376
+ "metadata": {},
377
+ "output_type": "execute_result"
378
+ }
379
+ ],
380
+ "source": [
381
+ "mol_ws, x_ws, MACCS_ws, AvalonFP_ws = fp_converter(smiles_ws,target_path)\n",
382
+ "mol_de, x_de, MACCS_de, AvalonFP_de = fp_converter(smiles_de,target_path)\n",
383
+ "mol_lo, x_lo, MACCS_lo, AvalonFP_lo = fp_converter(smiles_lo,target_path)\n",
384
+ "mol_hu, x_hu, MACCS_hu, AvalonFP_hu = fp_converter(smiles_hu,target_path)\n",
385
+ "del smiles_ws\n",
386
+ "del smiles_de\n",
387
+ "del smiles_lo\n",
388
+ "del smiles_hu\n",
389
+ "gc.collect()"
390
+ ]
391
+ },
392
+ {
393
+ "cell_type": "code",
394
+ "execution_count": 17,
395
+ "metadata": {},
396
+ "outputs": [],
397
+ "source": [
398
+ "def concatenate_to_numpy(*dataframes):\n",
399
+ " numpy_arrays = [df.to_numpy() if isinstance(df, pd.DataFrame) else df for df in dataframes]\n",
400
+ " if not all(isinstance(arr, np.ndarray) for arr in numpy_arrays):\n",
401
+ " raise ValueError(\"All inputs must be either pandas DataFrame or numpy array\")\n",
402
+ " return np.concatenate(numpy_arrays, axis=1)"
403
+ ]
404
+ },
405
+ {
406
+ "cell_type": "code",
407
+ "execution_count": 18,
408
+ "metadata": {},
409
+ "outputs": [
410
+ {
411
+ "data": {
412
+ "text/plain": [
413
+ "0"
414
+ ]
415
+ },
416
+ "execution_count": 18,
417
+ "metadata": {},
418
+ "output_type": "execute_result"
419
+ }
420
+ ],
421
+ "source": [
422
+ "group_nws = concatenate_to_numpy(x_ws, MACCS_ws, AvalonFP_ws)\n",
423
+ "group_nde = concatenate_to_numpy(x_de, MACCS_de, AvalonFP_de)\n",
424
+ "group_nlo = concatenate_to_numpy(x_lo, MACCS_lo, AvalonFP_lo)\n",
425
+ "group_nhu = concatenate_to_numpy(x_hu, MACCS_hu, AvalonFP_hu)\n",
426
+ "del x_ws, MACCS_ws, AvalonFP_ws\n",
427
+ "del x_de, MACCS_de, AvalonFP_de\n",
428
+ "del x_lo, MACCS_lo, AvalonFP_lo\n",
429
+ "del x_hu, MACCS_hu, AvalonFP_hu\n",
430
+ "gc.collect()"
431
+ ]
432
+ },
433
+ {
434
+ "cell_type": "code",
435
+ "execution_count": 19,
436
+ "metadata": {},
437
+ "outputs": [],
438
+ "source": [
439
+ "try:\n",
440
+ " storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
441
+ " # storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
442
+ " # storage = optuna.storages.RDBStorage(url=storage_urls)\n",
443
+ "except Exception as e:\n",
444
+ " print(f\"Error occured: {e}\")"
445
+ ]
446
+ },
447
+ {
448
+ "cell_type": "code",
449
+ "execution_count": 20,
450
+ "metadata": {},
451
+ "outputs": [
452
+ {
453
+ "name": "stdout",
454
+ "output_type": "stream",
455
+ "text": [
456
+ "Best trial for study 'ANO_ws_feature':\n",
457
+ "Best trial value: 0.932153\n",
458
+ "Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 0}\n",
459
+ "Generated fea: [1 1 1 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 1 1 1 1\n",
460
+ " 1 0 1 0 1 0 1 1 0 0 0 0]\n",
461
+ "Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n"
462
+ ]
463
+ }
464
+ ],
465
+ "source": [
466
+ "try:\n",
467
+ " ws_fea = selection_fromStudy_compress('ANO_ws_feature', storage)\n",
468
+ "except Exception as e:\n",
469
+ " print(f\"Error occured: {e}\") \n",
470
+ " ws_fea =[\n",
471
+ " 1, # 1 - \"MolWeight\"\n",
472
+ " 1, # 2 - \"Mol_logP\"\n",
473
+ " 1, # 3 - \"Mol_MR\"\n",
474
+ " 1, # 4 - \"Mol_TPSA\"\n",
475
+ " 0, # 5 - \"NumRotatableBonds\"\n",
476
+ " 0, # 6 - \"HeavyAtomCount\"\n",
477
+ " 0, # 7 - \"NumHAcceptors\"\n",
478
+ " 0, # 8 - \"NumHDonors\"\n",
479
+ " 0, # 9 - \"NumHeteroatoms\"\n",
480
+ " 1, # 10 - \"NumValenceElec\"\n",
481
+ " 1, # 11 - \"NHOHCount\"\n",
482
+ " 1, # 12 - \"NOCount\"\n",
483
+ " 0, # 13 - \"RingCount\"\n",
484
+ " 1, # 14 - \"NumAromaticRings\"\n",
485
+ " 0, # 15 - \"NumSaturatedRings\"\n",
486
+ " 0, # 16 - \"NumAliphaticRings\"\n",
487
+ " 0, # 17 - \"LabuteASA\"\n",
488
+ " 0, # 18 - \"NumValenceElectrons\"\n",
489
+ " 1, # 19 - \"BalabanJ\"\n",
490
+ " 1, # 20 - \"BertzCT\"\n",
491
+ " 0, # 21 - \"Ipc\"\n",
492
+ " 0, # 22 - \"kappa_Series[1-3]_ind\"\n",
493
+ " 1, # 23 - \"Chi_Series[13]_ind\"\n",
494
+ " 1, # 24 - \"Phi\"\n",
495
+ " 0, # 25 - \"HallKierAlpha\"\n",
496
+ " 0, # 26 - \"NumAmideBonds\"\n",
497
+ " 1, # 27 - \"FractionCSP3\"\n",
498
+ " 0, # 28 - \"NumSpiroAtoms\"\n",
499
+ " 1, # 29 - \"NumBridgeheadAtoms\"\n",
500
+ " 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n",
501
+ " 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n",
502
+ " 0, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n",
503
+ " 1, # 33 - \"EState_VSA_Series[1-11]_ind\"\n",
504
+ " 0, # 34 - \"VSA_EState_Series[1-10]_ind\"\n",
505
+ " 0, # 35 - \"Asphericity\"\n",
506
+ " 1, # 36 - \"PBF\"\n",
507
+ " 0, # 37 - \"RadiusOfGyration\"\n",
508
+ " 0, # 38 - \"InertialShapeFactor\"\n",
509
+ " 1, # 39 - \"Eccentricity\"\n",
510
+ " 0, # 40 - \"SpherocityIndex\"\n",
511
+ " 1, # 41 - \"PMI_series[1-3]_ind\"\n",
512
+ " 0, # 42 - \"NPR_series[1-2]_ind\"\n",
513
+ " 0, # 43 - \"MQNs\"\n",
514
+ " 0, # 44 - \"AUTOCORR2D\"\n",
515
+ " 1, # 45 - \"BCUT2D\"\n",
516
+ " 0, # 46 - \"AUTOCORR3D\"\n",
517
+ " 1, # 47 - \"RDF\"\n",
518
+ " 0, # 48 - \"MORSE\"\n",
519
+ " 1, # 49 - \"WHIM\"\n",
520
+ " 0, # 50 - \"GETAWAY\" \n",
521
+ " ]"
522
+ ]
523
+ },
524
+ {
525
+ "cell_type": "code",
526
+ "execution_count": 21,
527
+ "metadata": {},
528
+ "outputs": [
529
+ {
530
+ "name": "stdout",
531
+ "output_type": "stream",
532
+ "text": [
533
+ "Best trial for study 'ANO_de_feature':\n",
534
+ "Best trial value: 0.973052\n",
535
+ "Best trial parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 1, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 0, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 1}\n",
536
+ "Generated fea: [1 1 1 1 1 1 0 0 1 1 1 0 0 1 1 1 1 0 0 0 0 1 1 0 1 1 1 0 1 1 0 0 0 1 1 1 1\n",
537
+ " 1 1 0 1 1 1 0 0 0 1 0 1]\n",
538
+ "Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n"
539
+ ]
540
+ }
541
+ ],
542
+ "source": [
543
+ "try:\n",
544
+ " de_fea = selection_fromStudy_compress('ANO_de_feature', storage)\n",
545
+ "except Exception as e:\n",
546
+ " print(f\"Error occured: {e}\") \n",
547
+ " de_fea =[\n",
548
+ " 1, # 1 - \"MolWeight\"\n",
549
+ " 1, # 2 - \"Mol_logP\"\n",
550
+ " 1, # 3 - \"Mol_MR\"\n",
551
+ " 1, # 4 - \"Mol_TPSA\"\n",
552
+ " 0, # 5 - \"NumRotatableBonds\"\n",
553
+ " 0, # 6 - \"HeavyAtomCount\"\n",
554
+ " 1, # 7 - \"NumHAcceptors\"\n",
555
+ " 1, # 8 - \"NumHDonors\"\n",
556
+ " 0, # 9 - \"NumHeteroatoms\"\n",
557
+ " 0, # 10 - \"NumValenceElec\"\n",
558
+ " 1, # 11 - \"NHOHCount\"\n",
559
+ " 0, # 12 - \"NOCount\"\n",
560
+ " 0, # 13 - \"RingCount\"\n",
561
+ " 0, # 14 - \"NumAromaticRings\"\n",
562
+ " 0, # 15 - \"NumSaturatedRings\"\n",
563
+ " 1, # 16 - \"NumAliphaticRings\"\n",
564
+ " 1, # 17 - \"LabuteASA\"\n",
565
+ " 0, # 18 - \"NumValenceElectrons\"\n",
566
+ " 1, # 19 - \"BalabanJ\"\n",
567
+ " 1, # 20 - \"BertzCT\"\n",
568
+ " 1, # 21 - \"Ipc\"\n",
569
+ " 0, # 22 - \"kappa_Series[1-3]_ind\"\n",
570
+ " 0, # 23 - \"Chi_Series[13]_ind\"\n",
571
+ " 0, # 24 - \"Phi\"\n",
572
+ " 1, # 25 - \"HallKierAlpha\"\n",
573
+ " 1, # 26 - \"NumAmideBonds\"\n",
574
+ " 1, # 27 - \"FractionCSP3\"\n",
575
+ " 1, # 28 - \"NumSpiroAtoms\"\n",
576
+ " 0, # 29 - \"NumBridgeheadAtoms\"\n",
577
+ " 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n",
578
+ " 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n",
579
+ " 0, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n",
580
+ " 0, # 33 - \"EState_VSA_Series[1-11]_ind\"\n",
581
+ " 0, # 34 - \"VSA_EState_Series[1-10]_ind\"\n",
582
+ " 1, # 35 - \"Asphericity\"\n",
583
+ " 0, # 36 - \"PBF\"\n",
584
+ " 0, # 37 - \"RadiusOfGyration\"\n",
585
+ " 0, # 38 - \"InertialShapeFactor\"\n",
586
+ " 0, # 39 - \"Eccentricity\"\n",
587
+ " 0, # 40 - \"SpherocityIndex\"\n",
588
+ " 0, # 41 - \"PMI_series[1-3]_ind\"\n",
589
+ " 1, # 42 - \"NPR_series[1-2]_ind\"\n",
590
+ " 0, # 43 - \"MQNs\"\n",
591
+ " 1, # 44 - \"AUTOCORR2D\"\n",
592
+ " 1, # 45 - \"BCUT2D\"\n",
593
+ " 0, # 46 - \"AUTOCORR3D\"\n",
594
+ " 1, # 47 - \"RDF\"\n",
595
+ " 0, # 48 - \"MORSE\"\n",
596
+ " 1, # 49 - \"WHIM\"\n",
597
+ " 0, # 50 - \"GETAWAY\" \n",
598
+ " ]"
599
+ ]
600
+ },
601
+ {
602
+ "cell_type": "code",
603
+ "execution_count": 22,
604
+ "metadata": {},
605
+ "outputs": [
606
+ {
607
+ "name": "stdout",
608
+ "output_type": "stream",
609
+ "text": [
610
+ "Best trial for study 'ANO_lo_feature':\n",
611
+ "Best trial value: 0.843203\n",
612
+ "Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]': 1, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 1, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 0}\n",
613
+ "Generated fea: [1 1 1 1 0 0 0 1 0 0 0 0 0 1 1 1 0 1 0 1 1 1 1 1 0 0 0 1 0 0 1 0 1 1 1 1 0\n",
614
+ " 0 1 1 1 1 1 0 0 0 1 0 0]\n",
615
+ "Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n"
616
+ ]
617
+ }
618
+ ],
619
+ "source": [
620
+ "try:\n",
621
+ " lo_fea = selection_fromStudy_compress('ANO_lo_feature', storage)\n",
622
+ "except Exception as e:\n",
623
+ " print(f\"Error occured: {e}\") \n",
624
+ " lo_fea =[\n",
625
+ " 1, # 1 - \"MolWeight\"\n",
626
+ " 1, # 2 - \"Mol_logP\"\n",
627
+ " 1, # 3 - \"Mol_MR\"\n",
628
+ " 1, # 4 - \"Mol_TPSA\"\n",
629
+ " 1, # 5 - \"NumRotatableBonds\"\n",
630
+ " 0, # 6 - \"HeavyAtomCount\"\n",
631
+ " 0, # 7 - \"NumHAcceptors\"\n",
632
+ " 0, # 8 - \"NumHDonors\"\n",
633
+ " 1, # 9 - \"NumHeteroatoms\"\n",
634
+ " 1, # 10 - \"NumValenceElec\"\n",
635
+ " 1, # 11 - \"NHOHCount\"\n",
636
+ " 1, # 12 - \"NOCount\"\n",
637
+ " 0, # 13 - \"RingCount\"\n",
638
+ " 1, # 14 - \"NumAromaticRings\"\n",
639
+ " 0, # 15 - \"NumSaturatedRings\"\n",
640
+ " 0, # 16 - \"NumAliphaticRings\"\n",
641
+ " 0, # 17 - \"LabuteASA\"\n",
642
+ " 1, # 18 - \"NumValenceElectrons\"\n",
643
+ " 0, # 19 - \"BalabanJ\"\n",
644
+ " 0, # 20 - \"BertzCT\"\n",
645
+ " 0, # 21 - \"Ipc\"\n",
646
+ " 1, # 22 - \"kappa_Series[1-3]_ind\"\n",
647
+ " 0, # 23 - \"Chi_Series[13]_ind\"\n",
648
+ " 1, # 24 - \"Phi\"\n",
649
+ " 1, # 25 - \"HallKierAlpha\"\n",
650
+ " 0, # 26 - \"NumAmideBonds\"\n",
651
+ " 1, # 27 - \"FractionCSP3\"\n",
652
+ " 1, # 28 - \"NumSpiroAtoms\"\n",
653
+ " 0, # 29 - \"NumBridgeheadAtoms\"\n",
654
+ " 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n",
655
+ " 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n",
656
+ " 1, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n",
657
+ " 0, # 33 - \"EState_VSA_Series[1-11]_ind\"\n",
658
+ " 1, # 34 - \"VSA_EState_Series[1-10]_ind\"\n",
659
+ " 1, # 35 - \"Asphericity\"\n",
660
+ " 0, # 36 - \"PBF\"\n",
661
+ " 1, # 37 - \"RadiusOfGyration\"\n",
662
+ " 0, # 38 - \"InertialShapeFactor\"\n",
663
+ " 0, # 39 - \"Eccentricity\"\n",
664
+ " 1, # 40 - \"SpherocityIndex\"\n",
665
+ " 0, # 41 - \"PMI_series[1-3]_ind\"\n",
666
+ " 1, # 42 - \"NPR_series[1-2]_ind\"\n",
667
+ " 0, # 43 - \"MQNs\"\n",
668
+ " 0, # 44 - \"AUTOCORR2D\"\n",
669
+ " 0, # 45 - \"BCUT2D\"\n",
670
+ " 0, # 46 - \"AUTOCORR3D\"\n",
671
+ " 1, # 47 - \"RDF\"\n",
672
+ " 0, # 48 - \"MORSE\"\n",
673
+ " 0, # 49 - \"WHIM\"\n",
674
+ " 0, # 50 - \"GETAWAY\" \n",
675
+ " ]"
676
+ ]
677
+ },
678
+ {
679
+ "cell_type": "code",
680
+ "execution_count": 23,
681
+ "metadata": {},
682
+ "outputs": [
683
+ {
684
+ "name": "stdout",
685
+ "output_type": "stream",
686
+ "text": [
687
+ "Best trial for study 'ANO_hu_feature':\n",
688
+ "Best trial value: 0.939862\n",
689
+ "Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 1, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 1}\n",
690
+ "Generated fea: [1 1 1 1 0 0 1 0 1 0 0 1 1 0 1 1 0 0 1 1 0 1 1 1 0 0 0 0 1 0 1 1 0 1 1 1 1\n",
691
+ " 0 0 1 1 1 0 1 0 1 0 0 1]\n",
692
+ "Fixed features: ['MolWt', 'MolLogP', 'MolMR', 'TPSA']\n"
693
+ ]
694
+ }
695
+ ],
696
+ "source": [
697
+ "try:\n",
698
+ " hu_fea = selection_fromStudy_compress('ANO_hu_feature', storage)\n",
699
+ "except Exception as e:\n",
700
+ " print(f\"Error occured: {e}\") \n",
701
+ " hu_fea =[\n",
702
+ " 1, # 1 - \"MolWeight\"\n",
703
+ " 1, # 2 - \"Mol_logP\"\n",
704
+ " 1, # 3 - \"Mol_MR\"\n",
705
+ " 1, # 4 - \"Mol_TPSA\"\n",
706
+ " 0, # 5 - \"NumRotatableBonds\"\n",
707
+ " 1, # 6 - \"HeavyAtomCount\"\n",
708
+ " 0, # 7 - \"NumHAcceptors\"\n",
709
+ " 1, # 8 - \"NumHDonors\"\n",
710
+ " 1, # 9 - \"NumHeteroatoms\"\n",
711
+ " 1, # 10 - \"NumValenceElec\"\n",
712
+ " 0, # 11 - \"NHOHCount\"\n",
713
+ " 1, # 12 - \"NOCount\"\n",
714
+ " 1, # 13 - \"RingCount\"\n",
715
+ " 1, # 14 - \"NumAromaticRings\"\n",
716
+ " 1, # 15 - \"NumSaturatedRings\"\n",
717
+ " 0, # 16 - \"NumAliphaticRings\"\n",
718
+ " 0, # 17 - \"LabuteASA\"\n",
719
+ " 0, # 18 - \"NumValenceElectrons\"\n",
720
+ " 1, # 19 - \"BalabanJ\"\n",
721
+ " 1, # 20 - \"BertzCT\"\n",
722
+ " 1, # 21 - \"Ipc\"\n",
723
+ " 0, # 22 - \"kappa_Series[1-3]_ind\"\n",
724
+ " 1, # 23 - \"Chi_Series[13]_ind\"\n",
725
+ " 1, # 24 - \"Phi\"\n",
726
+ " 0, # 25 - \"HallKierAlpha\"\n",
727
+ " 1, # 26 - \"NumAmideBonds\"\n",
728
+ " 0, # 27 - \"FractionCSP3\"\n",
729
+ " 1, # 28 - \"NumSpiroAtoms\"\n",
730
+ " 0, # 29 - \"NumBridgeheadAtoms\"\n",
731
+ " 1, # 30 - \"PEOE_VSA_Series[1-14]_ind\"\n",
732
+ " 1, # 31 - \"SMR_VSA_Series[1-10]_ind\"\n",
733
+ " 1, # 32 - \"SlogP_VSA_Series[1-12]_ind\"\n",
734
+ " 1, # 33 - \"EState_VSA_Series[1-11]_ind\"\n",
735
+ " 1, # 34 - \"VSA_EState_Series[1-10]_ind\"\n",
736
+ " 1, # 35 - \"Asphericity\"\n",
737
+ " 1, # 36 - \"PBF\"\n",
738
+ " 1, # 37 - \"RadiusOfGyration\"\n",
739
+ " 1, # 38 - \"InertialShapeFactor\"\n",
740
+ " 0, # 39 - \"Eccentricity\"\n",
741
+ " 0, # 40 - \"SpherocityIndex\"\n",
742
+ " 1, # 41 - \"PMI_series[1-3]_ind\"\n",
743
+ " 1, # 42 - \"NPR_series[1-2]_ind\"\n",
744
+ " 1, # 43 - \"MQNs\"\n",
745
+ " 1, # 44 - \"AUTOCORR2D\"\n",
746
+ " 1, # 45 - \"BCUT2D\"\n",
747
+ " 1, # 46 - \"AUTOCORR3D\"\n",
748
+ " 1, # 47 - \"RDF\"\n",
749
+ " 0, # 48 - \"MORSE\"\n",
750
+ " 0, # 49 - \"WHIM\"\n",
751
+ " 0, # 50 - \"GETAWAY\" \n",
752
+ " ]"
753
+ ]
754
+ },
755
+ {
756
+ "cell_type": "code",
757
+ "execution_count": 24,
758
+ "metadata": {},
759
+ "outputs": [
760
+ {
761
+ "name": "stdout",
762
+ "output_type": "stream",
763
+ "text": [
764
+ "BCUT2D calculation failed: ERROR: No Gasteiger Partial Charge parameters for Element: Sn Mode: sp3\n"
765
+ ]
766
+ },
767
+ {
768
+ "data": {
769
+ "text/plain": [
770
+ "0"
771
+ ]
772
+ },
773
+ "execution_count": 24,
774
+ "metadata": {},
775
+ "output_type": "execute_result"
776
+ }
777
+ ],
778
+ "source": [
779
+ "new_ws = selection_data_descriptor_compress(ws_fea, group_nws, mol_ws, 'ws')\n",
780
+ "new_de = selection_data_descriptor_compress(de_fea, group_nde, mol_de, 'de')\n",
781
+ "new_lo = selection_data_descriptor_compress(lo_fea, group_nlo, mol_lo, 'lo')\n",
782
+ "new_hu = selection_data_descriptor_compress(hu_fea, group_nhu, mol_hu, 'hu')\n",
783
+ "del ws_fea, group_nws, mol_ws\n",
784
+ "del de_fea, group_nde, mol_de\n",
785
+ "del lo_fea, group_nlo, mol_lo\n",
786
+ "del hu_fea, group_nhu, mol_hu\n",
787
+ "gc.collect()\n",
788
+ "# 6m 10.3s"
789
+ ]
790
+ },
791
+ {
792
+ "cell_type": "code",
793
+ "execution_count": 25,
794
+ "metadata": {},
795
+ "outputs": [],
796
+ "source": [
797
+ "import logging\n",
798
+ "import warnings\n",
799
+ "\n",
800
+ "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
801
+ "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
802
+ "os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'\n",
803
+ "os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'\n",
804
+ "os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'\n",
805
+ "os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'\n",
806
+ "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
807
+ "os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'\n",
808
+ "os.environ['TF_NUMA_NODES'] = '1'\n",
809
+ "\n",
810
+ "warnings.filterwarnings('ignore')\n",
811
+ "\n",
812
+ "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
813
+ "\n",
814
+ "logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
815
+ "\n",
816
+ "tf.get_logger().setLevel('ERROR')\n",
817
+ "tf.autograph.set_verbosity(0)\n",
818
+ "\n",
819
+ "def suppress_warnings(condition=True):\n",
820
+ " if condition:\n",
821
+ " logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
822
+ " os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
823
+ " else:\n",
824
+ " logging.getLogger('tensorflow').setLevel(logging.WARNING)\n",
825
+ " os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'\n",
826
+ "\n",
827
+ "suppress_warnings(condition=True)"
828
+ ]
829
+ },
830
+ {
831
+ "cell_type": "code",
832
+ "execution_count": 26,
833
+ "metadata": {},
834
+ "outputs": [],
835
+ "source": [
836
+ "BATCHSIZE = 16\n",
837
+ "EPOCHS = 1000\n",
838
+ "# lr = 0.0001\n",
839
+ "# decay = 1e-4"
840
+ ]
841
+ },
842
+ {
843
+ "cell_type": "code",
844
+ "execution_count": 27,
845
+ "metadata": {},
846
+ "outputs": [],
847
+ "source": [
848
+ "# def new_model(trial):\n",
849
+ "# n_layers = trial.suggest_int(\"n_layers\", 1, 3)\n",
850
+ "# model = tf.keras.Sequential()\n",
851
+ "# layer_dropout = trial.suggest_int(\"layer_dropout\", 0,1)\n",
852
+ "# for i in range(n_layers):\n",
853
+ "# num_hidden = trial.suggest_int(\"n_units_l_{}\".format(i), 2, 1e4-1)\n",
854
+ "# num_decay = trial.suggest_categorical(\"n_decay_l_{}\".format(i), [1e-3,1e-4,1e-5])\n",
855
+ "# model.add(\n",
856
+ "# tf.keras.layers.Dense(\n",
857
+ "# num_hidden,\n",
858
+ "# activation=\"relu\",\n",
859
+ "# kernel_initializer='glorot_uniform',\n",
860
+ "# kernel_regularizer=tf.keras.regularizers.l2(num_decay),\n",
861
+ "# )\n",
862
+ "# )\n",
863
+ "# if layer_dropout==1:\n",
864
+ "# fdropout1 = trial.suggest_categorical(\"F_dropout_{}\".format(i),[0.1,0.2])\n",
865
+ "# model.add(Dropout(rate=fdropout1))\n",
866
+ "# if layer_dropout==0:\n",
867
+ "# fdropout2 = trial.suggest_categorical(\"Final_dropout\",[0.1,0.2])\n",
868
+ "# model.add(Dropout(rate=fdropout2))\n",
869
+ "# model.add(Dense(units=1))\n",
870
+ "# learningr = trial.suggest_categorical(\"Learning_rate\",[0.01,0.001,0.0001])\n",
871
+ "# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),\n",
872
+ "# loss=tf.keras.losses.MeanSquaredError(),\n",
873
+ "# metrics=[tf.keras.losses.MeanSquaredError(),\n",
874
+ "# tf.keras.losses.MeanAbsoluteError(),\n",
875
+ "# tf.keras.metrics.RootMeanSquaredError()])\n",
876
+ "# return model\n",
877
+ "\n",
878
+ "def search_model(trial, input_dim):\n",
879
+ " n_layers = trial.suggest_int(\"n_layers\", 1, 3)\n",
880
+ " model = tf.keras.Sequential()\n",
881
+ " model.add(tf.keras.layers.Input(shape=(input_dim,)))\n",
882
+ " layer_dropout = trial.suggest_int(\"layer_dropout\", 0, 1)\n",
883
+ " \n",
884
+ " for i in range(n_layers):\n",
885
+ " num_hidden = trial.suggest_int(f\"n_units_l_{i}\", 2, 9999)\n",
886
+ " num_decay = trial.suggest_categorical(f\"n_decay_l_{i}\", [1e-4,1e-5,1e-6])\n",
887
+ " model.add(\n",
888
+ " tf.keras.layers.Dense(\n",
889
+ " num_hidden,\n",
890
+ " # activation=\"relu\",\n",
891
+ " kernel_initializer='glorot_uniform',\n",
892
+ " kernel_regularizer=tf.keras.regularizers.l2(num_decay),\n",
893
+ " )\n",
894
+ " )\n",
895
+ " model.add(tf.keras.layers.LeakyReLU(alpha=0.01))\n",
896
+ " if layer_dropout == 1:\n",
897
+ " fdropout1 = trial.suggest_categorical(f\"F_dropout_{i}\", [0.1, 0.2, 0.3])\n",
898
+ " model.add(tf.keras.layers.Dropout(rate=fdropout1))\n",
899
+ " \n",
900
+ " if layer_dropout == 0:\n",
901
+ " fdropout2 = trial.suggest_categorical(\"last_dropout\", [0.1, 0.2, 0.3])\n",
902
+ " model.add(tf.keras.layers.Dropout(rate=fdropout2))\n",
903
+ " \n",
904
+ " model.add(tf.keras.layers.Dense(units=1))\n",
905
+ " # # Colab\n",
906
+ " # learningr = trial.suggest_categorical(\"Learning_rate\",[0.01,0.001,0.0001])\n",
907
+ " # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),\n",
908
+ " # loss=tf.keras.losses.MeanSquaredError(),\n",
909
+ " # metrics=[tf.keras.losses.MeanSquaredError(),\n",
910
+ " # tf.keras.losses.MeanAbsoluteError(),\n",
911
+ " # tf.keras.metrics.RootMeanSquaredError()])\n",
912
+ " return model\n",
913
+ "\n",
914
+ "\n",
915
+ "def save_model(trial, x_data):\n",
916
+ " model_path = \"save_model/full_model.keras\"\n",
917
+ " \n",
918
+ " if not os.path.exists(model_path):\n",
919
+ " try:\n",
920
+ " model = search_model(trial, x_data.shape[1])\n",
921
+ " os.makedirs(\"save_model\", exist_ok=True)\n",
922
+ " model.save(model_path)\n",
923
+ " print(f\"Model successfully saved to {model_path}\")\n",
924
+ " except Exception as e:\n",
925
+ " print(f\"Error saving model: {e}\")\n",
926
+ " else:\n",
927
+ " print(f\"Model already exists at {model_path}\")\n",
928
+ " os.remove(model_path)\n",
929
+ " save_model(trial, x_data)"
930
+ ]
931
+ },
932
+ {
933
+ "cell_type": "code",
934
+ "execution_count": 28,
935
+ "metadata": {},
936
+ "outputs": [],
937
+ "source": [
938
+ "from sklearn.model_selection import train_test_split\n",
939
+ "xtr_fws, xte_fws, ytr_fws, yte_fws = train_test_split(new_ws, y_ws, test_size = 0.1, random_state = 42)\n",
940
+ "xtr_fde, xte_fde, ytr_fde, yte_fde = train_test_split(new_de, y_de, test_size = 0.1, random_state = 42)\n",
941
+ "xtr_flo, xte_flo, ytr_flo, yte_flo = train_test_split(new_lo, y_lo, test_size = 0.1, random_state = 42)\n",
942
+ "xtr_fhu, xte_fhu, ytr_fhu, yte_fhu = train_test_split(new_hu, y_hu, test_size = 0.1, random_state = 42)"
943
+ ]
944
+ },
945
+ {
946
+ "cell_type": "code",
947
+ "execution_count": 29,
948
+ "metadata": {},
949
+ "outputs": [],
950
+ "source": [
951
+ "# # Colab\n",
952
+ "# def preprocess_data(xtr, ytr):\n",
953
+ "# dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))\n",
954
+ "# dataset = dataset.shuffle(buffer_size=len(xtr)).batch(BATCHSIZE).cache().prefetch(tf.data.AUTOTUNE)\n",
955
+ "# return dataset\n",
956
+ "\n",
957
+ "# cb = tf.keras.callbacks.EarlyStopping(\n",
958
+ "# monitor='loss', \n",
959
+ "# patience=5,\n",
960
+ "# restore_best_weights=True,\n",
961
+ "# # min_delta=0.001,\n",
962
+ "# mode='min',\n",
963
+ "# verbose=1\n",
964
+ "# )"
965
+ ]
966
+ },
967
+ {
968
+ "cell_type": "code",
969
+ "execution_count": 30,
970
+ "metadata": {},
971
+ "outputs": [],
972
+ "source": [
973
+ "# # Colab\n",
974
+ "# def objective_ws_network(trial):\n",
975
+ "# tf.keras.backend.clear_session()\n",
976
+ "# model = search_model(trial, xtr_fws.shape[1])\n",
977
+ "# train_data = preprocess_data(xtr_fws, ytr_fws)\n",
978
+ "# model.fit(\n",
979
+ "# train_data,\n",
980
+ "# batch_size=BATCHSIZE,\n",
981
+ "# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n",
982
+ "# epochs=EPOCHS,\n",
983
+ "# verbose=0,\n",
984
+ "# )\n",
985
+ "# y_pred_search = model.predict(xte_fws, verbose=0)\n",
986
+ "# score = r2_score(yte_fws, y_pred_search)\n",
987
+ "# del model\n",
988
+ "# tf.keras.backend.clear_session()\n",
989
+ "# gc.collect()\n",
990
+ "# return score"
991
+ ]
992
+ },
993
+ {
994
+ "cell_type": "code",
995
+ "execution_count": 31,
996
+ "metadata": {},
997
+ "outputs": [],
998
+ "source": [
999
+ "# # Colab\n",
1000
+ "# def objective_de_network(trial):\n",
1001
+ "# tf.keras.backend.clear_session()\n",
1002
+ "# model = search_model(trial, xtr_fde.shape[1])\n",
1003
+ "# train_data = preprocess_data(xtr_fde, ytr_fde)\n",
1004
+ "# model.fit(\n",
1005
+ "# train_data,\n",
1006
+ "# batch_size=BATCHSIZE,\n",
1007
+ "# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n",
1008
+ "# epochs=EPOCHS,\n",
1009
+ "# verbose=0,\n",
1010
+ "# )\n",
1011
+ "# y_pred_search = model.predict(xte_fde, verbose=0)\n",
1012
+ "# score = r2_score(yte_fde, y_pred_search)\n",
1013
+ "# del model\n",
1014
+ "# tf.keras.backend.clear_session()\n",
1015
+ "# gc.collect()\n",
1016
+ "# return score"
1017
+ ]
1018
+ },
1019
+ {
1020
+ "cell_type": "code",
1021
+ "execution_count": 32,
1022
+ "metadata": {},
1023
+ "outputs": [],
1024
+ "source": [
1025
+ "# # Colab\n",
1026
+ "# def objective_lo_network(trial):\n",
1027
+ "# tf.keras.backend.clear_session()\n",
1028
+ "# model = search_model(trial, xtr_flo.shape[1])\n",
1029
+ "# train_data = preprocess_data(xtr_flo, ytr_flo)\n",
1030
+ "# model.fit(\n",
1031
+ "# train_data,\n",
1032
+ "# batch_size=BATCHSIZE,\n",
1033
+ "# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n",
1034
+ "# epochs=EPOCHS,\n",
1035
+ "# verbose=0,\n",
1036
+ "# )\n",
1037
+ "# y_pred_search = model.predict(xte_flo, verbose=0)\n",
1038
+ "# score = r2_score(yte_flo, y_pred_search)\n",
1039
+ "# del model\n",
1040
+ "# tf.keras.backend.clear_session()\n",
1041
+ "# gc.collect()\n",
1042
+ "# return score"
1043
+ ]
1044
+ },
1045
+ {
1046
+ "cell_type": "code",
1047
+ "execution_count": 33,
1048
+ "metadata": {},
1049
+ "outputs": [],
1050
+ "source": [
1051
+ "# # Colab\n",
1052
+ "# def objective_hu_network(trial):\n",
1053
+ "# tf.keras.backend.clear_session()\n",
1054
+ "# model = search_model(trial, xtr_fhu.shape[1])\n",
1055
+ "# train_data = preprocess_data(xtr_fhu, ytr_fhu)\n",
1056
+ "# model.fit(\n",
1057
+ "# train_data,\n",
1058
+ "# batch_size=BATCHSIZE,\n",
1059
+ "# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],\n",
1060
+ "# epochs=EPOCHS,\n",
1061
+ "# verbose=0,\n",
1062
+ "# )\n",
1063
+ "# y_pred_search = model.predict(xte_fhu, verbose=0)\n",
1064
+ "# score = r2_score(yte_fhu, y_pred_search)\n",
1065
+ "# del model\n",
1066
+ "# tf.keras.backend.clear_session()\n",
1067
+ "# gc.collect()\n",
1068
+ "# return score"
1069
+ ]
1070
+ },
1071
+ {
1072
+ "cell_type": "code",
1073
+ "execution_count": 34,
1074
+ "metadata": {},
1075
+ "outputs": [],
1076
+ "source": [
1077
+ "def objective_ws_network(trial):\n",
1078
+ " r2_result = None\n",
1079
+ " current_step = 0 \n",
1080
+ " try:\n",
1081
+ " y_true = np.asarray(y_ws).astype('float')\n",
1082
+ " np.save('new_fps.npy', new_ws)\n",
1083
+ " np.save('y_true.npy', y_true)\n",
1084
+ " \n",
1085
+ " save_model(trial, new_ws)\n",
1086
+ "\n",
1087
+ " lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n",
1088
+ "\n",
1089
+ " result = subprocess.run(['python3', './extra_code/learning_process.py',\n",
1090
+ " str(BATCHSIZE), str(EPOCHS), \n",
1091
+ " str(lr), \n",
1092
+ " 'new_fps.npy', 'y_true.npy', str(trial.number)],\n",
1093
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
1094
+ " \n",
1095
+ " if result.stderr:\n",
1096
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n",
1097
+ " if \"could not open file to read NUMA node\" not in line \n",
1098
+ " and \"Your kernel may have been built without NUMA support\" not in line])\n",
1099
+ " if filtered_stderr:\n",
1100
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
1101
+ "\n",
1102
+ " lines = result.stdout.splitlines()\n",
1103
+ " for line in lines:\n",
1104
+ " if line.startswith(\"intermediate_value:\"):\n",
1105
+ " _, step, value = line.split(\":\")\n",
1106
+ " step = int(step)\n",
1107
+ " value = float(value)\n",
1108
+ " current_step = step\n",
1109
+ " \n",
1110
+ " trial.report(value, step)\n",
1111
+ " \n",
1112
+ " if trial.should_prune():\n",
1113
+ " raise optuna.exceptions.TrialPruned()\n",
1114
+ "\n",
1115
+ " for line in reversed(lines):\n",
1116
+ " if \"R2:\" in line:\n",
1117
+ " if \"(prune)\" in line:\n",
1118
+ " raise optuna.exceptions.TrialPruned()\n",
1119
+ " else:\n",
1120
+ " r2_result = float(line.split(\":\")[1].strip())\n",
1121
+ " break\n",
1122
+ "\n",
1123
+ " except optuna.exceptions.TrialPruned:\n",
1124
+ " print(f\"Trial pruned at step {current_step}\")\n",
1125
+ " raise\n",
1126
+ " except Exception as e:\n",
1127
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
1128
+ " r2_result = 0.0\n",
1129
+ "\n",
1130
+ " gc.collect()\n",
1131
+ " return r2_result if r2_result is not None else 0.0"
1132
+ ]
1133
+ },
1134
+ {
1135
+ "cell_type": "code",
1136
+ "execution_count": 35,
1137
+ "metadata": {},
1138
+ "outputs": [],
1139
+ "source": [
1140
+ "def objective_de_network(trial):\n",
1141
+ " r2_result = None\n",
1142
+ " current_step = 0 \n",
1143
+ " try:\n",
1144
+ " y_true = np.asarray(y_de).astype('float')\n",
1145
+ " np.save('new_fps.npy', new_de)\n",
1146
+ " np.save('y_true.npy', y_true)\n",
1147
+ " \n",
1148
+ " save_model(trial, new_de)\n",
1149
+ "\n",
1150
+ " lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n",
1151
+ "\n",
1152
+ " result = subprocess.run(['python3', './extra_code/learning_process.py',\n",
1153
+ " str(BATCHSIZE), str(EPOCHS), \n",
1154
+ " str(lr), \n",
1155
+ " 'new_fps.npy', 'y_true.npy', str(trial.number)],\n",
1156
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
1157
+ " \n",
1158
+ " if result.stderr:\n",
1159
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n",
1160
+ " if \"could not open file to read NUMA node\" not in line \n",
1161
+ " and \"Your kernel may have been built without NUMA support\" not in line])\n",
1162
+ " if filtered_stderr:\n",
1163
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
1164
+ "\n",
1165
+ " lines = result.stdout.splitlines()\n",
1166
+ " for line in lines:\n",
1167
+ " if line.startswith(\"intermediate_value:\"):\n",
1168
+ " _, step, value = line.split(\":\")\n",
1169
+ " step = int(step)\n",
1170
+ " value = float(value)\n",
1171
+ " current_step = step\n",
1172
+ " \n",
1173
+ " trial.report(value, step)\n",
1174
+ " \n",
1175
+ " if trial.should_prune():\n",
1176
+ " raise optuna.exceptions.TrialPruned()\n",
1177
+ "\n",
1178
+ " for line in reversed(lines):\n",
1179
+ " if \"R2:\" in line:\n",
1180
+ " if \"(prune)\" in line:\n",
1181
+ " raise optuna.exceptions.TrialPruned()\n",
1182
+ " else:\n",
1183
+ " r2_result = float(line.split(\":\")[1].strip())\n",
1184
+ " break\n",
1185
+ "\n",
1186
+ " except optuna.exceptions.TrialPruned:\n",
1187
+ " print(f\"Trial pruned at step {current_step}\")\n",
1188
+ " raise\n",
1189
+ " except Exception as e:\n",
1190
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
1191
+ " r2_result = 0.0\n",
1192
+ "\n",
1193
+ " gc.collect()\n",
1194
+ " return r2_result if r2_result is not None else 0.0"
1195
+ ]
1196
+ },
1197
+ {
1198
+ "cell_type": "code",
1199
+ "execution_count": 36,
1200
+ "metadata": {},
1201
+ "outputs": [],
1202
+ "source": [
1203
+ "def objective_lo_network(trial):\n",
1204
+ " r2_result = None\n",
1205
+ " current_step = 0 \n",
1206
+ " try:\n",
1207
+ " y_true = np.asarray(y_lo).astype('float')\n",
1208
+ " np.save('new_fps.npy', new_lo)\n",
1209
+ " np.save('y_true.npy', y_true)\n",
1210
+ " \n",
1211
+ " save_model(trial, new_lo)\n",
1212
+ "\n",
1213
+ " lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n",
1214
+ "\n",
1215
+ " result = subprocess.run(['python3', './extra_code/learning_process.py',\n",
1216
+ " str(BATCHSIZE), str(EPOCHS), \n",
1217
+ " str(lr), \n",
1218
+ " 'new_fps.npy', 'y_true.npy', str(trial.number)],\n",
1219
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
1220
+ " \n",
1221
+ " if result.stderr:\n",
1222
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n",
1223
+ " if \"could not open file to read NUMA node\" not in line \n",
1224
+ " and \"Your kernel may have been built without NUMA support\" not in line])\n",
1225
+ " if filtered_stderr:\n",
1226
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
1227
+ "\n",
1228
+ " lines = result.stdout.splitlines()\n",
1229
+ " for line in lines:\n",
1230
+ " if line.startswith(\"intermediate_value:\"):\n",
1231
+ " _, step, value = line.split(\":\")\n",
1232
+ " step = int(step)\n",
1233
+ " value = float(value)\n",
1234
+ " current_step = step\n",
1235
+ " \n",
1236
+ " trial.report(value, step)\n",
1237
+ " \n",
1238
+ " if trial.should_prune():\n",
1239
+ " raise optuna.exceptions.TrialPruned()\n",
1240
+ "\n",
1241
+ " for line in reversed(lines):\n",
1242
+ " if \"R2:\" in line:\n",
1243
+ " if \"(prune)\" in line:\n",
1244
+ " raise optuna.exceptions.TrialPruned()\n",
1245
+ " else:\n",
1246
+ " r2_result = float(line.split(\":\")[1].strip())\n",
1247
+ " break\n",
1248
+ "\n",
1249
+ " except optuna.exceptions.TrialPruned:\n",
1250
+ " print(f\"Trial pruned at step {current_step}\")\n",
1251
+ " raise\n",
1252
+ " except Exception as e:\n",
1253
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
1254
+ " r2_result = 0.0\n",
1255
+ "\n",
1256
+ " gc.collect()\n",
1257
+ " return r2_result if r2_result is not None else 0.0"
1258
+ ]
1259
+ },
1260
+ {
1261
+ "cell_type": "code",
1262
+ "execution_count": 37,
1263
+ "metadata": {},
1264
+ "outputs": [],
1265
+ "source": [
1266
+ "def objective_hu_network(trial):\n",
1267
+ " r2_result = None\n",
1268
+ " current_step = 0 \n",
1269
+ " try:\n",
1270
+ " y_true = np.asarray(y_hu).astype('float')\n",
1271
+ " np.save('new_fps.npy', new_hu)\n",
1272
+ " np.save('y_true.npy', y_true)\n",
1273
+ " \n",
1274
+ " save_model(trial, new_hu)\n",
1275
+ "\n",
1276
+ " lr = trial.suggest_categorical(f\"lr\", [0.001,0.0001,0.00001])\n",
1277
+ "\n",
1278
+ " result = subprocess.run(['python3', './extra_code/learning_process.py',\n",
1279
+ " str(BATCHSIZE), str(EPOCHS), \n",
1280
+ " str(lr), \n",
1281
+ " 'new_fps.npy', 'y_true.npy', str(trial.number)],\n",
1282
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
1283
+ " \n",
1284
+ " if result.stderr:\n",
1285
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') \n",
1286
+ " if \"could not open file to read NUMA node\" not in line \n",
1287
+ " and \"Your kernel may have been built without NUMA support\" not in line])\n",
1288
+ " if filtered_stderr:\n",
1289
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
1290
+ "\n",
1291
+ " lines = result.stdout.splitlines()\n",
1292
+ " for line in lines:\n",
1293
+ " if line.startswith(\"intermediate_value:\"):\n",
1294
+ " _, step, value = line.split(\":\")\n",
1295
+ " step = int(step)\n",
1296
+ " value = float(value)\n",
1297
+ " current_step = step\n",
1298
+ " \n",
1299
+ " trial.report(value, step)\n",
1300
+ " \n",
1301
+ " if trial.should_prune():\n",
1302
+ " raise optuna.exceptions.TrialPruned()\n",
1303
+ "\n",
1304
+ " for line in reversed(lines):\n",
1305
+ " if \"R2:\" in line:\n",
1306
+ " if \"(prune)\" in line:\n",
1307
+ " raise optuna.exceptions.TrialPruned()\n",
1308
+ " else:\n",
1309
+ " r2_result = float(line.split(\":\")[1].strip())\n",
1310
+ " break\n",
1311
+ "\n",
1312
+ " except optuna.exceptions.TrialPruned:\n",
1313
+ " print(f\"Trial pruned at step {current_step}\")\n",
1314
+ " raise\n",
1315
+ " except Exception as e:\n",
1316
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
1317
+ " r2_result = 0.0\n",
1318
+ "\n",
1319
+ " gc.collect()\n",
1320
+ " return r2_result if r2_result is not None else 0.0"
1321
+ ]
1322
+ },
1323
+ {
1324
+ "cell_type": "code",
1325
+ "execution_count": 38,
1326
+ "metadata": {},
1327
+ "outputs": [],
1328
+ "source": [
1329
+ "storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
1330
+ "# storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
1331
+ "# storage = optuna.storages.RDBStorage(url=storage_urls)"
1332
+ ]
1333
+ },
1334
+ {
1335
+ "cell_type": "code",
1336
+ "execution_count": 42,
1337
+ "metadata": {},
1338
+ "outputs": [],
1339
+ "source": [
1340
+ "try:\n",
1341
+ " # optuna.delete_study(study_name=\"ANO_ws_network\", storage=storage)\n",
1342
+ " # optuna.delete_study(study_name=\"ANO_de_network\", storage=storage)\n",
1343
+ " optuna.delete_study(study_name=\"ANO_lo_network\", storage=storage)\n",
1344
+ " # optuna.delete_study(study_name=\"ANO_hu_network\", storage=storage)\n",
1345
+ " pass\n",
1346
+ "except:\n",
1347
+ " pass "
1348
+ ]
1349
+ },
1350
+ {
1351
+ "cell_type": "code",
1352
+ "execution_count": 43,
1353
+ "metadata": {},
1354
+ "outputs": [],
1355
+ "source": [
1356
+ "TRIALS=1"
1357
+ ]
1358
+ },
1359
+ {
1360
+ "cell_type": "code",
1361
+ "execution_count": 44,
1362
+ "metadata": {},
1363
+ "outputs": [
1364
+ {
1365
+ "name": "stderr",
1366
+ "output_type": "stream",
1367
+ "text": [
1368
+ "[I 2024-11-04 23:06:11,544] Using an existing study with name 'ANO_de_network' instead of creating a new one.\n"
1369
+ ]
1370
+ },
1371
+ {
1372
+ "name": "stdout",
1373
+ "output_type": "stream",
1374
+ "text": [
1375
+ "Model already exists at save_model/full_model.keras\n",
1376
+ "Model successfully saved to save_model/full_model.keras\n"
1377
+ ]
1378
+ },
1379
+ {
1380
+ "name": "stderr",
1381
+ "output_type": "stream",
1382
+ "text": [
1383
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1384
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1385
+ "I0000 00:00:1730729176.331625 2499072 service.cc:146] XLA service 0x55701a22e460 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1386
+ "I0000 00:00:1730729176.331663 2499072 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1387
+ "I0000 00:00:1730729176.465814 2499072 service.cc:146] XLA service 0x55701a205900 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1388
+ "I0000 00:00:1730729176.465843 2499072 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1389
+ "I0000 00:00:1730729180.613395 2499185 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1390
+ "\n",
1391
+ "[I 2024-11-04 23:09:13,063] Trial 1115 finished with value: 0.965164 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 7490, 'n_decay_l_0': 1e-05, 'n_units_l_1': 2373, 'n_decay_l_1': 1e-06, 'n_units_l_2': 6613, 'n_decay_l_2': 1e-05, 'last_dropout': 0.3, 'lr': 0.001}. Best is trial 1097 with value: 0.983023.\n"
1392
+ ]
1393
+ }
1394
+ ],
1395
+ "source": [
1396
+ "# study_de_network = optuna.create_study(study_name='ANO_de_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
1397
+ "study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n",
1398
+ "# study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
1399
+ "# study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n",
1400
+ "study_de_network.optimize(objective_de_network, n_trials=TRIALS)\n",
1401
+ "pruned_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1402
+ "complete_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n",
1403
+ "#74m 22.0s\n",
1404
+ "#386m 42.2 - 100 trial 1000 epochs\n",
1405
+ "#278m 23.3s"
1406
+ ]
1407
+ },
1408
+ {
1409
+ "cell_type": "code",
1410
+ "execution_count": 45,
1411
+ "metadata": {},
1412
+ "outputs": [
1413
+ {
1414
+ "name": "stderr",
1415
+ "output_type": "stream",
1416
+ "text": [
1417
+ "[I 2024-11-04 23:09:13,086] Using an existing study with name 'ANO_ws_network' instead of creating a new one.\n"
1418
+ ]
1419
+ },
1420
+ {
1421
+ "name": "stdout",
1422
+ "output_type": "stream",
1423
+ "text": [
1424
+ "Model already exists at save_model/full_model.keras\n",
1425
+ "Model successfully saved to save_model/full_model.keras\n"
1426
+ ]
1427
+ },
1428
+ {
1429
+ "name": "stderr",
1430
+ "output_type": "stream",
1431
+ "text": [
1432
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1433
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1434
+ "I0000 00:00:1730729356.277557 2507565 service.cc:146] XLA service 0x55c7cad07060 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1435
+ "I0000 00:00:1730729356.277598 2507565 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1436
+ "I0000 00:00:1730729356.416113 2507565 service.cc:146] XLA service 0x55c7cac0bd20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1437
+ "I0000 00:00:1730729356.416147 2507565 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1438
+ "I0000 00:00:1730729359.300797 2507682 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1439
+ "\n",
1440
+ "[I 2024-11-04 23:09:27,954] Trial 193 finished with value: 0.939087 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 800, 'n_decay_l_0': 1e-06, 'n_units_l_1': 530, 'n_decay_l_1': 1e-05, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 104 with value: 0.970129.\n"
1441
+ ]
1442
+ }
1443
+ ],
1444
+ "source": [
1445
+ "# study_ws_network = optuna.create_study(study_name='ANO_ws_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
1446
+ "study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n",
1447
+ "# study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n",
1448
+ "# study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n",
1449
+ "study_ws_network.optimize(objective_ws_network, n_trials=TRIALS)\n",
1450
+ "pruned_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1451
+ "complete_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n",
1452
+ "# 108m 38.1s\n",
1453
+ "#160m 18.2 - 100 trial 1000 epochs"
1454
+ ]
1455
+ },
1456
+ {
1457
+ "cell_type": "code",
1458
+ "execution_count": 46,
1459
+ "metadata": {},
1460
+ "outputs": [],
1461
+ "source": [
1462
+ "TRIALS=10"
1463
+ ]
1464
+ },
1465
+ {
1466
+ "cell_type": "code",
1467
+ "execution_count": 47,
1468
+ "metadata": {},
1469
+ "outputs": [
1470
+ {
1471
+ "name": "stderr",
1472
+ "output_type": "stream",
1473
+ "text": [
1474
+ "[I 2024-11-04 23:09:27,984] A new study created in RDB with name: ANO_lo_network\n"
1475
+ ]
1476
+ },
1477
+ {
1478
+ "name": "stdout",
1479
+ "output_type": "stream",
1480
+ "text": [
1481
+ "Model already exists at save_model/full_model.keras\n",
1482
+ "Model successfully saved to save_model/full_model.keras\n"
1483
+ ]
1484
+ },
1485
+ {
1486
+ "name": "stderr",
1487
+ "output_type": "stream",
1488
+ "text": [
1489
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1490
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1491
+ "I0000 00:00:1730729368.680796 2510630 service.cc:146] XLA service 0x56035729eda0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1492
+ "I0000 00:00:1730729368.680848 2510630 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1493
+ "I0000 00:00:1730729368.837668 2510630 service.cc:146] XLA service 0x5603572f9c70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1494
+ "I0000 00:00:1730729368.837708 2510630 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1495
+ "I0000 00:00:1730729371.354407 2510735 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1496
+ "\n",
1497
+ "[I 2024-11-04 23:09:45,883] Trial 0 finished with value: 0.723669 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 2941, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.1, 'lr': 0.0001}. Best is trial 0 with value: 0.723669.\n"
1498
+ ]
1499
+ },
1500
+ {
1501
+ "name": "stdout",
1502
+ "output_type": "stream",
1503
+ "text": [
1504
+ "Model already exists at save_model/full_model.keras\n",
1505
+ "Model successfully saved to save_model/full_model.keras\n"
1506
+ ]
1507
+ },
1508
+ {
1509
+ "name": "stderr",
1510
+ "output_type": "stream",
1511
+ "text": [
1512
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1513
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1514
+ "I0000 00:00:1730729388.955076 2513776 service.cc:146] XLA service 0x5615ce07e010 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1515
+ "I0000 00:00:1730729388.955114 2513776 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1516
+ "I0000 00:00:1730729389.098245 2513776 service.cc:146] XLA service 0x5615ce09f3b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1517
+ "I0000 00:00:1730729389.098279 2513776 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1518
+ "I0000 00:00:1730729391.952147 2513881 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1519
+ "\n",
1520
+ "[I 2024-11-04 23:10:24,537] Trial 1 finished with value: 0.780745 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 7693, 'n_decay_l_0': 0.0001, 'last_dropout': 0.1, 'lr': 0.0001}. Best is trial 1 with value: 0.780745.\n"
1521
+ ]
1522
+ },
1523
+ {
1524
+ "name": "stdout",
1525
+ "output_type": "stream",
1526
+ "text": [
1527
+ "Model already exists at save_model/full_model.keras\n",
1528
+ "Model successfully saved to save_model/full_model.keras\n"
1529
+ ]
1530
+ },
1531
+ {
1532
+ "name": "stderr",
1533
+ "output_type": "stream",
1534
+ "text": [
1535
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1536
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1537
+ "I0000 00:00:1730729427.740828 2518105 service.cc:146] XLA service 0x563fb10b11b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1538
+ "I0000 00:00:1730729427.740872 2518105 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1539
+ "I0000 00:00:1730729427.891154 2518105 service.cc:146] XLA service 0x563fb0f93760 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1540
+ "I0000 00:00:1730729427.891196 2518105 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1541
+ "I0000 00:00:1730729428.275905 2518208 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1542
+ "\n",
1543
+ "[I 2024-11-04 23:10:56,189] Trial 2 finished with value: 0.861173 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 3091, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'lr': 0.001}. Best is trial 2 with value: 0.861173.\n"
1544
+ ]
1545
+ },
1546
+ {
1547
+ "name": "stdout",
1548
+ "output_type": "stream",
1549
+ "text": [
1550
+ "Model already exists at save_model/full_model.keras\n",
1551
+ "Model successfully saved to save_model/full_model.keras\n"
1552
+ ]
1553
+ },
1554
+ {
1555
+ "name": "stderr",
1556
+ "output_type": "stream",
1557
+ "text": [
1558
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1559
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1560
+ "I0000 00:00:1730729457.830007 2524433 service.cc:146] XLA service 0x56115c187470 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1561
+ "I0000 00:00:1730729457.830062 2524433 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1562
+ "I0000 00:00:1730729457.978097 2524433 service.cc:146] XLA service 0x56115c0a57a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1563
+ "I0000 00:00:1730729457.978151 2524433 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1564
+ "I0000 00:00:1730729463.165301 2524546 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1565
+ "\n",
1566
+ "[I 2024-11-04 23:12:29,474] Trial 3 pruned. \n"
1567
+ ]
1568
+ },
1569
+ {
1570
+ "name": "stdout",
1571
+ "output_type": "stream",
1572
+ "text": [
1573
+ "Trial pruned at step 50\n",
1574
+ "Model already exists at save_model/full_model.keras\n",
1575
+ "Model successfully saved to save_model/full_model.keras\n"
1576
+ ]
1577
+ },
1578
+ {
1579
+ "name": "stderr",
1580
+ "output_type": "stream",
1581
+ "text": [
1582
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1583
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1584
+ "I0000 00:00:1730729550.554875 2527992 service.cc:146] XLA service 0x559ab4767160 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1585
+ "I0000 00:00:1730729550.554932 2527992 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1586
+ "I0000 00:00:1730729550.699674 2527992 service.cc:146] XLA service 0x559ab47a71a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1587
+ "I0000 00:00:1730729550.699710 2527992 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1588
+ "I0000 00:00:1730729554.340454 2528101 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1589
+ "\n",
1590
+ "[I 2024-11-04 23:13:00,476] Trial 4 pruned. \n"
1591
+ ]
1592
+ },
1593
+ {
1594
+ "name": "stdout",
1595
+ "output_type": "stream",
1596
+ "text": [
1597
+ "Trial pruned at step 50\n",
1598
+ "Model already exists at save_model/full_model.keras\n",
1599
+ "Model successfully saved to save_model/full_model.keras\n"
1600
+ ]
1601
+ },
1602
+ {
1603
+ "name": "stderr",
1604
+ "output_type": "stream",
1605
+ "text": [
1606
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1607
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1608
+ "I0000 00:00:1730729581.148610 2530567 service.cc:146] XLA service 0x5603394ef990 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1609
+ "I0000 00:00:1730729581.148655 2530567 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1610
+ "I0000 00:00:1730729581.280871 2530567 service.cc:146] XLA service 0x5603394f7710 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1611
+ "I0000 00:00:1730729581.280904 2530567 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1612
+ "I0000 00:00:1730729584.114676 2530679 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1613
+ "\n",
1614
+ "[I 2024-11-04 23:13:19,868] Trial 5 pruned. \n"
1615
+ ]
1616
+ },
1617
+ {
1618
+ "name": "stdout",
1619
+ "output_type": "stream",
1620
+ "text": [
1621
+ "Trial pruned at step 50\n",
1622
+ "Model already exists at save_model/full_model.keras\n",
1623
+ "Model successfully saved to save_model/full_model.keras\n"
1624
+ ]
1625
+ },
1626
+ {
1627
+ "name": "stderr",
1628
+ "output_type": "stream",
1629
+ "text": [
1630
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1631
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1632
+ "I0000 00:00:1730729603.760434 2536173 service.cc:146] XLA service 0x55b2351fb2f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1633
+ "I0000 00:00:1730729603.760485 2536173 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1634
+ "I0000 00:00:1730729603.892722 2536173 service.cc:146] XLA service 0x55b232ce1e60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1635
+ "I0000 00:00:1730729603.892765 2536173 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1636
+ "I0000 00:00:1730729609.099171 2536280 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1637
+ "\n",
1638
+ "[I 2024-11-04 23:14:49,961] Trial 6 finished with value: 0.761589 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 8234, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.3, 'n_units_l_1': 5907, 'n_decay_l_1': 1e-05, 'F_dropout_1': 0.1, 'n_units_l_2': 5363, 'n_decay_l_2': 1e-05, 'F_dropout_2': 0.2, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n"
1639
+ ]
1640
+ },
1641
+ {
1642
+ "name": "stdout",
1643
+ "output_type": "stream",
1644
+ "text": [
1645
+ "Model already exists at save_model/full_model.keras\n",
1646
+ "Model successfully saved to save_model/full_model.keras\n"
1647
+ ]
1648
+ },
1649
+ {
1650
+ "name": "stderr",
1651
+ "output_type": "stream",
1652
+ "text": [
1653
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1654
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1655
+ "I0000 00:00:1730729693.449528 2539291 service.cc:146] XLA service 0x562dce6885b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1656
+ "I0000 00:00:1730729693.449582 2539291 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1657
+ "I0000 00:00:1730729693.592325 2539291 service.cc:146] XLA service 0x562dce5c4c30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1658
+ "I0000 00:00:1730729693.592355 2539291 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1659
+ "I0000 00:00:1730729697.337810 2539396 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1660
+ "\n",
1661
+ "[I 2024-11-04 23:15:31,025] Trial 7 pruned. \n"
1662
+ ]
1663
+ },
1664
+ {
1665
+ "name": "stdout",
1666
+ "output_type": "stream",
1667
+ "text": [
1668
+ "Trial pruned at step 50\n",
1669
+ "Model already exists at save_model/full_model.keras\n",
1670
+ "Model successfully saved to save_model/full_model.keras\n"
1671
+ ]
1672
+ },
1673
+ {
1674
+ "name": "stderr",
1675
+ "output_type": "stream",
1676
+ "text": [
1677
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1678
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1679
+ "I0000 00:00:1730729732.655927 2542190 service.cc:146] XLA service 0x557b8aaafa90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1680
+ "I0000 00:00:1730729732.655980 2542190 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1681
+ "I0000 00:00:1730729732.804313 2542190 service.cc:146] XLA service 0x557b8aa19180 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1682
+ "I0000 00:00:1730729732.804347 2542190 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1683
+ "I0000 00:00:1730729737.528836 2542300 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1684
+ "\n",
1685
+ "[I 2024-11-04 23:17:30,392] Trial 8 finished with value: 0.849003 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 9926, 'n_decay_l_0': 1e-05, 'n_units_l_1': 6304, 'n_decay_l_1': 1e-06, 'n_units_l_2': 1149, 'n_decay_l_2': 0.0001, 'last_dropout': 0.3, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n"
1686
+ ]
1687
+ },
1688
+ {
1689
+ "name": "stdout",
1690
+ "output_type": "stream",
1691
+ "text": [
1692
+ "Model already exists at save_model/full_model.keras\n",
1693
+ "Model successfully saved to save_model/full_model.keras\n"
1694
+ ]
1695
+ },
1696
+ {
1697
+ "name": "stderr",
1698
+ "output_type": "stream",
1699
+ "text": [
1700
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1701
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1702
+ "I0000 00:00:1730729854.300336 2546634 service.cc:146] XLA service 0x5635047e3fd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1703
+ "I0000 00:00:1730729854.300383 2546634 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1704
+ "I0000 00:00:1730729854.440994 2546634 service.cc:146] XLA service 0x5635046b3d50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1705
+ "I0000 00:00:1730729854.441033 2546634 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1706
+ "I0000 00:00:1730729857.327780 2546746 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1707
+ "\n",
1708
+ "[I 2024-11-04 23:18:18,438] Trial 9 finished with value: 0.859502 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 6912, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.3, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.\n"
1709
+ ]
1710
+ }
1711
+ ],
1712
+ "source": [
1713
+ "# study_lo_network = optuna.create_study(study_name='ANO_lo_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
1714
+ "study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n",
1715
+ "# study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n",
1716
+ "# study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n",
1717
+ "study_lo_network.optimize(objective_lo_network, n_trials=TRIALS)\n",
1718
+ "pruned_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1719
+ "complete_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
1720
+ ]
1721
+ },
1722
+ {
1723
+ "cell_type": "code",
1724
+ "execution_count": 48,
1725
+ "metadata": {},
1726
+ "outputs": [],
1727
+ "source": [
1728
+ "TRIALS=1"
1729
+ ]
1730
+ },
1731
+ {
1732
+ "cell_type": "code",
1733
+ "execution_count": 49,
1734
+ "metadata": {},
1735
+ "outputs": [
1736
+ {
1737
+ "name": "stderr",
1738
+ "output_type": "stream",
1739
+ "text": [
1740
+ "[I 2024-11-04 23:18:18,463] Using an existing study with name 'ANO_hu_network' instead of creating a new one.\n"
1741
+ ]
1742
+ },
1743
+ {
1744
+ "name": "stdout",
1745
+ "output_type": "stream",
1746
+ "text": [
1747
+ "Model already exists at save_model/full_model.keras\n",
1748
+ "Model successfully saved to save_model/full_model.keras\n"
1749
+ ]
1750
+ },
1751
+ {
1752
+ "name": "stderr",
1753
+ "output_type": "stream",
1754
+ "text": [
1755
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1756
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1757
+ "I0000 00:00:1730729902.168016 2552533 service.cc:146] XLA service 0x55fd29098ab0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1758
+ "I0000 00:00:1730729902.168077 2552533 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1759
+ "I0000 00:00:1730729902.305499 2552533 service.cc:146] XLA service 0x55fd28631810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1760
+ "I0000 00:00:1730729902.305538 2552533 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1761
+ "I0000 00:00:1730729907.273542 2552637 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1762
+ "\n",
1763
+ "[I 2024-11-04 23:19:12,205] Trial 144 finished with value: 0.936649 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 1510, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.1, 'n_units_l_1': 2489, 'n_decay_l_1': 1e-06, 'F_dropout_1': 0.1, 'n_units_l_2': 1567, 'n_decay_l_2': 1e-05, 'F_dropout_2': 0.1, 'lr': 0.0001}. Best is trial 130 with value: 0.943809.\n"
1764
+ ]
1765
+ }
1766
+ ],
1767
+ "source": [
1768
+ "# study_hu_network = optuna.create_study(study_name='ANO_hu_network_fixed', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
1769
+ "study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)\n",
1770
+ "# study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)\n",
1771
+ "# study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)\n",
1772
+ "study_hu_network.optimize(objective_hu_network, n_trials=TRIALS)\n",
1773
+ "pruned_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1774
+ "complete_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
1775
+ ]
1776
+ },
1777
+ {
1778
+ "cell_type": "code",
1779
+ "execution_count": 50,
1780
+ "metadata": {},
1781
+ "outputs": [
1782
+ {
1783
+ "name": "stdout",
1784
+ "output_type": "stream",
1785
+ "text": [
1786
+ "Study statistics: [ws_structure] \n",
1787
+ " Number of finished trials: 194\n",
1788
+ " Number of pruned trials: 3\n",
1789
+ " Number of complete trials: 168\n",
1790
+ "Best trial:\n",
1791
+ " Value: 0.970129\n",
1792
+ " Params: \n",
1793
+ " n_layers: 2\n",
1794
+ " layer_dropout: 0\n",
1795
+ " n_units_l_0: 205\n",
1796
+ " n_decay_l_0: 1e-06\n",
1797
+ " n_units_l_1: 742\n",
1798
+ " n_decay_l_1: 0.0001\n",
1799
+ " last_dropout: 0.1\n",
1800
+ " lr: 0.0001\n"
1801
+ ]
1802
+ }
1803
+ ],
1804
+ "source": [
1805
+ "print(\"Study statistics: [ws_structure] \")\n",
1806
+ "print(\" Number of finished trials: \", len(study_ws_network.trials))\n",
1807
+ "print(\" Number of pruned trials: \", len(pruned_trials_ws_newtork))\n",
1808
+ "print(\" Number of complete trials: \", len(complete_trials_ws_newtork))\n",
1809
+ "print(\"Best trial:\")\n",
1810
+ "trials_tmp = study_ws_network.best_trial\n",
1811
+ "print(\" Value: \", trials_tmp.value)\n",
1812
+ "print(\" Params: \")\n",
1813
+ "for key, value in trials_tmp.params.items():\n",
1814
+ " print(\" {}: {}\".format(key, value))"
1815
+ ]
1816
+ },
1817
+ {
1818
+ "cell_type": "code",
1819
+ "execution_count": 51,
1820
+ "metadata": {},
1821
+ "outputs": [
1822
+ {
1823
+ "name": "stdout",
1824
+ "output_type": "stream",
1825
+ "text": [
1826
+ "Study statistics: [de_structure] \n",
1827
+ " Number of finished trials: 1116\n",
1828
+ " Number of pruned trials: 59\n",
1829
+ " Number of complete trials: 1032\n",
1830
+ "Best trial:\n",
1831
+ " Value: 0.983023\n",
1832
+ " Params: \n",
1833
+ " n_layers: 3\n",
1834
+ " layer_dropout: 0\n",
1835
+ " n_units_l_0: 7946\n",
1836
+ " n_decay_l_0: 1e-05\n",
1837
+ " n_units_l_1: 2662\n",
1838
+ " n_decay_l_1: 1e-06\n",
1839
+ " n_units_l_2: 6499\n",
1840
+ " n_decay_l_2: 1e-05\n",
1841
+ " last_dropout: 0.3\n",
1842
+ " lr: 0.001\n"
1843
+ ]
1844
+ }
1845
+ ],
1846
+ "source": [
1847
+ "print(\"Study statistics: [de_structure] \")\n",
1848
+ "print(\" Number of finished trials: \", len(study_de_network.trials))\n",
1849
+ "print(\" Number of pruned trials: \", len(pruned_trials_de_newtork))\n",
1850
+ "print(\" Number of complete trials: \", len(complete_trials_de_newtork))\n",
1851
+ "print(\"Best trial:\")\n",
1852
+ "trials_tmp = study_de_network.best_trial\n",
1853
+ "print(\" Value: \", trials_tmp.value)\n",
1854
+ "print(\" Params: \")\n",
1855
+ "for key, value in trials_tmp.params.items():\n",
1856
+ " print(\" {}: {}\".format(key, value))"
1857
+ ]
1858
+ },
1859
+ {
1860
+ "cell_type": "code",
1861
+ "execution_count": 52,
1862
+ "metadata": {},
1863
+ "outputs": [
1864
+ {
1865
+ "name": "stdout",
1866
+ "output_type": "stream",
1867
+ "text": [
1868
+ "Study statistics: [lo_structure] \n",
1869
+ " Number of finished trials: 10\n",
1870
+ " Number of pruned trials: 4\n",
1871
+ " Number of complete trials: 6\n",
1872
+ "Best trial:\n",
1873
+ " Value: 0.861173\n",
1874
+ " Params: \n",
1875
+ " n_layers: 1\n",
1876
+ " layer_dropout: 1\n",
1877
+ " n_units_l_0: 3091\n",
1878
+ " n_decay_l_0: 1e-05\n",
1879
+ " F_dropout_0: 0.2\n",
1880
+ " lr: 0.001\n"
1881
+ ]
1882
+ }
1883
+ ],
1884
+ "source": [
1885
+ "print(\"Study statistics: [lo_structure] \")\n",
1886
+ "print(\" Number of finished trials: \", len(study_lo_network.trials))\n",
1887
+ "print(\" Number of pruned trials: \", len(pruned_trials_lo_newtork))\n",
1888
+ "print(\" Number of complete trials: \", len(complete_trials_lo_newtork))\n",
1889
+ "print(\"Best trial:\")\n",
1890
+ "trials_tmp = study_lo_network.best_trial\n",
1891
+ "print(\" Value: \", trials_tmp.value)\n",
1892
+ "print(\" Params: \")\n",
1893
+ "for key, value in trials_tmp.params.items():\n",
1894
+ " print(\" {}: {}\".format(key, value))"
1895
+ ]
1896
+ },
1897
+ {
1898
+ "cell_type": "code",
1899
+ "execution_count": 53,
1900
+ "metadata": {},
1901
+ "outputs": [
1902
+ {
1903
+ "name": "stdout",
1904
+ "output_type": "stream",
1905
+ "text": [
1906
+ "Study statistics: [hu_structure] \n",
1907
+ " Number of finished trials: 145\n",
1908
+ " Number of pruned trials: 55\n",
1909
+ " Number of complete trials: 78\n",
1910
+ "Best trial:\n",
1911
+ " Value: 0.943809\n",
1912
+ " Params: \n",
1913
+ " n_layers: 3\n",
1914
+ " layer_dropout: 1\n",
1915
+ " n_units_l_0: 3891\n",
1916
+ " n_decay_l_0: 0.0001\n",
1917
+ " F_dropout_0: 0.1\n",
1918
+ " n_units_l_1: 7719\n",
1919
+ " n_decay_l_1: 1e-05\n",
1920
+ " F_dropout_1: 0.3\n",
1921
+ " n_units_l_2: 342\n",
1922
+ " n_decay_l_2: 1e-05\n",
1923
+ " F_dropout_2: 0.1\n",
1924
+ " lr: 0.0001\n"
1925
+ ]
1926
+ }
1927
+ ],
1928
+ "source": [
1929
+ "print(\"Study statistics: [hu_structure] \")\n",
1930
+ "print(\" Number of finished trials: \", len(study_hu_network.trials))\n",
1931
+ "print(\" Number of pruned trials: \", len(pruned_trials_hu_newtork))\n",
1932
+ "print(\" Number of complete trials: \", len(complete_trials_hu_newtork))\n",
1933
+ "print(\"Best trial:\")\n",
1934
+ "trials_tmp = study_hu_network.best_trial\n",
1935
+ "print(\" Value: \", trials_tmp.value)\n",
1936
+ "print(\" Params: \")\n",
1937
+ "for key, value in trials_tmp.params.items():\n",
1938
+ " print(\" {}: {}\".format(key, value))"
1939
+ ]
1940
+ },
1941
+ {
1942
+ "cell_type": "code",
1943
+ "execution_count": null,
1944
+ "metadata": {},
1945
+ "outputs": [],
1946
+ "source": []
1947
+ },
1948
+ {
1949
+ "cell_type": "code",
1950
+ "execution_count": null,
1951
+ "metadata": {},
1952
+ "outputs": [],
1953
+ "source": []
1954
+ },
1955
+ {
1956
+ "cell_type": "code",
1957
+ "execution_count": null,
1958
+ "metadata": {},
1959
+ "outputs": [],
1960
+ "source": []
1961
+ },
1962
+ {
1963
+ "cell_type": "code",
1964
+ "execution_count": null,
1965
+ "metadata": {},
1966
+ "outputs": [],
1967
+ "source": []
1968
+ }
1969
+ ],
1970
+ "metadata": {
1971
+ "kernelspec": {
1972
+ "display_name": "ai",
1973
+ "language": "python",
1974
+ "name": "python3"
1975
+ },
1976
+ "language_info": {
1977
+ "codemirror_mode": {
1978
+ "name": "ipython",
1979
+ "version": 3
1980
+ },
1981
+ "file_extension": ".py",
1982
+ "mimetype": "text/x-python",
1983
+ "name": "python",
1984
+ "nbconvert_exporter": "python",
1985
+ "pygments_lexer": "ipython3",
1986
+ "version": "3.12.2"
1987
+ },
1988
+ "orig_nbformat": 4
1989
+ },
1990
+ "nbformat": 4,
1991
+ "nbformat_minor": 2
1992
+ }
7_ANO_network_[struc_fea].ipynb ADDED
@@ -0,0 +1,1913 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "import sys\n",
11
+ "import numpy as np\n",
12
+ "import pandas as pd\n",
13
+ "import seaborn as sns\n",
14
+ "import matplotlib.pyplot as plt\n",
15
+ "import matplotlib.patches as mpatches\n",
16
+ "import gc\n",
17
+ "import time\n",
18
+ "import subprocess\n",
19
+ "from concurrent.futures import ProcessPoolExecutor, as_completed"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 2,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "from rdkit import Chem\n",
29
+ "from rdkit.Chem import AllChem, DataStructs, Draw\n",
30
+ "from rdkit import RDConfig\n",
31
+ "from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges\n",
32
+ "from rdkit.Chem.AllChem import GetMorganGenerator\n",
33
+ "from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray\n",
34
+ "from rdkit.Avalon.pyAvalonTools import GetAvalonFP"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 3,
40
+ "metadata": {},
41
+ "outputs": [
42
+ {
43
+ "name": "stderr",
44
+ "output_type": "stream",
45
+ "text": [
46
+ "2024-10-20 11:19:00.304949: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
47
+ "2024-10-20 11:19:00.318297: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
48
+ "2024-10-20 11:19:00.322661: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
49
+ "2024-10-20 11:19:00.333360: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
50
+ "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
51
+ "2024-10-20 11:19:01.123896: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
52
+ ]
53
+ }
54
+ ],
55
+ "source": [
56
+ "import tensorflow as tf\n",
57
+ "from tensorflow import keras\n",
58
+ "from tensorflow.keras import layers\n",
59
+ "from tensorflow.keras.models import Sequential\n",
60
+ "from tensorflow.keras.layers import Dense, Dropout, Activation\n",
61
+ "from tensorflow.keras.regularizers import l2\n",
62
+ "from tensorflow.keras.optimizers import Adam\n",
63
+ "from tensorflow.keras import regularizers"
64
+ ]
65
+ },
66
+ {
67
+ "cell_type": "code",
68
+ "execution_count": 4,
69
+ "metadata": {},
70
+ "outputs": [],
71
+ "source": [
72
+ "from sklearn.model_selection import train_test_split\n",
73
+ "from sklearn.linear_model import Ridge\n",
74
+ "from sklearn.ensemble import RandomForestRegressor\n",
75
+ "from sklearn.neural_network import MLPRegressor\n",
76
+ "from sklearn.svm import SVR\n",
77
+ "from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error"
78
+ ]
79
+ },
80
+ {
81
+ "cell_type": "code",
82
+ "execution_count": 5,
83
+ "metadata": {},
84
+ "outputs": [],
85
+ "source": [
86
+ "import optuna\n",
87
+ "from optuna.trial import TrialState"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": 6,
93
+ "metadata": {},
94
+ "outputs": [],
95
+ "source": [
96
+ "from extra_code.feature_selection import selection_structure_compress\n",
97
+ "from extra_code.feature_search import search_data_descriptor_compress"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 7,
103
+ "metadata": {},
104
+ "outputs": [
105
+ {
106
+ "name": "stderr",
107
+ "output_type": "stream",
108
+ "text": [
109
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
110
+ "I0000 00:00:1729390742.084862 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
111
+ "Your kernel may have been built without NUMA support.\n",
112
+ "I0000 00:00:1729390742.133906 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
113
+ "Your kernel may have been built without NUMA support.\n",
114
+ "I0000 00:00:1729390742.134144 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
115
+ "Your kernel may have been built without NUMA support.\n"
116
+ ]
117
+ }
118
+ ],
119
+ "source": [
120
+ "tf.keras.backend.clear_session()\n",
121
+ "gpus = tf.config.experimental.list_physical_devices('GPU')\n",
122
+ "if gpus:\n",
123
+ " try:\n",
124
+ " for gpu in gpus:\n",
125
+ " tf.config.experimental.set_memory_growth(gpu, True)\n",
126
+ " except RuntimeError as e:\n",
127
+ " print(e)"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 8,
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": [
136
+ "target_path = \"result/7_ANO_network_[struc_fea]\"\n",
137
+ "os.makedirs(target_path, exist_ok=True)"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": 9,
143
+ "metadata": {},
144
+ "outputs": [],
145
+ "source": [
146
+ "data_ws = pd.read_csv('./data/ws496_logS.csv', dtype={'SMILES': 'string'})\n",
147
+ "smiles_ws = data_ws['SMILES']\n",
148
+ "y_ws = data_ws.iloc[:, 2]\n",
149
+ "\n",
150
+ "data_delaney = pd.read_csv('./data/delaney-processed.csv', dtype={'smiles': 'string'})\n",
151
+ "smiles_de = data_delaney['smiles']\n",
152
+ "y_de = data_delaney.iloc[:, 1]\n",
153
+ "\n",
154
+ "data_lovric2020 = pd.read_csv('./data/Lovric2020_logS0.csv', dtype={'isomeric_smiles': 'string'})\n",
155
+ "smiles_lo = data_lovric2020['isomeric_smiles']\n",
156
+ "y_lo = data_lovric2020.iloc[:, 1]\n",
157
+ "\n",
158
+ "data_huuskonen = pd.read_csv('./data/huusk.csv', dtype={'SMILES': 'string'})\n",
159
+ "smiles_hu = data_huuskonen['SMILES']\n",
160
+ "y_hu = data_huuskonen.iloc[:, -1].astype('float')"
161
+ ]
162
+ },
163
+ {
164
+ "cell_type": "code",
165
+ "execution_count": 10,
166
+ "metadata": {},
167
+ "outputs": [],
168
+ "source": [
169
+ "def mol3d(mol):\n",
170
+ " mol = Chem.AddHs(mol)\n",
171
+ " optimization_methods = [\n",
172
+ " (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),\n",
173
+ " (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),\n",
174
+ " (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})\n",
175
+ " ]\n",
176
+ "\n",
177
+ " for method, args, kwargs in optimization_methods:\n",
178
+ " try:\n",
179
+ " method(*args, **kwargs)\n",
180
+ " if mol.GetNumConformers() > 0:\n",
181
+ " return mol\n",
182
+ " except ValueError as e:\n",
183
+ " print(f\"Error: {e} - Trying next optimization method [{method}]\")\n",
184
+ "\n",
185
+ " print(f\"Invalid mol for 3d {'\\033[94m'}{Chem.MolToSmiles(mol)}{'\\033[0m'} - No conformer generated\")\n",
186
+ " return None"
187
+ ]
188
+ },
189
+ {
190
+ "cell_type": "code",
191
+ "execution_count": 11,
192
+ "metadata": {},
193
+ "outputs": [],
194
+ "source": [
195
+ "def convert_smiles_to_mol(smiles, fail_folder=None, index=None, yvalue=None):\n",
196
+ " mol = Chem.MolFromSmiles(smiles)\n",
197
+ " if mol is None:\n",
198
+ " print(f\"[convert_smiles_to_mol] Cannot convert {smiles} to Mols\")\n",
199
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": \"Invalid SMILES\"}\n",
200
+ "\n",
201
+ " try:\n",
202
+ " Chem.Kekulize(mol, clearAromaticFlags=True)\n",
203
+ " isomeric_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)\n",
204
+ " mol = Chem.MolFromSmiles(isomeric_smiles)\n",
205
+ " except Exception as e:\n",
206
+ " print(f\"[convert_smiles_to_mol] failed {smiles} isomeric_smiles by {e}\")\n",
207
+ " if fail_folder and index is not None:\n",
208
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
209
+ " img = Draw.MolToImage(mol)\n",
210
+ " img.save(img_path)\n",
211
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"Isomeric SMILES error: {e}\"}\n",
212
+ "\n",
213
+ " try:\n",
214
+ " Chem.SanitizeMol(mol)\n",
215
+ " except Exception as e:\n",
216
+ " print(f\"[convert_smiles_to_mol] failed {smiles} SanitizeMol by {e}\")\n",
217
+ " if fail_folder and index is not None:\n",
218
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
219
+ " img = Draw.MolToImage(mol)\n",
220
+ " img.save(img_path)\n",
221
+ " return None, {\"smiles\": smiles, \"y_value\": yvalue, \"error\": f\"SanitizeMol error: {e}\"}\n",
222
+ "\n",
223
+ " return mol, None"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 12,
229
+ "metadata": {},
230
+ "outputs": [],
231
+ "source": [
232
+ "def process_smiles(smiles, yvalue, fail_folder, index):\n",
233
+ " mol, error = convert_smiles_to_mol(smiles, fail_folder, index, yvalue)\n",
234
+ " if error:\n",
235
+ " return None, None, error\n",
236
+ "\n",
237
+ " mol_3d = mol3d(mol)\n",
238
+ " if mol_3d:\n",
239
+ " return smiles, yvalue, None\n",
240
+ " else:\n",
241
+ " img_path = os.path.join(fail_folder, f\"mol_{index}.png\")\n",
242
+ " img = Draw.MolToImage(mol)\n",
243
+ " img.save(img_path)\n",
244
+ " return None, None, {\"smiles\": smiles, \"y_value\": yvalue}\n",
245
+ "\n",
246
+ "def process_dataset(smiles_list, y_values, dataset_name, target_path=\"result\", max_workers=None):\n",
247
+ " start = time.time()\n",
248
+ " valid_smiles, valid_y = [], []\n",
249
+ " error_smiles_list = []\n",
250
+ " fail_folder = f\"{target_path}/failed/{dataset_name}\"\n",
251
+ " os.makedirs(fail_folder, exist_ok=True)\n",
252
+ "\n",
253
+ " with ProcessPoolExecutor(max_workers=max_workers) as executor:\n",
254
+ " futures = [\n",
255
+ " executor.submit(process_smiles, smiles, yvalue, fail_folder, i)\n",
256
+ " for i, (smiles, yvalue) in enumerate(zip(smiles_list, y_values))\n",
257
+ " ]\n",
258
+ " for future in as_completed(futures):\n",
259
+ " smiles, yvalue, error = future.result()\n",
260
+ " if error:\n",
261
+ " error_smiles_list.append(error)\n",
262
+ " elif smiles is not None and yvalue is not None:\n",
263
+ " valid_smiles.append(smiles)\n",
264
+ " valid_y.append(yvalue)\n",
265
+ "\n",
266
+ " if error_smiles_list:\n",
267
+ " error_df = pd.DataFrame(error_smiles_list)\n",
268
+ " error_df.to_csv(os.path.join(fail_folder, \"failed_smiles.csv\"), index=False)\n",
269
+ " print(f\" [{dataset_name:<10}] : {time.time()-start:.4f} sec\")\n",
270
+ " return valid_smiles, valid_y"
271
+ ]
272
+ },
273
+ {
274
+ "cell_type": "code",
275
+ "execution_count": 13,
276
+ "metadata": {},
277
+ "outputs": [
278
+ {
279
+ "name": "stdout",
280
+ "output_type": "stream",
281
+ "text": [
282
+ " [ws496 ] : 1.0593 sec\n",
283
+ " [delaney ] : 1.7029 sec\n",
284
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x56473ec3be90>]\n",
285
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x56473ec3cd20>]\n",
286
+ "Invalid mol for 3d \u001b[94m[H]O[C@@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
287
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x56473ec3be90>]\n",
288
+ "Error: Bad Conformer Id - Trying next optimization method [<Boost.Python.function object at 0x56473ec3cd20>]\n",
289
+ "Invalid mol for 3d \u001b[94m[H]O[C@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H]\u001b[0m - No conformer generated\n",
290
+ " [Lovric2020_logS0] : 9.2073 sec\n",
291
+ " [huusk ] : 3.8625 sec\n"
292
+ ]
293
+ }
294
+ ],
295
+ "source": [
296
+ "smiles_ws, y_ws = process_dataset(smiles_ws, y_ws, \"ws496\", target_path)\n",
297
+ "smiles_de, y_de = process_dataset(smiles_de, y_de, \"delaney\", target_path)\n",
298
+ "smiles_lo, y_lo = process_dataset(smiles_lo, y_lo, \"Lovric2020_logS0\", target_path)\n",
299
+ "smiles_hu, y_hu = process_dataset(smiles_hu, y_hu, \"huusk\", target_path)"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": 14,
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "LEN_OF_FF = 2048\n",
309
+ "LEN_OF_MA = 167\n",
310
+ "LEN_OF_AV = 512"
311
+ ]
312
+ },
313
+ {
314
+ "cell_type": "code",
315
+ "execution_count": 15,
316
+ "metadata": {},
317
+ "outputs": [],
318
+ "source": [
319
+ "def get_fingerprints(mol):\n",
320
+ " if mol is None:\n",
321
+ " return None, None, None\n",
322
+ " \n",
323
+ " morgan_generator = GetMorganGenerator(radius=2, fpSize=LEN_OF_FF)\n",
324
+ " ecfp = morgan_generator.GetFingerprint(mol)\n",
325
+ " ecfp_array = np.zeros((LEN_OF_FF,),dtype=int)\n",
326
+ " DataStructs.ConvertToNumpyArray(ecfp, ecfp_array)\n",
327
+ " \n",
328
+ " maccs = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol)\n",
329
+ "\n",
330
+ " avalon_fp = GetAvalonFP(mol)\n",
331
+ " avalon_array = np.zeros((LEN_OF_AV,),dtype=int)\n",
332
+ " DataStructs.ConvertToNumpyArray(avalon_fp, avalon_array)\n",
333
+ " \n",
334
+ " return ecfp_array, maccs, avalon_array\n",
335
+ "\n",
336
+ "def fp_converter(data, use_parallel=True):\n",
337
+ " mols = [Chem.MolFromSmiles(smi) for smi in data]\n",
338
+ " \n",
339
+ " if use_parallel:\n",
340
+ " try: \n",
341
+ " with ProcessPoolExecutor() as executor:\n",
342
+ " results = list(executor.map(get_fingerprints, mols))\n",
343
+ " except Exception as e:\n",
344
+ " print(f\"Parallel processing failed due to: {e}. Falling back to sequential processing.\")\n",
345
+ " use_parallel = False\n",
346
+ " \n",
347
+ " if not use_parallel:\n",
348
+ " results = [get_fingerprints(mol) for mol in mols]\n",
349
+ " \n",
350
+ " ECFP, MACCS, AvalonFP = zip(*results)\n",
351
+ " \n",
352
+ " ECFP_container = np.vstack([arr for arr in ECFP if arr is not None])\n",
353
+ " MACCS_container = np.zeros((len(MACCS), LEN_OF_MA), dtype=int)\n",
354
+ " AvalonFP_container = np.vstack([arr for arr in AvalonFP if arr is not None])\n",
355
+ "\n",
356
+ " for i, fp in enumerate(MACCS):\n",
357
+ " if fp is not None:\n",
358
+ " DataStructs.ConvertToNumpyArray(fp, MACCS_container[i])\n",
359
+ " \n",
360
+ " return mols, ECFP_container, MACCS_container, AvalonFP_container"
361
+ ]
362
+ },
363
+ {
364
+ "cell_type": "code",
365
+ "execution_count": 16,
366
+ "metadata": {},
367
+ "outputs": [
368
+ {
369
+ "data": {
370
+ "text/plain": [
371
+ "0"
372
+ ]
373
+ },
374
+ "execution_count": 16,
375
+ "metadata": {},
376
+ "output_type": "execute_result"
377
+ }
378
+ ],
379
+ "source": [
380
+ "mol_ws, x_ws, MACCS_ws, AvalonFP_ws = fp_converter(smiles_ws,target_path)\n",
381
+ "mol_de, x_de, MACCS_de, AvalonFP_de = fp_converter(smiles_de,target_path)\n",
382
+ "mol_lo, x_lo, MACCS_lo, AvalonFP_lo = fp_converter(smiles_lo,target_path)\n",
383
+ "mol_hu, x_hu, MACCS_hu, AvalonFP_hu = fp_converter(smiles_hu,target_path)\n",
384
+ "del smiles_ws\n",
385
+ "del smiles_de\n",
386
+ "del smiles_lo\n",
387
+ "del smiles_hu\n",
388
+ "gc.collect()"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": 17,
394
+ "metadata": {},
395
+ "outputs": [],
396
+ "source": [
397
+ "def concatenate_to_numpy(*dataframes):\n",
398
+ " numpy_arrays = [df.to_numpy() if isinstance(df, pd.DataFrame) else df for df in dataframes]\n",
399
+ " if not all(isinstance(arr, np.ndarray) for arr in numpy_arrays):\n",
400
+ " raise ValueError(\"All inputs must be either pandas DataFrame or numpy array\")\n",
401
+ " return np.concatenate(numpy_arrays, axis=1)"
402
+ ]
403
+ },
404
+ {
405
+ "cell_type": "code",
406
+ "execution_count": 18,
407
+ "metadata": {},
408
+ "outputs": [
409
+ {
410
+ "data": {
411
+ "text/plain": [
412
+ "0"
413
+ ]
414
+ },
415
+ "execution_count": 18,
416
+ "metadata": {},
417
+ "output_type": "execute_result"
418
+ }
419
+ ],
420
+ "source": [
421
+ "group_nws = concatenate_to_numpy(x_ws, MACCS_ws, AvalonFP_ws)\n",
422
+ "group_nde = concatenate_to_numpy(x_de, MACCS_de, AvalonFP_de)\n",
423
+ "group_nlo = concatenate_to_numpy(x_lo, MACCS_lo, AvalonFP_lo)\n",
424
+ "group_nhu = concatenate_to_numpy(x_hu, MACCS_hu, AvalonFP_hu)\n",
425
+ "del x_ws, MACCS_ws, AvalonFP_ws\n",
426
+ "del x_de, MACCS_de, AvalonFP_de\n",
427
+ "del x_lo, MACCS_lo, AvalonFP_lo\n",
428
+ "del x_hu, MACCS_hu, AvalonFP_hu\n",
429
+ "gc.collect()"
430
+ ]
431
+ },
432
+ {
433
+ "cell_type": "code",
434
+ "execution_count": 19,
435
+ "metadata": {},
436
+ "outputs": [],
437
+ "source": [
438
+ "try:\n",
439
+ " storage = optuna.storages.RDBStorage(url=\"sqlite:///ano_analysis.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
440
+ " # storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
441
+ " # storage = optuna.storages.RDBStorage(url=storage_urls)\n",
442
+ "except Exception as e:\n",
443
+ " print(f\"Error occured: {e}\")"
444
+ ]
445
+ },
446
+ {
447
+ "cell_type": "code",
448
+ "execution_count": 20,
449
+ "metadata": {},
450
+ "outputs": [],
451
+ "source": [
452
+ "def save_model(x_data, model_name):\n",
453
+ " model_path = \"save_model/full_model.keras\"\n",
454
+ " input_shape = x_data.shape[1]\n",
455
+ " lr = None\n",
456
+ " if not os.path.exists(model_path):\n",
457
+ " try:\n",
458
+ " if model_name in [\"ws\", \"de\", \"lo\", \"hu\"]:\n",
459
+ " model, lr = selection_structure_compress(f'ANO_{model_name}_struct', storage, input_shape) \n",
460
+ " os.makedirs(\"save_model\", exist_ok=True)\n",
461
+ " model.save(model_path)\n",
462
+ " # print(model.summary())\n",
463
+ " print(f\"Model successfully saved to {model_path}\")\n",
464
+ " return lr\n",
465
+ " except Exception as e:\n",
466
+ " print(f\"Error saving model: {e}\")\n",
467
+ " return lr\n",
468
+ " else:\n",
469
+ " print(f\"Model already exists at {model_path}\")\n",
470
+ " os.remove(model_path)\n",
471
+ " save_model(x_data, model_name)"
472
+ ]
473
+ },
474
+ {
475
+ "cell_type": "code",
476
+ "execution_count": 21,
477
+ "metadata": {},
478
+ "outputs": [],
479
+ "source": [
480
+ "BATCHSIZE = 32\n",
481
+ "EPOCHS = 1000\n",
482
+ "# lr = 0.0001\n",
483
+ "# decay = 1e-4"
484
+ ]
485
+ },
486
+ {
487
+ "cell_type": "code",
488
+ "execution_count": 22,
489
+ "metadata": {},
490
+ "outputs": [],
491
+ "source": [
492
+ "import logging\n",
493
+ "import warnings\n",
494
+ "\n",
495
+ "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
496
+ "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
497
+ "os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'\n",
498
+ "os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'\n",
499
+ "os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'\n",
500
+ "os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'\n",
501
+ "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'\n",
502
+ "os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'\n",
503
+ "os.environ['TF_NUMA_NODES'] = '1'\n",
504
+ "\n",
505
+ "warnings.filterwarnings('ignore')\n",
506
+ "\n",
507
+ "warnings.simplefilter(action='ignore', category=FutureWarning)\n",
508
+ "\n",
509
+ "logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
510
+ "\n",
511
+ "tf.get_logger().setLevel('ERROR')\n",
512
+ "tf.autograph.set_verbosity(0)\n",
513
+ "\n",
514
+ "def suppress_warnings(condition=True):\n",
515
+ " if condition:\n",
516
+ " logging.getLogger('tensorflow').setLevel(logging.ERROR)\n",
517
+ " os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n",
518
+ " else:\n",
519
+ " logging.getLogger('tensorflow').setLevel(logging.WARNING)\n",
520
+ " os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'\n",
521
+ "\n",
522
+ "suppress_warnings(condition=True)"
523
+ ]
524
+ },
525
+ {
526
+ "cell_type": "code",
527
+ "execution_count": 23,
528
+ "metadata": {},
529
+ "outputs": [],
530
+ "source": [
531
+ "def objective_ws_network(trial):\n",
532
+ " try:\n",
533
+ " new_x = search_data_descriptor_compress(trial, group_nws, mol_ws, 'ws496')\n",
534
+ " new_x = np.nan_to_num(new_x, nan=0.0, posinf=0.0, neginf=0.0).astype('float')\n",
535
+ " y_true = np.asarray(y_ws).astype('float')\n",
536
+ " np.save('new_fps.npy', new_x)\n",
537
+ " np.save('y_true.npy', y_true)\n",
538
+ " \n",
539
+ " lr = 0.0001\n",
540
+ " tmp_lr = save_model(new_x, 'ws')\n",
541
+ " if tmp_lr != None:\n",
542
+ " lr = tmp_lr\n",
543
+ "\n",
544
+ " result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
545
+ " str(BATCHSIZE), str(EPOCHS), \n",
546
+ " str(lr),\n",
547
+ " 'new_fps.npy', 'y_true.npy'],\n",
548
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
549
+ "\n",
550
+ " if result.stderr:\n",
551
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
552
+ " if filtered_stderr:\n",
553
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
554
+ "\n",
555
+ " for line in result.stdout.splitlines():\n",
556
+ " if \"R2\" in line:\n",
557
+ " if \"(prune)\" in line:\n",
558
+ " print(f\"Pruning trial due to poor R2: {line}\")\n",
559
+ " r2_result = 0.0\n",
560
+ " trial.report(r2_result, step=0)\n",
561
+ " raise optuna.exceptions.TrialPruned()\n",
562
+ " else:\n",
563
+ " r2_result = float(line.split(\":\")[1].strip())\n",
564
+ " print(f\"R2 score: {r2_result}\")\n",
565
+ " trial.report(r2_result, step=0)\n",
566
+ "\n",
567
+ " if trial.should_prune():\n",
568
+ " raise optuna.exceptions.TrialPruned()\n",
569
+ "\n",
570
+ " except Exception as e:\n",
571
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
572
+ " r2_result = 0.0\n",
573
+ "\n",
574
+ " gc.collect()\n",
575
+ "\n",
576
+ " return r2_result"
577
+ ]
578
+ },
579
+ {
580
+ "cell_type": "code",
581
+ "execution_count": 24,
582
+ "metadata": {},
583
+ "outputs": [],
584
+ "source": [
585
+ "def objective_de_network(trial):\n",
586
+ " try:\n",
587
+ " new_x = search_data_descriptor_compress(trial, group_nde, mol_de, 'delaney')\n",
588
+ " new_x = np.nan_to_num(new_x, nan=0.0, posinf=0.0, neginf=0.0).astype('float')\n",
589
+ " y_true = np.asarray(y_de).astype('float')\n",
590
+ " np.save('new_fps.npy', new_x)\n",
591
+ " np.save('y_true.npy', y_true)\n",
592
+ " \n",
593
+ " lr = 0.0001\n",
594
+ " tmp_lr = save_model(new_x, 'de')\n",
595
+ " if tmp_lr != None:\n",
596
+ " lr = tmp_lr\n",
597
+ "\n",
598
+ " result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
599
+ " str(BATCHSIZE), str(EPOCHS), \n",
600
+ " str(lr),\n",
601
+ " 'new_fps.npy', 'y_true.npy'],\n",
602
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
603
+ "\n",
604
+ " if result.stderr:\n",
605
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
606
+ " if filtered_stderr:\n",
607
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
608
+ "\n",
609
+ " for line in result.stdout.splitlines():\n",
610
+ " if \"R2\" in line:\n",
611
+ " if \"(prune)\" in line:\n",
612
+ " print(f\"Pruning trial due to poor R2: {line}\")\n",
613
+ " r2_result = 0.0\n",
614
+ " trial.report(r2_result, step=0)\n",
615
+ " raise optuna.exceptions.TrialPruned()\n",
616
+ " else:\n",
617
+ " r2_result = float(line.split(\":\")[1].strip())\n",
618
+ " print(f\"R2 score: {r2_result}\")\n",
619
+ " trial.report(r2_result, step=0)\n",
620
+ "\n",
621
+ " if trial.should_prune():\n",
622
+ " raise optuna.exceptions.TrialPruned()\n",
623
+ "\n",
624
+ " except Exception as e:\n",
625
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
626
+ " r2_result = 0.0\n",
627
+ "\n",
628
+ " gc.collect()\n",
629
+ "\n",
630
+ " return r2_result"
631
+ ]
632
+ },
633
+ {
634
+ "cell_type": "code",
635
+ "execution_count": 25,
636
+ "metadata": {},
637
+ "outputs": [],
638
+ "source": [
639
+ "def objective_lo_network(trial):\n",
640
+ " try:\n",
641
+ " new_x = search_data_descriptor_compress(trial, group_nlo, mol_lo, 'lovrics')\n",
642
+ " new_x = np.nan_to_num(new_x, nan=0.0, posinf=0.0, neginf=0.0).astype('float')\n",
643
+ " y_true = np.asarray(y_lo).astype('float')\n",
644
+ " np.save('new_fps.npy', new_x)\n",
645
+ " np.save('y_true.npy', y_true)\n",
646
+ " \n",
647
+ " lr = 0.0001\n",
648
+ " tmp_lr = save_model(new_x, 'lo')\n",
649
+ " if tmp_lr != None:\n",
650
+ " lr = tmp_lr\n",
651
+ "\n",
652
+ " result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
653
+ " str(BATCHSIZE), str(EPOCHS), \n",
654
+ " str(lr),\n",
655
+ " 'new_fps.npy', 'y_true.npy'],\n",
656
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
657
+ "\n",
658
+ " if result.stderr:\n",
659
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
660
+ " if filtered_stderr:\n",
661
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
662
+ "\n",
663
+ " for line in result.stdout.splitlines():\n",
664
+ " if \"R2\" in line:\n",
665
+ " if \"(prune)\" in line:\n",
666
+ " print(f\"Pruning trial due to poor R2: {line}\")\n",
667
+ " r2_result = 0.0\n",
668
+ " trial.report(r2_result, step=0)\n",
669
+ " raise optuna.exceptions.TrialPruned()\n",
670
+ " else:\n",
671
+ " r2_result = float(line.split(\":\")[1].strip())\n",
672
+ " print(f\"R2 score: {r2_result}\")\n",
673
+ " trial.report(r2_result, step=0)\n",
674
+ "\n",
675
+ " if trial.should_prune():\n",
676
+ " raise optuna.exceptions.TrialPruned()\n",
677
+ "\n",
678
+ " except Exception as e:\n",
679
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
680
+ " r2_result = 0.0\n",
681
+ "\n",
682
+ " gc.collect()\n",
683
+ "\n",
684
+ " return r2_result"
685
+ ]
686
+ },
687
+ {
688
+ "cell_type": "code",
689
+ "execution_count": 26,
690
+ "metadata": {},
691
+ "outputs": [],
692
+ "source": [
693
+ "def objective_hu_network(trial):\n",
694
+ " try:\n",
695
+ " new_x = search_data_descriptor_compress(trial, group_nhu, mol_hu, 'hussk')\n",
696
+ " new_x = np.nan_to_num(new_x, nan=0.0, posinf=0.0, neginf=0.0).astype('float')\n",
697
+ " y_true = np.asarray(y_hu).astype('float')\n",
698
+ " np.save('new_fps.npy', new_x)\n",
699
+ " np.save('y_true.npy', y_true)\n",
700
+ " \n",
701
+ " lr = 0.0001\n",
702
+ " tmp_lr = save_model(new_x, 'hu')\n",
703
+ " if tmp_lr != None:\n",
704
+ " lr = tmp_lr\n",
705
+ "\n",
706
+ " result = subprocess.run(['python3', './extra_code/learning_process.py', \n",
707
+ " str(BATCHSIZE), str(EPOCHS), \n",
708
+ " str(lr),\n",
709
+ " 'new_fps.npy', 'y_true.npy'],\n",
710
+ " stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
711
+ "\n",
712
+ " if result.stderr:\n",
713
+ " filtered_stderr = '\\n'.join([line for line in result.stderr.split('\\n') if \"could not open file to read NUMA node\" not in line and \"Your kernel may have been built without NUMA support\" not in line])\n",
714
+ " if filtered_stderr:\n",
715
+ " print(f\"Error in subprocess: {filtered_stderr}\", file=sys.stderr)\n",
716
+ "\n",
717
+ " for line in result.stdout.splitlines():\n",
718
+ " if \"R2\" in line:\n",
719
+ " if \"(prune)\" in line:\n",
720
+ " print(f\"Pruning trial due to poor R2: {line}\")\n",
721
+ " r2_result = 0.0\n",
722
+ " trial.report(r2_result, step=0)\n",
723
+ " raise optuna.exceptions.TrialPruned()\n",
724
+ " else:\n",
725
+ " r2_result = float(line.split(\":\")[1].strip())\n",
726
+ " print(f\"R2 score: {r2_result}\")\n",
727
+ " trial.report(r2_result, step=0)\n",
728
+ "\n",
729
+ " if trial.should_prune():\n",
730
+ " raise optuna.exceptions.TrialPruned()\n",
731
+ "\n",
732
+ " except Exception as e:\n",
733
+ " print(f\"Exception occurred: {e}\", file=sys.stderr)\n",
734
+ " r2_result = 0.0\n",
735
+ "\n",
736
+ " gc.collect()\n",
737
+ "\n",
738
+ " return r2_result"
739
+ ]
740
+ },
741
+ {
742
+ "cell_type": "code",
743
+ "execution_count": 27,
744
+ "metadata": {},
745
+ "outputs": [],
746
+ "source": [
747
+ "storage = optuna.storages.RDBStorage(url=\"sqlite:///example_ano.db\", engine_kwargs={\"connect_args\": {\"timeout\": 10000}})\n",
748
+ "# storage_urls = \"postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}\"\n",
749
+ "# storage = optuna.storages.RDBStorage(url=storage_urls)"
750
+ ]
751
+ },
752
+ {
753
+ "cell_type": "code",
754
+ "execution_count": 28,
755
+ "metadata": {},
756
+ "outputs": [],
757
+ "source": [
758
+ "try:\n",
759
+ " optuna.delete_study(study_name=\"ANO_ws_network_s2f\", storage=storage)\n",
760
+ " optuna.delete_study(study_name=\"ANO_de_network_s2f\", storage=storage)\n",
761
+ " optuna.delete_study(study_name=\"ANO_lo_network_s2f\", storage=storage)\n",
762
+ " optuna.delete_study(study_name=\"ANO_hu_network_s2f\", storage=storage)\n",
763
+ "except:\n",
764
+ " pass "
765
+ ]
766
+ },
767
+ {
768
+ "cell_type": "code",
769
+ "execution_count": 29,
770
+ "metadata": {},
771
+ "outputs": [],
772
+ "source": [
773
+ "TRIALS=3"
774
+ ]
775
+ },
776
+ {
777
+ "cell_type": "code",
778
+ "execution_count": 30,
779
+ "metadata": {},
780
+ "outputs": [
781
+ {
782
+ "name": "stderr",
783
+ "output_type": "stream",
784
+ "text": [
785
+ "[I 2024-10-20 11:19:21,464] A new study created in RDB with name: ANO_ws_network_s2f\n"
786
+ ]
787
+ },
788
+ {
789
+ "name": "stdout",
790
+ "output_type": "stream",
791
+ "text": [
792
+ "Model already exists at save_model/full_model.keras\n",
793
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n"
794
+ ]
795
+ },
796
+ {
797
+ "name": "stderr",
798
+ "output_type": "stream",
799
+ "text": [
800
+ "I0000 00:00:1729390847.014614 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
801
+ "Your kernel may have been built without NUMA support.\n",
802
+ "I0000 00:00:1729390847.014721 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
803
+ "Your kernel may have been built without NUMA support.\n",
804
+ "I0000 00:00:1729390847.014780 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
805
+ "Your kernel may have been built without NUMA support.\n",
806
+ "I0000 00:00:1729390847.186332 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
807
+ "Your kernel may have been built without NUMA support.\n",
808
+ "I0000 00:00:1729390847.186551 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
809
+ "Your kernel may have been built without NUMA support.\n",
810
+ "2024-10-20 11:20:47.186582: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2112] Could not identify NUMA node of platform GPU id 0, defaulting to 0. Your kernel may not have been built with NUMA support.\n",
811
+ "2024-10-20 11:20:47.186639: I tensorflow/core/common_runtime/gpu/gpu_process_state.cc:198] Using CUDA malloc Async allocator for GPU: 0\n",
812
+ "I0000 00:00:1729390847.187010 1599933 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node\n",
813
+ "Your kernel may have been built without NUMA support.\n",
814
+ "2024-10-20 11:20:47.187059: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3586 MB memory: -> device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6\n"
815
+ ]
816
+ },
817
+ {
818
+ "name": "stdout",
819
+ "output_type": "stream",
820
+ "text": [
821
+ "Model created from best trial of 'ANO_ws_struct':\n",
822
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
823
+ " Best trial value: 0.77755\n",
824
+ "Model successfully saved to save_model/full_model.keras\n"
825
+ ]
826
+ },
827
+ {
828
+ "name": "stderr",
829
+ "output_type": "stream",
830
+ "text": [
831
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
832
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
833
+ "I0000 00:00:1729390851.965239 1600607 service.cc:146] XLA service 0x556b2b6d7c90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
834
+ "I0000 00:00:1729390851.965303 1600607 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
835
+ "I0000 00:00:1729390852.219993 1600607 service.cc:146] XLA service 0x556b2b69f410 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
836
+ "I0000 00:00:1729390852.220034 1600607 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
837
+ "I0000 00:00:1729390860.162273 1600721 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
838
+ "\n"
839
+ ]
840
+ },
841
+ {
842
+ "name": "stdout",
843
+ "output_type": "stream",
844
+ "text": [
845
+ "R2 score: 0.858591\n"
846
+ ]
847
+ },
848
+ {
849
+ "name": "stderr",
850
+ "output_type": "stream",
851
+ "text": [
852
+ "[I 2024-10-20 11:24:15,303] Trial 0 finished with value: 0.858591 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 1, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 0 with value: 0.858591.\n"
853
+ ]
854
+ },
855
+ {
856
+ "name": "stdout",
857
+ "output_type": "stream",
858
+ "text": [
859
+ "Model already exists at save_model/full_model.keras\n",
860
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
861
+ "Model created from best trial of 'ANO_ws_struct':\n",
862
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
863
+ " Best trial value: 0.77755\n",
864
+ "Model successfully saved to save_model/full_model.keras\n"
865
+ ]
866
+ },
867
+ {
868
+ "name": "stderr",
869
+ "output_type": "stream",
870
+ "text": [
871
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
872
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
873
+ "I0000 00:00:1729391077.508810 1619585 service.cc:146] XLA service 0x564642a96600 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
874
+ "I0000 00:00:1729391077.508907 1619585 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
875
+ "I0000 00:00:1729391077.722268 1619585 service.cc:146] XLA service 0x5646429f2270 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
876
+ "I0000 00:00:1729391077.722308 1619585 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
877
+ "I0000 00:00:1729391082.624481 1619699 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
878
+ "\n"
879
+ ]
880
+ },
881
+ {
882
+ "name": "stdout",
883
+ "output_type": "stream",
884
+ "text": [
885
+ "R2 score: 0.869889\n"
886
+ ]
887
+ },
888
+ {
889
+ "name": "stderr",
890
+ "output_type": "stream",
891
+ "text": [
892
+ "[I 2024-10-20 11:27:55,750] Trial 1 finished with value: 0.869889 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElec': 0, 'NHOHCount': 1, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 0}. Best is trial 1 with value: 0.869889.\n"
893
+ ]
894
+ },
895
+ {
896
+ "name": "stdout",
897
+ "output_type": "stream",
898
+ "text": [
899
+ "Model already exists at save_model/full_model.keras\n",
900
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
901
+ "Model created from best trial of 'ANO_ws_struct':\n",
902
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
903
+ " Best trial value: 0.77755\n",
904
+ "Model successfully saved to save_model/full_model.keras\n"
905
+ ]
906
+ },
907
+ {
908
+ "name": "stderr",
909
+ "output_type": "stream",
910
+ "text": [
911
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
912
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
913
+ "I0000 00:00:1729391299.807522 1638541 service.cc:146] XLA service 0x564a6f2f36e0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
914
+ "I0000 00:00:1729391299.807567 1638541 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
915
+ "I0000 00:00:1729391300.031993 1638541 service.cc:146] XLA service 0x564a6f24eed0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
916
+ "I0000 00:00:1729391300.032062 1638541 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
917
+ "I0000 00:00:1729391305.114762 1638647 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
918
+ "\n"
919
+ ]
920
+ },
921
+ {
922
+ "name": "stdout",
923
+ "output_type": "stream",
924
+ "text": [
925
+ "R2 score: 0.878699\n"
926
+ ]
927
+ },
928
+ {
929
+ "name": "stderr",
930
+ "output_type": "stream",
931
+ "text": [
932
+ "[I 2024-10-20 11:31:39,117] Trial 2 finished with value: 0.878699 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 0, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElec': 1, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'NumValenceElectrons': 1, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 0}. Best is trial 2 with value: 0.878699.\n"
933
+ ]
934
+ }
935
+ ],
936
+ "source": [
937
+ "study_ws_network = optuna.create_study(study_name='ANO_ws_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(reduction_factor=64, min_early_stopping_rate=10),load_if_exists=True)\n",
938
+ "study_ws_network.optimize(objective_ws_network, n_trials=TRIALS)\n",
939
+ "pruned_trials_ws_fea = study_ws_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
940
+ "complete_trials_ws_fea = study_ws_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n"
941
+ ]
942
+ },
943
+ {
944
+ "cell_type": "code",
945
+ "execution_count": 31,
946
+ "metadata": {},
947
+ "outputs": [
948
+ {
949
+ "name": "stderr",
950
+ "output_type": "stream",
951
+ "text": [
952
+ "[I 2024-10-20 11:31:39,146] Using an existing study with name 'ANO_ws_network_s2f' instead of creating a new one.\n"
953
+ ]
954
+ },
955
+ {
956
+ "name": "stdout",
957
+ "output_type": "stream",
958
+ "text": [
959
+ "Model already exists at save_model/full_model.keras\n",
960
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
961
+ "Model created from best trial of 'ANO_ws_struct':\n",
962
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
963
+ " Best trial value: 0.77755\n",
964
+ "Model successfully saved to save_model/full_model.keras\n"
965
+ ]
966
+ },
967
+ {
968
+ "name": "stderr",
969
+ "output_type": "stream",
970
+ "text": [
971
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
972
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
973
+ "I0000 00:00:1729391518.947845 1657545 service.cc:146] XLA service 0x55d650748ae0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
974
+ "I0000 00:00:1729391518.947890 1657545 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
975
+ "I0000 00:00:1729391519.134710 1657545 service.cc:146] XLA service 0x55d6506a36b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
976
+ "I0000 00:00:1729391519.134754 1657545 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
977
+ "I0000 00:00:1729391524.385701 1657654 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
978
+ "\n"
979
+ ]
980
+ },
981
+ {
982
+ "name": "stdout",
983
+ "output_type": "stream",
984
+ "text": [
985
+ "R2 score: 0.865804\n"
986
+ ]
987
+ },
988
+ {
989
+ "name": "stderr",
990
+ "output_type": "stream",
991
+ "text": [
992
+ "[I 2024-10-20 11:36:15,828] Trial 3 finished with value: 0.865804 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 1, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 0, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 0, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 0}. Best is trial 2 with value: 0.878699.\n"
993
+ ]
994
+ },
995
+ {
996
+ "name": "stdout",
997
+ "output_type": "stream",
998
+ "text": [
999
+ "Model already exists at save_model/full_model.keras\n",
1000
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1001
+ "Model created from best trial of 'ANO_ws_struct':\n",
1002
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1003
+ " Best trial value: 0.77755\n",
1004
+ "Model successfully saved to save_model/full_model.keras\n"
1005
+ ]
1006
+ },
1007
+ {
1008
+ "name": "stderr",
1009
+ "output_type": "stream",
1010
+ "text": [
1011
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1012
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1013
+ "I0000 00:00:1729391870.164562 1676975 service.cc:146] XLA service 0x561fb8ffcbd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1014
+ "I0000 00:00:1729391870.164627 1676975 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1015
+ "I0000 00:00:1729391870.350359 1676975 service.cc:146] XLA service 0x561fb8f16400 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1016
+ "I0000 00:00:1729391870.350392 1676975 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1017
+ "I0000 00:00:1729391875.581017 1677087 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1018
+ "\n"
1019
+ ]
1020
+ },
1021
+ {
1022
+ "name": "stdout",
1023
+ "output_type": "stream",
1024
+ "text": [
1025
+ "R2 score: 0.887433\n"
1026
+ ]
1027
+ },
1028
+ {
1029
+ "name": "stderr",
1030
+ "output_type": "stream",
1031
+ "text": [
1032
+ "[I 2024-10-20 11:41:05,066] Trial 4 finished with value: 0.887433 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 0, 'BCUT2D': 0, 'AUTOCORR3D': 1, 'RDF': 1, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 4 with value: 0.887433.\n"
1033
+ ]
1034
+ },
1035
+ {
1036
+ "name": "stdout",
1037
+ "output_type": "stream",
1038
+ "text": [
1039
+ "Model already exists at save_model/full_model.keras\n",
1040
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1041
+ "Model created from best trial of 'ANO_ws_struct':\n",
1042
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1043
+ " Best trial value: 0.77755\n",
1044
+ "Model successfully saved to save_model/full_model.keras\n"
1045
+ ]
1046
+ },
1047
+ {
1048
+ "name": "stderr",
1049
+ "output_type": "stream",
1050
+ "text": [
1051
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1052
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1053
+ "I0000 00:00:1729392158.794709 1696169 service.cc:146] XLA service 0x55d9be410480 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1054
+ "I0000 00:00:1729392158.794767 1696169 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1055
+ "I0000 00:00:1729392159.010612 1696169 service.cc:146] XLA service 0x55d9be36b340 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1056
+ "I0000 00:00:1729392159.010676 1696169 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1057
+ "I0000 00:00:1729392164.300024 1696277 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1058
+ "\n"
1059
+ ]
1060
+ },
1061
+ {
1062
+ "name": "stdout",
1063
+ "output_type": "stream",
1064
+ "text": [
1065
+ "R2 score: 0.891404\n"
1066
+ ]
1067
+ },
1068
+ {
1069
+ "name": "stderr",
1070
+ "output_type": "stream",
1071
+ "text": [
1072
+ "[I 2024-10-20 11:45:51,346] Trial 5 finished with value: 0.891404 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 0, 'NHOHCount': 1, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 1, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 1, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 1, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 0, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 5 with value: 0.891404.\n"
1073
+ ]
1074
+ }
1075
+ ],
1076
+ "source": [
1077
+ "# study_ws_network = optuna.create_study(study_name='ANO_ws_network_fixed_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
1078
+ "study_ws_network = optuna.create_study(study_name='ANO_ws_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
1079
+ "study_ws_network.optimize(objective_ws_network, n_trials=TRIALS)\n",
1080
+ "pruned_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1081
+ "complete_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n",
1082
+ "# 108m 38.1s\n",
1083
+ "#160m 18.2 - 100 trial 1000 epochs"
1084
+ ]
1085
+ },
1086
+ {
1087
+ "cell_type": "code",
1088
+ "execution_count": 32,
1089
+ "metadata": {},
1090
+ "outputs": [
1091
+ {
1092
+ "name": "stderr",
1093
+ "output_type": "stream",
1094
+ "text": [
1095
+ "[I 2024-10-20 11:45:51,374] A new study created in RDB with name: ANO_de_network_s2f\n"
1096
+ ]
1097
+ },
1098
+ {
1099
+ "name": "stdout",
1100
+ "output_type": "stream",
1101
+ "text": [
1102
+ "Model already exists at save_model/full_model.keras\n",
1103
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1104
+ "Model created from best trial of 'ANO_ws_struct':\n",
1105
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1106
+ " Best trial value: 0.77755\n",
1107
+ "Model successfully saved to save_model/full_model.keras\n"
1108
+ ]
1109
+ },
1110
+ {
1111
+ "name": "stderr",
1112
+ "output_type": "stream",
1113
+ "text": [
1114
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1115
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1116
+ "I0000 00:00:1729392374.693258 1715146 service.cc:146] XLA service 0x56316822dc00 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1117
+ "I0000 00:00:1729392374.693318 1715146 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1118
+ "I0000 00:00:1729392374.893630 1715146 service.cc:146] XLA service 0x5631680ee0e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1119
+ "I0000 00:00:1729392374.893670 1715146 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1120
+ "I0000 00:00:1729392380.028886 1715262 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1121
+ "\n"
1122
+ ]
1123
+ },
1124
+ {
1125
+ "name": "stdout",
1126
+ "output_type": "stream",
1127
+ "text": [
1128
+ "R2 score: 0.897157\n"
1129
+ ]
1130
+ },
1131
+ {
1132
+ "name": "stderr",
1133
+ "output_type": "stream",
1134
+ "text": [
1135
+ "[I 2024-10-20 11:49:33,553] Trial 0 finished with value: 0.897157 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 1, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 0}. Best is trial 0 with value: 0.897157.\n"
1136
+ ]
1137
+ },
1138
+ {
1139
+ "name": "stdout",
1140
+ "output_type": "stream",
1141
+ "text": [
1142
+ "Model already exists at save_model/full_model.keras\n",
1143
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1144
+ "Model created from best trial of 'ANO_ws_struct':\n",
1145
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1146
+ " Best trial value: 0.77755\n",
1147
+ "Model successfully saved to save_model/full_model.keras\n"
1148
+ ]
1149
+ },
1150
+ {
1151
+ "name": "stderr",
1152
+ "output_type": "stream",
1153
+ "text": [
1154
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1155
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1156
+ "I0000 00:00:1729392596.112086 1734128 service.cc:146] XLA service 0x56143e783d80 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1157
+ "I0000 00:00:1729392596.112159 1734128 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1158
+ "I0000 00:00:1729392596.338586 1734128 service.cc:146] XLA service 0x56143e6df470 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1159
+ "I0000 00:00:1729392596.338628 1734128 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1160
+ "I0000 00:00:1729392601.646269 1734238 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1161
+ "\n"
1162
+ ]
1163
+ },
1164
+ {
1165
+ "name": "stdout",
1166
+ "output_type": "stream",
1167
+ "text": [
1168
+ "R2 score: 0.903416\n"
1169
+ ]
1170
+ },
1171
+ {
1172
+ "name": "stderr",
1173
+ "output_type": "stream",
1174
+ "text": [
1175
+ "[I 2024-10-20 11:53:09,937] Trial 1 finished with value: 0.903416 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 0, 'NumValenceElec': 1, 'NHOHCount': 1, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 1, 'NumValenceElectrons': 0, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 0}. Best is trial 1 with value: 0.903416.\n"
1176
+ ]
1177
+ },
1178
+ {
1179
+ "name": "stdout",
1180
+ "output_type": "stream",
1181
+ "text": [
1182
+ "Model already exists at save_model/full_model.keras\n",
1183
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1184
+ "Model created from best trial of 'ANO_ws_struct':\n",
1185
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1186
+ " Best trial value: 0.77755\n",
1187
+ "Model successfully saved to save_model/full_model.keras\n"
1188
+ ]
1189
+ },
1190
+ {
1191
+ "name": "stderr",
1192
+ "output_type": "stream",
1193
+ "text": [
1194
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1195
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1196
+ "I0000 00:00:1729392888.161687 1753348 service.cc:146] XLA service 0x564c24c171f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1197
+ "I0000 00:00:1729392888.161742 1753348 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1198
+ "I0000 00:00:1729392888.371050 1753348 service.cc:146] XLA service 0x564c24b71870 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1199
+ "I0000 00:00:1729392888.371090 1753348 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1200
+ "I0000 00:00:1729392893.641735 1753460 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1201
+ "\n"
1202
+ ]
1203
+ },
1204
+ {
1205
+ "name": "stdout",
1206
+ "output_type": "stream",
1207
+ "text": [
1208
+ "R2 score: 0.880276\n"
1209
+ ]
1210
+ },
1211
+ {
1212
+ "name": "stderr",
1213
+ "output_type": "stream",
1214
+ "text": [
1215
+ "[I 2024-10-20 11:58:03,586] Trial 2 finished with value: 0.880276 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 1, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 1, 'BCUT2D': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 1}. Best is trial 1 with value: 0.903416.\n"
1216
+ ]
1217
+ }
1218
+ ],
1219
+ "source": [
1220
+ "# study_de_network = optuna.create_study(study_name='ANO_de_network_fixed_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
1221
+ "study_de_network = optuna.create_study(study_name='ANO_de_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
1222
+ "study_de_network.optimize(objective_de_network, n_trials=TRIALS)\n",
1223
+ "pruned_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1224
+ "complete_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])\n",
1225
+ "#74m 22.0s\n",
1226
+ "#386m 42.2 - 100 trial 1000 epochs"
1227
+ ]
1228
+ },
1229
+ {
1230
+ "cell_type": "code",
1231
+ "execution_count": 33,
1232
+ "metadata": {},
1233
+ "outputs": [
1234
+ {
1235
+ "name": "stderr",
1236
+ "output_type": "stream",
1237
+ "text": [
1238
+ "[I 2024-10-20 11:58:03,612] A new study created in RDB with name: ANO_lo_network_s2f\n"
1239
+ ]
1240
+ },
1241
+ {
1242
+ "name": "stdout",
1243
+ "output_type": "stream",
1244
+ "text": [
1245
+ "Model already exists at save_model/full_model.keras\n",
1246
+ "Best trial params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
1247
+ "Model created from best trial of 'ANO_lo_struct':\n",
1248
+ " Params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
1249
+ " Best trial value: 0.683309\n"
1250
+ ]
1251
+ },
1252
+ {
1253
+ "name": "stderr",
1254
+ "output_type": "stream",
1255
+ "text": [
1256
+ "2024-10-20 11:58:27.205874: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 240931272 exceeds 10% of free system memory.\n"
1257
+ ]
1258
+ },
1259
+ {
1260
+ "name": "stdout",
1261
+ "output_type": "stream",
1262
+ "text": [
1263
+ "Model successfully saved to save_model/full_model.keras\n"
1264
+ ]
1265
+ },
1266
+ {
1267
+ "name": "stderr",
1268
+ "output_type": "stream",
1269
+ "text": [
1270
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1271
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1272
+ "I0000 00:00:1729393110.988890 1772389 service.cc:146] XLA service 0x55b5a753f1d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1273
+ "I0000 00:00:1729393110.988948 1772389 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1274
+ "I0000 00:00:1729393111.194962 1772389 service.cc:146] XLA service 0x55b5a74d89e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1275
+ "I0000 00:00:1729393111.195011 1772389 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1276
+ "I0000 00:00:1729393116.790223 1772497 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1277
+ "\n"
1278
+ ]
1279
+ },
1280
+ {
1281
+ "name": "stdout",
1282
+ "output_type": "stream",
1283
+ "text": [
1284
+ "R2 score: 0.713994\n"
1285
+ ]
1286
+ },
1287
+ {
1288
+ "name": "stderr",
1289
+ "output_type": "stream",
1290
+ "text": [
1291
+ "[I 2024-10-20 12:12:13,855] Trial 0 finished with value: 0.713994 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 0, 'Eccentricity': 0, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 1, 'RDF': 1, 'MORSE': 0, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 0 with value: 0.713994.\n"
1292
+ ]
1293
+ },
1294
+ {
1295
+ "name": "stdout",
1296
+ "output_type": "stream",
1297
+ "text": [
1298
+ "Model already exists at save_model/full_model.keras\n",
1299
+ "Best trial params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
1300
+ "Model created from best trial of 'ANO_lo_struct':\n",
1301
+ " Params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
1302
+ " Best trial value: 0.683309\n"
1303
+ ]
1304
+ },
1305
+ {
1306
+ "name": "stderr",
1307
+ "output_type": "stream",
1308
+ "text": [
1309
+ "2024-10-20 12:12:36.493107: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 240931272 exceeds 10% of free system memory.\n"
1310
+ ]
1311
+ },
1312
+ {
1313
+ "name": "stdout",
1314
+ "output_type": "stream",
1315
+ "text": [
1316
+ "Model successfully saved to save_model/full_model.keras\n"
1317
+ ]
1318
+ },
1319
+ {
1320
+ "name": "stderr",
1321
+ "output_type": "stream",
1322
+ "text": [
1323
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1324
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1325
+ "I0000 00:00:1729393960.991997 1793223 service.cc:146] XLA service 0x561d6d841040 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1326
+ "I0000 00:00:1729393960.992049 1793223 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1327
+ "I0000 00:00:1729393961.191204 1793223 service.cc:146] XLA service 0x561d6cdf02a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1328
+ "I0000 00:00:1729393961.191247 1793223 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1329
+ "I0000 00:00:1729393967.194640 1793326 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1330
+ "\n"
1331
+ ]
1332
+ },
1333
+ {
1334
+ "name": "stdout",
1335
+ "output_type": "stream",
1336
+ "text": [
1337
+ "R2 score: 0.685843\n"
1338
+ ]
1339
+ },
1340
+ {
1341
+ "name": "stderr",
1342
+ "output_type": "stream",
1343
+ "text": [
1344
+ "[I 2024-10-20 12:34:26,856] Trial 1 finished with value: 0.685843 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 1, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 1, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 0, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 0}. Best is trial 0 with value: 0.713994.\n"
1345
+ ]
1346
+ },
1347
+ {
1348
+ "name": "stdout",
1349
+ "output_type": "stream",
1350
+ "text": [
1351
+ "Model already exists at save_model/full_model.keras\n",
1352
+ "Best trial params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
1353
+ "Model created from best trial of 'ANO_lo_struct':\n",
1354
+ " Params: {'n_layers': 2, 'n_units_l_0': 6697, 'n_decay_l_0': 0.001, 'F_dropout_l_0': 0.3, 'n_units_l_1': 8994, 'n_decay_l_1': 0.0001, 'F_dropout_l_1': 0.4, 'lr': 0.0001}\n",
1355
+ " Best trial value: 0.683309\n"
1356
+ ]
1357
+ },
1358
+ {
1359
+ "name": "stderr",
1360
+ "output_type": "stream",
1361
+ "text": [
1362
+ "2024-10-20 12:34:56.213221: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 240931272 exceeds 10% of free system memory.\n"
1363
+ ]
1364
+ },
1365
+ {
1366
+ "name": "stdout",
1367
+ "output_type": "stream",
1368
+ "text": [
1369
+ "Model successfully saved to save_model/full_model.keras\n"
1370
+ ]
1371
+ },
1372
+ {
1373
+ "name": "stderr",
1374
+ "output_type": "stream",
1375
+ "text": [
1376
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1377
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1378
+ "I0000 00:00:1729395300.642231 1815505 service.cc:146] XLA service 0x55e01cc870a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1379
+ "I0000 00:00:1729395300.642304 1815505 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1380
+ "I0000 00:00:1729395300.846508 1815505 service.cc:146] XLA service 0x55e01cbe0700 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1381
+ "I0000 00:00:1729395300.846550 1815505 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1382
+ "I0000 00:00:1729395306.797856 1815618 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1383
+ "\n"
1384
+ ]
1385
+ },
1386
+ {
1387
+ "name": "stdout",
1388
+ "output_type": "stream",
1389
+ "text": [
1390
+ "R2 score: 0.661041\n"
1391
+ ]
1392
+ },
1393
+ {
1394
+ "name": "stderr",
1395
+ "output_type": "stream",
1396
+ "text": [
1397
+ "[I 2024-10-20 12:51:24,525] Trial 2 finished with value: 0.661041 and parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 1, 'NumValenceElec': 1, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 1, 'NumValenceElectrons': 1, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 0, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 0, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 0, 'AUTOCORR2D': 1, 'BCUT2D': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 1}. Best is trial 0 with value: 0.713994.\n"
1398
+ ]
1399
+ }
1400
+ ],
1401
+ "source": [
1402
+ "# study_lo_network = optuna.create_study(study_name='ANO_lo_network_fixed_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
1403
+ "study_lo_network = optuna.create_study(study_name='ANO_lo_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
1404
+ "study_lo_network.optimize(objective_lo_network, n_trials=TRIALS)\n",
1405
+ "pruned_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1406
+ "complete_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
1407
+ ]
1408
+ },
1409
+ {
1410
+ "cell_type": "code",
1411
+ "execution_count": 34,
1412
+ "metadata": {},
1413
+ "outputs": [
1414
+ {
1415
+ "name": "stderr",
1416
+ "output_type": "stream",
1417
+ "text": [
1418
+ "[I 2024-10-20 12:51:24,574] A new study created in RDB with name: ANO_hu_network_s2f\n"
1419
+ ]
1420
+ },
1421
+ {
1422
+ "name": "stdout",
1423
+ "output_type": "stream",
1424
+ "text": [
1425
+ "Model already exists at save_model/full_model.keras\n",
1426
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1427
+ "Model created from best trial of 'ANO_ws_struct':\n",
1428
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1429
+ " Best trial value: 0.77755\n",
1430
+ "Model successfully saved to save_model/full_model.keras\n"
1431
+ ]
1432
+ },
1433
+ {
1434
+ "name": "stderr",
1435
+ "output_type": "stream",
1436
+ "text": [
1437
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1438
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1439
+ "I0000 00:00:1729396386.767441 1837018 service.cc:146] XLA service 0x561ad9acf200 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1440
+ "I0000 00:00:1729396386.767513 1837018 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1441
+ "I0000 00:00:1729396386.929264 1837018 service.cc:146] XLA service 0x561ad99bd7b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1442
+ "I0000 00:00:1729396386.929306 1837018 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1443
+ "I0000 00:00:1729396392.843622 1837130 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1444
+ "\n"
1445
+ ]
1446
+ },
1447
+ {
1448
+ "name": "stdout",
1449
+ "output_type": "stream",
1450
+ "text": [
1451
+ "R2 score: 0.906639\n"
1452
+ ]
1453
+ },
1454
+ {
1455
+ "name": "stderr",
1456
+ "output_type": "stream",
1457
+ "text": [
1458
+ "[I 2024-10-20 13:02:51,820] Trial 0 finished with value: 0.906639 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElec': 0, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'NumValenceElectrons': 0, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 1, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 0, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 1}. Best is trial 0 with value: 0.906639.\n"
1459
+ ]
1460
+ },
1461
+ {
1462
+ "name": "stdout",
1463
+ "output_type": "stream",
1464
+ "text": [
1465
+ "Model already exists at save_model/full_model.keras\n",
1466
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1467
+ "Model created from best trial of 'ANO_ws_struct':\n",
1468
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1469
+ " Best trial value: 0.77755\n",
1470
+ "Model successfully saved to save_model/full_model.keras\n"
1471
+ ]
1472
+ },
1473
+ {
1474
+ "name": "stderr",
1475
+ "output_type": "stream",
1476
+ "text": [
1477
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1478
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1479
+ "I0000 00:00:1729397073.720871 1857420 service.cc:146] XLA service 0x558f5d8e5b70 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1480
+ "I0000 00:00:1729397073.720952 1857420 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1481
+ "I0000 00:00:1729397073.896023 1857420 service.cc:146] XLA service 0x558f5d7ff3a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1482
+ "I0000 00:00:1729397073.896075 1857420 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1483
+ "I0000 00:00:1729397079.724248 1857532 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1484
+ "\n"
1485
+ ]
1486
+ },
1487
+ {
1488
+ "name": "stdout",
1489
+ "output_type": "stream",
1490
+ "text": [
1491
+ "R2 score: 0.912375\n"
1492
+ ]
1493
+ },
1494
+ {
1495
+ "name": "stderr",
1496
+ "output_type": "stream",
1497
+ "text": [
1498
+ "[I 2024-10-20 13:14:49,340] Trial 1 finished with value: 0.912375 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 0, 'NumValenceElec': 1, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'NumValenceElectrons': 1, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 1, 'FractionCSP3': 0, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 0, 'WHIM': 1, 'GETAWAY': 1}. Best is trial 1 with value: 0.912375.\n"
1499
+ ]
1500
+ },
1501
+ {
1502
+ "name": "stdout",
1503
+ "output_type": "stream",
1504
+ "text": [
1505
+ "Model already exists at save_model/full_model.keras\n",
1506
+ "Best trial params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1507
+ "Model created from best trial of 'ANO_ws_struct':\n",
1508
+ " Params: {'n_layers': 3, 'n_units_l_0': 4153, 'n_decay_l_0': 0.0001, 'F_dropout_l_0': 0.2, 'n_units_l_1': 5638, 'n_decay_l_1': 1e-05, 'F_dropout_l_1': 0.4, 'n_units_l_2': 900, 'n_decay_l_2': 0.01, 'F_dropout_l_2': 0.1, 'lr': 0.001}\n",
1509
+ " Best trial value: 0.77755\n",
1510
+ "Model successfully saved to save_model/full_model.keras\n"
1511
+ ]
1512
+ },
1513
+ {
1514
+ "name": "stderr",
1515
+ "output_type": "stream",
1516
+ "text": [
1517
+ "Error in subprocess: WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1518
+ "WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
1519
+ "I0000 00:00:1729397712.104750 1877650 service.cc:146] XLA service 0x55f4c53b7d30 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
1520
+ "I0000 00:00:1729397712.104817 1877650 service.cc:154] StreamExecutor device (0): Host, Default Version\n",
1521
+ "I0000 00:00:1729397712.270438 1877650 service.cc:146] XLA service 0x55f4c5313420 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
1522
+ "I0000 00:00:1729397712.270487 1877650 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6\n",
1523
+ "I0000 00:00:1729397717.919845 1877761 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n",
1524
+ "\n"
1525
+ ]
1526
+ },
1527
+ {
1528
+ "name": "stdout",
1529
+ "output_type": "stream",
1530
+ "text": [
1531
+ "R2 score: 0.891494\n"
1532
+ ]
1533
+ },
1534
+ {
1535
+ "name": "stderr",
1536
+ "output_type": "stream",
1537
+ "text": [
1538
+ "[I 2024-10-20 13:25:39,445] Trial 2 finished with value: 0.891494 and parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElec': 1, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 0, 'LabuteASA': 0, 'NumValenceElectrons': 0, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]_ind': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 0, 'MQNs': 1, 'AUTOCORR2D': 0, 'BCUT2D': 0, 'AUTOCORR3D': 1, 'RDF': 1, 'MORSE': 1, 'WHIM': 1, 'GETAWAY': 0}. Best is trial 1 with value: 0.912375.\n"
1539
+ ]
1540
+ }
1541
+ ],
1542
+ "source": [
1543
+ "# study_hu_network = optuna.create_study(study_name='ANO_hu_network_fixed_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) \n",
1544
+ "study_hu_network = optuna.create_study(study_name='ANO_hu_network_s2f', storage=storage, direction=\"maximize\", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)\n",
1545
+ "study_hu_network.optimize(objective_hu_network, n_trials=TRIALS)\n",
1546
+ "pruned_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])\n",
1547
+ "complete_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])"
1548
+ ]
1549
+ },
1550
+ {
1551
+ "cell_type": "code",
1552
+ "execution_count": 35,
1553
+ "metadata": {},
1554
+ "outputs": [
1555
+ {
1556
+ "name": "stdout",
1557
+ "output_type": "stream",
1558
+ "text": [
1559
+ "Study statistics: [ws_structure] \n",
1560
+ " Number of finished trials: 6\n",
1561
+ " Number of pruned trials: 0\n",
1562
+ " Number of complete trials: 6\n",
1563
+ "Best trial:\n",
1564
+ " Value: 0.891404\n",
1565
+ " Params: \n",
1566
+ " NumRotatableBonds: 1\n",
1567
+ " HeavyAtomCount: 1\n",
1568
+ " NumHAcceptors: 1\n",
1569
+ " NumHDonors: 1\n",
1570
+ " NumHeteroatoms: 1\n",
1571
+ " NumValenceElec: 0\n",
1572
+ " NHOHCount: 1\n",
1573
+ " NOCount: 1\n",
1574
+ " RingCount: 0\n",
1575
+ " NumAromaticRings: 0\n",
1576
+ " NumSaturatedRings: 1\n",
1577
+ " NumAliphaticRings: 1\n",
1578
+ " LabuteASA: 0\n",
1579
+ " NumValenceElectrons: 1\n",
1580
+ " BalabanJ: 1\n",
1581
+ " BertzCT: 1\n",
1582
+ " Ipc: 1\n",
1583
+ " kappa_Series[1-3]_ind: 0\n",
1584
+ " Chi_Series[13]_ind: 0\n",
1585
+ " Phi: 0\n",
1586
+ " HallKierAlpha: 1\n",
1587
+ " NumAmideBonds: 0\n",
1588
+ " FractionCSP3: 0\n",
1589
+ " NumSpiroAtoms: 1\n",
1590
+ " NumBridgeheadAtoms: 1\n",
1591
+ " PEOE_VSA_Series[1-14]_ind: 1\n",
1592
+ " SMR_VSA_Series[1-10]_ind: 1\n",
1593
+ " SlogP_VSA_Series[1-12]_ind: 1\n",
1594
+ " EState_VSA_Series[1-11]_ind: 0\n",
1595
+ " VSA_EState_Series[1-10]_ind: 0\n",
1596
+ " Asphericity: 0\n",
1597
+ " PBF: 0\n",
1598
+ " RadiusOfGyration: 1\n",
1599
+ " InertialShapeFactor: 1\n",
1600
+ " Eccentricity: 0\n",
1601
+ " SpherocityIndex: 1\n",
1602
+ " PMI_series[1-3]_ind: 1\n",
1603
+ " NPR_series[1-2]_ind: 1\n",
1604
+ " MQNs: 1\n",
1605
+ " AUTOCORR2D: 1\n",
1606
+ " BCUT2D: 1\n",
1607
+ " AUTOCORR3D: 0\n",
1608
+ " RDF: 1\n",
1609
+ " MORSE: 0\n",
1610
+ " WHIM: 1\n",
1611
+ " GETAWAY: 1\n"
1612
+ ]
1613
+ }
1614
+ ],
1615
+ "source": [
1616
+ "print(\"Study statistics: [ws_structure] \")\n",
1617
+ "print(\" Number of finished trials: \", len(study_ws_network.trials))\n",
1618
+ "print(\" Number of pruned trials: \", len(pruned_trials_ws_newtork))\n",
1619
+ "print(\" Number of complete trials: \", len(complete_trials_ws_newtork))\n",
1620
+ "print(\"Best trial:\")\n",
1621
+ "trials_tmp = study_ws_network.best_trial\n",
1622
+ "print(\" Value: \", trials_tmp.value)\n",
1623
+ "print(\" Params: \")\n",
1624
+ "for key, value in trials_tmp.params.items():\n",
1625
+ " print(\" {}: {}\".format(key, value))"
1626
+ ]
1627
+ },
1628
+ {
1629
+ "cell_type": "code",
1630
+ "execution_count": 36,
1631
+ "metadata": {},
1632
+ "outputs": [
1633
+ {
1634
+ "name": "stdout",
1635
+ "output_type": "stream",
1636
+ "text": [
1637
+ "Study statistics: [de_structure] \n",
1638
+ " Number of finished trials: 3\n",
1639
+ " Number of pruned trials: 0\n",
1640
+ " Number of complete trials: 3\n",
1641
+ "Best trial:\n",
1642
+ " Value: 0.903416\n",
1643
+ " Params: \n",
1644
+ " NumRotatableBonds: 0\n",
1645
+ " HeavyAtomCount: 1\n",
1646
+ " NumHAcceptors: 1\n",
1647
+ " NumHDonors: 0\n",
1648
+ " NumHeteroatoms: 0\n",
1649
+ " NumValenceElec: 1\n",
1650
+ " NHOHCount: 1\n",
1651
+ " NOCount: 1\n",
1652
+ " RingCount: 0\n",
1653
+ " NumAromaticRings: 1\n",
1654
+ " NumSaturatedRings: 1\n",
1655
+ " NumAliphaticRings: 0\n",
1656
+ " LabuteASA: 1\n",
1657
+ " NumValenceElectrons: 0\n",
1658
+ " BalabanJ: 1\n",
1659
+ " BertzCT: 0\n",
1660
+ " Ipc: 0\n",
1661
+ " kappa_Series[1-3]_ind: 1\n",
1662
+ " Chi_Series[13]_ind: 0\n",
1663
+ " Phi: 0\n",
1664
+ " HallKierAlpha: 1\n",
1665
+ " NumAmideBonds: 0\n",
1666
+ " FractionCSP3: 0\n",
1667
+ " NumSpiroAtoms: 0\n",
1668
+ " NumBridgeheadAtoms: 1\n",
1669
+ " PEOE_VSA_Series[1-14]_ind: 1\n",
1670
+ " SMR_VSA_Series[1-10]_ind: 0\n",
1671
+ " SlogP_VSA_Series[1-12]_ind: 1\n",
1672
+ " EState_VSA_Series[1-11]_ind: 1\n",
1673
+ " VSA_EState_Series[1-10]_ind: 0\n",
1674
+ " Asphericity: 1\n",
1675
+ " PBF: 1\n",
1676
+ " RadiusOfGyration: 0\n",
1677
+ " InertialShapeFactor: 1\n",
1678
+ " Eccentricity: 0\n",
1679
+ " SpherocityIndex: 0\n",
1680
+ " PMI_series[1-3]_ind: 1\n",
1681
+ " NPR_series[1-2]_ind: 0\n",
1682
+ " MQNs: 1\n",
1683
+ " AUTOCORR2D: 0\n",
1684
+ " BCUT2D: 1\n",
1685
+ " AUTOCORR3D: 0\n",
1686
+ " RDF: 0\n",
1687
+ " MORSE: 1\n",
1688
+ " WHIM: 0\n",
1689
+ " GETAWAY: 0\n"
1690
+ ]
1691
+ }
1692
+ ],
1693
+ "source": [
1694
+ "print(\"Study statistics: [de_structure] \")\n",
1695
+ "print(\" Number of finished trials: \", len(study_de_network.trials))\n",
1696
+ "print(\" Number of pruned trials: \", len(pruned_trials_de_newtork))\n",
1697
+ "print(\" Number of complete trials: \", len(complete_trials_de_newtork))\n",
1698
+ "print(\"Best trial:\")\n",
1699
+ "trials_tmp = study_de_network.best_trial\n",
1700
+ "print(\" Value: \", trials_tmp.value)\n",
1701
+ "print(\" Params: \")\n",
1702
+ "for key, value in trials_tmp.params.items():\n",
1703
+ " print(\" {}: {}\".format(key, value))"
1704
+ ]
1705
+ },
1706
+ {
1707
+ "cell_type": "code",
1708
+ "execution_count": 37,
1709
+ "metadata": {},
1710
+ "outputs": [
1711
+ {
1712
+ "name": "stdout",
1713
+ "output_type": "stream",
1714
+ "text": [
1715
+ "Study statistics: [lo_structure] \n",
1716
+ " Number of finished trials: 3\n",
1717
+ " Number of pruned trials: 0\n",
1718
+ " Number of complete trials: 3\n",
1719
+ "Best trial:\n",
1720
+ " Value: 0.713994\n",
1721
+ " Params: \n",
1722
+ " NumRotatableBonds: 1\n",
1723
+ " HeavyAtomCount: 1\n",
1724
+ " NumHAcceptors: 0\n",
1725
+ " NumHDonors: 1\n",
1726
+ " NumHeteroatoms: 1\n",
1727
+ " NumValenceElec: 0\n",
1728
+ " NHOHCount: 0\n",
1729
+ " NOCount: 1\n",
1730
+ " RingCount: 0\n",
1731
+ " NumAromaticRings: 1\n",
1732
+ " NumSaturatedRings: 0\n",
1733
+ " NumAliphaticRings: 1\n",
1734
+ " LabuteASA: 0\n",
1735
+ " NumValenceElectrons: 0\n",
1736
+ " BalabanJ: 0\n",
1737
+ " BertzCT: 0\n",
1738
+ " Ipc: 1\n",
1739
+ " kappa_Series[1-3]_ind: 1\n",
1740
+ " Chi_Series[13]_ind: 0\n",
1741
+ " Phi: 1\n",
1742
+ " HallKierAlpha: 1\n",
1743
+ " NumAmideBonds: 0\n",
1744
+ " FractionCSP3: 1\n",
1745
+ " NumSpiroAtoms: 0\n",
1746
+ " NumBridgeheadAtoms: 1\n",
1747
+ " PEOE_VSA_Series[1-14]_ind: 0\n",
1748
+ " SMR_VSA_Series[1-10]_ind: 0\n",
1749
+ " SlogP_VSA_Series[1-12]_ind: 1\n",
1750
+ " EState_VSA_Series[1-11]_ind: 1\n",
1751
+ " VSA_EState_Series[1-10]_ind: 1\n",
1752
+ " Asphericity: 0\n",
1753
+ " PBF: 0\n",
1754
+ " RadiusOfGyration: 0\n",
1755
+ " InertialShapeFactor: 0\n",
1756
+ " Eccentricity: 0\n",
1757
+ " SpherocityIndex: 0\n",
1758
+ " PMI_series[1-3]_ind: 0\n",
1759
+ " NPR_series[1-2]_ind: 0\n",
1760
+ " MQNs: 0\n",
1761
+ " AUTOCORR2D: 1\n",
1762
+ " BCUT2D: 1\n",
1763
+ " AUTOCORR3D: 1\n",
1764
+ " RDF: 1\n",
1765
+ " MORSE: 0\n",
1766
+ " WHIM: 1\n",
1767
+ " GETAWAY: 1\n"
1768
+ ]
1769
+ }
1770
+ ],
1771
+ "source": [
1772
+ "print(\"Study statistics: [lo_structure] \")\n",
1773
+ "print(\" Number of finished trials: \", len(study_lo_network.trials))\n",
1774
+ "print(\" Number of pruned trials: \", len(pruned_trials_lo_newtork))\n",
1775
+ "print(\" Number of complete trials: \", len(complete_trials_lo_newtork))\n",
1776
+ "print(\"Best trial:\")\n",
1777
+ "trials_tmp = study_lo_network.best_trial\n",
1778
+ "print(\" Value: \", trials_tmp.value)\n",
1779
+ "print(\" Params: \")\n",
1780
+ "for key, value in trials_tmp.params.items():\n",
1781
+ " print(\" {}: {}\".format(key, value))"
1782
+ ]
1783
+ },
1784
+ {
1785
+ "cell_type": "code",
1786
+ "execution_count": 38,
1787
+ "metadata": {},
1788
+ "outputs": [
1789
+ {
1790
+ "name": "stdout",
1791
+ "output_type": "stream",
1792
+ "text": [
1793
+ "Study statistics: [hu_structure] \n",
1794
+ " Number of finished trials: 3\n",
1795
+ " Number of pruned trials: 0\n",
1796
+ " Number of complete trials: 3\n",
1797
+ "Best trial:\n",
1798
+ " Value: 0.912375\n",
1799
+ " Params: \n",
1800
+ " NumRotatableBonds: 1\n",
1801
+ " HeavyAtomCount: 0\n",
1802
+ " NumHAcceptors: 1\n",
1803
+ " NumHDonors: 0\n",
1804
+ " NumHeteroatoms: 0\n",
1805
+ " NumValenceElec: 1\n",
1806
+ " NHOHCount: 0\n",
1807
+ " NOCount: 0\n",
1808
+ " RingCount: 0\n",
1809
+ " NumAromaticRings: 1\n",
1810
+ " NumSaturatedRings: 1\n",
1811
+ " NumAliphaticRings: 1\n",
1812
+ " LabuteASA: 0\n",
1813
+ " NumValenceElectrons: 1\n",
1814
+ " BalabanJ: 0\n",
1815
+ " BertzCT: 1\n",
1816
+ " Ipc: 1\n",
1817
+ " kappa_Series[1-3]_ind: 0\n",
1818
+ " Chi_Series[13]_ind: 1\n",
1819
+ " Phi: 1\n",
1820
+ " HallKierAlpha: 1\n",
1821
+ " NumAmideBonds: 1\n",
1822
+ " FractionCSP3: 0\n",
1823
+ " NumSpiroAtoms: 1\n",
1824
+ " NumBridgeheadAtoms: 1\n",
1825
+ " PEOE_VSA_Series[1-14]_ind: 0\n",
1826
+ " SMR_VSA_Series[1-10]_ind: 0\n",
1827
+ " SlogP_VSA_Series[1-12]_ind: 1\n",
1828
+ " EState_VSA_Series[1-11]_ind: 0\n",
1829
+ " VSA_EState_Series[1-10]_ind: 1\n",
1830
+ " Asphericity: 1\n",
1831
+ " PBF: 1\n",
1832
+ " RadiusOfGyration: 0\n",
1833
+ " InertialShapeFactor: 0\n",
1834
+ " Eccentricity: 1\n",
1835
+ " SpherocityIndex: 1\n",
1836
+ " PMI_series[1-3]_ind: 1\n",
1837
+ " NPR_series[1-2]_ind: 0\n",
1838
+ " MQNs: 1\n",
1839
+ " AUTOCORR2D: 0\n",
1840
+ " BCUT2D: 1\n",
1841
+ " AUTOCORR3D: 0\n",
1842
+ " RDF: 1\n",
1843
+ " MORSE: 0\n",
1844
+ " WHIM: 1\n",
1845
+ " GETAWAY: 1\n"
1846
+ ]
1847
+ }
1848
+ ],
1849
+ "source": [
1850
+ "print(\"Study statistics: [hu_structure] \")\n",
1851
+ "print(\" Number of finished trials: \", len(study_hu_network.trials))\n",
1852
+ "print(\" Number of pruned trials: \", len(pruned_trials_hu_newtork))\n",
1853
+ "print(\" Number of complete trials: \", len(complete_trials_hu_newtork))\n",
1854
+ "print(\"Best trial:\")\n",
1855
+ "trials_tmp = study_hu_network.best_trial\n",
1856
+ "print(\" Value: \", trials_tmp.value)\n",
1857
+ "print(\" Params: \")\n",
1858
+ "for key, value in trials_tmp.params.items():\n",
1859
+ " print(\" {}: {}\".format(key, value))"
1860
+ ]
1861
+ },
1862
+ {
1863
+ "cell_type": "code",
1864
+ "execution_count": null,
1865
+ "metadata": {},
1866
+ "outputs": [],
1867
+ "source": []
1868
+ },
1869
+ {
1870
+ "cell_type": "code",
1871
+ "execution_count": null,
1872
+ "metadata": {},
1873
+ "outputs": [],
1874
+ "source": []
1875
+ },
1876
+ {
1877
+ "cell_type": "code",
1878
+ "execution_count": null,
1879
+ "metadata": {},
1880
+ "outputs": [],
1881
+ "source": []
1882
+ },
1883
+ {
1884
+ "cell_type": "code",
1885
+ "execution_count": null,
1886
+ "metadata": {},
1887
+ "outputs": [],
1888
+ "source": []
1889
+ }
1890
+ ],
1891
+ "metadata": {
1892
+ "kernelspec": {
1893
+ "display_name": "ai",
1894
+ "language": "python",
1895
+ "name": "python3"
1896
+ },
1897
+ "language_info": {
1898
+ "codemirror_mode": {
1899
+ "name": "ipython",
1900
+ "version": 3
1901
+ },
1902
+ "file_extension": ".py",
1903
+ "mimetype": "text/x-python",
1904
+ "name": "python",
1905
+ "nbconvert_exporter": "python",
1906
+ "pygments_lexer": "ipython3",
1907
+ "version": "3.12.2"
1908
+ },
1909
+ "orig_nbformat": 4
1910
+ },
1911
+ "nbformat": 4,
1912
+ "nbformat_minor": 2
1913
+ }
7_solubility_final_HPO_proving.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
8_solubility_xai.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
data/Lovric2020_logS0.csv ADDED
@@ -0,0 +1,830 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ isomeric_smiles,logS0
2
+ C(\C=C\c1ccccc1)N1CCN(CC1)C(c1ccccc1)c1ccccc1,-5.34
3
+ C(c1ccccc1)n1ccnc1,-2.26
4
+ C1Cc2ccccc2N1,-1.04
5
+ C1O[C@H]1c1ccccc1,-1.6
6
+ C=CCC1(C(=O)NC(=O)NC1=O)c1ccccc1,-2.346
7
+ C=CCC1(CC=C)C(=O)NC(=O)NC1=O,-1.796
8
+ C=CCS[S@@](=O)CC=C,-0.83
9
+ CC#C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CCC4=C3[C@H](C[C@]12C)c1ccc(cc1)N(C)C,-5.825
10
+ CC(=C)[C@@H]1CC=C(C)C(=O)C1,-2.06
11
+ CC(=C)[C@H]1CC=C(C)C(=O)C1,-2.06
12
+ CC(=O)C(C)(C)C,-0.723666667
13
+ CC(=O)CCCCn1c(=O)n(C)c2ncn(C)c2c1=O,-0.558
14
+ CC(=O)C[C@H](c1ccccc1)c1c(O)c2ccccc2oc1=O,-4.761
15
+ CC(=O)N1CCN(CC1)c1ccc(OC[C@H]2CO[C@@](Cn3ccnc3)(O2)c2ccc(Cl)cc2Cl)cc1,-3.8
16
+ CC(=O)NC(N)=O,-0.9
17
+ CC(=O)NC[C@H]1CN(C(=O)O1)c1ccc(N2CCN(CC2)C(=O)CO)c(F)c1,-1.97
18
+ CC(=O)NC[C@H]1CN(C(=O)O1)c1ccc(N2CCOCC2)c(F)c1,-2.07
19
+ CC(=O)NCc1c(I)c(NC(C)=O)c(I)c(C(O)=O)c1I,-2.321
20
+ CC(=O)NS(=O)(=O)c1ccc(N)cc1,-1.5135
21
+ CC(=O)Nc1c(I)c(NC(C)=O)c(I)c(C(O)=O)c1I,-2.788
22
+ CC(=O)Nc1ccc(C=O)cc1,-1.58
23
+ CC(=O)Nc1ccc(Cl)cc1,-2.842
24
+ CC(=O)Nc1ccc(F)cc1,-1.78
25
+ CC(=O)Nc1ccc(N)cc1,-0.98
26
+ CC(=O)Nc1ccc(O)cc1,-1.064
27
+ CC(=O)Nc1ccc(OC(C)=O)cc1,-1.91
28
+ CC(=O)Nc1ccc(cc1)S(N)(=O)=O,-1.61
29
+ CC(=O)Nc1ccc(cc1)[N+]([O-])=O,-2.691333333
30
+ CC(=O)Nc1cccc(C)c1,-2.09
31
+ CC(=O)Nc1ccccc1,-1.398
32
+ CC(=O)Nc1ccccc1Cl,-1.4
33
+ CC(=O)Nc1ccccc1[N+]([O-])=O,-1.91
34
+ CC(=O)Nc1nnc(s1)S(N)(=O)=O,-2.462
35
+ CC(=O)OC1CCCCC1,-1.67
36
+ CC(=O)OCC(COC(C)=O)OC(C)=O,-0.6
37
+ CC(=O)OCC1=C(N2[C@H](SC1)[C@H](NC(=O)Cc1cccs1)C2=O)C(O)=O,-2.938
38
+ CC(=O)O[C@@H]1C2=C(C)[C@H](C[C@@](O)([C@@H](OC(=O)c3ccccc3)[C@@H]3[C@@]4(CO[C@@H]4C[C@H](O)[C@@]3(C)C1=O)OC(C)=O)C2(C)C)OC(=O)[C@H](O)[C@@H](NC(=O)c1ccccc1)c1ccccc1,-6.63
39
+ CC(=O)O[C@]1(C(C)=O)C(=C)C[C@H]2[C@@H]3C=C(C)C4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,-5.57
40
+ CC(=O)Oc1ccccc1C(O)=O,-1.75
41
+ CC(=O)S[C@@H]1CC2=CC(=O)CC[C@]2(C)[C@H]2CC[C@@]3(C)[C@@H](CC[C@@]33CCC(=O)O3)[C@H]12,-4.173
42
+ CC(=O)[C@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,-4.42
43
+ CC(=O)c1ccc(N)cc1,-1.61
44
+ CC(=O)c1cccc(N)c1,-1.28
45
+ CC(=O)c1ccccc1,-1.29
46
+ CC(C)(C)C(C)(C)O,-0.72
47
+ CC(C)(C)CCCO,-1.55
48
+ CC(C)(C)CCO,-0.5
49
+ CC(C)(C)CO,-0.4
50
+ CC(C)(C)COC(N)=O,-0.8
51
+ CC(C)(C)NC(=O)[C@H]1CC[C@H]2[C@@H]3CC=C4C=C(CC[C@]4(C)[C@H]3CC[C@]12C)C(O)=O,-8.7585
52
+ CC(C)(C)NC[C@H](O)COc1ccc(NC(=O)NC2CCCCC2)cc1,-3.62
53
+ CC(C)(C)NC[C@H](O)COc1cccc2C[C@@H](O)[C@@H](O)Cc12,-1.57
54
+ CC(C)(C)NC[C@H](O)c1ccc(O)c(CO)c1,-1.224
55
+ CC(C)(C)OC(N)=O,0.1
56
+ CC(C)(C)c1ccc(O)cc1,-2.41
57
+ CC(C)(C)c1ccc(cc1)[C@@H](O)CCCN1CCC(CC1)C(O)(c1ccccc1)c1ccccc1,-6.69
58
+ CC(C)(N)C(O)=O,0.21
59
+ CC(C)(O\N=C(\C(=O)N[C@H]1[C@H]2SCC(C[n+]3ccccc3)=C(N2C1=O)C(O)=O)c1csc(N)n1)C(O)=O,-2.038
60
+ CC(C)(Oc1ccc(CCNC(=O)c2ccc(Cl)cc2)cc1)C(O)=O,-4.8
61
+ CC(C)(S)[C@@H](N)C(O)=O,-0.13
62
+ CC(C)(S)[C@H](N)C(O)=O,-0.128
63
+ CC(C)=CCC1(C)C(=O)NC(=O)NC1=O,-2.602
64
+ CC(C)=CCC[C@](C)(O)C=C,-1.99
65
+ CC(C)=CCC\C(C)=C/CO,-2.46
66
+ CC(C)=CCC\C(C)=C\C=O,-2.06
67
+ CC(C)C(=O)C(C)C,-1.3
68
+ CC(C)C(C)(C)O,-0.41
69
+ CC(C)C(O)C(C)C,-1.13725
70
+ CC(C)C1(C(C)C)C(=O)NC(=O)NC1=O,-2.766
71
+ CC(C)C1(CC=C(C)C)C(=O)NC(=O)NC1=O,-2.593
72
+ CC(C)C1(CC=C)C(=O)NC(=O)NC1=O,-1.71
73
+ CC(C)C1C(=O)NC(=O)NC1=O,-1.456
74
+ CC(C)CC(C)(C)O,-0.92
75
+ CC(C)CC(C)=O,-0.966666667
76
+ CC(C)CC1(CC=C)C(=O)NC(=O)NC1=O,-2.119
77
+ CC(C)CCC(C)=O,-1.33
78
+ CC(C)CCCO,-1.14
79
+ CC(C)CCO,-0.513333333
80
+ CC(C)CCOC(C)=O,-1.92
81
+ CC(C)CCOC=O,-1.52
82
+ CC(C)CC[C@@H](C)O,-1.38
83
+ CC(C)CNC(=O)N1CCNC1=O,-2.15
84
+ CC(C)COC(=O)C=C,-1.21
85
+ CC(C)COC(C)=O,-1.22
86
+ CC(C)COC=O,-1.01
87
+ CC(C)C[C@@H](C)O,-0.79625
88
+ CC(C)C[C@H](C)CO,-1.6
89
+ CC(C)C[C@H](N)C(O)=O,-0.75
90
+ CC(C)Cc1ccc(cc1)[C@@H](C)C(O)=O,-3.595
91
+ CC(C)Cn1c(C)nc2n(C)c(=O)n(C)c(=O)c12,-1.599
92
+ CC(C)Cn1cnc2n(C)c(=O)n(C)c(=O)c12,-0.942
93
+ CC(C)N(C(=O)CCl)c1ccccc1,-2.48
94
+ CC(C)N(CC[C@@](C(N)=O)(c1ccccc1)c1ccccn1)C(C)C,-3.1
95
+ CC(C)N(CC[C@H](c1ccccc1)c1cc(C)ccc1O)C(C)C,-2.58
96
+ CC(C)NC(=O)N1CC(=O)N(C1=O)c1cc(Cl)cc(Cl)c1,-4.405
97
+ CC(C)NC[C@H](O)COc1ccc(CC(N)=O)cc1,-1.3
98
+ CC(C)NC[C@H](O)COc1ccc(COCCOC(C)C)cc1,-1.93
99
+ CC(C)NC[C@H](O)COc1cccc2[nH]ccc12,-3.88
100
+ CC(C)NC[C@H](O)COc1cccc2ccccc12,-3.7075
101
+ CC(C)NC[C@H](O)COc1ccccc1CC=C,-2.82
102
+ CC(C)NC[C@H]1CCc2cc(CO)c(cc2N1)[N+]([O-])=O,-2.965
103
+ CC(C)Nc1cccnc1N1CCN(CC1)C(=O)c1cc2cc(NS(C)(=O)=O)ccc2[nH]1,-5.74
104
+ CC(C)OC(=O)C(C)(C)Oc1ccc(cc1)C(=O)c1ccc(Cl)cc1,-5.712
105
+ CC(C)OC(C)=O,-0.563
106
+ CC(C)OC(C)C,-1.1
107
+ CC(C)OC=O,-0.63
108
+ CC(C)SC(C)C,-2.24
109
+ CC(C)[C@@H](C)O,-0.186666667
110
+ CC(C)[C@@H]1CC[C@@H](C)CC1=O,-2.396666667
111
+ CC(C)[C@H](C)CO,-0.39
112
+ CC(C)[C@H]1CC[C@H](C)C[C@@H]1O,-2.53
113
+ CC(C)[N+]([O-])=O,-0.62
114
+ CC(C)\N=c1/cc2n(-c3ccc(Cl)cc3)c3ccccc3nc2cc1Nc1ccc(Cl)cc1,-5.8
115
+ CC(C)c1ccc(C)c(O)c1,-2.08
116
+ CC(C)c1ccc(C)cc1O,-2.186
117
+ CC(C)c1ccc(NC(=O)N(C)C)cc1,-3.469
118
+ CC(C)c1nc(nc(-c2ccc(F)cc2)c1\C=C\[C@@H](O)C[C@@H](O)CC(O)=O)N(C)S(C)(=O)=O,-2.48
119
+ CC(C)n1c(\C=C\[C@H](O)C[C@H](O)CC(O)=O)c(-c2ccc(F)cc2)c2ccccc12,-3.83
120
+ CC1(C(=O)NC(=O)NC1=O)c1ccccc1,-2.38
121
+ CC1(C)C(=O)NC(=O)NC1=O,-1.742
122
+ CC1(C)CON(Cc2ccccc2Cl)C1=O,-2.338
123
+ CC1(C)N(Cl)C(=O)N(Cl)C1=O,-2.6
124
+ CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccc(O)cc3)C(=O)N2[C@H]1C(O)=O,-2.031
125
+ CC1(C)S[C@@H]2[C@H](NC(=O)[C@H](N)c3ccccc3)C(=O)N2[C@H]1C(O)=O,-1.539
126
+ CC1(C)[C@@H]2CC[C@@](C)(C2)C1=O,-1.85
127
+ CC1(C)[C@H]2CC[C@]1(C)C(=O)C2,-2.086
128
+ CC1(C)[C@H]2CC[C@]1(C)[C@H](O)C2,-2.32
129
+ CC1(CC=C)C(=O)NC(=O)NC1=O,-1.16
130
+ CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1,-2.3555
131
+ CC1=C(CC(O)=O)c2cc(F)ccc2\C1=C/c1ccc(cc1)[S@](C)=O,-4.78
132
+ CC1=C(CCCO1)C(=O)Nc1ccccc1,-2.56
133
+ CC1=C(N2[C@H](SC1)[C@H](NC(=O)[C@H](N)C1=CCC=CC1)C2=O)C(O)=O,-1.215
134
+ CC1=CC(=O)CC(C)(C)C1,-1.06
135
+ CC1=NS(=O)(=O)c2cc(Cl)ccc2N1,-3.481
136
+ CC1C(=O)NC(=O)NC1=O,-1.126
137
+ CCC(=O)Nc1ccccc1,-1.92
138
+ CCC(=O)OC,-0.14
139
+ CCC(=O)O[C@@](Cc1ccccc1)([C@H](C)CN(C)C)c1ccccc1,-4.985
140
+ CCC(=O)c1ccccc1,-1.83
141
+ CCC(Br)(CC)C(=O)NC(N)=O,-2.68
142
+ CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@@H]12,-4.145
143
+ CCC(C)(C)CO,-1.04
144
+ CCC(C)(C)O,0.088175
145
+ CCC(C)(O)CC,-0.366666667
146
+ CCC(CC)C=O,-1.52
147
+ CCC(CC)CO,-1.17
148
+ CCC(O)(CC)CC,-0.85
149
+ CCC(O)CC,-0.239
150
+ CCC1(C(=O)NC(=O)NC1=O)C1=CCCCC1,-2.273
151
+ CCC1(C(=O)NC(=O)NC1=O)C1=CCCCCC1,-3
152
+ CCC1(C(=O)NC(=O)NC1=O)C1=C[C@@H]2CC[C@@H](C2)C1,-2.773
153
+ CCC1(C(=O)NC(=O)NC1=O)c1ccccc1,-2.293
154
+ CCC1(C(=O)NCNC1=O)c1ccccc1,-2.64
155
+ CCC1(C(C)C)C(=O)NC(=O)NC1=O,-2.153
156
+ CCC1(C)C(=O)NC(=O)NC1=O,-1.162
157
+ CCC1(CC)C(=O)NC(=O)NC1=O,-1.41
158
+ CCC1(CC)C(=O)NC[C@H](C)C1=O,-0.382
159
+ CCC1(CC=C(C)C)C(=O)NC(=O)NC1=O,-2.253
160
+ CCC1(CC=C)C(=O)NC(=O)NC1=O,-1.614
161
+ CCC1(CCC(C)C)C(=O)NC(=O)NC1=O,-2.47
162
+ CCC1=C(C)CN(C(=O)NCCc2ccc(cc2)S(=O)(=O)NC(=O)N[C@H]2CC[C@H](C)CC2)C1=O,-6.44
163
+ CCC1C(=O)NC(=O)NC1=O,-1.427
164
+ CCCC(=O)C=C,-0.83
165
+ CCCC(=O)CC,-0.83
166
+ CCCC(=O)CCC,-1.3
167
+ CCCC(=O)Nc1ccc(OC[C@@H](O)CNC(C)C)c(c1)C(C)=O,-2.4375
168
+ CCCC(=O)OC,-0.82
169
+ CCCC(=O)OCC,-1.28
170
+ CCCC(C)(C)CO,-1.52
171
+ CCCC(C)(C)O,-0.49
172
+ CCCC(C)(COC(N)=O)COC(N)=O,-1.807
173
+ CCCC(O)CCC,-1.4
174
+ CCCC1(CC)C(=O)NC(=O)NC1=O,-1.491
175
+ CCCC1(CCC)C(=O)NC(=O)NC1=O,-2.527
176
+ CCCCC(=O)CCCC,-2.583333333
177
+ CCCCC(=O)OC,-1.36
178
+ CCCCC(=O)OCC,-1.75
179
+ CCCCC(C)(C)O,-1.08
180
+ CCCCC(C)=O,-0.8
181
+ CCCCC1(CC)C(=O)NC(=O)NC1=O,-1.686
182
+ CCCCC1(CC=C)C(=O)NC(=O)NC1=O,-2.172
183
+ CCCCC1C(=O)N(N(C1=O)c1ccccc1)c1ccccc1,-4.391
184
+ CCCCCC(=O)OC,-1.913333333
185
+ CCCCCC(=O)OCC,-2.336666667
186
+ CCCCCC(C)(C)O,-1.72
187
+ CCCCCC(C)=O,-1.44
188
+ CCCCCC1(CC)C(=O)NC(=O)NC1=O,-2.34
189
+ CCCCCC=O,-1.3
190
+ CCCCCCC(=O)OCC,-2.73
191
+ CCCCCCC(C)=O,-2.05
192
+ CCCCCCC1(CC)C(=O)NC(=O)NC1=O,-3.049
193
+ CCCCCCC=O,-1.7
194
+ CCCCCCCC(C)=O,-2.58
195
+ CCCCCCCC1(CC)C(=O)NC(=O)NC1=O,-3.218
196
+ CCCCCCCC=O,-2.36
197
+ CCCCCCCCC1(CC)C(=O)NC(=O)NC1=O,-3.943
198
+ CCCCCCCCCC1(CC)C(=O)NC(=O)NC1=O,-4.462
199
+ CCCCCCCCC[C@@H](C)O,-2.94
200
+ CCCCCCCCO,-2.385
201
+ CCCCCCCN(CC)CCC[C@@H](O)c1ccc(NS(C)(=O)=O)cc1,-1.81
202
+ CCCCCCCO,-1.8175
203
+ CCCCCCCOC(N)=O,-2.62
204
+ CCCCCCC[C@@H](C)O,-2.74
205
+ CCCCCCO,-1.2375
206
+ CCCCCCOC(C)=O,-2.46
207
+ CCCCCCOC(N)=O,-1.92
208
+ CCCCCC[C@@H](C)O,-1.99
209
+ CCCCCO,-0.60225
210
+ CCCCCOC(=O)CC,-2.25
211
+ CCCCCOC(C)=O,-1.8875
212
+ CCCCCOC(N)=O,-1.47
213
+ CCCCC[C@@H](C)O,-1.55
214
+ CCCCC[C@@H](O)CC,-1.98
215
+ CCCCC[C@H](O)\C=C\[C@H]1[C@H](O)CC(=O)[C@@H]1CCCCCCC(O)=O,-3.67
216
+ CCCCC[C@H](O)\C=C\[C@H]1[C@H](O)CC(=O)[C@@H]1C\C=C/CCCC(O)=O,-2.47
217
+ CCCCN(CCN(CCCC)C(=O)N1CCOCC1)C(=O)N1CCOCC1,0.098
218
+ CCCCN1CCCC[C@@H]1C(=O)Nc1c(C)cccc1C,-3.511
219
+ CCCCNC(=O)NS(=O)(=O)c1ccc(C)cc1,-3.4665
220
+ CCCCNC(=O)OC[C@@](C)(CCC)COC(N)=O,-2.739
221
+ CCCCNc1cc(cc(c1Oc1ccccc1)S(N)(=O)=O)C(O)=O,-3.562
222
+ CCCCNc1ccc(cc1)C(=O)OCCN(C)C,-3.011
223
+ CCCCOC,-0.99
224
+ CCCCOC(=O)c1ccc(N)cc1,-3.131
225
+ CCCCOC(=O)c1ccc(O)cc1,-3.101
226
+ CCCCOC(C)=O,-1.29
227
+ CCCCOC(N)=O,-0.66
228
+ CCCCOCCCC,-1.885
229
+ CCCCOCCO,-0.42
230
+ CCCCOc1cc(C(=O)NCCN(CC)CC)c2ccccc2n1,-4.39
231
+ CCCCOc1ccc(OCCCN2CCOCC2)cc1,-3.5
232
+ CCCC[C@@H](C)O,-0.89
233
+ CCCC[C@@H](CC)C=O,-2.13
234
+ CCCC[C@@H](CC)CO,-2.11
235
+ CCCC[C@@H](O)CC,-1.47
236
+ CCCC[C@H](C)[C@H](C)O,-1.72
237
+ CCCC[C@](C)(O)CC,-1.6
238
+ CCCCc1oc2ccccc2c1C(=O)c1cc(I)c(OCCN(CC)CC)c(I)c1,-8.174
239
+ CCCCn1c(=O)n(C)c2ncn(C)c2c1=O,-1.625
240
+ CCCCn1c(C)nc2n(C)c(=O)n(C)c(=O)c12,-1.745
241
+ CCCCn1cnc2n(C)c(=O)n(C)c(=O)c12,-1.805
242
+ CCCN(CCC)S(=O)(=O)c1ccc(cc1)C(O)=O,-4.888
243
+ CCCNC(=O)NS(=O)(=O)c1ccc(Cl)cc1,-3.2745
244
+ CCCNC[C@@H](O)COc1ccccc1C(=O)CCc1ccccc1,-5.2
245
+ CCCOC(=O)CC,-1.34
246
+ CCCOC(=O)CCC,-1.92
247
+ CCCOC(=O)c1ccc(O)cc1,-2.557
248
+ CCCOC(=O)c1ccccc1,-2.67
249
+ CCCOC(C)=O,-0.72
250
+ CCCOC(C)C,-1.34
251
+ CCCOC=O,-0.49
252
+ CCCOCC,-0.66
253
+ CCCOCCC,-1.446666667
254
+ CCCOc1ccc2[C@@H]([C@H]([C@@H](c2c1)c1ccc(OC)cc1OCC(O)=O)C(O)=O)c1ccc2OCOc2c1,-6.771
255
+ CCCSCCC,-2.58
256
+ CCC[C@@H](C)O,-0.29
257
+ CCC[C@@H](O)CC,-0.8
258
+ CCC[C@@H](O)[C@H](CC)CO,-0.54
259
+ CCC[C@@]1(CCc2ccccc2)CC(=O)[C@@H]([C@H](CC)c2cccc(NS(=O)(=O)c3ccc(cn3)C(F)(F)F)c2)C(=O)O1,-6.3
260
+ CCC[C@H](C)C1(CC)C(=O)NC(=O)NC1=O,-2.41
261
+ CCC[C@H](C)C1(CC=C)C(=O)NC(=O)NC1=O,-2.333
262
+ CCC[C@H](C)CO,-1.11
263
+ CCC[C@H](O)C=C,-0.59
264
+ CCC[C@](C)(O)CC,-0.986666667
265
+ CCC[N+]([O-])=O,-0.8
266
+ CCC[S@](=O)CCCN(CC)C[C@@H](O)COc1ccc(cc1)C#N,-1.17
267
+ CCC\C(=C(/C)O)C(C)=O,-0.88
268
+ CCC\C=C(\CC)C=O,-2.46
269
+ CCCc1cc(=O)[nH]c(=S)[nH]1,-2.185
270
+ CCCn1c(=O)n(C)c2ncn(C)c2c1=O,-1.207
271
+ CCN(CC)C(=O)C(\Cl)=C(/C)OP(=O)(OC)OC,0.523
272
+ CCN(CC)C(=O)CSc1ccc(Cl)nn1,-1.716
273
+ CCN(CC)C(=O)Nc1ccc(OC[C@@H](O)CNC(C)(C)C)c(c1)C(C)=O,-1.9
274
+ CCN(CC)C(=S)SSC(=S)N(CC)CC,-2.995
275
+ CCN(CC)CC(=O)Nc1c(C)cccc1C,-1.874
276
+ CCN(CC)CCC[C@@H](C)Nc1c2ccc(Cl)cc2nc2ccc(OC)cc12,-4.35
277
+ CCN(CC)CCC[C@@H](C)Nc1ccnc2cc(Cl)ccc12,-3.89
278
+ CCN(CC)CCNC(=O)c1cc(Cl)c(N)cc1OC,-3.565
279
+ CCN(CC)CCOC(=O)c1ccc(N)cc1,-1.719
280
+ CCN(CC)Cc1cc(Nc2ccnc3cc(Cl)ccc23)ccc1O,-5.79
281
+ CCN(Cc1ccncc1)C(=O)[C@@H](CO)c1ccccc1,-1.698
282
+ CCN(N=O)C(N)=O,-0.96
283
+ CCN1CCC[C@@H]1CNC(=O)c1cc(ccc1OC)S(N)(=O)=O,-2.876
284
+ CCN1CCN(CC1)c1cc2n(cc(C(O)=O)c(=O)c2cc1F)C1CC1,-3.18
285
+ CCN1c2ncccc2N(C)C(=O)c2cccnc12,-2.62
286
+ CCN1c2ncccc2N(CC)C(=O)c2cccnc12,-2.86
287
+ CCNC(=O)[C@H](C)OC(=O)Nc1ccccc1,-1.83
288
+ CCNC(=S)NCC,-1.46
289
+ CCN[C@H]1CN(CCCOC)S(=O)(=O)c2sc(cc12)S(N)(=O)=O,-2.95
290
+ CCNc1nc(Cl)nc(NC(C)C)n1,-3.489
291
+ CCOC(=O)C1=C(C)NC(C)=C([C@H]1c1cccc(Cl)c1Cl)C(=O)OC,-5.89
292
+ CCOC(=O)C=C,-0.74
293
+ CCOC(=O)CC,-0.66
294
+ CCOC(=O)CC(=O)OCC,-0.82
295
+ CCOC(=O)CCC(=O)OCC,-0.96
296
+ CCOC(=O)N(C)C(=O)CSP(=S)(OCC)OCC,-2.518
297
+ CCOC(=O)NCCOc1ccc(Oc2ccccc2)cc1,-4.719
298
+ CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1CCC[C@H]1C(O)=O,-1.305
299
+ CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1Cc2ccccc2C[C@H]1C(O)=O,-1.9
300
+ CCOC(=O)[C@H](CCc1ccccc1)N[C@@H](C)C(=O)N1[C@H]2CCCC[C@@H]2C[C@H]1C(O)=O,-2.84
301
+ CCOC(=O)c1ccc(N)cc1,-2.41
302
+ CCOC(=O)c1ccc(O)cc1,-2.346
303
+ CCOC(=O)c1ccccc1,-2.32
304
+ CCOC(=O)c1ccccc1C(=O)OCC,-2.35
305
+ CCOC(=O)c1cncn1[C@H](C)c1ccccc1,-6.735
306
+ CCOC(C)=O,-0.035675
307
+ CCOC(C)C,-0.55
308
+ CCOC(C)OCC,-0.43
309
+ CCOC(N)=O,0.85
310
+ CCOCCOCC,-0.77
311
+ CCOP(=O)(OCC)OCC,0.43
312
+ CCOP(=S)(OCC)Oc1ccc(cc1)[S@](C)=O,-2.3
313
+ CCOP(=S)(OCC)Oc1nc(Cl)c(Cl)cc1Cl,-5.244
314
+ CCOc1ccc(NC(C)=O)cc1,-2.4255
315
+ CCOc1ccc(NC(N)=O)cc1,-2.17
316
+ CCOc1ccccc1,-2.33
317
+ CCS(=O)(=O)CC,0.04
318
+ CCSCc1ccccc1OC(=O)NC,-2.09
319
+ CCSSCC,-2.42
320
+ CC[C@@H](C)C(C)(C)O,-0.89
321
+ CC[C@@H](C)C(C)=O,-0.67
322
+ CC[C@@H](C)OC(N)=O,-0.3
323
+ CC[C@@H](CO)NCCN[C@@H](CC)CO,-0.565
324
+ CC[C@@H](Cc1c(I)cc(I)c(N)c1I)C(O)=O,-4.58
325
+ CC[C@@H](O)C(C)(C)C,-1.15
326
+ CC[C@@H](O)C(C)C,-0.7
327
+ CC[C@@](C)(O)C(C)(C)C,-1.27
328
+ CC[C@@](C)(O)C(C)C,-0.85
329
+ CC[C@H](C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)[C@H](CC[C@@H]3C[C@@H](O)CC(=O)O3)[C@@H]12,-6
330
+ CC[C@H](C)C1(CC)C(=O)NC(=O)NC1=O,-2.333
331
+ CC[C@H](C)C1(CC=C)C(=O)NC(=O)NC1=O,-2.016
332
+ CC[C@H](C)CO,-0.47
333
+ CC[C@H](C)[C@H](C)O,-0.716666667
334
+ CC[C@H](N)C(O)=O,0.3075
335
+ CC[C@H]1NC(=O)NC1=O,-0.06
336
+ CC[C@H]1OC(=O)[C@H](C)[C@@H](O[C@H]2C[C@@](C)(OC)[C@@H](O)[C@H](C)O2)[C@H](C)[C@@H](O[C@@H]2O[C@H](C)C[C@@H]([C@H]2O)N(C)C)[C@](C)(O)C[C@@H](C)C(=O)[C@H](C)[C@@H](O)[C@]1(C)O,-3.15
337
+ CC[C@]1(CCC(=O)NC1=O)c1ccccc1,-2.337
338
+ CC\C(=C(/c1ccccc1)c1ccc(OCCN(C)C)cc1)c1ccccc1,-8.02
339
+ CC\C(=C(\CC)c1ccc(O)cc1)c1ccc(O)cc1,-4.7145
340
+ CC\C=C(/C)C1(CC)C(=O)NC(=O)NC1=O,-2.458
341
+ CCc1ccc(CCOc2ccc(C[C@H]3SC(=O)NC3=O)cc2)nc1,-6.185
342
+ CCc1cccc(C)c1N([C@@H](C)COC)C(=O)CCl,-2.73
343
+ CCc1ccccc1O,-1.36
344
+ CCc1cccs1,-2.59
345
+ CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,-4.109
346
+ CCn1c(=O)n(C)c2ncn(C)c2c1=O,-0.719
347
+ CCn1cc(C(O)=O)c(=O)c2cc(F)c(N3CCN[C@@H](C)C3)c(F)c12,-2.43291365
348
+ CCn1cc(C(O)=O)c(=O)c2cc(F)c(cc12)N1CCNCC1,-2.9065
349
+ CCn1cc(C(O)=O)c(=O)c2ccc(C)nc12,-3.4885
350
+ CCn1cnc2n(C)c(=O)n(C)c(=O)c12,-0.757
351
+ CN(C(C)=O)c1ccccc1,-0.95
352
+ CN(C)C(=O)C(CCN1CCC(O)(CC1)c1ccc(Cl)cc1)(c1ccccc1)c1ccccc1,-7.074
353
+ CN(C)C(=O)C(c1ccccc1)c1ccccc1,-2.98
354
+ CN(C)C(=O)N(C)C,0.94
355
+ CN(C)C(=O)NC1CCCCCCC1,-2.289
356
+ CN(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-3.76
357
+ CN(C)C(=O)Nc1ccc(Cl)cc1,-2.9
358
+ CN(C)C(=O)Nc1cccc(OC(=O)NC(C)(C)C)c1,-2.93
359
+ CN(C)C(=O)Nc1cccc(c1)C(F)(F)F,-3.463
360
+ CN(C)C(=O)Nc1ccccc1,-1.6175
361
+ CN(C)C(=O)OC1=CC(=O)CC(C)(C)C1,-0.85
362
+ CN(C)C(=O)Oc1cc(C)nn1-c1ccccc1,-2.09
363
+ CN(C)CCC=C1c2ccccc2CCc2ccccc12,-4.55
364
+ CN(C)CCCN1c2ccccc2CCc2ccccc12,-4.3125
365
+ CN(C)CCCN1c2ccccc2Sc2ccc(Cl)cc12,-5.1455
366
+ CN(C)CCCOc1nn(Cc2ccccc2)c2ccccc12,-3.78
367
+ CN(C)CCOC(c1ccccc1)c1ccccc1,-2.947
368
+ CN(C)CCO[C@@H](c1ccccc1)c1ccccc1C,-4.1
369
+ CN(C)CC[C@@H](c1ccc(Cl)cc1)c1ccccn1,-2.659
370
+ CN(C)CC\C=C1\c2ccccc2Sc2ccc(Cl)cc12,-6.308
371
+ CN(C)[C@H]1[C@@H]2C[C@@H]3Cc4c(ccc(O)c4C(=O)C3=C(O)[C@]2(O)C(=O)[C@H](C(N)=O)C1=O)N(C)C,-0.944
372
+ CN(C)[C@H]1[C@@H]2C[C@H]3C(=C(O)[C@]2(O)C(=O)[C@H](C(N)=O)C1=O)C(=O)c1c(O)ccc(Cl)c1[C@@]3(C)O,-2.94
373
+ CN(C)[C@H]1[C@@H]2C[C@H]3C(=C(O)[C@]2(O)C(=O)[C@H](C(N)=O)C1=O)C(=O)c1c(O)cccc1[C@@]3(C)O,-2.924
374
+ CN(C)[C@H]1[C@@H]2[C@@H](O)[C@H]3C(=C(O)[C@]2(O)C(=O)[C@H](C(N)=O)C1=O)C(=O)c1c(O)cccc1[C@@]3(C)O,-3.093
375
+ CN(C)c1c(C)n(C)n(-c2ccccc2)c1=O,-0.619
376
+ CN(C)c1cnc2cncnc2n1,-0.021
377
+ CN(C)c1ncc2nccnc2n1,0.36
378
+ CN(C)c1ncnc2nccnc12,-1.021
379
+ CN(CCOc1ccc(C[C@H]2SC(=O)NC2=O)cc1)c1ccccn1,-5.25
380
+ CN(Cc1cnc2nc(N)nc(N)c2n1)c1ccc(cc1)C(=O)N[C@@H](CCC(O)=O)C(O)=O,-4.1
381
+ CN(N=O)C(N)=O,-0.85
382
+ CN1C(=O)NC(=O)[C@@](C)(C1=O)C1=CCCCC1,-2.674
383
+ CN1C(C(=O)Nc2cc(C)on2)=C(O)c2ccccc2S1(=O)=O,-5.61
384
+ CN1C(C(=O)Nc2ccccn2)=C(O)c2ccccc2S1(=O)=O,-4.8
385
+ CN1C(C(=O)Nc2ccccn2)=C(O)c2sccc2S1(=O)=O,-3.875
386
+ CN1CCC(CC1)=C1c2ccccc2C=Cc2ccccc12,-5.9
387
+ CN1CCCC1=O,1
388
+ CN1CCN(CC1)C1=Nc2ccccc2Nc2sc(C)cc12,-4.35
389
+ CN1CCN(CC1)c1cc2n(cc(C(O)=O)c(=O)c2cc1F)-c1ccc(F)cc1,-3.6
390
+ CN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc23)CC1,-4.398
391
+ CN1CC[C@@]23[C@H]4Oc5c2c(C[C@@H]1[C@@H]3C=C[C@@H]4O)ccc5O,-3.154
392
+ CN1CC[C@@]23[C@H]4Oc5c2c(C[C@@H]1[C@@H]3C=C[C@@H]4OC(C)=O)ccc5OC(C)=O,-2.798
393
+ CN1C[C@@H](O)N(C1=O)c1nnc(s1)C(C)(C)C,-1.877
394
+ CN1C[C@@H]2C[C@H]1CN2c1cc2n(cc(C(O)=O)c(=O)c2cc1F)C1CC1,-2.9
395
+ CN1[C@@H](CCl)Nc2cc(Cl)c(cc2S1(=O)=O)S(N)(=O)=O,-3.778
396
+ CN1[C@H]2CC[C@@H]1C[C@H](C2)OC(=O)[C@@H](CO)c1ccccc1,-2.004
397
+ CN1[C@H]2CC[C@@H]1C[C@H](C2)OC(=O)[C@H](CO)c1ccccc1,-1.91
398
+ CN1c2ccc(Cl)cc2C(=NCC1=O)c1ccccc1,-3.802
399
+ CNC(=O)O\N=C(/SC)C(=O)N(C)C,0.106
400
+ CNC(=O)O\N=C(\CSC)C(C)(C)C,-1.62
401
+ CNC(=O)O\N=C1\[C@H](Cl)[C@H]2C[C@@H](C#N)[C@@H]1C2,-2.08
402
+ CNC(=O)Oc1cc(C)cc(C)c1,-2.581
403
+ CNC(=O)Oc1cccc(C)c1,-1.802
404
+ CNC(=O)Oc1cccc2CC(C)(C)Oc12,-2.5
405
+ CNC(=O)Oc1ccccc1C(C)C,-2.863
406
+ CNC(=O)Oc1ccccc1C1OCCO1,-1.57
407
+ CNC(=O)Oc1ccccc1OC(C)C,-2.02
408
+ CNC(=O)[C@@H](C)SCCSP(=O)(OC)OC,1.144
409
+ CNC(=O)\C=C(/C)OP(=O)(OC)OC,0.651
410
+ CNCCC=C1c2ccccc2CCc2ccccc12,-4.018
411
+ CNCCCN1c2ccccc2CCc2ccccc12,-3.76
412
+ CNCCC[C@@]12CC[C@@H](c3ccccc13)c1ccccc21,-4.796
413
+ CNCC[C@H](Oc1ccc(cc1)C(F)(F)F)c1ccccc1,-3.92
414
+ CNC[C@H](O)c1ccc(O)c(O)c1,-2.74
415
+ CN[C@H]1CC[C@@H](c2ccc(Cl)c(Cl)c2)c2ccccc12,-4.83
416
+ CN\C(NCCSCc1ccc(CN(C)C)o1)=C/[N+]([O-])=O,-2.5
417
+ CNc1ccccc1,-1.28
418
+ COC(=O)C(C)=C,-0.8
419
+ COC(=O)[C@@H]1[C@H]2CC[C@@H](C[C@@H]1OC(=O)c1ccccc1)N2C,-2.26
420
+ COC(=O)[C@@H]1[C@H]2C[C@H](C=C2)[C@H]1C(=O)OC,-1.2
421
+ COC(=O)c1ccc(N)cc1,-1.59
422
+ COC(=O)c1ccc(O)cc1,-1.705
423
+ COC(=O)c1ccc(OC)cc1,-2.41
424
+ COC(=O)c1ccccc1,-1.85
425
+ COC(=O)c1ccccc1C(=O)OC,-1.66
426
+ COC(=O)c1ccccc1O[C@@H]1O[C@H](CO[C@@H]2OC[C@@H](O)[C@H](O)[C@H]2O)[C@@H](O)[C@H](O)[C@H]1O,-0.742
427
+ COC(=O)c1cccnc1,-0.46
428
+ COC(C)(C)C,-0.23875
429
+ COC1=CC(=O)C[C@@H](C)[C@]11Oc2c(C1=O)c(OC)cc(OC)c2Cl,-4.83
430
+ COC1=CC=C2[C@H]3Cc4ccc(OC)c5O[C@@H]1[C@]2(CCN3C)c45,-2.658
431
+ COCC(=O)N([C@H](C)C(=O)OC)c1c(C)cccc1C,-1.6005
432
+ COCCOc1c(OC)cc(Cc2cnc(N)nc2N)cc1OC,-2.101
433
+ COCCc1ccc(OC[C@@H](O)CNC(C)C)cc1,-1.315
434
+ CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1,-3.521
435
+ CON(C)C(=O)Nc1ccc(Cl)cc1,-2.57
436
+ COP(=O)(OC)O[C@@H](Br)C(Cl)(Cl)Br,-2.28
437
+ COP(=O)(OC)[C@H](O)C(Cl)(Cl)Cl,-0.22
438
+ COP(=S)(OC)Oc1cc(Cl)c(Cl)cc1Cl,-3.905
439
+ COP(=S)(OC)SCC(=O)N(C)C=O,-1.995
440
+ CO[C@]12CC[C@@]3(C[C@@H]1[C@](C)(O)C(C)(C)C)[C@H]1Cc4ccc(O)c5O[C@@H]2[C@]3(CCN1CC1CC1)c45,-4.37
441
+ CO[C@]12[C@H]3N[C@H]3CN1C1=C([C@H]2COC(N)=O)C(=O)C(N)=C(C)C1=O,-2.564
442
+ CO[P@@](=O)(NC(C)=O)SC,0.54
443
+ COc1c2ccoc2c(OC)c2oc(C)cc(=O)c12,-3.017
444
+ COc1cc(Cc2cnc(N)nc2N)cc(OC)c1OC,-2.87
445
+ COc1cc(N[C@@H](C)CCCN)c2ncccc2c1,-2.52
446
+ COc1cc2CC[C@H](NC(C)=O)c3cc(=O)c(OC)ccc3-c2c(OC)c1OC,-0.944
447
+ COc1cc2nc(nc(N)c2cc1OC)N1CCN(CC1)C(=O)[C@H]1CCCO1,-1.8
448
+ COc1cc2nc(nc(N)c2cc1OC)N1CCN(CC1)C(=O)c1ccco1,-5.086
449
+ COc1ccc(C=O)cc1,-1.49
450
+ COc1ccc(CC=C)cc1,-2.92
451
+ COc1ccc(CCN(C)CCC[C@@](C#N)(C(C)C)c2ccc(OC)c(OC)c2)cc1OC,-4.6
452
+ COc1ccc(Cc2nccc3cc(OC)c(OC)cc23)cc1OC,-4.103
453
+ COc1ccc(Cl)cc1,-2.78
454
+ COc1ccc(Cl)cc1C(=O)NCCc1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1,-6.755
455
+ COc1ccc(NC(=O)N(C)C)cc1Cl,-2.564
456
+ COc1ccc(NC(C)=O)cc1,-1.3
457
+ COc1ccc(cc1)[C@@H]1Sc2ccccc2N(CCN(C)C)C(=O)[C@@H]1OC(C)=O,-3.0545
458
+ COc1ccc(cc1)[C@@](O)(C1CC1)c1cncnc1,-2.598
459
+ COc1ccc(cc1)[N+]([O-])=O,-2.41
460
+ COc1ccc2COC(=O)c2c1OC,-1.8945
461
+ COc1ccc2cc(ccc2c1)[C@H](C)C(O)=O,-4.215
462
+ COc1ccc2nc([nH]c2c1)[S@](=O)Cc1ncc(C)c(OC)c1C,-3.42
463
+ COc1ccc2nccc([C@@H](O)[C@@H]3C[C@@H]4CC[N@]3C[C@@H]4C=C)c2c1,-2.788
464
+ COc1ccc2nccc([C@H](O)[C@H]3C[C@@H]4CC[N@]3C[C@@H]4C=C)c2c1,-2.812
465
+ COc1cccc(Cl)c1,-2.78
466
+ COc1cccc(c1)[C@@]1(O)CCCC[C@@H]1CN(C)C,-2.24
467
+ COc1ccccc1,-1.85
468
+ COc1ccccc1Cl,-2.46
469
+ COc1ccccc1OC[C@@H](O)CO,-0.598
470
+ COc1ccccc1OC[C@@H](O)COC(N)=O,-0.985
471
+ COc1ccccc1[N+]([O-])=O,-1.96
472
+ COc1cnc2cncnc2n1,-0.91
473
+ COc1cnc2ncncc2n1,-1.139
474
+ COc1ncc2nccnc2n1,-1.112
475
+ COc1ncnc2nccnc12,-1.112
476
+ CS(=O)(=O)OCCCCOS(C)(=O)=O,-2.267
477
+ CS(=O)(=O)c1ccc(cc1)[C@@H](O)[C@@H](CO)NC(=O)C(Cl)Cl,-2.154
478
+ CSCC[C@H](N)C(O)=O,-0.42
479
+ CSc1ccc2Sc3ccccc3N(CC[C@H]3CCCCN3C)c2c1,-5.362
480
+ CSc1ccccc1,-2.39
481
+ CSc1cnc2cncnc2n1,-1.551
482
+ CSc1ncc2nccnc2n1,-1.754
483
+ CSc1ncnc2nccnc12,-2.365
484
+ CSc1nnc(c(=O)n1N)C(C)(C)C,-2.253
485
+ C[C@@H](C(O)=O)c1ccc(c(F)c1)-c1ccccc1,-4.256
486
+ C[C@@H](C(O)=O)c1ccc(cc1)N1Cc2ccccc2C1=O,-4.772
487
+ C[C@@H](C(O)=O)c1ccc2c(c1)[nH]c1ccc(Cl)cc21,-4.699
488
+ C[C@@H](C(O)=O)c1cccc(Oc2ccccc2)c1,-3.699
489
+ C[C@@H](C(O)=O)c1cccc(c1)C(=O)c1ccccc1,-3.38
490
+ C[C@@H](CCc1ccccc1)NC[C@H](O)c1ccc(O)c(c1)C(N)=O,-3.41
491
+ C[C@@H](CN1c2ccccc2Sc2ccccc12)N(C)C,-4.34
492
+ C[C@@H](Cc1ccccc1)N(C)CC#C,-2.513
493
+ C[C@@H](N(O)C(N)=O)c1cc2ccccc2s1,-3.373
494
+ C[C@@H](N)C(O)=O,0.243
495
+ C[C@@H](N)CC(O)=O,1.08
496
+ C[C@@H](O)C(C)(C)C,-0.62
497
+ C[C@@H](O)C(F)(F)F,0.3
498
+ C[C@@H](O)[C@@H](N)C(O)=O,-0.09
499
+ C[C@@H](O)c1ccccc1,-0.92
500
+ C[C@@H](Oc1ccc(Cl)cc1Cl)C(O)=O,-2.827
501
+ C[C@@H]1CCCC(=O)C1,-1.87
502
+ C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)C3=CC[C@]2(C)[C@H]1C(=O)CN1CCN(CC1)c1cc(nc(n1)N1CCCC1)N1CCCC1,-7.59
503
+ C[C@@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,-3.59
504
+ C[C@@H]1Nc2cc(Cl)c(cc2C(=O)N1c1ccccc1C)S(N)(=O)=O,-4.33
505
+ C[C@@]12CC[C@@H](CC1)C(C)(C)O2,-1.69
506
+ C[C@@]12CC[C@@H]3C(=CCc4cc(O)ccc34)[C@H]1CC[C@@H]2O,-4.402
507
+ C[C@H](CN(C)C)CN1c2ccccc2CCc2ccccc12,-4.796
508
+ C[C@H](N)C(O)=O,0.25
509
+ C[C@H](NCCC(c1ccccc1)c1ccccc1)c1ccccc1,-4
510
+ C[C@H](O)Cn1cnc2n(C)c(=O)n(C)c(=O)c12,0.623
511
+ C[C@H](OC(=O)Nc1cccc(Cl)c1)C#C,-2.617
512
+ C[C@H]1CCCCC1=O,-0.94
513
+ C[C@H]1CC[C@@H](CC1=O)C(C)=C,-2.18
514
+ C[C@H]1CC[C@H](O)CC1,-0.88
515
+ C[C@H]1CCc2cc(F)cc3c2n1cc(C(O)=O)c3=O,-3.733
516
+ C[C@H]1CN(C[C@@H](C)N1)c1c(F)c(N)c2c(c1F)n(cc(C(O)=O)c2=O)C1CC1,-3.371
517
+ C[C@H]1COc2c(N3CCN(C)CC3)c(F)cc3c2n1cc(C(O)=O)c3=O,-1.266
518
+ C[C@H]1C[C@H](C)C(=O)[C@@H](C1)[C@H](O)CC1CC(=O)NC(=O)C1,-1.13
519
+ C[C@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3(F)[C@@H](O)C[C@]2(C)[C@@]1(O)C(=O)CO,-3.77
520
+ C[C@H]1C[C@H]2[C@@H]3CC[C@](O)(C(=O)CO)[C@@]3(C)C[C@H](O)[C@@H]2[C@@]2(C)C=CC(=O)C=C12,-2.99
521
+ C[C@H]1Cc2ccccc2N1NC(=O)c1ccc(Cl)c(c1)S(N)(=O)=O,-3.792
522
+ C[C@H]1[C@H](NC(=O)C(=N\OC(C)(C)C(O)=O)\c2csc(N)n2)C(=O)N1S(O)(=O)=O,-1.639
523
+ C[C@H]1[C@H]2Cc3ccc(O)cc3[C@]1(C)CCN2CC=C(C)C,-3.803
524
+ C[C@H]1[C@H]2[C@H](C[C@H]3[C@@H]4CC=C5C[C@@H](O)CC[C@]5(C)[C@H]4CC[C@]23C)O[C@]11CC[C@@H](C)CO1,-2.618
525
+ C[C@]1(O)CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,-3.99
526
+ C[C@]12CC(=O)[C@H]3[C@@H](CCC4=CC(=O)C=C[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.239
527
+ C[C@]12CC(=O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.27
528
+ C[C@]12CC[C@H]3C(=CCc4cc(O)ccc34)[C@@H]1CCC2=O,-5.282
529
+ C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@]34C)[C@@H]1CCC2=O,-4.064
530
+ C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@H]34)[C@@H]1CC[C@@]2(O)C#C,-4.63
531
+ C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@@H]2C(=O)CO,-3.45
532
+ C[C@]12CC[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@@H]2O,-4.159
533
+ C[C@]12CC[C@H]3[C@@H](CCC4=Cc5oncc5C[C@]34C)[C@@H]1CC[C@@]2(O)C#C,-5.507
534
+ C[C@]12CC[C@H]3[C@@H](CC[C@H]4CC(=O)CC[C@]34C)[C@@H]1CC[C@@H]2O,-4.743
535
+ C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CCC2=O,-3.955
536
+ C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CC[C@@H]2O,-4.845
537
+ C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1CC[C@@]2(O)C#C,-4.217
538
+ C[C@]12CC[C@H]3[C@@H](CCc4cc(O)ccc34)[C@@H]1C[C@@H](O)[C@@H]2O,-4.955
539
+ C[C@]12CCc3c(ccc4cc(O)ccc34)[C@@H]1CCC2=O,-5.249
540
+ C[C@]12C[C@H](O)[C@@]3(F)[C@@H](CCC4=CC(=O)C=C[C@]34C)[C@@H]1C[C@@H](O)[C@]2(O)C(=O)CO,-3.693
541
+ C[C@]12C[C@H](O)[C@@]3(F)[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.434
542
+ C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)C=C[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.18
543
+ C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@@H]2C(=O)CO,-3.24
544
+ C[C@]12C[C@H](O)[C@H]3[C@@H](CCC4=CC(=O)CC[C@]34C)[C@@H]1CC[C@]2(O)C(=O)CO,-3.1
545
+ C\C(O)=C\C(C)=O,0.22
546
+ C\N=C(/NCCSCc1[nH]cnc1C)NC#N,-1.655
547
+ C\N=C1\CN(O)C(c2ccccc2)=c2cc(Cl)ccc2=N1,-2.176
548
+ Cc1c(N)c(=O)n(-c2ccccc2)n1C,-0.622
549
+ Cc1c[nH]c(=O)[nH]c1=O,-1.499
550
+ Cc1c[nH]c(=O)nc1N,-0.56
551
+ Cc1c[nH]c2ccccc12,-2.42
552
+ Cc1cc(=O)c2ccccc2[nH]1,-1.2
553
+ Cc1cc(=O)n(-c2ccccc2)n1C,0.48
554
+ Cc1cc(C)c(O)c(C)c1,-2.05
555
+ Cc1cc(C)cc(O)c1,-1.4
556
+ Cc1cc(C)nc(NS(=O)(=O)c2ccc(N)cc2)n1,-2.73
557
+ Cc1cc(NS(=O)(=O)c2ccc(N)cc2)no1,-2.705
558
+ Cc1cc(no1)C(=O)NNCc1ccccc1,-2.461
559
+ Cc1ccc(C)c(O)c1,-1.54
560
+ Cc1ccc(CO)cc1,-1.2
561
+ Cc1ccc(Cl)c(Nc2ccccc2C(O)=O)c1Cl,-6.267
562
+ Cc1ccc(O)c(C)c1,-1.1975
563
+ Cc1ccc(O)cc1,-0.72
564
+ Cc1ccc(O)cc1C,-1.39
565
+ Cc1ccc(cc1)C(=O)c1ccc(CC(O)=O)n1C,-4.092
566
+ Cc1ccc(cc1)S(=O)(=O)NC(=O)NN1C[C@@H]2CCC[C@@H]2C1,-4.175
567
+ Cc1ccc(cc1)S(N)(=O)=O,-1.74
568
+ Cc1ccc(cc1)[N+]([O-])=O,-2.49
569
+ Cc1ccc(cc1[N+]([O-])=O)[N+]([O-])=O,-2.82
570
+ Cc1cccc(C)c1O,-1.296666667
571
+ Cc1cccc(CN2CCN(CC2)[C@H](c2ccccc2)c2ccc(Cl)cc2)c1,-6.481
572
+ Cc1cccc(Nc2ccccc2C(O)=O)c1C,-6.544
573
+ Cc1cccc(O)c1,-0.69025
574
+ Cc1cccc(O)c1C,-1.43
575
+ Cc1cccc(c1)[N+]([O-])=O,-2.44
576
+ Cc1cccc2sc3nncn3c12,-2.07
577
+ Cc1ccccc1-n1c(C)nc2ccccc2c1=O,-2.921
578
+ Cc1ccccc1N,-1.756666667
579
+ Cc1ccccc1O,-0.62375
580
+ Cc1ccccc1S(N)(=O)=O,-2.02
581
+ Cc1ccccc1[N+]([O-])=O,-2.33
582
+ Cc1ccnc(NS(=O)(=O)c2ccc(N)cc2)n1,-3.1205
583
+ Cc1ccsc1,-2.39
584
+ Cc1cn([C@H]2C[C@H]([N-][N+]#N)[C@@H](CO)O2)c(=O)[nH]c1=O,-1.0645
585
+ Cc1cnc(cn1)C(=O)NCCc1ccc(cc1)S(=O)(=O)NC(=O)NC1CCCCC1,-5.454
586
+ Cc1cnc2cncnc2n1,-0.854
587
+ Cc1nc2[C@@H](O)CCCn2c(=O)c1CCN1CCC(CC1)c1noc2cc(F)ccc12,-4.435
588
+ Cc1ncc(n1CCO)[N+]([O-])=O,-1.2165
589
+ Cc1ncc2nccnc2n1,-0.094
590
+ Cc1ncnc2nccnc12,-0.466
591
+ Cc1nnc(NS(=O)(=O)c2ccc(N)cc2)s1,-2.779
592
+ Cc1nnc(SCC2=C(N3[C@H](SC2)[C@H](NC(=O)Cn2cnnn2)C3=O)C(O)=O)s1,-2.616
593
+ Cc1nnc2CN=C(c3ccccc3)c3cc(Cl)ccc3-n12,-3.6
594
+ Cc1nnc2CN=C(c3ccccc3Cl)c3cc(Cl)ccc3-n12,-4.095
595
+ ClC(Cl)(Cl)C#N,-2.168
596
+ ClC(Cl)C(=O)N(CC=C)CC=C,-1.62
597
+ ClCCOCCCl,-1.12
598
+ ClCCS(=O)(=O)CCCl,-1.5
599
+ ClCCS(=O)CCCl,-1.16
600
+ Clc1ccc(CC[C@@](Cn2cncn2)(C#N)c2ccccc2)cc1,-6.226
601
+ Clc1ccc(CO[C@@H](Cn2ccnc2)c2ccc(Cl)cc2Cl)c(Cl)c1,-5.4305
602
+ Clc1ccc2oc(=O)[nH]c2c1,-2.7105
603
+ Clc1cccc(-c2c[nH]cc2C#N)c1Cl,-5.074
604
+ Clc1cccc(Cl)c1NC1=NCCN1,-0.1
605
+ Clc1cccc(N2CCN(CCCCOc3ccc4CCC(=O)Nc4c3)CC2)c1Cl,-6.585
606
+ Clc1cccc(c1)N1CCN(CCCn2nc3ccccn3c2=O)CC1,-3.1875
607
+ Clc1cnc2cncnc2n1,-0.876
608
+ Clc1cnc2ncncc2n1,-1.124
609
+ Clc1ncc2nccnc2n1,-0.699
610
+ Cn1c(=O)on(-c2ccc(Cl)c(Cl)c2)c1=O,-2.82
611
+ Cn1c2nc[nH]c2c(=O)n(C)c1=O,-1.3625
612
+ Cn1c2ncn(C[C@@H](O)CO)c2c(=O)n(C)c1=O,0.118
613
+ Cn1ccc(=O)[nH]c1=O,-0.8035
614
+ Cn1ccccc1=O,0.96
615
+ Cn1cnc(c1Sc1ncnc2nc[nH]c12)[N+]([O-])=O,-3.3255
616
+ Cn1cnc2n(C)c(=O)[nH]c(=O)c12,-2.557
617
+ Cn1cnc2n(C)c(=O)n(C)c(=O)c12,-0.951
618
+ Cn1nnnc1SCC1=C(N2[C@H](SC1)[C@H](NC(=O)[C@H](O)c1ccccc1)C2=O)C(O)=O,-0.143
619
+ FC1(F)Oc2cccc(c2O1)-c1c[nH]cc1C#N,-5.21
620
+ Fc1c[nH]c(=O)[nH]c1=O,-1.028
621
+ Fc1ccc(cc1)C(=O)CCCN1CCC(CC1)n1c2ccccc2[nH]c1=O,-4.28
622
+ N#Cc1ccccc1,-1
623
+ N#Cc1ccccc1C#N,-2.38
624
+ N(Nc1ccccc1)c1ccccc1,-2.92
625
+ NC(=N)NCC(O)=O,-1.51
626
+ NC(=N)Nc1nc(CSCC\C(N)=N\S(N)(=O)=O)cs1,-2.49
627
+ NC(=O)N1c2ccccc2C=Cc2ccccc12,-3.294
628
+ NC(=O)NCc1ccccc1,-0.95
629
+ NC(=O)OCc1ccccc1,-0.35
630
+ NC(=O)c1ccccc1,-0.953
631
+ NC(=O)c1ccccc1C(N)=O,-2.771
632
+ NC(=O)c1ccccc1O,-1.836
633
+ NC(=O)c1cccnc1,0.913
634
+ NC(=O)c1cnccn1,-0.914
635
+ NC(=S)Nc1ccccc1,-1.77
636
+ NC(N)=NC(=O)c1nc(Cl)c(N)nc1N,-3.13
637
+ NC(N)=N[N+]([O-])=O,-1.37
638
+ NCCc1c[nH]c2ccccc12,-3.301029996
639
+ NC[C@@H](CC(O)=O)c1ccc(Cl)cc1,-1.696
640
+ NNC(=O)c1ccncc1,0.009
641
+ NNc1nncc2ccccc12,-2.6
642
+ NS(=O)(=O)c1cc(C(O)=O)c(NCc2ccco2)cc1Cl,-4.75
643
+ NS(=O)(=O)c1cc(ccc1Cl)[C@]1(O)NC(=O)c2ccccc12,-3.451
644
+ NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1C(F)(F)F,-2.98
645
+ NS(=O)(=O)c1cc2c(NCNS2(=O)=O)cc1Cl,-2.689
646
+ NS(=O)(=O)c1cc2c(N[C@@H](Cc3ccccc3)NS2(=O)=O)cc1C(F)(F)F,-4.298
647
+ NS(=O)(=O)c1cc2c(N[C@@H](NS2(=O)=O)C(Cl)Cl)cc1Cl,-3.354253415
648
+ NS(=O)(=O)c1cc2c(N\C(NS2(=O)=O)=C/SCc2ccccc2)cc1Cl,-4.829
649
+ NS(=O)(=O)c1ccccc1,-1.56
650
+ N[C@@H](CC(N)=O)C(O)=O,-0.74
651
+ N[C@@H](CCC(N)=O)C(O)=O,-0.55
652
+ N[C@@H](Cc1cc(I)c(Oc2cc(I)c(O)c(I)c2)c(I)c1)C(O)=O,-4.259
653
+ N[C@@H](Cc1ccc(O)c(O)c1)C(O)=O,-1.818
654
+ N[C@@H](Cc1ccc(cc1)N(CCCl)CCCl)C(O)=O,-3.485
655
+ N[C@@H](Cc1ccccc1)C(O)=O,-0.804
656
+ N[C@@H]1CONC1=O,-0.009
657
+ N[C@H]1[C@@H]2CN(C[C@H]12)c1nc2n(cc(C(O)=O)c(=O)c2cc1F)-c1ccc(F)cc1F,-4.48
658
+ N[C@]12C[C@H]3C[C@H](C[C@H](C3)C1)C2,-1.854
659
+ Nc1[nH]c(=O)nc2nc[nH]c12,-3.401
660
+ Nc1cc(nc(=N)n1O)N1CCCCC1,-2.009
661
+ Nc1cc[nH]c(=O)n1,-1.159
662
+ Nc1ccc(Br)cc1C(O)=O,-3.074
663
+ Nc1ccc(C(O)=O)c(O)c1,-1.963
664
+ Nc1ccc(Cl)cc1,-1.66
665
+ Nc1ccc(O)c(c1)C(O)=O,-2.259
666
+ Nc1ccc(\N=N\c2ccccc2)c(N)n1,-4.362
667
+ Nc1ccc(cc1)C(O)=O,-1.368
668
+ Nc1ccc(cc1)S(=O)(=O)Nc1ncccn1,-3.529
669
+ Nc1ccc(cc1)S(=O)(=O)Nc1nccs1,-2.7465
670
+ Nc1ccc(cc1)S(=O)(=O)c1ccc(N)cc1,-3.094
671
+ Nc1ccc(cc1)S(N)(=O)=O,-1.361
672
+ Nc1ccc(cc1)[N+]([O-])=O,-2.37
673
+ Nc1cccc(Cl)c1,-1.37
674
+ Nc1cccc(c1)C(F)(F)F,-1.47
675
+ Nc1cccc(c1)[N+]([O-])=O,-2.19
676
+ Nc1cccc2ccccc12,-1.92
677
+ Nc1ccccc1,-0.4115
678
+ Nc1ccccc1Cl,-1.52
679
+ Nc1ccccc1O,-0.72
680
+ Nc1ccccc1[N+]([O-])=O,-1.96
681
+ Nc1cnc2cncnc2n1,-2.313
682
+ Nc1cnc2ncncc2n1,-2.343
683
+ Nc1cnn(-c2ccccc2)c(=O)c1Cl,-2.874
684
+ Nc1nc(=O)[nH]cc1F,-0.959
685
+ Nc1nc(=O)c2nc(CNc3ccc(cc3)C(=O)N[C@@H](CCC(O)=O)C(O)=O)cnc2[nH]1,-5.344
686
+ Nc1nc(=O)c2ncn(COCCO)c2[nH]1,-2.2244
687
+ Nc1nc2nc[nH]c2c(=O)[nH]1,-4.0045
688
+ Nc1ncc2nccnc2n1,-2.298
689
+ Nc1ncnc2n(cnc12)[C@@H]1O[C@H](CO)[C@@H](O)[C@H]1O,-1.728
690
+ Nc1ncnc2nc[nH]c12,-2.432
691
+ Nc1ncnc2nccnc12,-2.313
692
+ O=C(C1CCCCC1)N1C[C@H]2N(CCc3ccccc23)C(=O)C1,-2.893
693
+ O=C1C=CC(=O)C=C1,-0.9345
694
+ O=C1CCC(=O)N1,0.3
695
+ O=C1CCCCC1,-0.6
696
+ O=C1CNC(=O)CN1,-0.83
697
+ O=C1CNC(=O)N1,-0.4
698
+ O=C1C[C@@H]2OCC=C3CN4CC[C@@]56[C@@H]4C[C@@H]3[C@@H]2[C@@H]5N1c1ccccc61,-3.33
699
+ O=C1N([C@@H]2CCC(=O)NC2=O)C(=O)c2ccccc12,-3.699
700
+ O=C1NC(=O)C(C(=O)N1)(c1ccccc1)c1ccccc1,-4.196
701
+ O=C1NC(=O)C(C2CCC2)C(=O)N1,-2.349
702
+ O=C1NC(=O)C(C2CCCC2)C(=O)N1,-3.06
703
+ O=C1NC(=O)C(C2CCCCC2)C(=O)N1,-3.168
704
+ O=C1NC(=O)C(C2CCCCCC2)C(=O)N1,-2.982
705
+ O=C1NC(=O)C(N1)(c1ccccc1)c1ccccc1,-4.125
706
+ O=C1NC(=O)C2(CC2)C(=O)N1,-1.886
707
+ O=C1NS(=O)(=O)c2ccccc12,-1.725
708
+ O=C1OC(=O)c2ccccc12,-1.39
709
+ O=CNc1ccccc1,-0.68
710
+ O=Cc1ccc2OCOc2c1,-1.63
711
+ O=Cc1ccccc1,-1.2075
712
+ O=Cc1ccco1,-0.093925
713
+ O=NN1CCCCC1,-0.17
714
+ O=c1[nH]c2[nH]c(=O)[nH]c(=O)c2[nH]1,-3.402
715
+ O=c1[nH]c2nc[nH]c2c(=O)[nH]1,-2.483
716
+ O=c1cc[nH]c(=O)[nH]1,-1.493
717
+ O=c1cc[nH]cc1,1.02
718
+ O=c1cc[nH]cn1,0.59
719
+ O=c1ccc2ccccc2[nH]1,-2.14
720
+ O=c1ccc2ccccc2o1,-1.89
721
+ O=c1cccc[nH]1,1.02
722
+ O=c1cnc2cncnc2[nH]1,-2.124
723
+ O=c1cnc2ncncc2[nH]1,-2.714
724
+ O=c1nc2nccnc2c[nH]1,-1.947
725
+ O=c1nc[nH]c2n[nH]cc12,-2.453
726
+ O=c1nc[nH]c2nc[nH]c12,-2.28
727
+ O=c1nc[nH]c2nccnc12,-1.471
728
+ OC(=O)CCC(=O)c1ccc(cc1)-c1ccccc1,-5.2455
729
+ OC(=O)Cc1ccc(OCC=C)c(Cl)c1,-3.125
730
+ OC(=O)Cc1ccccc1Nc1c(Cl)cccc1Cl,-5.398
731
+ OC(=O)Cc1ccccc1Oc1ccc(Cl)cc1Cl,-3.854
732
+ OC(=O)[C@@H]1CCCN1,1.149
733
+ OC(=O)c1cc(Br)c(O)cc1O,-2.62
734
+ OC(=O)c1cc(ccc1O)-c1ccc(F)cc1F,-4.72275
735
+ OC(=O)c1cc(ccc1O)\N=N\c1ccc(cc1)S(=O)(=O)Nc1ccccn1,-6.137
736
+ OC(=O)c1ccc(O)cc1,-1.464
737
+ OC(=O)c1ccc2ccccc2c1,-3.8
738
+ OC(=O)c1cccc2ccccc12,-3.774
739
+ OC(=O)c1ccccc1,-1.58
740
+ OC(=O)c1ccccc1C(O)=O,-1.50975
741
+ OC(=O)c1ccccc1Nc1cccc(c1)C(F)(F)F,-5.33
742
+ OC(=O)c1ccccc1O,-1.931
743
+ OC(=O)c1cccnc1,-0.85
744
+ OC(=O)c1cccnc1Nc1cccc(c1)C(F)(F)F,-4.585
745
+ OC(=O)c1cn(-c2ccc(F)cc2)c2cc(N3CCNCC3)c(F)cc2c1=O,-3.131
746
+ OC(=O)c1cn(C2CC2)c2cc(N3CCNCC3)c(F)cc2c1=O,-3.742
747
+ OC(CCl)CCl,-0.11
748
+ OC(Cn1cncn1)(Cn1cncn1)c1ccc(F)cc1F,-1.8
749
+ OC(c1ccccc1)c1ccccc1,-2.55
750
+ OC1(CCN(CCCC(=O)c2ccc(F)cc2)CC1)c1ccc(Cl)cc1,-4.3645
751
+ OC1CCCCC1,-0.44
752
+ OC1CCCCCC1,-0.88
753
+ OC1CCCCCCC1,-1.29
754
+ OCC(O)CO,1.12
755
+ OCCCc1ccccc1,-1.38
756
+ OCCN(CCO)c1ccccc1,-0.73
757
+ OCCN(CCO)c1nc(N2CCCCC2)c2nc(nc(N3CCCCC3)c2n1)N(CCO)CCO,-5.332
758
+ OCCN1CCN(CCCN2c3ccccc3Sc3ccc(Cl)cc23)CC1,-4.3875
759
+ OCCN1CCN(CC\C=C2/c3ccccc3Sc3ccc(cc23)C(F)(F)F)CC1,-4.0185
760
+ OCCOc1ccccc1,-0.703333333
761
+ OCCc1ccccc1,-0.74
762
+ OC[C@@H](NC(=O)C(Cl)Cl)[C@H](O)c1ccc(cc1)[N+]([O-])=O,-2.111
763
+ OC[C@@H](O)[C@@H](O)[C@H](O)C(=O)CO,0.64
764
+ OC[C@@H](O)[C@@H](O)[C@H](O)[C@@H](O)C=O,0.74
765
+ OC[C@@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,0.05845
766
+ OC[C@@H](O)[C@H]1O[C@@H]2O[C@@H](O[C@@H]2[C@H]1O)C(Cl)(Cl)Cl,-1.84
767
+ OC[C@@H]1CC[C@@H](O1)n1cnc2c1[nH]cnc2=O,-0.937
768
+ OC[C@H](O)COC(=O)c1ccccc1Nc1ccnc2cc(Cl)ccc12,-4.571
769
+ OC[C@H](O)[C@@H](O)[C@H](O)[C@H](O)CO,1.148
770
+ OC[C@H](O)[C@H](O)CO,0.7
771
+ OC[C@H](O)[C@H]1OC(=O)C(O)=C1O,0.277
772
+ OC[C@H]1O[C@@H](OC[C@H]2O[C@@H](O[C@H](C#N)c3ccccc3)[C@H](O)[C@@H](O)[C@@H]2O)[C@H](O)[C@@H](O)[C@@H]1O,-0.77
773
+ OC[C@H]1O[C@@H](Oc2ccccc2CO)[C@H](O)[C@@H](O)[C@@H]1O,-0.85
774
+ OC[C@H]1O[C@H](O)[C@H](O)[C@@H](O)[C@@H]1O,0.74
775
+ OC[C@H]1O[C@H]([C@H](O)[C@@H]1O)n1cnc2c1[nH]cnc2=O,-1.23
776
+ OC[C@]1(O)OC[C@@H](O)[C@@H](O)[C@@H]1O,0.64
777
+ OCc1c(Cl)cccc1Cl,-2.1
778
+ OCc1ccccc1,-0.4
779
+ OCc1ccccc1O,-0.29
780
+ O[C@@H](C(=O)c1ccccc1)c1ccccc1,-2.85
781
+ O[C@@H]([C@@H](O)c1ccccc1)c1ccccc1,-1.93
782
+ O[C@@H]1CCOC1,1.05
783
+ O[C@@H]1CO[C@H](O)[C@@H](O)[C@@H]1O,0.39
784
+ O[C@@H]1C[C@@H](O)[C@H](O)C(O)[C@H]1O,-0.17
785
+ O[C@@H]1N=C(c2ccccc2)c2cc(Cl)ccc2NC1=O,-3.952
786
+ O[C@@H]1N=C(c2ccccc2Cl)c2cc(Cl)ccc2NC1=O,-3.604
787
+ O[C@@](CCN1CCCC1)(C1CCCCC1)c1ccccc1,-4.7
788
+ O[C@@](CCN1CCCCC1)(C1CCCCC1)c1ccccc1,-5.2
789
+ O[C@H](Cc1ccccc1)c1ccccc1,-2.52
790
+ O[C@H]1CO[C@H](O)[C@H](O)[C@H]1O,0.39
791
+ Oc1ccc(Cl)cc1,-0.7
792
+ Oc1ccc(I)cc1,-1.714
793
+ Oc1ccc(O)cc1,-0.1695
794
+ Oc1ccc(cc1)C1(OC(=O)c2ccccc12)c1ccc(O)cc1,-2.9
795
+ Oc1ccc2CCCCc2c1,-1.99
796
+ Oc1ccc2C[C@H]3N(CC=C)CC[C@@]45[C@@H](Oc1c24)C(=O)CC[C@@]35O,-2.898
797
+ Oc1ccc2ccccc2c1,-2.159
798
+ Oc1cccc(Cl)c1,-0.7
799
+ Oc1cccc(O)c1,0.81
800
+ Oc1cccc2C(=O)c3cccc(O)c3C(=O)c12,-5.187
801
+ Oc1cccc2ccccc12,-1.9715
802
+ Oc1ccccc1,-0.00835
803
+ Oc1ccccc1C=O,-0.86
804
+ Oc1ccccc1O,0.62
805
+ S=C1NCCN1,-0.71
806
+ S=c1cnc2cncnc2[nH]1,-2.706
807
+ S=c1nc2nccnc2c[nH]1,-2.629
808
+ S=c1nc[nH]c2nccnc12,-2.646
809
+ [O-][N+](=O)C(Cl)(Cl)Cl,-2
810
+ [O-][N+](=O)OCC(CO[N+]([O-])=O)O[N+]([O-])=O,-2.22
811
+ [O-][N+](=O)O[C@@H]1CO[C@@H]2[C@H](CO[C@H]12)O[N+]([O-])=O,-2.63
812
+ [O-][N+](=O)c1cc(cc(c1)[N+]([O-])=O)[N+]([O-])=O,-2.89
813
+ [O-][N+](=O)c1ccc(Cl)cc1,-2.92
814
+ [O-][N+](=O)c1ccc(\C=N\N2CC(=O)NC2=O)o1,-3.26
815
+ [O-][N+](=O)c1cccc(Cl)c1,-2.77
816
+ [O-][N+](=O)c1cccc(c1)[N+]([O-])=O,-2.316666667
817
+ [O-][N+](=O)c1ccccc1,-1.8
818
+ [O-][N+](=O)c1ccccc1Cl,-2.55
819
+ [O-][N+](=O)c1nccn1CC(=O)NCc1ccccc1,-2.81
820
+ c1[nH]nc2ccccc12,-2.16
821
+ c1cc2ccccc2[nH]1,-1.416666667
822
+ c1ccc(nc1)-c1ccccn1,-1.42
823
+ c1ccc2n[nH]nc2c1,-0.78
824
+ c1ccc2ncccc2c1,-1.3
825
+ c1cn(cn1)[C@H](c1ccccc1)c1ccc(cc1)-c1ccccc1,-6.27
826
+ c1cnc2c(c1)ccc1cccnc21,-1.618
827
+ c1cnc2ncncc2n1,0.0205
828
+ c1nc(cs1)-c1nc2ccccc2[nH]1,-3.484
829
+ c1nc2ccccc2o1,-1.16
830
+ c1nc2ccccc2s1,-1.5
data/delaney-processed.csv ADDED
@@ -0,0 +1,1129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Compound ID,ESOL predicted log solubility in mols per litre,Minimum Degree,Molecular Weight,Number of H-Bond Donors,Number of Rings,Number of Rotatable Bonds,Polar Surface Area,measured log solubility in mols per litre,smiles
2
+ Amigdalin,-0.9740000000000001,1,457.4320000000001,7,3,7,202.31999999999996,-0.77,OCC3OC(OCC2OC(OC(C#N)c1ccccc1)C(O)C(O)C2O)C(O)C(O)C3O
3
+ Fenfuram,-2.885,1,201.22500000000002,1,2,2,42.24,-3.3,Cc1occc1C(=O)Nc2ccccc2
4
+ citral,-2.5789999999999997,1,152.237,0,0,4,17.07,-2.06,CC(C)=CCCC(C)=CC(=O)
5
+ Picene,-6.617999999999999,2,278.354,0,5,0,0.0,-7.87,c1ccc2c(c1)ccc3c2ccc4c5ccccc5ccc43
6
+ Thiophene,-2.2319999999999998,2,84.14299999999999,0,1,0,0.0,-1.33,c1ccsc1
7
+ benzothiazole,-2.733,2,135.191,0,2,0,12.89,-1.5,c2ccc1scnc1c2
8
+ "2,2,4,6,6'-PCB",-6.545,1,326.437,0,2,1,0.0,-7.32,Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cccc2Cl
9
+ Estradiol,-4.138,1,272.388,2,4,0,40.46,-5.03,CC12CCC3C(CCc4cc(O)ccc34)C2CCC1O
10
+ Dieldrin,-4.533,1,380.913,0,5,0,12.53,-6.29,ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl
11
+ Rotenone,-5.246,1,394.42300000000023,0,5,3,63.22,-4.42,COc5cc4OCC3Oc2c1CC(Oc1ccc2C(=O)C3c4cc5OC)C(C)=C
12
+ 2-pyrrolidone,0.243,1,85.10600000000001,1,1,0,29.1,1.07,O=C1CCCN1
13
+ 2-Chloronapthalene,-4.063,1,162.61899999999997,0,2,0,0.0,-4.14,Clc1ccc2ccccc2c1
14
+ 1-Pentene ,-2.01,1,70.135,0,0,2,0.0,-2.68,CCCC=C
15
+ Primidone,-1.8969999999999998,1,218.256,2,2,2,58.2,-2.64,CCC1(C(=O)NCNC1=O)c2ccccc2
16
+ Tetradecane,-5.45,1,198.39399999999995,0,0,11,0.0,-7.96,CCCCCCCCCCCCCC
17
+ 2-Chloropropane,-1.585,1,78.542,0,0,0,0.0,-1.41,CC(C)Cl
18
+ 2-Methylbutanol,-1.0270000000000001,1,88.14999999999999,1,0,2,20.23,-0.47,CCC(C)CO
19
+ Benzonitrile,-2.03,1,103.12399999999997,0,1,0,23.79,-1.0,N#Cc1ccccc1
20
+ Diazinon,-3.989,1,304.35200000000003,0,1,7,53.47,-3.64,CCOP(=S)(OCC)Oc1cc(C)nc(n1)C(C)C
21
+ 2-Undecanol,-3.096,1,172.312,1,0,8,20.23,-2.94,CCCCCCCCCC(C)O
22
+ "2,2',3,4,6-PCB",-6.627000000000001,1,326.437,0,2,1,0.0,-7.43,Clc1ccc(c(Cl)c1)c2c(Cl)ccc(Cl)c2Cl
23
+ Lenacil,-3.355,1,234.29899999999995,1,3,1,54.86,-4.593999999999999,O=c2[nH]c1CCCc1c(=O)n2C3CCCCC3
24
+ Phorate,-3.747,1,260.38599999999997,0,0,8,18.46,-4.11,CCOP(=S)(OCC)SCSCC
25
+ Phenacetin,-2.342,1,179.219,1,1,3,38.33,-2.35,CCOc1ccc(NC(=O)C)cc1
26
+ Dinitramine,-4.479,1,322.243,1,1,5,115.54000000000002,-5.47,CCN(CC)c1c(cc(c(N)c1N(=O)=O)C(F)(F)F)N(=O)=O
27
+ 1-Heptanol,-1.751,1,116.204,1,0,5,20.23,-1.81,CCCCCCCO
28
+ Theophylline,-1.452,1,180.16699999999997,1,2,0,72.68,-1.39,Cn1c(=O)n(C)c2nc[nH]c2c1=O
29
+ Butethal,-1.974,1,212.249,2,1,4,75.27000000000001,-1.661,CCCCC1(CC)C(=O)NC(=O)NC1=O
30
+ "P,P'-DDE",-6.553,1,318.0300000000001,0,2,2,0.0,-6.9,ClC(Cl)=C(c1ccc(Cl)cc1)c2ccc(Cl)cc2
31
+ Methyl octanoate,-2.608,1,158.24099999999999,0,0,6,26.3,-3.17,CCCCCCCC(=O)OC
32
+ "1,4-Diethylbenzene ",-3.633,1,134.22199999999998,0,1,2,0.0,-3.75,CCc1ccc(CC)cc1
33
+ Terbufos,-4.367,1,288.44,0,0,7,18.46,-4.755,CCOP(=S)(OCC)SCSC(C)(C)C
34
+ Phenmedipham,-4.229,1,300.314,2,2,3,76.66,-4.805,COC(=O)Nc1cccc(OC(=O)Nc2cccc(C)c2)c1
35
+ "1,1-Dichloroethylene",-1.939,1,96.94399999999999,0,0,0,0.0,-1.64,ClC(=C)Cl
36
+ 1-Methylfluorene,-4.478,1,180.25000000000003,0,3,0,0.0,-5.22,Cc1cccc2c1Cc3ccccc32
37
+ Valeraldehyde,-1.103,1,86.13399999999999,0,0,3,17.07,-0.85,CCCCC=O
38
+ Diphenylamine,-3.8569999999999998,2,169.227,1,2,2,12.03,-3.5039999999999996,N(c1ccccc1)c2ccccc2
39
+ Fenothiocarb,-3.2969999999999997,1,253.367,0,1,6,29.540000000000003,-3.927,CN(C)C(=O)SCCCCOc1ccccc1
40
+ Piperophos,-4.637,1,353.4900000000001,0,1,9,38.77,-4.15,CCCOP(=S)(OCCC)SCC(=O)N1CCCCC1C
41
+ 1-Iodoheptane,-3.904,1,226.101,0,0,5,0.0,-4.81,CCCCCCCI
42
+ 3-Chlorobiphenyl,-4.685,1,188.657,0,2,1,0.0,-4.88,c1c(Cl)cccc1c2ccccc2
43
+ 4-Pentene-1-ol,-0.7909999999999999,1,86.134,1,0,3,20.23,-0.15,OCCCC=C
44
+ Cyclobutyl-5-spirobarbituric acid,-0.527,1,168.15200000000002,2,2,0,75.27,-1.655,O=C2NC(=O)C1(CCC1)C(=O)N2
45
+ menthol,-2.782,1,156.269,1,1,1,20.23,-2.53,CC(C)C1CCC(C)CC1O
46
+ Isopropyl formate,-0.684,1,88.106,0,0,2,26.3,-0.63,CC(C)OC=O
47
+ 2-Heptanol ,-1.6780000000000002,1,116.20399999999998,1,0,4,20.23,-1.55,CCCCCC(C)O
48
+ p-Bromoacetanilide,-3.012,1,214.06199999999998,1,1,1,29.1,-3.083,CC(=O)Nc1ccc(Br)cc1
49
+ brompyrazone,-3.005,1,266.098,1,2,1,60.910000000000004,-3.127,c1ccccc1n2ncc(N)c(Br)c2(=O)
50
+ nifedipine,-4.248,1,346.33900000000017,1,2,4,107.77,-4.76,COC(=O)C1=C(C)NC(=C(C1c2ccccc2N(=O)=O)C(=O)OC)C
51
+ "2,7-dimethylquinoline",-3.342,1,157.216,0,2,0,12.89,-1.94,c2c(C)cc1nc(C)ccc1c2
52
+ 1-Octyne ,-2.509,1,110.19999999999999,0,0,4,0.0,-3.66,CCCCCCC#C
53
+ cyclobarbital,-2.421,1,236.27099999999993,2,2,2,75.27000000000001,-2.17,CCC1(C(=O)NC(=O)NC1=O)C2=CCCCC2
54
+ Chrysene,-5.568,2,228.29399999999998,0,4,0,0.0,-8.057,c1ccc2c(c1)ccc3c4ccccc4ccc23
55
+ Bromacil,-3.4189999999999996,1,261.11899999999997,1,1,2,54.86,-2.523,CCC(C)n1c(=O)[nH]c(C)c(Br)c1=O
56
+ "2,2',3,3',5,6-PCB",-7.185,1,360.88200000000006,0,2,1,0.0,-8.6,Clc1cccc(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl
57
+ 2-Methylphenol,-2.281,1,108.13999999999999,1,1,0,20.23,-0.62,Cc1ccccc1O
58
+ "2,2,5-Trimethylhexane",-3.6310000000000002,1,128.259,0,0,2,0.0,-5.05,CC(C)CCC(C)(C)C
59
+ "1,4-Dimethylnaphthalene ",-4.147,1,156.228,0,2,0,0.0,-4.14,Cc1ccc(C)c2ccccc12
60
+ 6-Methylchrysene,-5.931,1,242.321,0,4,0,0.0,-6.57,Cc1cc2c3ccccc3ccc2c4ccccc14
61
+ 2-Pentanone,-0.846,1,86.13399999999999,0,0,2,17.07,-0.19,CCCC(=O)C
62
+ "2,2',3,3',5,5',6,6'-PCB",-8.304,1,429.77200000000016,0,2,1,0.0,-9.15,Clc1cc(Cl)c(Cl)c(c1Cl)c2c(Cl)c(Cl)cc(Cl)c2Cl
63
+ Methyl butyrate,-1.545,1,116.15999999999998,0,0,3,26.3,-0.82,CCCOC(=O)CC
64
+ Triamcinolone,-2.734,1,394.43900000000014,4,4,2,115.06000000000002,-3.68,CC34CC(O)C1(F)C(CCC2=CC(=O)C=CC12C)C3CC(O)C4(O)C(=O)CO
65
+ p-Aminophenol,-1.2309999999999999,1,109.12799999999999,2,1,0,46.25,-0.8,Nc1ccc(O)cc1
66
+ Benznidazole,-2.3209999999999997,1,260.253,1,2,5,90.06,-2.81,O=C(Cn1ccnc1N(=O)=O)NCc2ccccc2
67
+ "Atovaquone(0,430mg/ml) - neutral",-6.269,1,366.84400000000016,1,4,2,54.37,-5.931,OC4=C(C1CCC(CC1)c2ccc(Cl)cc2)C(=O)c3ccccc3C4=O
68
+ Trietazine,-3.233,1,229.71499999999997,1,1,5,53.940000000000005,-4.06,CCNc1nc(Cl)nc(n1)N(CC)CC
69
+ Pyrazinamide,-0.674,1,123.11499999999998,1,1,1,68.87,-0.667,NC(=O)c1cnccn1
70
+ Carbromal,-2.198,1,237.09699999999998,2,0,3,72.19,-2.68,CCC(Br)(CC)C(=O)NC(N)=O
71
+ "2,2'-PCB",-4.984,1,223.102,0,2,1,0.0,-5.27,Clc1ccccc1c2ccccc2Cl
72
+ nitrofurantoin,-1.2429999999999999,1,238.15899999999996,1,2,3,118.04999999999998,-3.38,O=C2CN(N=Cc1ccc(o1)N(=O)=O)C(=O)N2
73
+ Nitrofen,-5.361000000000001,1,284.09799999999996,0,2,3,52.37,-5.46,Clc2ccc(Oc1ccc(cc1)N(=O)=O)c(Cl)c2
74
+ Camphor,-2.158,1,152.237,0,2,0,17.07,-1.96,CC1(C)C2CCC1(C)C(=O)C2
75
+ 5-Allyl-5-phenylbarbital,-2.36,1,244.25,2,2,3,75.27000000000001,-2.369,O=C1NC(=O)NC(=O)C1(CC=C)c1ccccc1
76
+ Pentyl propanoate,-1.899,1,130.18699999999998,0,0,4,26.3,-2.25,CCCCC(=O)OCC
77
+ Isopentyl acetate,-1.817,1,130.18699999999998,0,0,3,26.3,-1.92,CC(C)CCOC(=O)C
78
+ 3-Hexanoyloxymethylphenyltoin,-4.1530000000000005,1,380.444,1,3,8,75.71,-5.886,O=C1N(COC(=O)CCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
79
+ "2,3',5-PCB",-5.7620000000000005,1,257.547,0,2,1,0.0,-6.01,Clc1cccc(c1)c2cc(Cl)ccc2Cl
80
+ 1-Bromopropane,-1.949,1,122.993,0,0,1,0.0,-1.73,CCCBr
81
+ Propiconazole,-4.603,1,342.2260000000001,0,3,5,49.17,-3.4930000000000003,CCCC1COC(Cn2cncn2)(O1)c3ccc(Cl)cc3Cl
82
+ Formothion,-2.0869999999999997,1,257.27299999999997,0,0,6,55.84,-1.995,COP(=S)(OC)SCC(=O)N(C)C=O
83
+ 4-methylpteridine,-1.24,1,146.15299999999996,0,2,0,51.56,-0.466,Cc1ncnc2nccnc12
84
+ Thiourea,0.32899999999999996,1,76.12400000000001,2,0,0,52.04,0.32,NC(=S)N
85
+ p-Xylene ,-3.035,1,106.16799999999999,0,1,0,0.0,-2.77,Cc1ccc(C)cc1
86
+ "1,2-Diethylbenzene",-3.6010000000000004,1,134.22199999999998,0,1,2,0.0,-3.28,CCc1ccccc1CC
87
+ Hexachloroethane,-4.215,1,236.74,0,0,0,0.0,-3.67,ClC(Cl)(Cl)C(Cl)(Cl)Cl
88
+ Flucythrinate,-6.877999999999999,1,451.46900000000005,0,3,9,68.55000000000001,-6.876,CC(C)C(C(=O)OC(C#N)c1cccc(Oc2ccccc2)c1)c3ccc(OC(F)F)cc3
89
+ 1-Nitropropane,-0.816,1,89.09399999999998,0,0,2,43.14,-0.8,CCCN(=O)=O
90
+ Menthone,-2.516,1,154.253,0,1,1,17.07,-2.35,CC(C)C1CCC(C)CC1=O
91
+ RTI 24,-4.423,1,273.723,1,3,1,45.230000000000004,-5.36,CCN2c1cc(Cl)ccc1NC(=O)c3cccnc23
92
+ "2,3-Dichloronitrobenzene",-3.322,1,192.00100000000003,0,1,1,43.14,-3.48,O=N(=O)c1c(Cl)c(Cl)ccc1
93
+ thiamylal,-3.063,1,254.35500000000002,2,1,5,58.2,-3.46,CCCC(C)C1(CC=C)C(=O)NC(=S)NC1=O
94
+ Fluoranthene,-4.957,2,202.25599999999997,0,4,0,0.0,-6.0,c1ccc2c(c1)c3cccc4cccc2c34
95
+ Propylisopropylether,-1.354,1,102.17699999999998,0,0,3,9.23,-1.34,CCCOC(C)C
96
+ "1,3-Dimethylnaphthalene",-4.147,1,156.22799999999998,0,2,0,0.0,-4.29,Cc1cc(C)c2ccccc2c1
97
+ diethylstilbestrol,-5.074,1,268.356,2,2,4,40.46,-4.07,CCC(=C(CC)c1ccc(O)cc1)c2ccc(O)cc2
98
+ Chlorothalonil,-3.995,1,265.914,0,1,0,47.58,-5.64,c1(C#N)c(Cl)c(C#N)c(Cl)c(Cl)c(Cl)1
99
+ "2,3',4',5-PCB",-6.312,1,291.992,0,2,1,0.0,-7.25,Clc1ccc(Cl)c(c1)c2ccc(Cl)c(Cl)c2
100
+ styrene oxide,-1.8259999999999998,2,120.15099999999995,0,2,1,12.53,-1.6,C1OC1c2ccccc2
101
+ Isopropylbenzene ,-3.265,1,120.19499999999995,0,1,1,0.0,-3.27,CC(C)c1ccccc1
102
+ Deoxycorticosterone,-3.9389999999999996,1,330.4680000000001,1,4,2,54.370000000000005,-3.45,CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO
103
+ chlorquinox,-4.438,1,267.93,0,2,0,25.78,-5.43,c2(Cl)c(Cl)c(Cl)c1nccnc1c2(Cl)
104
+ L-arabinose,0.601,1,150.13,4,1,0,90.15,0.39,C1OC(O)C(O)C(O)C1O
105
+ Dichloromethane,-1.156,1,84.93299999999999,0,0,0,0.0,-0.63,ClCCl
106
+ 1-Ethylnaphthalene ,-4.1,1,156.22799999999998,0,2,1,0.0,-4.17,CCc1cccc2ccccc12
107
+ Methyl formate,-0.048,1,60.05200000000001,0,0,1,26.3,0.58,COC=O
108
+ o-Nitrophenol,-2.318,1,139.10999999999999,1,1,1,63.37,-1.74,Oc1ccccc1N(=O)=O
109
+ thymine,-0.78,1,126.115,2,1,0,65.72,-1.506,Cc1c[nH]c(=O)[nH]c1=O
110
+ 2-Methylpropane,-1.891,1,58.123999999999995,0,0,0,0.0,-2.55,CC(C)C
111
+ Inosine,-0.8340000000000001,1,268.22900000000004,4,3,2,133.75,-1.23,OCC1OC(C(O)C1O)n2cnc3c(O)ncnc23
112
+ Ioxynil,-4.615,1,370.91499999999996,1,1,0,44.019999999999996,-3.61,Oc1c(I)cc(C#N)cc1I
113
+ Niclosamide,-5.032,1,327.1230000000001,2,2,3,92.47,-4.7,Oc1ccc(Cl)cc1C(=O)Nc2ccc(cc2Cl)N(=O)=O
114
+ Pentane,-2.261,1,72.151,0,0,2,0.0,-3.18,CCCCC
115
+ Phenol,-1.9909999999999999,1,94.11299999999999,1,1,0,20.23,0.0,c1ccccc1O
116
+ 2-aminoanthracene,-3.7889999999999997,1,193.249,1,3,0,26.02,-5.17,Nc3ccc2cc1ccccc1cc2c3
117
+ theobromine,-1.05,1,180.16699999999997,1,2,0,72.68,-2.523,Cn1cnc2n(C)c(=O)[nH]c(=O)c12
118
+ Isoquinoline,-2.531,2,129.16199999999998,0,2,0,12.89,-1.45,c1ccc2cnccc2c1
119
+ Anilofos,-5.106,1,367.86,0,1,7,38.77,-4.4319999999999995,COP(=S)(OC)SCC(=O)N(C(C)C)c1ccc(Cl)cc1
120
+ Hexylbenzene ,-4.22,1,162.276,0,1,5,0.0,-5.21,CCCCCCc1ccccc1
121
+ 2-Chlorobiphenyl,-4.5280000000000005,1,188.657,0,2,1,0.0,-4.54,Clc1ccccc1c2ccccc2
122
+ 2-Methyl-1-Pentene,-2.3480000000000003,1,84.16199999999999,0,0,2,0.0,-3.03,CCCC(=C)C
123
+ "2,3,4-Trimethylpentane",-3.2760000000000002,1,114.23199999999999,0,0,2,0.0,-4.8,CC(C)C(C)C(C)C
124
+ Pentachlorobenzene,-5.167999999999999,1,250.339,0,1,0,0.0,-5.65,Clc1cc(Cl)c(Cl)c(Cl)c1Cl
125
+ m-Nitrophenol,-2.318,1,139.10999999999999,1,1,1,63.37,-1.01,Oc1cccc(c1)N(=O)=O
126
+ 1-Decene,-3.781,1,140.26999999999998,0,0,7,0.0,-5.51,CCCCCCCCC=C
127
+ Glyceryl triacetate,-1.285,1,218.20499999999998,0,0,5,78.9,-0.6,CC(=O)OCC(COC(=O)C)OC(=O)C
128
+ dimethirimol,-3.57,1,209.29299999999998,1,1,4,49.25000000000001,-2.24,CCCCc1c(C)nc(nc1O)N(C)C
129
+ Cyfluthrin,-6.84,1,434.29400000000015,0,3,6,59.32000000000001,-7.337000000000001,CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2ccc(F)c(Oc3ccccc3)c2
130
+ Pyridine,-1.4809999999999999,2,79.10199999999998,0,1,0,12.89,0.76,c1ccncc1
131
+ 1-Bromoheptane,-3.366,1,179.101,0,0,5,0.0,-4.43,CCCCCCCBr
132
+ "3,4-Dimethylpyridine",-2.0669999999999997,1,107.15599999999999,0,1,0,12.89,0.36,Cc1ccncc1C
133
+ Fludrocortisone,-3.1719999999999997,1,380.45600000000013,3,4,2,94.83,-3.43,CC34CC(O)C1(F)C(CCC2=CC(=O)CCC12C)C3CCC4(O)C(=O)CO
134
+ ethiofencarb,-2.855,1,225.313,1,1,4,38.33,-2.09,CCSCc1ccccc1OC(=O)NC
135
+ Malonic acid diethylester,-1.413,1,160.16899999999998,0,0,4,52.60000000000001,-0.82,CCOC(=O)CC(=O)OCC
136
+ d-Limonene,-3.429,1,136.238,0,1,1,0.0,-4.26,CC1=CCC(CC1)C(C)=C
137
+ Indan,-3.057,2,118.17899999999997,0,2,0,0.0,-3.04,C1Cc2ccccc2C1
138
+ p-t-Butylphenol,-3.1919999999999997,1,150.22099999999998,1,1,0,20.23,-2.41,CC(C)(C)c1ccc(O)cc1
139
+ Cyclopropyl-5-spirobarbituric acid,-0.08800000000000001,1,154.125,2,2,0,75.27,-1.886,O=C2NC(=O)C1(CC1)C(=O)N2
140
+ m-Chloroiodobenzene,-4.3839999999999995,1,238.45499999999998,0,1,0,0.0,-3.55,Clc1cccc(I)c1
141
+ 1-Bromonapthalene,-4.434,1,207.07,0,2,0,0.0,-4.35,Brc1cccc2ccccc12
142
+ trans-2-Pentene ,-2.076,1,70.135,0,0,1,0.0,-2.54,CC/C=C/C
143
+ "2,6-Dimethylpyridine",-2.0980000000000003,1,107.156,0,1,0,12.89,0.45,Cc1cccc(C)n1
144
+ Trichloroethylene,-2.312,1,131.389,0,0,0,0.0,-1.96,ClC=C(Cl)Cl
145
+ 1-Napthylamine,-2.721,1,143.189,1,2,0,26.02,-1.92,Nc1cccc2ccccc12
146
+ m-Xylene ,-3.035,1,106.16799999999999,0,1,0,0.0,-2.82,Cc1cccc(C)c1
147
+ 2-hydroxypteridine,-1.4040000000000001,1,148.125,1,2,0,71.79,-1.9469999999999998,Oc2ncc1nccnc1n2
148
+ Methanol,0.441,1,32.042,1,0,0,20.23,1.57,CO
149
+ Amobarbital,-2.312,1,226.27599999999998,2,1,4,75.27000000000001,-2.468,CCC1(CCC(C)C)C(=O)NC(=O)NC1=O
150
+ 2-Butanone,-0.491,1,72.107,0,0,1,17.07,0.52,CCC(=O)C
151
+ 5-fluorouracil,-0.792,1,130.078,2,1,0,65.72,-1.077,Fc1c[nH]c(=O)[nH]c1=O
152
+ tubercidin,-0.892,1,266.257,4,3,2,126.65,-1.95,Nc1ncnc2n(ccc12)C3OC(CO)C(O)C3O
153
+ "1,3-Benzenediol",-1.59,1,110.11199999999998,2,1,0,40.46,0.81,Oc1cccc(O)c1
154
+ 1-Hexanol,-1.3969999999999998,1,102.17699999999999,1,0,4,20.23,-1.24,CCCCCCO
155
+ 1-Chloropentane,-2.294,1,106.596,0,0,3,0.0,-2.73,CCCCCCl
156
+ "1,3-Butadiene",-1.376,1,54.09199999999999,0,0,1,0.0,-1.87,C=CC=C
157
+ Propyl acetate,-1.125,1,102.13299999999998,0,0,2,26.3,-0.72,CCCOC(=O)C
158
+ "5,6,7,8-tetrahydro-2-naphthol",-3.0860000000000003,1,148.205,1,2,0,20.23,-1.99,Oc2ccc1CCCCc1c2
159
+ chloroacetamide,-0.106,1,93.513,1,0,1,43.09,-0.02,NC(=O)CCl
160
+ Iodofenphos,-6.148,1,413.0,0,1,4,27.69,-6.62,COP(=S)(OC)Oc1cc(Cl)c(I)cc1Cl
161
+ 4-Chlorotoluene,-3.2969999999999997,1,126.586,0,1,0,0.0,-3.08,Cc1ccc(Cl)cc1
162
+ Metribuzin,-2.324,1,214.29399999999998,1,1,1,73.8,-2.253,CSc1nnc(c(=O)n1N)C(C)(C)C
163
+ Tricresyl phosphate,-6.39,1,368.3690000000001,0,3,6,44.760000000000005,-6.01,Cc1ccc(OP(=O)(Oc2cccc(C)c2)Oc3ccccc3C)cc1
164
+ Caproaldehyde,-1.4569999999999999,1,100.16099999999999,0,0,4,17.07,-1.3,CCCCCC=O
165
+ Butamben,-3.0389999999999997,1,193.24599999999998,1,1,4,52.32,-3.082,CCCCOC(=O)c1ccc(N)cc1
166
+ RTI 3,-3.049,1,255.277,1,3,0,68.45,-3.043,O2c1cc(C)ccc1N(C)C(=O)c3cc(N)cnc23
167
+ Nerol,-2.603,1,154.253,1,0,4,20.23,-2.46,CC(C)=CCC/C(C)=C\CO
168
+ "2,4'-PCB",-5.142,1,223.102,0,2,1,0.0,-5.28,Clc1ccc(cc1)c2ccccc2Cl
169
+ 3-Octanoyloxymethylphenytoin,-4.84,1,408.498,1,3,10,75.71,-6.523,O=C1N(COC(=O)CCCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
170
+ Nitroethane,-0.462,1,75.067,0,0,1,43.14,-0.22,CCN(=O)=O
171
+ Ethalfluralin,-5.063,1,333.266,0,1,6,89.51999999999998,-6.124,CCN(CC(C)=C)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O
172
+ "1,2,3,4-Tetrachlorobenzene",-4.546,1,215.894,0,1,0,0.0,-4.57,Clc1ccc(Cl)c(Cl)c1Cl
173
+ Meprobamate,-1.376,1,218.25299999999996,2,0,6,104.63999999999999,-1.807,CCCC(C)(COC(N)=O)COC(N)=O
174
+ pregnenolone,-4.342,1,316.48500000000007,1,4,1,37.3,-4.65,CC(=O)C3CCC4C2CC=C1CC(O)CCC1(C)C2CCC34C
175
+ Iodomethane,-1.646,1,141.939,0,0,0,0.0,-1.0,CI
176
+ cycloheximide,-1.5319999999999998,1,281.35200000000003,2,2,3,83.47,-1.13,CC1CC(C)C(=O)C(C1)C(O)CC2CC(=O)NC(=O)C2
177
+ 3-Heptanoyloxymethylphenytoin,-4.496,1,394.471,1,3,9,75.71,-6.301,O=C1N(COC(=O)CCCCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
178
+ isophorone,-2.015,1,138.20999999999998,0,1,0,17.07,-1.06,CC1=CC(=O)CC(C)(C)C1
179
+ Butabarbital,-1.9580000000000002,1,212.24899999999997,2,1,3,75.27000000000001,-2.39,O=C1NC(=O)NC(=O)C1(CC)C(C)CC
180
+ 5-Nonanone,-2.329,1,142.242,0,0,6,17.07,-2.58,CCCCC(=O)CCCC
181
+ Glutethimide,-2.591,1,217.268,1,2,2,46.17,-2.3369999999999997,CCC1(CCC(=O)NC1=O)c2ccccc2
182
+ 3-Methylpentane,-2.6,1,86.178,0,0,2,0.0,-3.68,CCC(C)CC
183
+ Etofenprox,-6.896,1,376.49600000000004,0,3,9,27.69,-8.6,CCOc1ccc(cc1)C(C)(C)COCc3cccc(Oc2ccccc2)c3
184
+ Methaqualone,-3.8810000000000002,1,250.30100000000002,0,3,1,34.89,-2.925,Cc1ccccc1n3c(C)nc2ccccc2c3=O
185
+ Chloroacetonitrile,-0.44799999999999995,1,75.498,0,0,0,23.79,-0.092,ClCC#N
186
+ Trichloronate,-5.225,1,333.60400000000004,0,1,5,18.46,-5.752000000000001,CCOP(=S)(CC)Oc1cc(Cl)c(Cl)cc1Cl
187
+ Ethisterone,-3.858,1,312.45300000000003,1,4,0,37.3,-5.66,CC12CCC(=O)C=C1CCC3C2CCC4(C)C3CCC4(O)C#C
188
+ Pyridazine,-0.619,2,80.08999999999999,0,1,0,25.78,1.1,c1ccnnc1
189
+ "1,2,3,5-Tetrachlorobenzene",-4.621,1,215.894,0,1,0,0.0,-4.63,Clc1cc(Cl)c(Cl)c(Cl)c1
190
+ Diosgenin,-5.681,1,414.63000000000017,1,6,0,38.69,-7.32,C1C(O)CCC2(C)CC3CCC4(C)C5(C)CC6OCC(C)CC6OC5CC4C3C=C21
191
+ o-Aminophenol,-1.465,1,109.12799999999999,2,1,0,46.25,-0.72,Nc1ccccc1O
192
+ Ethyl nonanoate,-3.3160000000000003,1,186.295,0,0,8,26.3,-3.8,CCCCCCCCC(=O)OCC
193
+ metalaxyl,-2.87,1,279.336,0,1,5,55.84,-1.601,COCC(=O)N(C(C)C(=O)OC)c1c(C)cccc1C
194
+ Propoxur,-2.4090000000000003,1,209.24499999999998,1,1,3,47.56,-2.05,CNC(=O)Oc1ccccc1OC(C)C
195
+ 2-Chlorobutane,-1.94,1,92.569,0,0,1,0.0,-1.96,CCC(C)Cl
196
+ 2-Napthol,-3.08,1,144.17299999999997,1,2,0,20.23,-2.28,Oc1ccc2ccccc2c1
197
+ Oxadiazon,-5.265,1,345.22600000000017,0,2,3,57.26,-5.696000000000001,CC(C)Oc1cc(c(Cl)cc1Cl)n2nc(oc2=O)C(C)(C)C
198
+ 1-Hexyne ,-1.801,1,82.14599999999999,0,0,2,0.0,-2.36,CCCCC#C
199
+ 1-Nonyne ,-2.864,1,124.22699999999999,0,0,5,0.0,-4.24,CCCCCCCC#C
200
+ 2-Chlorotoluene,-3.2969999999999997,1,126.586,0,1,0,0.0,-3.52,Cc1ccccc1Cl
201
+ Diisopropyl ether ,-1.281,1,102.17699999999999,0,0,2,9.23,-1.1,CC(C)OC(C)C
202
+ Dapsone,-2.464,1,248.307,2,2,2,86.18,-3.094,Nc1ccc(cc1)S(=O)(=O)c2ccc(N)cc2
203
+ Methyl hydrazine,0.5429999999999999,1,46.073,2,0,0,38.05,1.34,CNN
204
+ Propyne,-0.672,1,40.065000000000005,0,0,0,0.0,-0.41,CC#C
205
+ Phoxim,-4.5569999999999995,1,298.304,0,1,7,63.839999999999996,-4.862,CCOP(=S)(OCC)ON=C(C#N)c1ccccc1
206
+ Propetamphos,-2.826,1,281.314,1,0,7,56.790000000000006,-3.408,CCNP(=S)(OC)OC(=CC(=O)OC(C)C)C
207
+ Acrolein,-0.184,1,56.064,0,0,1,17.07,0.57,C=CC=O
208
+ Hypoxanthine,-0.6559999999999999,1,136.114,2,2,0,74.43,-2.296,O=c1[nH]cnc2nc[nH]c12
209
+ 6-hydroxyquinoline,-2.725,1,145.161,1,2,0,33.120000000000005,-2.16,Oc2ccc1ncccc1c2
210
+ Fluorobenzene,-2.5140000000000002,1,96.10399999999998,0,1,0,0.0,-1.8,Fc1ccccc1
211
+ 1-Chloropropane,-1.585,1,78.542,0,0,1,0.0,-1.47,CCCCl
212
+ Ethyl acetate,-0.77,1,88.106,0,0,1,26.3,-0.04,CCOC(=O)C
213
+ "2,2-Dimethylpentane",-2.938,1,100.20499999999998,0,0,1,0.0,-4.36,CCCC(C)(C)C
214
+ Pentamethylbenzene,-3.9930000000000003,1,148.249,0,1,0,0.0,-4.0,Cc1cc(C)c(C)c(C)c1C
215
+ eucalyptol,-2.5789999999999997,1,154.253,0,3,0,9.23,-1.64,CC12CCC(CC1)C(C)(C)O2
216
+ dibutyl sebacate,-4.726,1,314.46600000000007,0,0,15,52.60000000000001,-3.8960000000000004,CCCCOC(=O)CCCCCCCCC(=O)OCCCC
217
+ "4,4'-PCB",-5.2989999999999995,1,223.102,0,2,1,0.0,-6.56,Clc1ccc(cc1)c2ccc(Cl)cc2
218
+ "2,3-Dimethylpyridine",-2.0669999999999997,1,107.156,0,1,0,12.89,0.38,Cc1cccnc1C
219
+ Carvone,-2.042,1,150.22099999999998,0,1,1,17.07,-2.06,CC(=C)C1CC=C(C)C(=O)C1
220
+ Carbophenthion,-5.827999999999999,1,342.875,0,1,8,18.46,-5.736000000000001,CCOP(=S)(OCC)SCSc1ccc(Cl)cc1
221
+ "Etoposide (148-167,25mg/ml)",-3.292,1,588.5620000000001,3,7,5,160.83,-3.571,COc1cc(cc(OC)c1O)C6C2C(COC2=O)C(OC4OC3COC(C)OC3C(O)C4O)c7cc5OCOc5cc67
222
+ Perylene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.804,c1cc2cccc3c4cccc5cccc(c(c1)c23)c54
223
+ "2,4-Dinitrotoluene",-2.6039999999999996,1,182.135,0,1,2,86.28,-2.82,Cc1ccc(cc1N(=O)=O)N(=O)=O
224
+ 2-bromonaphthalene,-4.434,1,207.07000000000002,0,2,0,0.0,-4.4,c1c(Br)ccc2ccccc12
225
+ Formetanate,-1.8459999999999999,1,221.26,1,1,3,53.93,-2.34,CNC(=O)Oc1cccc(N=CN(C)C)c1
226
+ 6-methoxypteridine,-1.589,1,162.15200000000002,0,2,1,60.790000000000006,-1.139,COc2cnc1ncncc1n2
227
+ nevirapine,-3.397,1,266.30400000000003,1,4,1,58.120000000000005,-3.19,Cc3ccnc4N(C1CC1)c2ncccc2C(=O)Nc34
228
+ Isazofos,-3.76,1,313.747,0,1,7,58.4,-3.658,CCOP(=S)(OCC)Oc1nc(Cl)n(n1)C(C)C
229
+ "2-Methyl-1,3-Butadiene ",-1.714,1,68.11900000000001,0,0,1,0.0,-2.03,CC(=C)C=C
230
+ linalool,-2.399,1,154.253,1,0,4,20.23,-1.99,CC(C)=CCCC(O)(C)C=C
231
+ Fenthion,-4.265,1,278.335,0,1,5,27.69,-4.57,COP(=S)(OC)Oc1ccc(SC)c(C)c1
232
+ Cyclohexanol ,-1.261,1,100.161,1,1,0,20.23,-0.44,OC1CCCCC1
233
+ 5-Allyl-5-methylbarbital,-1.013,1,182.17899999999997,2,1,2,75.27000000000001,-1.16,O=C1NC(=O)NC(=O)C1(C)CC=C
234
+ Epiandrosterone,-3.8819999999999997,1,290.447,1,4,0,37.3,-4.16,CC34CCC1C(CCC2CC(O)CCC12C)C3CCC4=O
235
+ mannitol,0.647,1,182.172,6,0,5,121.38000000000001,0.06,OCC(O)C(O)C(O)C(O)CO
236
+ 4-Methylbiphenyl,-4.4239999999999995,1,168.239,0,2,1,0.0,-4.62,Cc1ccc(cc1)c2ccccc2
237
+ Atrazine,-3.069,1,215.68800000000002,2,1,4,62.730000000000004,-3.85,CCNc1nc(Cl)nc(NC(C)C)n1
238
+ Phenylthiourea,-1.7009999999999998,1,152.22199999999998,2,1,1,38.05,-1.77,NC(=S)Nc1ccccc1
239
+ 4-Heptanone,-1.62,1,114.18799999999999,0,0,4,17.07,-1.3,CCCC(=O)CCC
240
+ "3,3-Dimethyl-2-butanone",-1.25,1,100.16099999999999,0,0,0,17.07,-0.72,CC(=O)C(C)(C)C
241
+ 4-Chlorophenol ,-2.761,1,128.558,1,1,0,20.23,-0.7,Oc1ccc(Cl)cc1
242
+ Cyclohexanone,-0.996,1,98.14500000000001,0,1,0,17.07,-0.6,O=C1CCCCC1
243
+ m-Methylaniline,-1.954,1,107.156,1,1,0,26.02,-0.85,Cc1cccc(N)c1
244
+ Trichloroacetonitrile,-2.019,1,144.388,0,0,0,23.79,-2.168,ClC(Cl)(Cl)C#N
245
+ norflurazon,-4.029,1,303.67100000000005,1,2,2,46.92,-4.046,CNc2cnn(c1cccc(c1)C(F)(F)F)c(=O)c2Cl
246
+ 2-Decanone,-2.617,1,156.269,0,0,7,17.07,-3.3,CCCCCCCCC(=O)C
247
+ Ipazine,-3.497,1,243.74200000000002,1,1,5,53.940000000000005,-3.785,CCN(CC)c1nc(Cl)nc(NC(C)C)n1
248
+ Benzocaine,-2.383,1,165.19199999999998,1,1,2,52.32,-2.616,CCOC(=O)c1ccc(N)cc1
249
+ "1,2,4-Trichlorobenzene",-4.083,1,181.449,0,1,0,0.0,-3.59,Clc1ccc(Cl)c(Cl)c1
250
+ Triazolam,-3.948,1,343.2170000000001,0,4,1,43.07,-4.09,Cc3nnc4CN=C(c1ccccc1Cl)c2cc(Cl)ccc2n34
251
+ "1,2-Benzenediol",-1.635,1,110.11199999999998,2,1,0,40.46,0.62,Oc1ccccc1O
252
+ Reverse Transcriptase inhibitor 1,-2.7939999999999996,1,254.29299999999998,0,3,1,49.330000000000005,-2.62,CCN2c1ncccc1N(C)C(=O)c3cccnc23
253
+ Dimethyl sulfide,-0.758,1,62.137,0,0,0,0.0,-0.45,CSC
254
+ 2-Bromotoluene,-3.667,1,171.03699999999998,0,1,0,0.0,-2.23,Cc1ccccc1Br
255
+ O-Ethyl carbamate,-0.218,1,89.09400000000001,1,0,1,52.32,0.85,CCOC(=O)N
256
+ megestrol acetate,-4.417,1,384.5160000000002,0,4,2,60.440000000000005,-5.35,CC(=O)OC3(CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C)C(C)=O
257
+ "2,4-Dimethyl-3-pentanol",-1.6469999999999998,1,116.20399999999998,1,0,2,20.23,-1.22,CC(C)C(O)C(C)C
258
+ Napthalene,-3.468,2,128.17399999999995,0,2,0,0.0,-3.6,c1ccc2ccccc2c1
259
+ N-Ethylaniline,-2.3890000000000002,1,121.18299999999995,1,1,2,12.03,-1.7,CCNc1ccccc1
260
+ Phenytoin,-3.057,1,252.27300000000002,2,3,2,58.2,-4.0969999999999995,O=C1NC(=O)C(N1)(c2ccccc2)c3ccccc3
261
+ "7,12-Dimethylbenz(a)anthracene",-6.297000000000001,1,256.348,0,4,0,0.0,-7.02,Cc1c2ccccc2c(C)c3ccc4ccccc4c13
262
+ Dialifor,-5.026,1,393.85400000000016,0,2,8,55.84,-6.34,CCOP(=S)(OCC)SC(CCl)N1C(=O)c2ccccc2C1=O
263
+ Methoxychlor,-5.537999999999999,1,345.6529999999999,0,2,4,18.46,-6.89,COc1ccc(cc1)C(c2ccc(OC)cc2)C(Cl)(Cl)Cl
264
+ TEFLUBENZURON,-5.462000000000001,1,381.1120000000001,2,2,2,58.2,-7.28,Fc1cccc(F)c1C(=O)NC(=O)Nc2cc(Cl)c(F)c(Cl)c2F
265
+ 3-Pentanoyloxymethylphenytoin,-3.81,1,366.41700000000003,1,3,7,75.71,-4.678,O=C1N(COC(=O)CCCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
266
+ Monuron,-2.6710000000000003,1,198.653,1,1,1,32.34,-2.89,CN(C)C(=O)Nc1ccc(Cl)cc1
267
+ Flutriafol,-3.569,1,301.296,1,3,4,50.94,-3.37,OC(Cn1cncn1)(c2ccc(F)cc2)c3ccccc3F
268
+ triamcinolone diacetate,-3.8760000000000003,1,478.51300000000026,2,4,4,127.20000000000002,-4.13,CC(=O)OCC(=O)C3(O)C(CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)OC(C)=O
269
+ 1-Bromobutane,-2.303,1,137.01999999999998,0,0,2,0.0,-2.37,CCCCBr
270
+ "1,2,4,5-Tetrabromobenzene",-6.001,1,393.69800000000004,0,1,0,0.0,-6.98,Brc1cc(Br)c(Br)cc1Br
271
+ 4-Methyl-2-pentanone,-1.1840000000000002,1,100.16099999999999,0,0,2,17.07,-0.74,CC(C)CC(=O)C
272
+ cycloate,-3.35,1,215.36199999999994,0,1,3,20.310000000000002,-3.4,CCSC(=O)N(CC)C1CCCCC1
273
+ 4-Chloroanisole,-3.057,1,142.585,0,1,1,9.23,-2.78,COc1ccc(Cl)cc1
274
+ Deltamethrin,-7.44,1,505.20600000000024,0,3,6,59.32000000000001,-8.402000000000001,CC1(C)C(C=C(Br)Br)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2
275
+ Talbutal,-2.06,1,224.26,2,1,4,75.27000000000001,-2.016,CCC(C)C1(CC=C)C(=O)NC(=O)NC1=O
276
+ Fenitrothion,-3.845,1,277.238,0,1,5,70.83000000000001,-4.04,COP(=S)(OC)Oc1ccc(N(=O)=O)c(C)c1
277
+ 1-Iodonapthalene,-4.888999999999999,1,254.07000000000002,0,2,0,0.0,-4.55,Ic1cccc2ccccc12
278
+ Sorbitol,0.647,1,182.172,6,0,5,121.38000000000001,1.09,OCC(O)C(O)C(O)C(O)CO
279
+ Ethanethiol,-0.968,1,62.137,1,0,0,0.0,-0.6,CCS
280
+ "1,1,2-Trichloroethane",-1.9609999999999999,1,133.405,0,0,1,0.0,-1.48,ClCC(Cl)Cl
281
+ Pyrolan,-3.141,1,245.282,0,2,2,47.36000000000001,-2.09,CN(C)C(=O)Oc1cc(C)nn1c2ccccc2
282
+ o-Hydroxybenzamide,-1.942,1,137.13799999999998,2,1,1,63.32000000000001,-1.82,NC(=O)c1ccccc1O
283
+ o-Nitrotoluene,-2.589,1,137.138,0,1,1,43.14,-2.33,Cc1ccccc1N(=O)=O
284
+ "5,5-Diisopropylbarbital",-1.942,1,212.249,2,1,2,75.27000000000001,-2.766,O=C1NC(=O)NC(=O)C1(C(C)C)C(C)C
285
+ 2-Ethyltoluene,-3.2960000000000003,1,120.19499999999996,0,1,1,0.0,-3.21,CCc1ccccc1C
286
+ 1-Chloroheptane,-3.003,1,134.65,0,0,5,0.0,-4.0,CCCCCCCCl
287
+ Barbital,-1.265,1,184.19499999999996,2,1,2,75.27000000000001,-2.4,O=C1NC(=O)NC(=O)C1(CC)CC
288
+ Bibenzyl ,-4.301,2,182.266,0,2,3,0.0,-4.62,C(Cc1ccccc1)c2ccccc2
289
+ "1,1,2,2-Tetrachloroethane",-2.549,1,167.85,0,0,1,0.0,-1.74,ClC(Cl)C(Cl)Cl
290
+ RTI 23,-4.228,1,283.331,1,3,2,54.46,-5.153,CCN2c1cc(OC)cc(C)c1NC(=O)c3cccnc23
291
+ 2-Methylphenanthrene,-4.87,1,192.261,0,3,0,0.0,-5.84,Cc1ccc2c(ccc3ccccc32)c1
292
+ dibutylphthalate,-4.378,1,278.348,0,1,8,52.60000000000001,-4.4,CCCCOC(=O)c1ccccc1C(=O)OCCCC
293
+ tetrachloroguaiacol,-4.2989999999999995,1,261.919,1,1,1,29.46,-4.02,COc1c(O)c(Cl)c(Cl)c(Cl)c1Cl
294
+ Dimecron,-2.426,1,299.6909999999999,0,0,8,65.07000000000001,0.523,CCN(CC)C(=O)C(=CCOP(=O)(OC)OC)Cl
295
+ Equilin,-3.555,1,268.356,1,4,0,37.3,-5.282,CC34CCC1C(=CCc2cc(O)ccc12)C3CCC4=O
296
+ Chlorimuron-ethyl (ph 7),-3.719,1,414.82700000000017,1,2,8,127.79,-4.5760000000000005,CCOC(=O)c1ccccc1S(=O)(=O)NN(C=O)c2nc(Cl)cc(OC)n2
297
+ p-Nitroanisole,-2.522,1,153.13699999999997,0,1,2,52.37,-2.41,COc1ccc(cc1)N(=O)=O
298
+ 1-Chlorohexane,-2.648,1,120.623,0,0,4,0.0,-3.12,CCCCCCCl
299
+ "2,2',3,3',4,4',5,5'-PCB",-8.468,1,429.77200000000016,0,2,1,0.0,-9.16,Clc1cc(c(Cl)c(Cl)c1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl
300
+ Raffinose,0.496,1,504.43800000000005,11,3,8,268.67999999999995,-0.41,OCC1OC(CO)(OC2OC(COC3OC(CO)C(O)C(O)C3O)C(O)C(O)C2O)C(O)C1O
301
+ hexacosane,-9.702,1,366.7180000000002,0,0,23,0.0,-8.334,CCCCCCCCCCCCCCCCCCCCCCCCCC
302
+ RTI 5,-3.471,1,253.30499999999995,0,3,1,36.44,-3.324,CCN2c1ccccc1N(C)C(=O)c3cccnc23
303
+ "1,1-Dichloroethane",-1.5759999999999998,1,98.96000000000001,0,0,0,0.0,-1.29,CC(Cl)Cl
304
+ Sulfanilamide,-0.9540000000000001,1,172.20899999999997,2,1,1,86.18,-1.34,Nc1ccc(cc1)S(N)(=O)=O
305
+ Isopropalin,-5.306,1,309.36600000000004,0,1,8,89.51999999999998,-6.49,CCCN(CCC)c1c(cc(cc1N(=O)=O)C(C)C)N(=O)=O
306
+ Lindane,-4.0089999999999995,1,290.832,0,1,0,0.0,-4.64,ClC1C(Cl)C(Cl)C(Cl)C(Cl)C1Cl
307
+ Isofenphos,-4.538,1,345.4010000000002,1,1,8,56.790000000000006,-4.194,CCOP(=S)(NC(C)C)Oc1ccccc1C(=O)OC(C)C
308
+ "1,2,3-Trichlorobenzene",-4.008,1,181.44899999999998,0,1,0,0.0,-4.0,Clc1cccc(Cl)c1Cl
309
+ Tetrachloromethane,-2.6069999999999998,1,153.823,0,0,0,0.0,-2.31,ClC(Cl)(Cl)Cl
310
+ "3,4-Dichloronitrobenzene",-3.448,1,192.001,0,1,1,43.14,-3.2,O=N(=O)c1cc(Cl)c(Cl)cc1
311
+ Cyclooctanol,-2.14,1,128.215,1,1,0,20.23,-1.29,OC1CCCCCCC1
312
+ 17a-Methyltestosterone,-4.073,1,302.4580000000001,1,4,0,37.3,-3.9989999999999997,CC1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C
313
+ Dulcin,-2.167,1,180.20699999999997,2,1,3,64.35,-2.17,CCOc1ccc(NC(N)=O)cc1
314
+ "trans-1,4-Dimethylcyclohexane",-3.305,1,112.216,0,1,0,0.0,-4.47,C/C1CCC(\C)CC1
315
+ "1,7-phenantroline",-2.9939999999999998,2,180.20999999999998,0,3,0,25.78,-2.68,c1cnc2c(c1)ccc3ncccc23
316
+ Methyl t-butyl ether ,-0.9840000000000001,1,88.14999999999999,0,0,0,9.23,-0.24,COC(C)(C)C
317
+ Anethole,-3.2539999999999996,1,148.20499999999998,0,1,2,9.23,-3.13,COc1ccc(C=CC)cc1
318
+ 1-Hexadecanol,-4.94,1,242.44699999999992,1,0,14,20.23,-7.0,CCCCCCCCCCCCCCCCO
319
+ uracil,-0.441,1,112.088,2,1,0,65.72,-1.4880000000000002,O=c1cc[nH]c(=O)[nH]1
320
+ adenine,-1.255,1,135.13,2,2,0,80.47999999999999,-2.12,Nc1ncnc2nc[nH]c12
321
+ "2,2',3,4,5-PCB",-6.709,1,326.437,0,2,1,0.0,-7.21,Clc1cc(Cl)c(cc1Cl)c2cccc(Cl)c2Cl
322
+ Ancymidol,-2.181,1,256.30499999999995,1,3,4,55.24,-2.596,COc1ccc(cc1)C(O)(C2CC2)c3cncnc3
323
+ Benzo(b)fluoranthene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.23,c1ccc2c(c1)c3cccc4c3c2cc5ccccc54
324
+ Carbanilide,-3.611,1,212.25199999999998,2,2,2,41.13,-3.15,O=C(Nc1ccccc1)Nc2ccccc2
325
+ phenobarbital,-2.272,1,232.239,2,2,2,75.27000000000001,-2.322,CCC1(C(=O)NC(=O)NC1=O)c2ccccc2
326
+ "2',3,4-PCB",-5.686,1,257.547,0,2,1,0.0,-6.29,Clc1ccc(cc1)c2cccc(Cl)c2Cl
327
+ Isoproturon,-2.867,1,206.289,1,1,2,32.34,-3.536,CC(C)c1ccc(NC(=O)N(C)C)cc1
328
+ Azintamide,-2.231,1,259.762,0,1,5,46.09,-1.716,CCN(CC)C(=O)CSc1ccc(Cl)nn1
329
+ "2,2-Dimethyl-1-butanol",-1.365,1,102.17699999999998,1,0,2,20.23,-1.04,CCC(C)(C)CO
330
+ Ethyl pentanoate,-1.899,1,130.18699999999998,0,0,4,26.3,-1.75,CCCOC(=O)CCC
331
+ "2,4,6-Trinitrotoluene",-2.6060000000000003,1,227.13199999999998,0,1,3,129.42000000000002,-3.22,Cc1c(cc(cc1N(=O)=O)N(=O)=O)N(=O)=O
332
+ Bensulide,-4.99,1,397.52400000000006,1,1,10,64.63,-4.2,CC(C)OP(=S)(OC(C)C)SCCNS(=O)(=O)c1ccccc1
333
+ Cycloheptane,-2.9160000000000004,2,98.18900000000001,0,1,0,0.0,-3.51,C1CCCCCC1
334
+ Propyl formate,-0.757,1,88.10599999999998,0,0,3,26.3,-0.49,CCCOC=O
335
+ 2-Isopropyltoluene,-3.585,1,134.22199999999995,0,1,1,0.0,-3.76,CC(C)c1ccccc1C
336
+ m-Chloroaniline,-2.392,1,127.574,1,1,0,26.02,-1.37,Nc1cccc(Cl)c1
337
+ "2,4-Dimethylpentane",-2.938,1,100.20499999999998,0,0,2,0.0,-4.26,CC(C)CC(C)C
338
+ Dibenzofurane,-4.2010000000000005,2,168.195,0,3,0,13.14,-4.6,o1c2ccccc2c3ccccc13
339
+ ethofumesate,-3.1839999999999997,1,286.34900000000005,0,2,4,61.830000000000005,-3.42,CCOC2Oc1ccc(OS(C)(=O)=O)cc1C2(C)C
340
+ Fluometuron,-3.065,1,232.20499999999996,1,1,1,32.34,-3.43,CN(C)C(=O)Nc1cccc(c1)C(F)(F)F
341
+ Acridine,-3.846,2,179.22199999999998,0,3,0,12.89,-3.67,c3ccc2nc1ccccc1cc2c3
342
+ Cortisone,-2.8930000000000002,1,360.45000000000016,2,4,2,91.67,-3.11,CC12CC(=O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO
343
+ glucose,0.501,1,180.156,5,1,1,110.38000000000001,0.74,OCC1OC(O)C(O)C(O)C1O
344
+ 3-Methylphenol,-2.313,1,108.13999999999999,1,1,0,20.23,-0.68,Cc1cccc(O)c1
345
+ Indapamide,-4.345,1,365.84200000000004,2,3,3,92.5,-3.5860000000000003,CC2Cc1ccccc1N2NC(=O)c3ccc(Cl)c(c3)S(N)(=O)=O
346
+ Lovastatin,-4.731,1,404.54700000000025,1,3,6,72.83,-6.005,CCC(C)C(=O)OC2CC(C)C=C3C=CC(C)C(CCC1CC(O)CC(=O)O1)C23
347
+ "1,4-Dinitrobenzene",-2.281,1,168.10799999999995,0,1,2,86.28,-3.39,O=N(=O)c1ccc(cc1)N(=O)=O
348
+ Reposal,-2.781,1,262.30899999999997,2,3,2,75.27000000000001,-2.696,CCC1(C(=O)NC(=O)NC1=O)C2=CCC3CCC2C3
349
+ Ethyl decanoate,-3.6710000000000003,1,200.32199999999997,0,0,9,26.3,-4.1,CCCCCCCCCC(=O)OCC
350
+ Fenuron,-1.847,1,164.208,1,1,1,32.34,-1.6,CN(C)C(=O)Nc1ccccc1
351
+ Ethyl propyl ether,-1.072,1,88.14999999999999,0,0,3,9.23,-0.66,CCCOCC
352
+ 2-Propanol,-0.261,1,60.096000000000004,1,0,0,20.23,0.43,CC(C)O
353
+ 2-Methylnapthalene,-3.802,1,142.201,0,2,0,0.0,-3.77,Cc1ccc2ccccc2c1
354
+ Chlorodibromethane,-2.54,1,208.28,0,0,0,0.0,-1.9,ClC(Br)Br
355
+ Hexestrol,-4.854,1,270.372,2,2,5,40.46,-4.43,CCC(C(CC)c1ccc(O)cc1)c2ccc(O)cc2
356
+ Malathion,-3.391,1,330.3640000000001,0,0,9,71.06,-3.37,CCOC(=O)CC(SP(=S)(OC)OC)C(=O)OCC
357
+ Benzylchloride,-2.887,1,126.58599999999996,0,1,1,0.0,-2.39,ClCc1ccccc1
358
+ t-Crotonaldehyde,-0.604,1,70.09100000000001,0,0,1,17.07,0.32,C/C=C/C=O
359
+ Chlorbromuron,-3.938,1,293.548,1,1,2,41.57,-3.924,CON(C)C(=O)Nc1ccc(Br)c(Cl)c1
360
+ "9,10-Dimethylanthracene",-5.228,1,206.28799999999998,0,3,0,0.0,-6.57,Cc1c2ccccc2c(C)c3ccccc13
361
+ Methyl hexanoate,-1.899,1,130.18699999999998,0,0,4,26.3,-1.87,CCCCCC(=O)OC
362
+ Dimefuron,-3.8310000000000004,1,338.79500000000013,1,2,2,80.37,-4.328,CN(C)C(=O)Nc1ccc(c(Cl)c1)n2nc(oc2=O)C(C)(C)C
363
+ p-Fluoroacetanilide,-2.181,1,153.156,1,1,1,29.1,-1.78,CC(=O)Nc1ccc(F)cc1
364
+ alachlor,-3.319,1,269.77199999999993,0,1,6,29.54,-3.26,CCc1cccc(CC)c1N(COC)C(=O)CCl
365
+ Cyclohexene,-2.16,2,82.146,0,1,0,0.0,-2.59,C1CCC=CC1
366
+ Hydrocortisone ,-3.159,1,362.4660000000002,3,4,2,94.83,-3.09,CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1(O)C(=O)CO
367
+ Pyrimidine,-0.884,2,80.08999999999999,0,1,0,25.78,1.1,c1cncnc1
368
+ p-Chloronitrobenzene,-2.9010000000000002,1,157.55599999999998,0,1,1,43.14,-2.92,Clc1ccc(cc1)N(=O)=O
369
+ Methyl propionate,-0.836,1,88.106,0,0,1,26.3,-0.14,CCC(=O)OC
370
+ o-Chloronitrobenzene,-2.775,1,157.55599999999998,0,1,1,43.14,-2.55,Clc1ccccc1N(=O)=O
371
+ Neburon,-4.157,1,275.179,1,1,4,32.34,-4.77,CCCCN(C)C(=O)Nc1ccc(Cl)c(Cl)c1
372
+ Buthidazole,-2.398,1,256.33099999999996,1,2,1,69.56,-1.8769999999999998,CN1CC(O)N(C1=O)c2nnc(s2)C(C)(C)C
373
+ Nitrobenzene,-2.2880000000000003,1,123.11099999999996,0,1,1,43.14,-1.8,O=N(=O)c1ccccc1
374
+ Iodobenzene,-3.8,1,204.01000000000002,0,1,0,0.0,-3.01,Ic1ccccc1
375
+ Metolazone,-3.7769999999999997,1,365.8420000000001,2,3,2,92.5,-3.78,CC2Nc1cc(Cl)c(cc1C(=O)N2c3ccccc3C)S(N)(=O)=O
376
+ Methocarbamol,-1.4280000000000002,1,241.24299999999994,2,1,6,91.00999999999999,-0.985,COc1ccccc1OCC(O)COC(N)=O
377
+ butachlor,-4.3469999999999995,1,311.85300000000007,0,1,9,29.54,-4.19,CCCCOCN(C(=O)CCl)c1c(CC)cccc1CC
378
+ "2,3-Dichlorophenol",-3.1439999999999997,1,163.003,1,1,0,20.23,-1.3,Oc1cccc(Cl)c1Cl
379
+ Propyl butyrate,-1.1909999999999998,1,102.13299999999998,0,0,2,26.3,-1.92,CCCC(=O)OC
380
+ Propanil,-3.6439999999999997,1,218.08299999999997,1,1,2,29.1,-3.0,CCC(=O)Nc1ccc(Cl)c(Cl)c1
381
+ Triamterene,-3.051,1,253.26900000000003,3,3,1,129.62,-2.404,Nc3nc(N)c2nc(c1ccccc1)c(N)nc2n3
382
+ Ethyl hexanoate,-2.254,1,144.21399999999997,0,0,5,26.3,-2.35,CCCCCC(=O)OCC
383
+ chloralose,-1.8869999999999998,1,309.529,3,2,2,88.38000000000001,-1.84,OCC(O)C2OC1OC(OC1C2O)C(Cl)(Cl)Cl
384
+ Amitraz,-5.5329999999999995,1,293.41400000000004,0,2,4,27.96,-5.47,CN(C=Nc1ccc(C)cc1C)C=Nc2ccc(C)cc2C
385
+ Prometon,-3.448,1,225.296,2,1,5,71.96000000000001,-2.478,COc1nc(NC(C)C)nc(NC(C)C)n1
386
+ 1-Octene ,-3.073,1,112.216,0,0,5,0.0,-4.44,CCCCCCC=C
387
+ p-Methylaniline ,-1.954,1,107.156,1,1,0,26.02,-1.21,Cc1ccc(N)cc1
388
+ aminothiazole,-1.226,1,100.14599999999999,1,1,0,38.91,-0.36,Nc1nccs1
389
+ Metolcarb,-1.9469999999999998,1,151.165,1,1,1,38.33,-1.8030000000000002,c1ccccc1(OC(=O)NC)
390
+ 3-Hexanol,-1.324,1,102.17699999999999,1,0,3,20.23,-0.8,CCCC(O)CC
391
+ 9-anthrol,-4.148,1,194.23299999999998,1,3,0,20.23,-4.73,c3ccc2c(O)c1ccccc1cc2c3
392
+ 2-Methylanthracene,-4.87,1,192.261,0,3,0,0.0,-6.96,Cc1ccc2cc3ccccc3cc2c1
393
+ "1,2,3-Trimethylbenzene ",-3.312,1,120.195,0,1,0,0.0,-3.2,Cc1cccc(C)c1C
394
+ Aminocarb,-2.677,1,208.26099999999997,1,1,2,41.57,-2.36,CNC(=O)Oc1ccc(N(C)C)c(C)c1
395
+ 2-Nonanol,-2.387,1,144.258,1,0,6,20.23,-2.74,CCCCCCCC(C)O
396
+ Methyldymron,-3.863,1,268.36,1,2,3,32.34,-3.35,CN(C(=O)NC(C)(C)c1ccccc1)c2ccccc2
397
+ 3-Hexanone,-1.266,1,100.16099999999999,0,0,3,17.07,-0.83,CCCC(=O)CC
398
+ bromoxynil,-3.793,1,276.91499999999996,1,1,0,44.019999999999996,-3.33,Oc1c(Br)cc(C#N)cc1Br
399
+ "3,4-PCB",-5.223,1,223.102,0,2,1,0.0,-6.39,Clc1ccc(cc1Cl)c2ccccc2
400
+ Mefenacet,-4.504,1,298.367,0,3,4,42.43000000000001,-4.873,CN(C(=O)COc1nc2ccccc2s1)c3ccccc3
401
+ 5-hydroxyquinoline,-2.725,1,145.161,1,2,0,33.120000000000005,-2.54,Oc1cccc2ncccc12
402
+ Carboxin,-2.927,1,235.30800000000002,1,2,2,38.33,-3.14,CC1=C(SCCO1)C(=O)Nc2ccccc2
403
+ Ethoxyzolamide,-3.085,1,258.324,1,2,3,82.28,-3.81,CCOc2ccc1nc(sc1c2)S(N)(=O)=O
404
+ Pentachlorophenol,-4.835,1,266.33799999999997,1,1,0,20.23,-4.28,Oc1c(Cl)c(Cl)c(Cl)c(Cl)c1Cl
405
+ Bromochloromethane,-1.5190000000000001,1,129.384,0,0,0,0.0,-0.89,ClCBr
406
+ metharbital,-1.6580000000000001,1,198.22199999999998,1,1,2,66.48,-2.23,CCC1(CC)C(=O)NC(=O)N(C)C1=O
407
+ deoxycorticosterone acetate,-4.4719999999999995,1,372.5050000000002,0,4,3,60.440000000000005,-4.63,CC(=O)OCC(=O)C3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C
408
+ benzylurea,-1.5090000000000001,1,150.18099999999998,2,1,2,55.120000000000005,-0.95,NC(=O)NCc1ccccc1
409
+ Chlortoluron,-3.048,1,212.67999999999998,1,1,1,32.34,-3.483,CN(C)C(=O)Nc1ccc(C)c(Cl)c1
410
+ Linuron,-3.5810000000000004,1,249.09699999999998,1,1,2,41.57,-3.592,CON(C)C(=O)Nc1ccc(Cl)c(Cl)c1
411
+ Cycloheptanol,-1.7,1,114.188,1,1,0,20.23,-0.88,OC1CCCCCC1
412
+ Thiamphenicol,-1.936,1,356.2270000000001,3,1,6,103.70000000000002,-2.154,CS(=O)(=O)c1ccc(cc1)C(O)C(CO)NC(=O)C(Cl)Cl
413
+ thiopental,-2.96,1,242.34400000000002,2,1,4,58.2,-3.36,CCCC(C)C1(CC)C(=O)NC(=S)NC1=O
414
+ acetazolamide,-0.7929999999999999,1,222.251,2,1,2,115.03999999999999,-2.36,CC(=O)Nc1nnc(s1)S(N)(=O)=O
415
+ p-Nitrophenol,-2.318,1,139.10999999999999,1,1,1,63.37,-0.74,Oc1ccc(cc1)N(=O)=O
416
+ Aldrin,-5.511,1,364.914,0,4,0,0.0,-6.307,ClC1=C(Cl)C2(Cl)C3C4CC(C=C4)C3C1(Cl)C2(Cl)Cl
417
+ Tetrahydrofurane ,-0.62,2,72.107,0,1,0,9.23,0.49,C1CCOC1
418
+ o-Nitroaniline,-2.2769999999999997,1,138.126,1,1,1,69.16,-1.96,Nc1ccccc1N(=O)=O
419
+ "2,2',3,3'-PCB",-6.079,1,291.99199999999996,0,2,1,0.0,-7.28,Clc1cccc(c1Cl)c2cccc(Cl)c2Cl
420
+ phenylbutazone,-4.0760000000000005,1,308.38100000000003,0,3,5,40.620000000000005,-3.81,CCCCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3
421
+ "2,6-Dinitrotoluene",-2.553,1,182.135,0,1,2,86.28,-3.0,Cc1c(cccc1N(=O)=O)N(=O)=O
422
+ Progesterone,-4.17,1,314.46900000000005,0,4,1,34.14,-4.42,CC(=O)C1CCC2C3CCC4=CC(=O)CCC4(C)C3CCC12C
423
+ Chlorazine,-3.6630000000000003,1,257.76899999999995,0,1,6,45.150000000000006,-4.4110000000000005,CCN(CC)c1nc(Cl)nc(n1)N(CC)CC
424
+ captafol,-4.365,1,349.06600000000014,0,2,3,37.38,-5.4,ClC(Cl)C(Cl)(Cl)SN2C(=O)C1CC=CCC1C2=O
425
+ "1,2,4-tribromobenzene",-5.144,1,314.802,0,1,0,0.0,-4.5,c1(Br)c(Br)cc(Br)cc1
426
+ Oxazepam,-3.517,1,286.718,2,3,1,61.690000000000005,-3.952,OC3N=C(c1ccccc1)c2cc(Cl)ccc2NC3=O
427
+ Secobarbital,-2.415,1,238.28699999999995,2,1,5,75.27000000000001,-2.356,O=C1NC(=O)NC(=O)C1(C(C)CCC)CC=C
428
+ Carvacrol,-3.2239999999999998,1,150.22099999999998,1,1,1,20.23,-2.08,c1(O)c(C)ccc(C(C)C)c1
429
+ rhodanine,-0.396,1,133.197,1,1,0,29.1,-1.77,C1SC(=S)NC1(=O)
430
+ Morin,-2.7310000000000003,1,302.23800000000006,5,3,1,131.35999999999999,-3.083,Oc1ccc(c(O)c1)c3oc2cc(O)cc(O)c2c(=O)c3O
431
+ Kepone,-5.112,1,490.6390000000001,0,6,0,17.07,-5.2589999999999995,ClC1(C(=O)C2(Cl)C3(Cl)C14Cl)C5(Cl)C2(Cl)C3(Cl)C(Cl)(Cl)C45Cl
432
+ Disulfiram,-3.862,1,296.5520000000001,0,0,4,6.48,-4.86,CCN(CC)C(=S)SSC(=S)N(CC)CC
433
+ Cyclohexane,-2.477,2,84.162,0,1,0,0.0,-3.1,C1CCCCC1
434
+ Dienochlor,-7.848,1,474.64,0,2,1,0.0,-7.278,ClC1=C(Cl)C(Cl)(C(=C1Cl)Cl)C2(Cl)C(=C(Cl)C(=C2Cl)Cl)Cl
435
+ chlordimeform,-3.1639999999999997,1,196.68099999999998,0,1,2,15.6,-2.86,CN(C)C=Nc1ccc(Cl)cc1C
436
+ Equilenin,-3.927,1,266.34,1,4,0,37.3,-5.24,CC34CCc1c(ccc2cc(O)ccc12)C3CCC4=O
437
+ 1-Octanol,-2.105,1,130.23100000000002,1,0,6,20.23,-2.39,CCCCCCCCO
438
+ Diethyl sulfide,-1.598,1,90.191,0,0,2,0.0,-1.34,CCSCC
439
+ "1,2-Dichloroethane",-1.374,1,98.96000000000001,0,0,1,0.0,-1.06,ClCCCl
440
+ 2-Chloro-2-methylbutane,-2.278,1,106.59599999999999,0,0,1,0.0,-2.51,CCC(C)(C)Cl
441
+ 1-Chloro-2-bromoethane,-1.7380000000000002,1,143.411,0,0,1,0.0,-1.32,ClCCBr
442
+ p-Nitroaniline,-1.936,1,138.126,1,1,1,69.16,-2.37,Nc1ccc(cc1)N(=O)=O
443
+ Lactose,1.071,1,342.297,8,2,4,189.52999999999997,-0.244,OCC1OC(OC2C(O)C(O)C(O)OC2CO)C(O)C(O)C1O
444
+ RTI 2,-3.125,1,268.32,0,3,2,49.330000000000005,-2.86,CCN2c1ncccc1N(CC)C(=O)c3cccnc23
445
+ Chlorobenzene,-2.975,1,112.55899999999997,0,1,0,0.0,-2.38,Clc1ccccc1
446
+ 1-Nonene ,-3.427,1,126.243,0,0,6,0.0,-5.05,CCCCCCCC=C
447
+ p-Bromoiodobenzene,-4.754,1,282.90599999999995,0,1,0,0.0,-4.56,Brc1ccc(I)cc1
448
+ 3-Methyl-3-pentanol,-1.308,1,102.17699999999998,1,0,2,20.23,-0.36,CCC(C)(O)CC
449
+ Pentylbenzene,-3.8989999999999996,1,148.249,0,1,4,0.0,-4.64,CCCCCc1ccccc1
450
+ allantoin,0.652,1,158.117,4,1,1,113.32,-1.6,NC(=O)NC1NC(=O)NC1=O
451
+ Glafenine,-5.052,1,372.80800000000016,3,3,6,91.67999999999999,-4.571000000000001,OCC(O)COC(=O)c1ccccc1Nc2ccnc3cc(Cl)ccc23
452
+ DDD,-6.007999999999999,1,320.04600000000005,0,2,3,0.0,-7.2,ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2
453
+ testosterone acetate,-4.449,1,330.4680000000001,0,4,1,43.370000000000005,-5.184,CC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C
454
+ 1-Chloronapthalene,-4.063,1,162.61899999999997,0,2,0,0.0,-3.93,Clc1cccc2ccccc12
455
+ RTI 19,-4.007,1,252.31699999999995,0,3,1,23.55,-4.749,CCN2c1ccccc1N(C)C(=O)c3ccccc23
456
+ 2-Hexanol,-1.324,1,102.17699999999998,1,0,3,20.23,-0.89,CCCCC(C)O
457
+ Propylcyclopentane,-3.16,1,112.21600000000001,0,1,2,0.0,-4.74,CCCC1CCCC1
458
+ Etomidate,-3.359,1,244.294,0,2,4,44.12,-4.735,CCOC(=O)c1cncn1C(C)c2ccccc2
459
+ "3,4-Dichlorophenol",-3.352,1,163.00300000000001,1,1,0,20.23,-1.25,Oc1ccc(Cl)c(Cl)c1
460
+ Cypermethrin,-6.775,1,416.30400000000014,0,3,6,59.32000000000001,-8.017000000000001,CC1(C)C(C=C(Cl)Cl)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2
461
+ Benzoxazole,-2.214,2,119.12299999999998,0,2,0,26.03,-1.16,c2ccc1ocnc1c2
462
+ 1-Pentanol,-1.042,1,88.14999999999999,1,0,3,20.23,-0.6,CCCCCO
463
+ "N,N-Diethylaniline",-3.16,1,149.237,0,1,3,3.24,-3.03,CCN(CC)c1ccccc1
464
+ "1,3-Difluorobenzene",-2.636,1,114.094,0,1,0,0.0,-2.0,Fc1cccc(F)c1
465
+ 3-chloropropionitrile,-0.522,1,89.525,0,0,1,23.79,-0.29,ClCCC#N
466
+ t-Pentylbenzene,-3.867,1,148.249,0,1,1,0.0,-4.15,CC(C)(C)Cc1ccccc1
467
+ 5-Ethyl-5-phenylbarbital,-2.272,1,232.239,2,2,2,75.27000000000001,-2.322,O=C1NC(=O)NC(=O)C1(CC)c1ccccc1
468
+ o-Chloroiodobenzene,-4.3839999999999995,1,238.45499999999998,0,1,0,0.0,-3.54,Clc1ccccc1I
469
+ Benzotriazole,-2.21,2,119.127,1,2,0,41.57,-0.78,c2ccc1[nH]nnc1c2
470
+ Carbofuran,-3.05,1,221.25599999999994,1,2,1,47.56,-2.8,CNC(=O)Oc1cccc2CC(C)(C)Oc12
471
+ "2,6-Dimethylphenol",-2.589,1,122.16699999999999,1,1,0,20.23,-1.29,Cc1cccc(C)c1O
472
+ 3-Methyl-2-butanol,-0.9540000000000001,1,88.14999999999999,1,0,1,20.23,-0.18,CC(C)C(C)O
473
+ benzhydrol,-3.033,1,184.238,1,2,2,20.23,-2.55,c1ccccc1C(O)c2ccccc2
474
+ Methyl decanoate,-3.3160000000000003,1,186.295,0,0,8,26.3,-4.69,CCCCCCCCCC(=O)OC
475
+ Dicapthon,-4.188,1,297.656,0,1,5,70.83000000000001,-4.31,COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O
476
+ 1-Bromo-2-methylpropane,-2.2880000000000003,1,137.01999999999998,0,0,1,0.0,-2.43,CC(C)CBr
477
+ Iodoethane,-2.066,1,155.966,0,0,0,0.0,-1.6,CCI
478
+ Pirimicarb,-2.34,1,238.29099999999997,0,1,2,58.56000000000001,-1.95,CN(C)C(=O)Oc1nc(nc(C)c1C)N(C)C
479
+ 1-Bromohexane,-3.012,1,165.074,0,0,4,0.0,-3.81,CCCCCCBr
480
+ 2-Methylpentane,-2.6,1,86.178,0,0,2,0.0,-3.74,CCCC(C)C
481
+ Tetrafluthrin,-6.3389999999999995,1,418.7360000000001,0,2,4,26.3,-7.321000000000001,Cc1c(F)c(F)c(COC(=O)C2C(C=C(Cl)C(F)(F)F)C2(C)C)c(F)c1F
482
+ Metolachlor,-3.431,1,283.7989999999999,0,1,6,29.54,-2.73,CCc1cccc(C)c1N(C(C)COC)C(=O)CCl
483
+ nifuroxime,-1.8430000000000002,1,156.09699999999998,1,1,2,88.87,-2.19,ON=Cc1ccc(o1)N(=O)=O
484
+ Fluvalinate,-8.057,1,502.9200000000002,1,3,8,71.35,-8.003,CC(C)C(Nc1ccc(cc1Cl)C(F)(F)F)C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2
485
+ Amitrole,-0.674,1,84.082,2,1,0,67.59,0.522,Nc1nc[nH]n1
486
+ Tribromomethane,-2.904,1,252.731,0,0,0,0.0,-1.91,BrC(Br)Br
487
+ Trichlorfon,-1.8659999999999999,1,257.437,1,0,3,55.760000000000005,-0.22,COP(=O)(OC)C(O)C(Cl)(Cl)Cl
488
+ Phosalone,-5.024,1,367.8160000000001,0,2,7,53.6,-5.233,CCOP(=S)(OCC)SCn1c(=O)oc2cc(Cl)ccc12
489
+ Phenylmethanol,-1.699,1,108.13999999999997,1,1,1,20.23,-0.4,OCc1ccccc1
490
+ Coumatetralyl,-5.194,1,292.33400000000006,1,4,1,50.44,-2.84,O=c2c(C3CCCc4ccccc43)c(O)c1ccccc1o2
491
+ 4-Bromophenol,-3.1319999999999997,1,173.00900000000001,1,1,0,20.23,-1.09,Oc1ccc(Br)cc1
492
+ 2-Bromopropane,-1.949,1,122.993,0,0,0,0.0,-1.59,CC(C)Br
493
+ "2,2,4-Trimethylpentane",-3.2760000000000002,1,114.23199999999999,0,0,1,0.0,-4.74,CC(C)CC(C)(C)C
494
+ "1,3,5-Trinitrobenzene",-2.324,1,213.10499999999996,0,1,3,129.42000000000002,-2.89,O=N(=O)c1cc(cc(c1)N(=O)=O)N(=O)=O
495
+ Nimetazepam,-3.557,1,295.29800000000006,0,3,2,75.81,-3.7960000000000003,CN2C(=O)CN=C(c1ccccc1)c3cc(ccc23)N(=O)=O
496
+ Propane,-1.5530000000000002,1,44.096999999999994,0,0,0,0.0,-1.94,CCC
497
+ Minoxidil,-1.8090000000000002,1,209.25299999999996,2,2,1,95.10999999999999,-1.989,Nc1cc(nc(N)n1=O)N2CCCCC2
498
+ 1-aminoacridine,-3.542,1,194.23700000000002,1,3,0,38.91,-4.22,Nc2cccc3nc1ccccc1cc23
499
+ Benzo(k)fluoranthene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.49,c1ccc2cc3c4cccc5cccc(c3cc2c1)c45
500
+ Dicofol,-6.268,1,370.49,1,2,2,20.23,-5.666,OC(c1ccc(Cl)cc1)(c2ccc(Cl)cc2)C(Cl)(Cl)Cl
501
+ Acenapthene,-3.792,2,154.21199999999996,0,3,0,0.0,-4.63,C1Cc2cccc3cccc1c23
502
+ Dialifos,-5.026,1,393.85400000000016,0,2,8,55.84,-6.34,CCOP(=S)(OCC)SC(CCl)N2C(=O)c1ccccc1C2=O
503
+ "1,4-Dibromobenzene",-4.298,1,235.90599999999998,0,1,0,0.0,-4.07,Brc1ccc(Br)cc1
504
+ Methazole,-3.6010000000000004,1,261.064,0,2,1,57.14,-2.82,Cn2c(=O)on(c1ccc(Cl)c(Cl)c1)c2=O
505
+ p-Phenylphenol,-3.701,1,170.211,1,2,1,20.23,-3.48,Oc1ccc(cc1)c2ccccc2
506
+ pyracarbolid,-2.83,1,217.26800000000003,1,2,2,38.33,-2.56,CC1=C(CCCO1)C(=O)Nc2ccccc2
507
+ Ethyl vinyl ether,-0.857,1,72.10700000000001,0,0,2,9.23,-0.85,CCOC=C
508
+ 1-Butyne,-1.092,1,54.09199999999999,0,0,0,0.0,-1.24,CCC#C
509
+ 4-methoxypteridine,-1.589,1,162.15200000000002,0,2,1,60.790000000000006,-1.11,COc1ncnc2nccnc12
510
+ 3-Methyl-3-heptanol,-2.017,1,130.23099999999997,1,0,4,20.23,-1.6,CCCCC(C)(O)CC
511
+ "1,4-Dichlorobenzene",-3.5580000000000003,1,147.00400000000002,0,1,0,0.0,-3.27,Clc1ccc(Cl)cc1
512
+ 3-Ethanoyloxymethylphenytoin,-2.7230000000000003,1,324.33600000000007,1,3,4,75.71,-4.47,O=C1N(COC(=O)C)C(=O)C(N1)(c2ccccc2)c3ccccc3
513
+ "Sparsomycin (3,8mg/ml)",-1.57,1,361.4450000000001,4,1,8,132.11999999999998,-1.9809999999999999,CSCS(=O)CC(CO)NC(=O)C=Cc1c(C)[nH]c(=O)[nH]c1=O
514
+ 3-methylindole,-2.9810000000000003,1,131.17799999999997,1,2,0,15.79,-2.42,Cc1c[nH]c2ccccc12
515
+ 2-methoxypteridine,-1.589,1,162.152,0,2,1,60.790000000000006,-1.11,COc2ncc1nccnc1n2
516
+ Dioxacarb,-1.614,1,223.22799999999995,1,2,2,56.790000000000006,-1.57,CNC(=O)Oc1ccccc1C2OCCO2
517
+ isocarbamid,-1.508,1,185.22699999999998,2,1,2,61.440000000000005,-2.15,C1N(C(=O)NCC(C)C)C(=O)NC1
518
+ Acetonitrile,0.152,1,41.053,0,0,0,23.79,0.26,CC#N
519
+ Fenoxycarb,-4.662,1,301.34200000000004,1,2,7,56.790000000000006,-4.7,CCOC(=O)NCCOc2ccc(Oc1ccccc1)cc2
520
+ acetyl sulfisoxazole,-2.024,1,293.34800000000007,1,2,3,89.43,-3.59,CC(=O)N(S(=O)c1ccc(N)cc1)c2onc(C)c2C
521
+ "1,1,1,2-Tetrachloroethane",-2.7939999999999996,1,167.85,0,0,0,0.0,-2.18,ClCC(Cl)(Cl)Cl
522
+ 1-Butanol,-0.688,1,74.12299999999999,1,0,2,20.23,0.0,CCCCO
523
+ Siduron,-3.779,1,232.32700000000003,2,2,2,41.13,-4.11,CC1CCCCC1NC(=O)Nc2ccccc2
524
+ "1,3,5-Trichlorobenzene",-4.159,1,181.449,0,1,0,0.0,-4.48,Clc1cc(Cl)cc(Cl)c1
525
+ Furfural,-1.391,1,96.08499999999998,0,1,1,30.21,-0.1,O=Cc1ccco1
526
+ 3-Methylbutan-1-ol,-1.0270000000000001,1,88.14999999999999,1,0,2,20.23,-0.51,CC(C)CCO
527
+ piperonal,-2.033,1,150.13299999999998,0,2,1,35.53,-1.63,O=Cc2ccc1OCOc1c2
528
+ 2-Methylpropene,-1.5730000000000002,1,56.108000000000004,0,0,0,0.0,-2.33,CC(=C)C
529
+ Benzaldehyde,-1.999,1,106.12399999999997,0,1,1,17.07,-1.19,O=Cc1ccccc1
530
+ "2,3-Dimethyl-1,3-Butadiene",-2.052,1,82.146,0,0,1,0.0,-2.4,CC(=C)C(=C)C
531
+ Benfuracarb,-5.132999999999999,1,410.53600000000023,0,2,8,68.31,-4.71,CCOC(=O)CCN(SN(C)C(=O)Oc1cccc2CC(C)(C)Oc21)C(C)C
532
+ RTI 10,-2.7710000000000004,1,226.235,0,3,0,42.43,-3.6719999999999997,O2c1ccccc1N(C)C(=O)c3cccnc23
533
+ Fluorene ,-4.125,2,166.22299999999998,0,3,0,0.0,-5.0,C1c2ccccc2c3ccccc13
534
+ Methylcyclohexane ,-2.891,1,98.18900000000001,0,1,0,0.0,-3.85,CC1CCCCC1
535
+ sulfaguanidine,-0.706,1,214.25,4,1,2,122.05999999999999,-1.99,NC(=N)NS(=O)(=O)c1ccc(N)cc1
536
+ Methylparaben,-2.441,1,152.149,1,1,1,46.53,-1.827,COC(=O)c1ccc(O)cc1
537
+ 2-Methyltetrahydrofurane,-1.034,1,86.134,0,1,0,9.23,0.11,CC1CCCO1
538
+ Santonin,-2.43,1,246.30599999999995,0,3,0,43.370000000000005,-3.09,CC3C2CCC1(C)C=CC(=O)C(=C1C2OC3=O)C
539
+ Salicin,-0.975,1,286.28,5,2,4,119.61000000000001,-0.85,OCC2OC(Oc1ccccc1CO)C(O)C(O)C2O
540
+ 1-Iodopropane,-2.4859999999999998,1,169.993,0,0,1,0.0,-2.29,CCCI
541
+ Ametryn,-3.43,1,227.337,2,1,5,62.730000000000004,-3.04,CCNc1nc(NC(C)C)nc(SC)n1
542
+ 1-Propanol,-0.33399999999999996,1,60.096,1,0,1,20.23,0.62,CCCO
543
+ Hydroxyprogesterone-17a,-3.8760000000000003,1,330.4680000000001,1,4,1,54.37,-3.8169999999999997,CC(=O)C1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3CCC21C
544
+ 2-Pentanol,-0.97,1,88.14999999999999,1,0,2,20.23,-0.29,CCCC(C)O
545
+ benzoin,-3.148,1,212.248,1,2,3,37.3,-2.85,OC(C(=O)c1ccccc1)c2ccccc2
546
+ "2,4-Dimethylphenol",-2.6210000000000004,1,122.16699999999999,1,1,0,20.23,-1.19,Cc1ccc(O)c(C)c1
547
+ m-Chloronitrobenzene ,-2.9010000000000002,1,157.55599999999998,0,1,1,43.14,-2.77,Clc1cccc(c1)N(=O)=O
548
+ ampyrone,-1.192,1,203.245,1,2,1,52.95,-0.624,Cc2c(N)c(=O)n(c1ccccc1)n2C
549
+ "2,2',4,5'-PCB",-6.23,1,291.99199999999996,0,2,1,0.0,-6.57,Clc1ccc(c(Cl)c1)c2cc(Cl)ccc2Cl
550
+ "Hexachloro-1,3-butadiene",-4.546,1,260.762,0,0,1,0.0,-4.92,ClC(=C(Cl)C(=C(Cl)Cl)Cl)Cl
551
+ Terbutryn,-3.75,1,241.364,2,1,4,62.730000000000004,-4.0,CCNc1nc(NC(C)(C)C)nc(SC)n1
552
+ 3-Methyl-2-pentanol,-1.308,1,102.17699999999999,1,0,3,20.23,-0.71,CCC(C)CCO
553
+ 2-methylpteridine,-1.24,1,146.153,0,2,0,51.56,-0.12,Cc2ncc1nccnc1n2
554
+ Danazol,-4.5569999999999995,1,337.4630000000001,1,5,0,46.260000000000005,-5.507000000000001,CC23Cc1cnoc1C=C2CCC4C3CCC5(C)C4CCC5(O)C#C
555
+ 1-Iodobutane,-2.841,1,184.01999999999998,0,0,2,0.0,-2.96,CCCCI
556
+ 2-Bromonapthalene,-4.434,1,207.07,0,2,0,0.0,-4.4,Brc1ccc2ccccc2c1
557
+ "Digoxin (L1=41,8mg/mL, L2=68,2mg/mL, Z=40,1mg/mL)",-5.312,1,780.9490000000001,6,8,7,203.05999999999997,-4.081,CC1OC(CC(O)C1O)OC2C(O)CC(OC2C)OC8C(O)CC(OC7CCC3(C)C(CCC4C3CC(O)C5(C)C(CCC45O)C6=CC(=O)OC6)C7)OC8C
558
+ Benzyltrifluoride,-3.0989999999999998,1,146.111,0,1,0,0.0,-2.51,FC(F)(F)c1ccccc1
559
+ Dihexyl phthalate,-5.757999999999999,1,334.45600000000024,0,1,12,52.60000000000001,-6.144,CCCCCCOC(=O)c1ccccc1C(=O)OCCCCCC
560
+ Dibenzothiophene,-4.5969999999999995,2,184.263,0,3,0,0.0,-4.38,c1ccc2c(c1)sc3ccccc23
561
+ "2,3',4,4'-PCB",-6.709,1,326.437,0,2,1,0.0,-7.8,Clc1ccc(c(Cl)c1)c2ccc(Cl)c(Cl)c2Cl
562
+ "2,2',3,3',4,4'-PCB",-7.192,1,360.88200000000006,0,2,1,0.0,-8.01,Clc1ccc(c(Cl)c1Cl)c2ccc(Cl)c(Cl)c2Cl
563
+ Warfarin,-3.9130000000000003,1,308.3330000000001,1,3,4,67.50999999999999,-3.8930000000000002,CC(=O)CC(c1ccccc1)c3c(O)c2ccccc2oc3=O
564
+ hydrobenzoin,-2.645,1,214.264,2,2,3,40.46,-1.93,c1ccccc1C(O)C(O)c2ccccc2
565
+ Dimethyl phthalate,-2.347,1,194.18599999999995,0,1,2,52.60000000000001,-1.66,COC(=O)c1ccccc1C(=O)OC
566
+ Ethyl octanoate,-2.9619999999999997,1,172.26799999999997,0,0,7,26.3,-3.39,CCCCCCCC(=O)OCC
567
+ Diethyldisulfide,-2.364,1,122.258,0,0,3,0.0,-2.42,CCSSCC
568
+ "1,2-Diethoxyethane ",-0.833,1,118.176,0,0,5,18.46,-0.77,CCOCCOCC
569
+ "1,2,4,5-Tetrachlorobenzene",-4.621,1,215.894,0,1,0,0.0,-5.56,Clc1cc(Cl)c(Cl)cc1Cl
570
+ p-benzidine,-2.613,1,184.242,2,2,1,52.04,-2.7,Nc1ccc(cc1)c2ccc(N)cc2
571
+ 1-Heptene,-2.718,1,98.189,0,0,4,0.0,-3.73,CCCCCC=C
572
+ Ethirimol,-2.7319999999999998,1,209.29299999999998,2,1,5,57.78,-3.028,CCCCc1c(C)nc(NCC)[nH]c1=O
573
+ Pentobarbital,-2.312,1,226.27599999999995,2,1,4,75.27000000000001,-2.39,O=C1NC(=O)NC(=O)C1(CC)C(C)CCC
574
+ o-Chloroaniline,-2.392,1,127.574,1,1,0,26.02,-1.52,Nc1ccccc1Cl
575
+ 3-Chloroanisole,-3.057,1,142.58499999999998,0,1,1,9.23,-2.78,COc1cccc(Cl)c1
576
+ Pebulate,-3.1310000000000002,1,203.35099999999997,0,0,6,20.310000000000002,-3.53,CCCCN(CC)C(=O)SCCC
577
+ Butyl acetate,-1.111,1,102.13299999999998,0,0,4,26.3,-1.37,CCCCOC=O
578
+ Prednisolone,-2.9739999999999998,1,360.4500000000002,3,4,2,94.83,-3.18,CC12CC(O)C3C(CCC4=CC(=O)C=CC34C)C2CCC1(O)C(=O)CO
579
+ Bromodichloromethane,-2.176,1,163.82899999999998,0,0,0,0.0,-1.54,BrC(Cl)Cl
580
+ adrenosterone,-2.99,1,300.3980000000001,0,4,0,51.21,-3.48,CC34CC(=O)C1C(CCC2=CC(=O)CCC12C)C3CCC4(=O)
581
+ p-terphenyl,-5.7410000000000005,2,230.31,0,3,2,0.0,-7.11,c1ccc(cc1)c2ccc(cc2)c3ccccc3
582
+ p-Hydroxybenzaldehyde ,-2.003,1,122.12299999999998,1,1,1,37.3,-0.96,Oc1ccc(C=O)cc1
583
+ Bromomethane,-1.109,1,94.939,0,0,0,0.0,-0.79,CBr
584
+ Perfluidone,-4.945,1,379.38100000000003,1,2,4,80.31,-3.8,Cc1cc(ccc1NS(=O)(=O)C(F)(F)F)S(=O)(=O)c2ccccc2
585
+ Coumachlor,-4.553999999999999,1,342.7780000000001,1,3,4,67.50999999999999,-5.8389999999999995,CC(=O)CC(c1ccc(Cl)cc1)c2c(O)c3ccccc3oc2=O
586
+ 2-Ethylnaphthalene,-4.1,1,156.22799999999998,0,2,1,0.0,-4.29,CCc1ccc2ccccc2c1
587
+ 5-methylcytosine,-0.257,1,125.13099999999999,2,1,0,71.77000000000001,-1.4580000000000002,Nc1c(C)c[nH]c(=O)n1
588
+ "2,3,4,5,6-PCB",-6.785,1,326.437,0,2,1,0.0,-7.92,Clc2c(Cl)c(Cl)c(c1ccccc1)c(Cl)c2Cl
589
+ benodanil,-4.245,1,323.133,1,2,2,29.1,-4.21,c1c(NC(=O)c2ccccc2(I))cccc1
590
+ Riboflavin,-1.865,1,376.36900000000014,5,3,5,161.56,-3.685,Cc3cc2nc1c(=O)[nH]c(=O)nc1n(CC(O)C(O)C(O)CO)c2cc3C
591
+ o-Fluorobromobenzene,-3.467,1,175.0,0,1,0,0.0,-2.7,Fc1ccccc1Br
592
+ "2,4-Dichlorophenol ",-3.22,1,163.003,1,1,0,20.23,-1.55,Oc1ccc(Cl)cc1Cl
593
+ Permethrin,-7.129,1,391.2940000000001,0,3,6,35.53,-6.291,CC1(C)C(C=C(Cl)Cl)C1C(=O)OCc2cccc(Oc3ccccc3)c2
594
+ piroxicam,-3.4730000000000003,1,331.353,2,3,2,99.60000000000001,-4.16,CN2C(=C(O)c1ccccc1S2(=O)=O)C(=O)Nc3ccccn3
595
+ 3-Propanoyloxymethylphenytoin,-3.128,1,338.36300000000006,1,3,5,75.71,-4.907,O=C1N(COC(=O)CC)C(=O)C(N1)(c2ccccc2)c3ccccc3
596
+ Cyclopentane ,-2.0380000000000003,2,70.135,0,1,0,0.0,-2.64,C1CCCC1
597
+ o-Toluidine,-1.922,1,107.156,1,1,0,26.02,-2.21,Cc1ccccc1N
598
+ Estragole,-3.074,1,148.205,0,1,3,9.23,-2.92,c1(OC)ccc(CC=C)cc1
599
+ karbutilate,-2.655,1,279.34,2,1,2,70.67,-2.93,CN(C)C(=O)Nc1cccc(OC(=O)NC(C)(C)C)c1
600
+ 3-Methyl-1-Butene,-1.994,1,70.135,0,0,1,0.0,-2.73,CC(C)C=C
601
+ 2-Hydroxypyridine,-1.655,1,95.101,1,1,0,33.120000000000005,1.02,Oc1ccccn1
602
+ Ethane,-1.1320000000000001,1,30.07,0,0,0,0.0,-1.36,CC
603
+ "1,2-Dichlorobenzene",-3.4819999999999998,1,147.00399999999996,0,1,0,0.0,-3.05,Clc1ccccc1Cl
604
+ mercaptobenzothiazole,-3.411,1,167.25799999999998,1,2,0,12.89,-3.18,Sc2nc1ccccc1s2
605
+ "2,2',3,3',4,4',5,5',6,6'-PCB",-9.589,1,498.66200000000026,0,2,1,0.0,-11.6,Clc1c(Cl)c(Cl)c(c(Cl)c1Cl)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl
606
+ Methoxsalen,-3.25,1,216.19199999999995,0,3,1,52.58,-3.6639999999999997,COc2c1occc1cc3ccc(=O)oc23
607
+ Acetamide,0.494,1,59.068,1,0,0,43.09,1.58,CC(=O)N
608
+ 1-Methylnaphthalene,-3.802,1,142.201,0,2,0,0.0,-3.7,Cc1cccc2ccccc12
609
+ Napropamide,-4.088,1,271.36,0,2,5,29.540000000000003,-3.57,CCN(CC)C(=O)C(C)Oc1cccc2ccccc12
610
+ "3,3-Dimethyl-2-butanol",-1.2919999999999998,1,102.17699999999999,1,0,0,20.23,-0.62,CC(O)C(C)(C)C
611
+ Methyl pentanoate,-1.545,1,116.15999999999998,0,0,3,26.3,-1.36,CCCC(=O)OCC
612
+ Menadione,-2.667,1,172.18299999999996,0,2,0,34.14,-3.03,CC2=CC(=O)c1ccccc1C2=O
613
+ Phenanthrene,-4.518,2,178.23399999999998,0,3,0,0.0,-5.26,c1ccc2c(c1)ccc3ccccc32
614
+ "2,4-Dimethylpyridine",-2.0980000000000003,1,107.15599999999999,0,1,0,12.89,0.38,Cc1ccnc(C)c1
615
+ 1-Nonanol,-2.46,1,144.258,1,0,7,20.23,-3.01,CCCCCCCCCO
616
+ Dibromomethane,-1.883,1,173.83499999999998,0,0,0,0.0,-1.17,BrCBr
617
+ Dexamethasone,-3.4,1,392.4670000000002,3,4,2,94.83,-3.59,CC1CC2C3CCC4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO
618
+ "2,6-Dimethylnaphthalene ",-4.147,1,156.228,0,2,0,0.0,-4.89,Cc1ccc2cc(C)ccc2c1
619
+ Butylate,-3.4530000000000003,1,217.378,0,0,5,20.310000000000002,-3.68,CCSC(=O)N(CC(C)C)CC(C)C
620
+ nitroglycerin,-2.029,1,227.08499999999998,0,0,8,157.11,-2.22,O=N(=O)OCC(CON(=O)=O)ON(=O)=O
621
+ m-Nitroaniline,-1.936,1,138.126,1,1,1,69.16,-2.19,Nc1cccc(c1)N(=O)=O
622
+ 1-Chlorobutane,-1.94,1,92.56899999999999,0,0,2,0.0,-2.03,CCCCCl
623
+ triforine,-3.715,1,430.9340000000001,2,1,6,64.68,-4.19,ClC(Cl)(Cl)C(NC=O)N1C=CN(C=C1)C(NC=O)C(Cl)(Cl)Cl
624
+ Fluridone,-4.249,1,329.32099999999997,0,3,2,22.0,-4.445,Cn2cc(c1ccccc1)c(=O)c(c2)c3cccc(c3)C(F)(F)F
625
+ 6-aminochrysene,-4.849,1,243.309,1,4,0,26.02,-6.2,Nc3cc2c1ccccc1ccc2c4ccccc34
626
+ Estrone,-3.872,1,270.372,1,4,0,37.3,-3.955,CC12CCC3C(CCc4cc(O)ccc34)C2CCC1=O
627
+ RTI 17,-4.227,1,269.373,0,3,1,19.37,-4.706,CCN2c1ccccc1N(C)C(=S)c3cccnc23
628
+ "1,2-Propylene oxide",-0.358,1,58.08,0,1,0,12.53,-0.59,CC1CO1
629
+ Nitrazepam,-3.4730000000000003,1,281.271,1,3,2,84.6,-3.7960000000000003,O=C3CN=C(c1ccccc1)c2cc(ccc2N3)N(=O)=O
630
+ "1,3-diethylthiourea",-1.028,1,132.232,2,0,2,24.06,-1.46,CCNC(=S)NCC
631
+ "2,3,5-Trichlorophenol",-3.78,1,197.44799999999998,1,1,0,20.23,-2.67,Oc1cc(Cl)cc(Cl)c1Cl
632
+ Propyl propanoate,-1.545,1,116.15999999999998,0,0,3,26.3,-1.34,CCCCC(=O)OC
633
+ Aniline ,-1.632,1,93.12899999999999,1,1,0,26.02,-0.41,Nc1ccccc1
634
+ "1,5-Dimethlnapthalene",-4.147,1,156.228,0,2,0,0.0,-4.678999999999999,Cc1cccc2c(C)cccc12
635
+ hydrochlorothiazide,-1.72,1,297.745,3,2,1,118.35999999999999,-2.63,NS(=O)(=O)c2cc1c(NCNS1(=O)=O)cc2Cl
636
+ Acenapthylene,-3.682,2,152.19599999999994,0,3,0,0.0,-3.96,C1=Cc2cccc3cccc1c23
637
+ Ethyl butyrate,-2.254,1,144.21399999999997,0,0,5,26.3,-1.28,CCCCCOC(=O)CC
638
+ Atratone,-3.185,1,211.26899999999998,2,1,5,71.96000000000001,-2.084,CCNc1nc(NC(C)C)nc(OC)n1
639
+ Benzo(a)pyrene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.699,c1ccc2c(c1)cc3ccc4cccc5ccc2c3c45
640
+ Bromoethane,-1.5290000000000001,1,108.966,0,0,0,0.0,-1.09,CCBr
641
+ 3-Hexyne,-1.933,1,82.14599999999999,0,0,0,0.0,-1.99,CCC#CCC
642
+ Digitoxin,-6.114,1,764.9499999999999,5,8,7,182.82999999999998,-5.292999999999999,CC1OC(CC(O)C1O)OC2C(O)CC(OC2C)OC8C(O)CC(OC7CCC3(C)C(CCC4C3CCC5(C)C(CCC45O)C6=CC(=O)OC6)C7)OC8C
643
+ 2-Methyl-1-Butene,-1.994,1,70.13499999999999,0,0,1,0.0,-2.73,CCC(=C)C
644
+ 8-quinolinol,-2.725,1,145.16099999999997,1,2,0,33.120000000000005,-2.42,Oc1cccc2cccnc12
645
+ "1,2,3,4-Tetrahydronapthalene",-3.447,2,132.20599999999996,0,2,0,0.0,-4.37,C1CCc2ccccc2C1
646
+ phenolphthalein,-4.59,1,318.32800000000003,2,4,2,66.76,-2.9,Oc1ccc(cc1)C2(OC(=O)c3ccccc23)c4ccc(O)cc4
647
+ "1,3,5-Tribromobenzene",-5.27,1,314.802,0,1,0,0.0,-5.6,Brc1cc(Br)cc(Br)c1
648
+ Ronnel,-5.247000000000001,1,321.549,0,1,4,27.69,-5.72,COP(=S)(OC)Oc1cc(Cl)c(Cl)cc1Cl
649
+ methylthiouracil,-0.547,1,142.18300000000002,2,1,0,48.65,-2.436,Cc1cc(=O)[nH]c(=S)[nH]1
650
+ Eugenol,-2.675,1,164.204,1,1,3,29.46,-1.56,COc1cc(CC=C)ccc1O
651
+ 5-Allyl-5-isopropylbarbital,-1.706,1,210.23299999999998,2,1,3,75.27000000000001,-1.7080000000000002,O=C1NC(=O)NC(=O)C1(C(C)C)CC=C
652
+ Pyrene,-4.957,2,202.25599999999997,0,4,0,0.0,-6.176,c1cc2ccc3cccc4ccc(c1)c2c34
653
+ "1,1-Diethoxyethane ",-0.899,1,118.176,0,0,4,18.46,-0.43,CCOC(C)OCC
654
+ Clomazone,-3.077,1,239.702,0,2,2,29.54,-2.338,CC1(C)CON(Cc2ccccc2Cl)C1=O
655
+ 2-Butoxyethanol,-0.775,1,118.17599999999999,1,0,5,29.46,-0.42,CCCCOCCO
656
+ Quintozene,-5.098,1,295.336,0,1,1,43.14,-5.82,Clc1c(Cl)c(Cl)c(N(=O)=O)c(Cl)c1Cl
657
+ Androsterone,-3.8819999999999997,1,290.447,1,4,0,37.3,-4.402,CC12CCC(O)CC1CCC3C2CCC4(C)C3CCC4=O
658
+ Flurochloridone,-4.749,1,312.118,0,2,2,20.310000000000002,-4.047,FC(F)(F)c1cccc(c1)N2CC(CCl)C(Cl)C2=O
659
+ Quinoline,-2.6630000000000003,2,129.16199999999998,0,2,0,12.89,-1.3,c1ccc2ncccc2c1
660
+ methyl gallate,-1.913,1,184.147,3,1,1,86.99000000000001,-1.24,COC(=O)c1cc(O)c(O)c(O)c1
661
+ fluconazole,-2.418,1,306.276,1,3,5,81.64999999999999,-1.8,OC(Cn1cncn1)(Cn2cncn2)c3ccc(F)cc3F
662
+ Chlorzoxazone,-2.679,1,169.567,1,2,0,46.0,-2.8310000000000004,Clc2ccc1oc(=O)[nH]c1c2
663
+ "2,2',3,4,4',5',6-PCB",-7.898,1,395.3270000000001,0,2,1,0.0,-7.92,Clc1ccc(c(Cl)c1)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl
664
+ alloxan,0.436,1,142.07,2,1,0,92.34,-1.25,O=C1NC(=O)C(=O)C(=O)N1
665
+ "1,3-Dichloropropane",-1.618,1,112.98700000000001,0,0,2,0.0,-1.62,ClCCCCl
666
+ m-Fluorobromobenzene,-3.467,1,175.0,0,1,0,0.0,-2.67,Fc1cccc(Br)c1
667
+ p-Chlorobromobenzene,-3.928,1,191.45499999999998,0,1,0,0.0,-3.63,Clc1ccc(Br)cc1
668
+ "2,3-Dimethylbutane",-2.5839999999999996,1,86.178,0,0,1,0.0,-3.65,CC(C)C(C)C
669
+ 1-Butene,-1.655,1,56.108,0,0,1,0.0,-1.94,CCC=C
670
+ "2,2',3,4,5,5'-PCB",-7.343,1,360.88200000000006,0,2,1,0.0,-7.68,Clc1ccc(Cl)c(c1)c2cc(Cl)c(Cl)c(Cl)c2Cl
671
+ cytosine,0.051,1,111.104,2,1,0,71.77000000000001,-1.155,Nc1cc[nH]c(=O)n1
672
+ "1,1,2-Trichlorotrifluoroethane",-3.077,1,187.37500000000003,0,0,1,0.0,-3.04,FC(F)(Cl)C(F)(Cl)Cl
673
+ Propionitrile,-0.26899999999999996,1,55.07999999999999,0,0,0,23.79,0.28,CCC#N
674
+ "O,P'-DDD",-6.007999999999999,1,320.04600000000005,0,2,3,0.0,-6.51,ClC(Cl)C(c1ccc(Cl)cc1)c2ccccc2Cl
675
+ o-Nitroanisole,-2.346,1,153.13699999999997,0,1,2,52.37,-1.96,COc1ccccc1N(=O)=O
676
+ Prasterone,-3.5639999999999996,1,288.43100000000004,1,4,0,37.3,-4.12,CC34CCC1C(CC=C2CC(O)CCC12C)C3CCC4=O
677
+ Procymidone,-3.464,1,284.142,0,3,1,37.38,-4.8,CC12CC2(C)C(=O)N(C1=O)c3cc(Cl)cc(Cl)c3
678
+ Benzo[ghi]perylene,-6.446000000000001,2,276.338,0,6,0,0.0,-9.017999999999999,c1cc2ccc3ccc4ccc5cccc6c(c1)c2c3c4c56
679
+ Dinoseb,-3.715,1,240.21499999999995,1,1,4,106.51000000000002,-3.38,CCC(C)c1cc(cc(N(=O)=O)c1O)N(=O)=O
680
+ meconin,-0.825,1,196.20199999999997,0,2,2,44.760000000000005,-1.899,c1c(OC)c(OC)C2C(=O)OCC2c1
681
+ Glycerol,0.688,1,92.09400000000001,3,0,2,60.69,1.12,OCC(O)CO
682
+ Guaiacol,-1.9409999999999998,1,124.13899999999997,1,1,1,29.46,-1.96,COc1ccccc1O
683
+ chlorpyrifos,-4.9719999999999995,1,350.591,0,1,6,40.58,-5.67,CCOP(=S)(OCC)Oc1nc(Cl)c(Cl)cc1Cl
684
+ 9-Methylanthracene,-4.87,1,192.261,0,3,0,0.0,-5.89,Cc1c2ccccc2cc3ccccc13
685
+ Antipyrene,-1.733,1,188.23000000000002,0,2,1,26.93,0.715,Cc1cc(=O)n(c2ccccc2)n1C
686
+ Methyl butyl ether ,-1.072,1,88.14999999999999,0,0,3,9.23,-0.99,CCCCOC
687
+ 7-methylpteridine,-1.24,1,146.153,0,2,0,51.56,-0.8540000000000001,Cc2cnc1cncnc1n2
688
+ simazine,-2.8110000000000004,1,201.661,2,1,4,62.730000000000004,-4.55,CCNc1nc(Cl)nc(NCC)n1
689
+ "N,N-Dimethylacetamide",0.12300000000000001,1,87.12199999999999,0,0,0,20.310000000000002,1.11,CN(C)C(=O)C
690
+ Simetryn,-2.6889999999999996,1,213.31,0,1,3,45.150000000000006,-2.676,CSc1nc(nc(n1)N(C)C)N(C)C
691
+ Ethylene,-0.815,1,28.053999999999995,0,0,0,0.0,-0.4,C=C
692
+ "3,3-Dimethyl-1-butanol",-1.365,1,102.17699999999999,1,0,1,20.23,-0.5,CC(C)(C)CCO
693
+ 5-Allyl-5-ethylbarbital,-1.368,1,196.20599999999996,2,1,3,75.27000000000001,-1.614,O=C1NC(=O)NC(=O)C1(CC)CC=C
694
+ "2,3,4-Trichlorophenol",-3.705,1,197.448,1,1,0,20.23,-2.67,Oc1ccc(Cl)c(Cl)c1Cl
695
+ Anisole,-2.3680000000000003,1,108.13999999999997,0,1,1,9.23,-1.85,COc1ccccc1
696
+ chloropropylate,-5.093,1,339.21800000000013,1,2,4,46.53,-4.53,c1ccc(Cl)cc1C(c2ccc(Cl)cc2)(O)C(=O)OC(C)C
697
+ aldosterone,-3.0660000000000003,1,360.45000000000005,2,4,3,91.67000000000002,-3.85,CC13CCC(=O)C=C1CCC4C2CCC(C(=O)CO)C2(CC(O)C34)C=O
698
+ Difenoxuron,-3.928,1,286.331,1,2,4,50.800000000000004,-4.16,COc2ccc(Oc1ccc(NC(=O)N(C)C)cc1)cc2
699
+ 4-Ethyltoluene,-3.3280000000000003,1,120.19499999999996,0,1,1,0.0,-3.11,CCc1ccc(C)cc1
700
+ Diisopropylsulfide,-2.162,1,118.24499999999999,0,0,2,0.0,-2.24,CC(C)SC(C)C
701
+ "1,3-Dinitrobenzene",-2.281,1,168.10799999999995,0,1,2,86.28,-2.29,O=N(=O)c1cccc(c1)N(=O)=O
702
+ Ethion,-5.471,1,384.4870000000002,0,0,12,36.92,-5.54,CCOP(=S)(OCC)SCSP(=S)(OCC)OCC
703
+ probarbital,-1.6030000000000002,1,198.22199999999998,2,1,2,75.27000000000001,-2.21,CCC1(C(C)C)C(=O)NC(=O)NC1=O
704
+ cortisone acetate,-3.426,1,402.48700000000025,1,4,3,97.74000000000001,-4.21,CC(=O)OCC(=O)C3(O)CCC4C2CCC1=CC(=O)CCC1(C)C2C(=O)CC34C
705
+ Metronidazole,-0.8590000000000001,1,171.15599999999998,1,1,3,81.19,-1.22,Cc1ncc(N(=O)=O)n1CCO
706
+ p-Chloroaniline,-2.392,1,127.574,1,1,0,26.02,-1.66,Nc1ccc(Cl)cc1
707
+ "2,2-Dimethylpentanol",-1.719,1,116.20399999999998,1,0,3,20.23,-1.52,CCCC(C)(C)CO
708
+ Furane,-1.837,2,68.07499999999999,0,1,0,13.14,-0.82,c1ccoc1
709
+ Methoproptryne,-3.259,1,271.39,2,1,8,71.96000000000001,-2.928,COCCCNc1nc(NC(C)C)nc(SC)n1
710
+ Norea,-2.47,1,222.33199999999994,1,3,1,32.34,-3.1710000000000003,CN(C)C(=O)NC1CC2CC1C3CCCC23
711
+ t-Butylbenzene ,-3.554,1,134.22199999999998,0,1,0,0.0,-3.66,CC(C)(C)c1ccccc1
712
+ kebuzone,-2.645,1,322.36400000000003,0,3,5,57.690000000000005,-3.27,CC(=O)CCC1C(=O)N(N(C1=O)c2ccccc2)c3ccccc3
713
+ prednisolone acetate,-3.5069999999999997,1,402.48700000000014,2,4,3,100.90000000000002,-4.37,CC(=O)OCC(=O)C3(O)CCC4C2CCC1=CC(=O)C=CC1(C)C2C(O)CC34C
714
+ Methyl propyl ether ,-0.718,1,74.12299999999999,0,0,2,9.23,-0.39,CCCOC
715
+ Isopropyl acetate,-1.1909999999999998,1,102.133,0,0,1,26.3,-0.55,CC(C)OC(=O)C
716
+ Bromobenzene,-3.345,1,157.01,0,1,0,0.0,-2.55,Brc1ccccc1
717
+ Ethyl-p-hydroxybenzoate ,-2.761,1,166.176,1,1,2,46.53,-2.35,CCOC(=O)c1ccc(O)cc1
718
+ 3-Butanoyloxymethylphenytoin,-3.469,1,352.39000000000004,1,3,6,75.71,-5.071000000000001,O=C1N(COC(=O)CCC)C(=O)C(N1)(c2ccccc2)c3ccccc3
719
+ testosterone propionate,-4.87,1,344.4950000000001,0,4,2,43.370000000000005,-5.37,CCC(=O)OC3CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C
720
+ Coronene,-6.885,2,300.36000000000007,0,7,0,0.0,-9.332,c1cc2ccc3ccc4ccc5ccc6ccc1c7c2c3c4c5c67
721
+ allopurinol,-0.84,1,136.114,2,2,0,74.43,-2.266,O=c1[nH]cnc2[nH]ncc12
722
+ Chloroethylene,-1.188,1,62.499,0,0,0,0.0,-1.75,ClC=C
723
+ diphenamid,-3.147,1,239.318,0,2,3,20.310000000000002,-2.98,CN(C)C(=O)C(c1ccccc1)c2ccccc2
724
+ Tetrabromomethane,-4.063,1,331.62699999999995,0,0,0,0.0,-3.14,BrC(Br)(Br)Br
725
+ RTI 22,-4.408,1,296.374,1,3,2,48.47,-4.871,CCN2c1cc(N(C)C)cc(C)c1NC(=O)c3cccnc23
726
+ phthalimide,-1.882,1,147.13299999999998,1,2,0,46.17,-2.61,O=C1NC(=O)c2ccccc12
727
+ Fenarimol,-4.1080000000000005,1,331.202,1,3,3,46.010000000000005,-4.38,OC(c1ccc(Cl)cc1)(c2cncnc2)c3ccccc3Cl
728
+ Methyl benzoate ,-2.4619999999999997,1,136.14999999999998,0,1,1,26.3,-1.85,COC(=O)c1ccccc1
729
+ 1-methyluracil,-0.375,1,126.115,1,1,0,54.86,-0.807,Cn1ccc(=O)[nH]c1=O
730
+ oxyphenbutazone,-3.739,1,324.38000000000005,1,3,5,60.85000000000001,-3.73,CCCCC1C(=O)N(N(C1=O)c2ccc(O)cc2)c3ccccc3
731
+ "2,2',3,5'-PCB",-6.155,1,291.9920000000001,0,2,1,0.0,-6.47,Clc1ccc(Cl)c(c1)c2cccc(Cl)c2Cl
732
+ Quinethazone,-2.184,1,289.7440000000001,3,2,2,101.28999999999999,-3.29,CCC2NC(=O)c1cc(c(Cl)cc1N2)S(N)(=O)=O
733
+ Diuron,-3.301,1,233.09799999999998,1,1,1,32.34,-3.8,CN(C)C(=O)Nc1ccc(Cl)c(Cl)c1
734
+ Cyclopentene ,-1.72,2,68.11900000000001,0,1,0,0.0,-2.1,C1CC=CC1
735
+ alloxantin,0.919,1,286.156,6,2,1,191.0,-1.99,C1(=O)NC(=O)NC(=O)C1(O)C2(O)C(=O)NC(=O)NC2(=O)
736
+ Nonane,-3.678,1,128.259,0,0,6,0.0,-5.88,CCCCCCCCC
737
+ 2-Chlorophenol,-2.553,1,128.558,1,1,0,20.23,-1.06,Oc1ccccc1Cl
738
+ 5-Methylchrysene,-5.931,1,242.321,0,4,0,0.0,-6.59,c1cccc2c3c(C)cc4ccccc4c3ccc12
739
+ Phenetole,-2.66,1,122.16699999999996,0,1,2,9.23,-2.33,CCOc1ccccc1
740
+ ethyl cinnamate,-3.0980000000000003,1,176.215,0,1,3,26.3,-3.0,CCOC(=O)C=Cc1ccccc1
741
+ Terbacil,-3.033,1,216.66799999999998,1,1,0,54.86,-2.484,Cc1[nH]c(=O)n(c(=O)c1Cl)C(C)(C)C
742
+ Clonazepam,-3.707,1,315.716,1,3,2,84.6,-3.4989999999999997,Clc1ccccc1C2=NCC(=O)Nc3ccc(cc23)N(=O)=O
743
+ p-Toluenesulfonamide ,-1.815,1,171.22099999999998,1,1,1,60.16,-1.74,Cc1ccc(cc1)S(=O)(=O)N
744
+ Chlorbufam,-3.6289999999999996,1,223.659,1,1,2,38.33,-2.617,CC(OC(=O)Nc1cccc(Cl)c1)C#C
745
+ 2-Methylheptane,-3.3080000000000003,1,114.23199999999999,0,0,4,0.0,-5.08,CCCCCC(C)C
746
+ Cyhalothrin,-6.905,1,449.8560000000001,0,3,6,59.32000000000001,-8.176,CC1(C)C(C=C(Cl)C(F)(F)F)C1C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2
747
+ Apazone,-2.9,1,300.3620000000001,0,3,2,56.220000000000006,-3.5380000000000003,CCCC1C(=O)N3N(C1=O)c2cc(C)ccc2N=C3N(C)C
748
+ Diazepam,-4.05,1,284.74600000000004,0,3,1,32.67,-3.7539999999999996,CN2C(=O)CN=C(c1ccccc1)c3cc(Cl)ccc23
749
+ 2-Methyl-3-pentanol,-1.308,1,102.17699999999999,1,0,2,20.23,-0.7,CCC(O)C(C)C
750
+ fensulfothion,-3.283,1,308.36100000000005,0,1,7,44.760000000000005,-2.3,CCOP(=S)(OCC)Oc1ccc(cc1)S(C)=O
751
+ borneol,-2.423,1,154.253,1,2,0,20.23,-2.32,CC1(C)C2CCC1(C)C(O)C2
752
+ Testosterone,-3.659,1,288.431,1,4,0,37.3,-4.02,CC12CCC3C(CCC4=CC(=O)CCC34C)C2CCC1O
753
+ Heptane,-2.97,1,100.205,0,0,4,0.0,-4.53,CCCCCCC
754
+ 1-Napthol,-3.08,1,144.17299999999997,1,2,0,20.23,-2.22,Oc1cccc2ccccc12
755
+ "cis-1,2-Dimethylcyclohexane",-3.305,1,112.216,0,1,0,0.0,-4.3,C/C1CCCCC1\C
756
+ Trimazosin,-3.958,1,435.48100000000034,2,3,6,132.5,-3.638,COc2cc1c(N)nc(nc1c(OC)c2OC)N3CCN(CC3)C(=O)OCC(C)(C)O
757
+ Cholanthrene,-5.942,2,254.33199999999997,0,5,0,0.0,-7.85,C1Cc2c3c1cccc3cc4c2ccc5ccccc54
758
+ Medrogestone,-4.593,1,340.5070000000001,0,4,1,34.14,-5.27,CC(=O)C3(C)CCC4C2C=C(C)C1=CC(=O)CCC1(C)C2CCC34C
759
+ 2-Heptanone,-1.554,1,114.18799999999999,0,0,4,17.07,-1.45,CCCCCC(=O)C
760
+ Acephate,-0.41600000000000004,1,183.16899999999998,1,0,3,55.4,0.54,COP(=O)(NC(C)=O)SC
761
+ DEF,-4.074,1,314.5220000000001,0,0,12,17.07,-5.14,CCCCSP(=O)(SCCCC)SCCCC
762
+ phthalamide,-0.636,1,149.149,1,2,0,46.17,-2.932,c1cC2C(=O)NC(=O)C2cc1
763
+ Trichlomethiazide,-2.98,1,380.66200000000003,3,2,2,118.35999999999999,-2.68,NS(=O)(=O)c2cc1c(NC(NS1(=O)=O)C(Cl)Cl)cc2Cl
764
+ 2-Methy-2-Butene,-1.994,1,70.13499999999999,0,0,0,0.0,-2.56,CC=C(C)C
765
+ "1,2,4-Trimethylbenzene",-3.343,1,120.195,0,1,0,0.0,-3.31,Cc1ccc(C)c(C)c1
766
+ "2,4,5-Trichlorophenol ",-3.78,1,197.448,1,1,0,20.23,-2.21,Oc1cc(Cl)c(Cl)cc1Cl
767
+ phenanthridine,-3.713,2,179.22199999999998,0,3,0,12.89,-2.78,c1ccc2c(c1)cnc3ccccc23
768
+ 3-Methyl-3-hexanol,-1.663,1,116.20399999999998,1,0,3,20.23,-0.98,CCCC(C)(O)CC
769
+ Octane,-3.324,1,114.232,0,0,5,0.0,-5.24,CCCCCCCC
770
+ Anthracene,-4.518,2,178.23399999999995,0,3,0,0.0,-6.35,c1ccc2cc3ccccc3cc2c1
771
+ Phenylhydrazine,-1.8659999999999999,1,108.14399999999998,2,1,1,38.05,0.07,NNc1ccccc1
772
+ Propionaldehyde,-0.39399999999999996,1,58.08,0,0,1,17.07,0.58,CCC=O
773
+ Cyclooctane,-3.355,2,112.21600000000001,0,1,0,0.0,-4.15,C1CCCCCCC1
774
+ "5,5-Diallylbarbital",-1.4709999999999999,1,208.21699999999996,2,1,4,75.27000000000001,-2.077,O=C1NC(=O)NC(=O)C1(CC=C)CC=C
775
+ Trichloromethane,-1.8119999999999998,1,119.37800000000001,0,0,0,0.0,-1.17,ClC(Cl)Cl
776
+ thiouracil,-0.992,1,128.15599999999998,2,1,0,45.75,-2.273,Sc1nccc(=O)[nH]1
777
+ Pencycuron,-5.126,1,328.84299999999996,1,3,4,32.34,-5.915,Clc1ccc(CN(C2CCCC2)C(=O)Nc3ccccc3)cc1
778
+ 1-Methylcyclohexene ,-2.574,1,96.17300000000002,0,1,0,0.0,-3.27,CC1=CCCCC1
779
+ 2-Ethylhexanal,-2.2319999999999998,1,128.21499999999997,0,0,5,17.07,-2.13,CCCCC(CC)C=O
780
+ Khellin,-3.603,1,260.24499999999995,0,3,2,61.81,-3.0210000000000004,COc2c1occc1c(OC)c3c(=O)cc(C)oc23
781
+ 5-Ethyl-5-(3-methylbutyl)barbital,-2.312,1,226.27599999999995,2,1,4,75.27000000000001,-2.658,O=C1NC(=O)NC(=O)C1(CC)CCC(C)C
782
+ Benzo(j)fluoranthene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-8.0,c1ccc2c3c(ccc2c1)c4cccc5cccc3c45
783
+ 2-Ethylbutanal,-1.5230000000000001,1,100.16099999999999,0,0,3,17.07,-1.52,CCC(CC)C=O
784
+ Dipropyl ether,-1.426,1,102.17699999999999,0,0,4,9.23,-1.62,CCCOCCC
785
+ 1-Tetradecanol,-4.231,1,214.39299999999994,1,0,12,20.23,-5.84,CCCCCCCCCCCCCCO
786
+ "2,3,6-Trichlorophenol",-3.572,1,197.44799999999998,1,1,0,20.23,-2.64,Oc1c(Cl)ccc(Cl)c1Cl
787
+ Urea,0.8320000000000001,1,60.056,2,0,0,69.11,0.96,NC(=O)N
788
+ 1-Pentyne,-1.446,1,68.11899999999999,0,0,1,0.0,-1.64,CCCC#C
789
+ "1,3-Dibromobenzene",-4.298,1,235.90599999999998,0,1,0,0.0,-3.54,Brc1cccc(Br)c1
790
+ 1-Octadecanol,-5.649,1,270.50099999999986,1,0,16,20.23,-8.4,CCCCCCCCCCCCCCCCCCO
791
+ Acetanilide,-1.857,1,135.16599999999997,1,1,1,29.1,-1.33,CC(=O)Nc1ccccc1
792
+ hematein,-1.795,1,300.266,4,4,0,107.22000000000001,-2.7,c1cc(O)c(O)c2OCC3(O)CC4=CC(=O)C(O)=CC4=C3c21
793
+ Isonazid,-0.7170000000000001,1,137.14200000000002,2,1,1,68.01,0.009000000000000001,c1nccc(C(=O)NN)c1
794
+ hydroxychlordene,-4.156000000000001,1,354.8749999999999,1,3,0,20.23,-5.46,OC1C=CC2C1C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl
795
+ Isopentyl formate,-1.449,1,116.15999999999998,0,0,4,26.3,-1.52,CC(C)CCOC=O
796
+ Acetophenone,-2.0780000000000003,1,120.15099999999995,0,1,1,17.07,-1.28,CC(=O)c1ccccc1
797
+ biquinoline,-4.9030000000000005,2,256.308,0,4,1,25.78,-5.4,c2ccc1nc(ccc1c2)c4ccc3ccccc3n4
798
+ Triethyl phosphate,-0.953,1,182.15599999999998,0,0,6,44.760000000000005,0.43,CCOP(=O)(OCC)OCC
799
+ D-fenchone,-2.158,1,152.237,0,2,0,17.07,-1.85,CC2(C)C1CCC(C)(C1)C2=O
800
+ 7-methoxypteridine,-1.589,1,162.152,0,2,1,60.790000000000006,-0.91,COc2cnc1cncnc1n2
801
+ Chlordene,-5.152,1,338.876,0,3,0,0.0,-5.64,ClC2=C(Cl)C3(Cl)C1C=CCC1C2(Cl)C3(Cl)Cl
802
+ 2-Nitropropane,-0.743,1,89.094,0,0,1,43.14,-0.62,CC(C)N(=O)=O
803
+ Carbazole,-3.8360000000000003,2,167.21099999999998,1,3,0,15.79,-5.27,c1ccc2c(c1)[nH]c3ccccc32
804
+ Erythritol,0.675,1,122.11999999999999,4,0,3,80.92,0.7,OCC(O)C(O)CO
805
+ Risocaine,-2.7089999999999996,1,179.21899999999997,1,1,3,52.32,-2.452,CCCOC(=O)c1ccc(N)cc1
806
+ Azodrin,-0.9490000000000001,1,223.16499999999996,1,0,5,73.86,0.6509999999999999,CNC(=O)C=C(C)OP(=O)(OC)OC
807
+ Succinimide,0.282,1,99.089,1,1,0,46.17,0.3,O=C1CCC(=O)N1
808
+ "2,3-Dimethylpentane",-2.938,1,100.20499999999998,0,0,2,0.0,-4.28,CCC(C)C(C)C
809
+ bupirimate,-3.4930000000000003,1,316.4270000000001,1,1,8,84.42,-4.16,CCCCc1c(C)nc(NCC)nc1OS(=O)(=O)N(C)C
810
+ RTI 16,-3.411,1,270.361,0,3,1,32.260000000000005,-4.6339999999999995,CCN2c1ncccc1N(C)C(=S)c3cccnc23
811
+ RTI 9,-3.784,1,239.274,0,3,1,29.54,-3.68,O2c1ccccc1N(CC)C(=O)c3ccccc23
812
+ Tetrahydropyran ,-0.978,2,86.134,0,1,0,9.23,-0.03,C1CCOCC1
813
+ 1-Heptyne,-2.155,1,96.17299999999999,0,0,3,0.0,-3.01,CCCCCC#C
814
+ osthole,-4.0760000000000005,1,244.28999999999994,0,2,3,39.44,-4.314,c1cc2ccc(OC)c(CC=C(C)(C))c2oc1=O
815
+ 3-Methylcholanthrene,-6.311,1,268.3589999999999,0,5,0,0.0,-7.92,c1cc(C)cc2c1c3cc4cccc5CCc(c45)c3cc2
816
+ Ethyl benzoate ,-2.775,1,150.177,0,1,2,26.3,-2.32,CCOC(=O)c1ccccc1
817
+ 1-Chloro-2-methylpropane,-1.9240000000000002,1,92.569,0,0,1,0.0,-2.0,ClCC(C)C
818
+ Ethinyl estradiol,-4.317,1,296.41,2,4,0,40.46,-4.3,CC34CCC1C(CCc2cc(O)ccc12)C3CCC4(O)C#C
819
+ methyl laurate,-4.025,1,214.34899999999996,0,0,10,26.3,-4.69,CCCCCCCCCCCC(=O)OC
820
+ Di-n-propylsulfide,-2.307,1,118.24499999999999,0,0,4,0.0,-2.58,CCCSCCC
821
+ Napthacene,-5.568,2,228.29399999999998,0,4,0,0.0,-8.6,c1ccc2cc3cc4ccccc4cc3cc2c1
822
+ 1-Bromopentane,-2.658,1,151.047,0,0,3,0.0,-3.08,CCCCCBr
823
+ trans-2-Heptene ,-2.784,1,98.18899999999998,0,0,3,0.0,-3.82,CCCC/C=C/C
824
+ Metranidazole,-0.8590000000000001,1,171.15599999999998,1,1,3,81.19,-1.26,Cc1ncc(N(=O)=O)n1CCO
825
+ Pentylcyclopentane,-3.8689999999999998,1,140.26999999999998,0,1,4,0.0,-6.08,CCCCCC1CCCC1
826
+ "2,2',3,5,5',6-PCB",-7.261,1,360.88200000000006,0,2,1,0.0,-7.42,Clc1ccc(Cl)c(c1)c2c(Cl)c(Cl)cc(Cl)c2Cl
827
+ 5-Ethyl-5-isopropylbarbituric acid,-1.6030000000000002,1,198.22199999999998,2,1,2,75.27000000000001,-2.148,O=C1NC(=O)NC(=O)C1(CC)C(C)C
828
+ "1,1,1-Trichloroethane",-2.2319999999999998,1,133.405,0,0,0,0.0,-2.0,CC(Cl)(Cl)Cl
829
+ Monolinuron,-2.948,1,214.652,1,1,2,41.57,-2.57,CON(C)C(=O)Nc1ccc(Cl)cc1
830
+ Cyclohexyl-5-spirobarbituric acid,-1.405,1,196.206,2,2,0,75.27,-3.06,O=C2NC(=O)C1(CCCCC1)C(=O)N2
831
+ dimetan,-2.3040000000000003,1,211.26099999999994,0,1,1,46.61,-0.85,CN(C)C(=O)OC1=CC(=O)CC(C)(C)C1
832
+ 4-Bromotoluene,-3.667,1,171.03700000000003,0,1,0,0.0,-3.19,Cc1ccc(Br)cc1
833
+ Diethyl ether ,-0.718,1,74.123,0,0,2,9.23,-0.09,CCOCC
834
+ Rovral,-4.004,1,330.17100000000005,1,2,2,69.72,-4.376,CC(C)NC(=O)N1CC(=O)N(C1=O)c2cc(Cl)cc(Cl)c2
835
+ Benfluralin,-5.205,1,335.28200000000004,0,1,7,89.51999999999998,-5.53,CCCCN(CC)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O
836
+ "2,4,6-Trimethylphenol",-2.9410000000000003,1,136.194,1,1,0,20.23,-2.05,Cc1cc(C)c(O)c(C)c1
837
+ Benzene ,-2.418,2,78.11399999999999,0,1,0,0.0,-1.64,c1ccccc1
838
+ p-Chloroiodobenzene,-4.3839999999999995,1,238.45499999999998,0,1,0,0.0,-4.03,Clc1ccc(I)cc1
839
+ Metoxuron,-2.6830000000000003,1,228.67899999999997,1,1,2,41.57,-2.5639999999999996,COc1ccc(NC(=O)N(C)C)cc1Cl
840
+ propachlor,-3.0180000000000002,1,211.69200000000004,0,1,3,20.310000000000002,-2.48,CC(C)N(C(=O)CCl)c1ccccc1
841
+ Styrene,-2.85,1,104.15199999999997,0,1,1,0.0,-2.82,C=Cc1ccccc1
842
+ Dimethoxymethane,0.092,1,76.095,0,0,2,18.46,0.48,COCOC
843
+ o-Xylene ,-3.0039999999999996,1,106.16799999999999,0,1,0,0.0,-2.8,Cc1ccccc1C
844
+ Butan-2-ol,-0.616,1,74.12299999999999,1,0,1,20.23,0.47,CCC(C)O
845
+ "1,4-Benzenediol",-1.59,1,110.11199999999998,2,1,0,40.46,-0.17,Oc1ccc(O)cc1
846
+ estriol,-3.858,1,288.387,3,4,0,60.69,-4.955,CC34CCC1C(CCc2cc(O)ccc12)C3CC(O)C4O
847
+ Benzo(b)fluorene,-5.189,2,216.283,0,4,0,0.0,-8.04,C1c2ccccc2c3cc4ccccc4cc13
848
+ hydantoin,0.603,1,100.077,2,1,0,58.2,-0.4,O=C1CNC(=O)N1
849
+ 4-hexylresorcinol,-3.4930000000000003,1,194.27399999999992,2,1,5,40.46,-2.59,c1(O)cc(O)ccc1CCCCCC
850
+ allicin,-2.045,1,162.27899999999997,0,0,5,17.07,-0.83,C=CCS(=O)SCC=C
851
+ Coumaphos,-5.04,1,362.77100000000013,0,2,6,57.9,-5.382000000000001,CCOP(=S)(OCC)Oc2ccc1oc(=O)c(Cl)c(C)c1c2
852
+ "5,6-Dimethylchrysene",-6.265,1,256.348,0,4,0,0.0,-7.01,Cc1c(C)c2c3ccccc3ccc2c4ccccc14
853
+ Betamethasone-17-valerate,-5.062,1,476.5850000000002,2,4,6,100.90000000000002,-4.71,CCCCC(=O)OC3(C(C)CC4C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC34C)C(=O)CO
854
+ uric acid,-0.541,1,168.112,4,2,0,114.36999999999998,-3.93,O=c2[nH]c(=O)c1[nH]c(=O)[nH]c1[nH]2
855
+ "2,3,4,6-Tetrachlorophenol",-4.203,1,231.89299999999997,1,1,0,20.23,-3.1,Oc1c(Cl)cc(Cl)c(Cl)c1Cl
856
+ "1,3-Dichlorobenzene",-3.5580000000000003,1,147.004,0,1,0,0.0,-3.04,Clc1cccc(Cl)c1
857
+ DDT,-6.638,1,354.491,0,2,2,0.0,-7.15,Clc1ccc(cc1)C(c2ccc(Cl)cc2)C(Cl)(Cl)Cl
858
+ Isobutyl formate,-1.095,1,102.13299999999998,0,0,3,26.3,-1.01,CC(C)COC=O
859
+ thioanisole,-2.87,1,124.208,0,1,1,0.0,-2.39,c1ccccc1SC
860
+ RTI 13,-4.45,1,322.29,1,3,1,58.120000000000005,-4.207,CCN2c1nc(C)cc(C(F)(F)F)c1NC(=O)c3cccnc23
861
+ Hexane ,-2.615,1,86.178,0,0,3,0.0,-3.84,CCCCCC
862
+ methyl nicotinate,-1.621,1,137.138,0,1,1,39.19,-0.46,COC(=O)c1cccnc1
863
+ Bendroflumethiazide,-3.741,1,421.4220000000001,3,3,3,118.35999999999999,-3.59,NS(=O)(=O)c3cc2c(NC(Cc1ccccc1)NS2(=O)=O)cc3C(F)(F)F
864
+ "2,3,3',4,4',5-PCB",-7.425,1,360.88200000000006,0,2,1,0.0,-7.82,Clc1ccc(cc1Cl)c2cc(Cl)c(Cl)c(Cl)c2Cl
865
+ Vinclozolin,-4.377,1,286.11400000000003,0,2,2,46.61,-4.925,CC1(OC(=O)N(C1=O)c2cc(Cl)cc(Cl)c2)C=C
866
+ Cyanazine,-2.49,1,240.698,2,1,4,86.52,-3.15,CCNc1nc(Cl)nc(NC(C)(C)C#N)n1
867
+ Triphenylene,-5.568,2,228.29399999999998,0,4,0,0.0,-6.726,c1ccc2c(c1)c3ccccc3c4ccccc24
868
+ Dienestrol,-4.775,1,266.34,2,2,3,40.46,-4.95,CC=C(C(=CC)c1ccc(O)cc1)c2ccc(O)cc2
869
+ Di(2-ethylhexyl)-phthalate,-7.117000000000001,1,390.5640000000003,0,1,14,52.60000000000001,-6.96,CCCCC(CC)COC(=O)c1ccccc1C(=O)OCC(CC)CCCC
870
+ 2-Ethyl pyridine,-2.051,1,107.15599999999998,0,1,1,12.89,0.51,CCc1ccccn1
871
+ Naled,-3.548,1,380.784,0,0,5,44.760000000000005,-2.28,COP(=O)(OC)OC(Br)C(Cl)(Cl)Br
872
+ Biphenyl,-4.079,2,154.21199999999996,0,2,1,0.0,-4.345,c1ccc(cc1)c2ccccc2
873
+ "2,2',4,4',6,6'-PCB",-7.178999999999999,1,360.88200000000006,0,2,1,0.0,-8.71,Clc1cc(Cl)c(c(Cl)c1)c2c(Cl)cc(Cl)cc2Cl
874
+ Altretamine,-2.492,1,210.285,0,1,3,48.39000000000001,-3.364,CN(C)c1nc(nc(n1)N(C)C)N(C)C
875
+ "2,4-Dimethyl-2-pentanol ",-1.6469999999999998,1,116.20399999999998,1,0,2,20.23,-0.92,CC(C)CC(C)(C)O
876
+ Cycloheptyl-5-spirobarbituric acid,-1.844,1,210.23299999999998,2,2,0,75.27,-3.168,O=C2NC(=O)C1(CCCCCC1)C(=O)N2
877
+ Fructose,0.47100000000000003,1,180.156,5,1,2,110.38000000000001,0.64,OCC1OC(O)(CO)C(O)C1O
878
+ "3,5-Dimethylphenol",-2.6519999999999997,1,122.16699999999997,1,1,0,20.23,-1.4,Cc1cc(C)cc(O)c1
879
+ Barban,-4.16,1,258.104,1,1,2,38.33,-4.37,ClCC#CCOC(=O)Nc1cccc(Cl)c1
880
+ p-Chloroacetanilide,-2.642,1,169.611,1,1,1,29.1,-2.843,CC(=O)Nc1ccc(Cl)cc1
881
+ "2,2',3,4,5,5',6-PCB",-7.898,1,395.3270000000001,0,2,1,0.0,-8.94,Clc1ccc(Cl)c(c1)c2c(Cl)c(Cl)c(Cl)c(Cl)c2Cl
882
+ "2,2-Dimethylbutane",-2.5839999999999996,1,86.17799999999998,0,0,0,0.0,-3.55,CCC(C)(C)C
883
+ N-Methylaniline ,-2.097,1,107.15599999999998,1,1,1,12.03,-1.28,CNc1ccccc1
884
+ "1,4-Pentadiene ",-1.758,1,68.119,0,0,2,0.0,-2.09,C=CCC=C
885
+ Hydrocortisone 21-acetate,-3.6919999999999997,1,404.5030000000002,2,4,3,100.90000000000002,-4.88,CC(=O)OCC(=O)C1(O)CCC2C3CCC4=CC(=O)CCC4(C)C3C(O)CC21C
886
+ DNOC,-2.818,1,198.134,1,1,2,106.51000000000002,-1.456,Cc1cc(cc(N(=O)=O)c1O)N(=O)=O
887
+ Lorazepam,-3.75,1,321.163,2,3,1,61.690000000000005,-3.6039999999999996,OC3N=C(c1ccccc1Cl)c2cc(Cl)ccc2NC3=O
888
+ 3-Chlorophenol,-2.761,1,128.558,1,1,0,20.23,-0.7,Oc1cccc(Cl)c1
889
+ m-Chlorobromobenzene,-3.928,1,191.45499999999998,0,1,0,0.0,-3.21,Clc1cccc(Br)c1
890
+ chlorothiazide,-1.7519999999999998,1,295.72900000000004,2,2,1,118.69,-3.05,NS(=O)(=O)c2cc1c(N=CNS1(=O)=O)cc2Cl
891
+ 5-Methyl-5-ethylbarbituric acid,-0.9109999999999999,1,170.16799999999998,2,1,1,75.27000000000001,-1.228,O=C1NC(=O)NC(=O)C1(C)CC
892
+ 2-Phenoxyethanol,-1.761,1,138.16599999999997,1,1,3,29.46,-0.7,OCCOc1ccccc1
893
+ Diphenylmethane,-4.09,2,168.239,0,2,2,0.0,-4.08,C(c1ccccc1)c2ccccc2
894
+ 3-Octanol,-2.033,1,130.23099999999997,1,0,5,20.23,-1.98,CCCCCC(O)CC
895
+ Flumetralin,-6.584,1,421.7340000000001,0,2,6,89.51999999999998,-6.78,CCN(Cc1c(F)cccc1Cl)c2c(cc(cc2N(=O)=O)C(F)(F)F)N(=O)=O
896
+ Propazine,-3.3289999999999997,1,229.71500000000003,2,1,4,62.730000000000004,-4.43,CC(C)Nc1nc(Cl)nc(NC(C)C)n1
897
+ 2-Methylpentanol,-1.381,1,102.17699999999999,1,0,3,20.23,-1.11,CCCC(C)CO
898
+ 2-Methyl-2-hexanol,-1.663,1,116.20399999999998,1,0,3,20.23,-1.08,CCCCC(C)(C)O
899
+ Ethylbenzene,-2.988,1,106.16799999999996,0,1,1,0.0,-2.77,CCc1ccccc1
900
+ 5-(3-Methyl-2-butenyl)-5-ethylbarbital,-2.126,1,224.25999999999996,2,1,3,75.27000000000001,-2.253,O=C1NC(=O)NC(=O)C1(CC)CC=C(C)C
901
+ Heptachlor,-5.26,1,373.3209999999999,0,3,0,0.0,-6.317,ClC1C=CC2C1C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl
902
+ butallylonal,-2.766,1,303.156,2,1,4,75.27000000000001,-2.647,CCC(C)C1(CC(Br)=C)C(=O)NC(=O)NC1=O
903
+ Fenpropathrin,-6.15,1,349.43000000000006,0,3,5,59.32000000000001,-6.025,CC1(C)C(C(=O)OC(C#N)c2cccc(Oc3ccccc3)c2)C1(C)C
904
+ Methoprene,-4.795,1,310.47800000000007,0,0,10,35.53,-5.19,COC(C)(C)CCCC(C)CC=CC(C)=CC(=O)OC(C)C
905
+ Ethyl propionate,-1.1909999999999998,1,102.133,0,0,2,26.3,-0.66,CCOC(=O)CC
906
+ Prometryn,-3.693,1,241.364,2,1,5,62.730000000000004,-4.1,CSc1nc(NC(C)C)nc(NC(C)C)n1
907
+ Buturon,-3.199,1,236.702,1,1,2,32.34,-3.9,CC(C#C)N(C)C(=O)Nc1ccc(Cl)cc1
908
+ "2,3-Dimethylnaphthalene",-4.1160000000000005,1,156.22799999999998,0,2,0,0.0,-4.72,Cc1cc2ccccc2cc1C
909
+ "2,4',5-PCB",-5.7620000000000005,1,257.547,0,2,1,0.0,-6.25,Clc1ccc(cc1)c2cc(Cl)ccc2Cl
910
+ "2,3',4,4',5-PCB",-7.343,1,360.88200000000006,0,2,1,0.0,-7.39,Clc1ccc(c(Cl)c1)c2cc(Cl)c(Cl)c(Cl)c2Cl
911
+ 2-cyanoguanidine,0.361,1,84.082,2,0,0,88.19,-0.31,NC(N)=NC#N
912
+ Chloropicrin,-1.8659999999999999,1,164.375,0,0,0,43.14,-2.0,ClC(Cl)(Cl)N(=O)=O
913
+ "2,6-PCB",-4.984,1,223.102,0,2,1,0.0,-5.21,Clc1cccc(Cl)c1c2ccccc2
914
+ p-Methoxybenzaldehyde,-2.252,1,136.14999999999998,0,1,2,26.3,-1.49,COc1ccc(C=O)cc1
915
+ 4-Nitroacetanilide,-2.219,1,180.16299999999998,1,1,2,72.24000000000001,-2.6919999999999997,CC(=O)Nc1ccc(cc1)N(=O)=O
916
+ Ethyl heptanoate,-2.608,1,158.24099999999999,0,0,6,26.3,-2.74,CCCCCCC(=O)OCC
917
+ p-Hydroxyacetanilide,-1.495,1,151.165,2,1,1,49.33,-1.03,CC(=O)Nc1ccc(O)cc1
918
+ indazole,-2.34,2,118.13899999999998,1,2,0,28.68,-2.16,c2ccc1[nH]ncc1c2
919
+ triamcinolone acetonide,-3.928,1,434.50400000000025,2,5,2,93.06000000000002,-4.31,CC5(C)OC4CC3C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC3(C)C4(O5)C(=O)CO
920
+ guanine,-0.67,1,151.129,3,2,0,100.44999999999999,-3.583,Nc2nc1[nH]cnc1c(=O)[nH]2
921
+ Methyl acetate,-0.41600000000000004,1,74.07900000000001,0,0,0,26.3,0.46,COC(=O)C
922
+ Stanolone,-3.8819999999999997,1,290.44699999999995,1,4,0,37.3,-4.743,CC34CCC1C(CCC2CC(=O)CCC12C)C3CCC4O
923
+ 1-Hexene-3-ol,-1.199,1,100.16099999999999,1,0,3,20.23,-0.59,CCCC(O)C=C
924
+ norbormide,-4.238,1,511.5810000000002,2,7,5,92.18,-3.931,OC(C1=CC2C5C(C1C2=C(c3ccccc3)c4ccccn4)C(=O)NC5=O)(c6ccccc6)c7ccccn7
925
+ Dibutyl ether ,-2.135,1,130.231,0,0,6,9.23,-1.85,CCCCOCCCC
926
+ 1-Dodecanol,-3.523,1,186.33899999999997,1,0,10,20.23,-4.8,CCCCCCCCCCCCO
927
+ RTI 6,-3.335,1,313.36100000000005,2,3,4,81.59000000000002,-3.36,CCN2c1nc(N(C)(CCO))ccc1NC(=O)c3cccnc23
928
+ 2-Methyl-2-pentanol,-1.308,1,102.17699999999998,1,0,2,20.23,-0.49,CCCC(C)(C)O
929
+ Flucytosine,-0.132,1,129.09399999999997,2,1,0,71.77,-0.972,Nc1nc(=O)[nH]cc1F
930
+ stadacaine,-5.127999999999999,1,293.40700000000004,0,1,9,38.77,-3.84,CCCCOc1ccc(C(=O)OCC)c(c1)N(CC)CC
931
+ 2-Methyl-2-heptanol,-2.017,1,130.231,1,0,4,20.23,-1.72,CCCCCC(C)(C)O
932
+ Hexamethylbenzene,-4.361000000000001,1,162.27599999999998,0,1,0,0.0,-5.23,Cc1c(C)c(C)c(C)c(C)c1C
933
+ Thymol,-3.1289999999999996,1,150.22099999999998,1,1,1,20.23,-2.22,CC(C)c1ccc(C)cc1O
934
+ Pteridine,-0.9059999999999999,2,132.12599999999998,0,2,0,51.56,0.02,c2cnc1ncncc1n2
935
+ Parathion,-3.949,1,291.26500000000004,0,1,7,70.83000000000001,-4.66,CCOP(=S)(OCC)Oc1ccc(cc1)N(=O)=O
936
+ Methane,-0.636,0,16.043,0,0,0,0.0,-0.9,C
937
+ indoline,-2.195,2,119.16699999999999,1,2,0,12.03,-1.04,c2ccc1NCCc1c2
938
+ 1-Nitronapthalene,-3.4139999999999997,1,173.171,0,2,1,43.14,-3.54,O=N(=O)c1cccc2ccccc12
939
+ 3-Methyl-2-pentanone,-1.266,1,100.16099999999999,0,0,2,17.07,-0.67,CCC(C)C(=O)C
940
+ isoguanine,-1.74,1,151.129,3,2,0,100.71000000000001,-3.4010000000000002,Nc1nc(O)nc2nc[nH]c12
941
+ bromadiolone,-7.877000000000001,1,527.4140000000002,2,5,6,70.67,-4.445,OC(CC(c1ccccc1)c3c(O)c2ccccc2oc3=O)c4ccc(cc4)c5ccc(Br)cc5
942
+ Nitromethane,-0.042,1,61.040000000000006,0,0,0,43.14,0.26,CN(=O)=O
943
+ Triallate,-4.578,1,304.66999999999996,0,0,4,20.310000000000002,-4.88,CC(C)N(C(C)C)C(=O)SCC(Cl)=C(Cl)Cl
944
+ "1,5-Hexadiene ",-2.112,1,82.14599999999999,0,0,3,0.0,-2.68,C=CCCC=C
945
+ Indole,-2.654,2,117.15099999999997,1,2,0,15.79,-1.52,c2ccc1[nH]ccc1c2
946
+ Androstenedione,-3.3930000000000002,1,286.415,0,4,0,34.14,-3.69,CC34CCC1C(CCC2=CC(=O)CCC12C)C3CCC4=O
947
+ 1-Hexene,-2.364,1,84.16199999999999,0,0,3,0.0,-3.23,CCCCC=C
948
+ Xipamide,-3.642,1,354.8150000000001,3,2,3,109.48999999999998,-3.79,Cc1cccc(C)c1NC(=O)c2cc(c(Cl)cc2O)S(N)(=O)=O
949
+ Ethylcyclohexane,-3.245,1,112.21600000000001,0,1,1,0.0,-4.25,CCC1CCCCC1
950
+ 2-Nonanone,-2.263,1,142.242,0,0,6,17.07,-2.58,CCCCCCCC(=O)C
951
+ Mebendazole,-4.118,1,295.298,2,3,3,84.07999999999998,-3.88,COC(=O)Nc2nc1ccc(cc1[nH]2)C(=O)c3ccccc3
952
+ Chloropham,-3.5439999999999996,1,213.66400000000002,1,1,2,38.33,-3.38,CC(C)OC(=O)Nc1cccc(Cl)c1
953
+ RTI 12,-3.446,1,288.73800000000006,0,3,1,49.330000000000005,-4.114,CCN2c1nc(Cl)ccc1N(C)C(=O)c3cccnc23
954
+ Carbaryl,-3.0869999999999997,1,201.225,1,2,1,38.33,-3.2239999999999998,CNC(=O)Oc1cccc2ccccc12
955
+ Ethyne,-0.252,1,26.037999999999997,0,0,0,0.0,0.29,C#C
956
+ "3,5-Dimethylpyridine",-2.0980000000000003,1,107.15599999999998,0,1,0,12.89,0.38,Cc1cncc(C)c1
957
+ "1,4-Cyclohexadiene",-1.8419999999999999,2,80.12999999999998,0,1,0,0.0,-2.06,C1C=CCC=C1
958
+ Mecarbam,-3.738,1,329.3800000000001,0,0,8,65.07000000000001,-2.5180000000000002,CCOC(=O)N(C)C(=O)CSP(=S)(OCC)OCC
959
+ 1-Phenylethanol,-1.919,1,122.16699999999996,1,1,1,20.23,-0.92,CC(O)c1ccccc1
960
+ "1,2-Dichloropropane",-1.794,1,112.98700000000001,0,0,1,0.0,-1.6,CC(Cl)CCl
961
+ 2-Ethyl-2-hexanal,-2.081,1,126.19899999999998,0,0,4,17.07,-2.46,CCCC=C(CC)C=O
962
+ Disulfoton,-3.975,1,274.413,0,0,9,18.46,-4.23,CCOP(=S)(OCC)SCCSCC
963
+ methyltestosterone acetate,-4.863,1,344.4950000000001,0,4,1,43.370000000000005,-5.284,CC(=O)OC3(C)CCC4C2CCC1=CC(=O)CCC1(C)C2CCC34C
964
+ "2,4,6-PCB",-5.604,1,257.547,0,2,1,0.0,-6.14,Clc1ccc(cc1)c2c(Cl)cccc2Cl
965
+ difluron,-4.692,1,310.687,2,2,2,58.2,-6.02,Fc1cccc(F)c1C(=O)NC(=O)Nc2ccc(Cl)cc2
966
+ Triclosan,-5.645,1,289.54499999999996,1,2,2,29.46,-4.46,Oc1cc(Cl)ccc1Oc2ccc(Cl)cc2Cl
967
+ diisooctyl phthalate,-7.117000000000001,1,390.5640000000002,0,1,14,52.60000000000001,-6.6370000000000005,c1(C(=O)OCCCCCC(C)(C))c(C(=O)OCCCCCC(C)(C))cccc1
968
+ Corticosterone,-3.4539999999999997,1,346.46700000000016,2,4,2,74.6,-3.24,CC12CC(O)C3C(CCC4=CC(=O)CCC34C)C2CCC1C(=O)CO
969
+ "1,3,5-Trimethylbenzene ",-3.375,1,120.19499999999998,0,1,0,0.0,-3.4,Cc1cc(C)cc(C)c1
970
+ dioctyl phthalate,-7.148,1,390.56400000000036,0,1,16,52.60000000000001,-5.115,CCCCCCCCOC(=O)c1ccccc1C(=O)OCCCCCCCC
971
+ 1-Pentadecanol,-4.586,1,228.41999999999993,1,0,13,20.23,-6.35,CCCCCCCCCCCCCCCO
972
+ "2,2',6,6'-PCB",-5.915,1,291.99199999999996,0,2,1,0.0,-7.39,Clc1cccc(Cl)c1c2c(Cl)cccc2Cl
973
+ "5,5-Dimethylbarbituric acid",-0.556,1,156.141,2,1,0,75.27000000000001,-1.742,O=C1NC(=O)NC(=O)C1(C)C
974
+ 2-Iodopropane,-2.4859999999999998,1,169.993,0,0,0,0.0,-2.09,CC(C)I
975
+ "1,2-Dinitrobenzene",-2.281,1,168.10799999999995,0,1,2,86.28,-3.1,O=N(=O)c1ccccc1N(=O)=O
976
+ 3-Methyl-2-butanone,-0.912,1,86.13399999999999,0,0,1,17.07,-0.12,CC(C)C(=O)C
977
+ Hexadecane,-6.159,1,226.44799999999992,0,0,13,0.0,-8.4,CCCCCCCCCCCCCCCC
978
+ "1,8-Cineole",-2.5789999999999997,1,154.253,0,3,0,9.23,-1.74,CC12CCC(CC1)C(C)(C)O2
979
+ Tricyclazole,-2.8680000000000003,1,189.24300000000002,0,3,0,30.19,-2.07,Cc2cccc3sc1nncn1c23
980
+ 2-Octanone,-1.909,1,128.21499999999997,0,0,5,17.07,-2.05,CCCCCCC(=O)C
981
+ Methyl nonanoate,-2.9619999999999997,1,172.268,0,0,7,26.3,-3.38,CCCCCCCCC(=O)OC
982
+ "1,4-Difluorobenzene",-2.636,1,114.094,0,1,0,0.0,-1.97,Fc1ccc(F)cc1
983
+ Thalidomide,-1.944,1,258.233,1,3,1,83.55000000000001,-2.676,O=C1N(C2CCC(=O)NC2=O)C(=O)c3ccccc13
984
+ Trifluralin,-5.205,1,335.28200000000004,0,1,7,89.51999999999998,-5.68,CCCN(CCC)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O
985
+ Ethanol,0.02,1,46.069,1,0,0,20.23,1.1,CCO
986
+ Cyclopentyl-5-spirobarbituric acid,-0.966,1,182.179,2,2,0,75.27,-2.349,O=C2NC(=O)C1(CCCC1)C(=O)N2
987
+ Carbetamide,-2.29,1,236.271,2,1,4,67.42999999999999,-1.83,c1c(NC(=O)OC(C)C(=O)NCC)cccc1
988
+ phenothrin,-6.763,1,350.4580000000001,0,3,6,35.53,-5.24,CC(C)=CC3C(C(=O)OCc2cccc(Oc1ccccc1)c2)C3(C)C
989
+ Cycluron,-2.6289999999999996,1,198.30999999999992,1,1,1,32.34,-2.218,CN(C)C(=O)NC1CCCCCCC1
990
+ Mirex,-6.155,1,545.5460000000002,0,6,0,0.0,-6.8,ClC1(C2(Cl)C3(Cl)C4(Cl)C5(Cl)C1(Cl)C3(Cl)Cl)C5(Cl)C(Cl)(Cl)C24Cl
991
+ 1-Bromooctane,-3.721,1,193.128,0,0,6,0.0,-5.06,CCCCCCCCBr
992
+ Benomyl,-2.9019999999999997,1,290.323,2,2,4,85.25,-4.883,CCCCNC(=O)n1c(NC(=O)OC)nc2ccccc12
993
+ aminopyrine,-2.129,1,231.299,0,2,2,30.17,-0.364,CN(C)c2c(C)n(C)n(c1ccccc1)c2=O
994
+ 3-Pentanol,-0.97,1,88.15,1,0,2,20.23,-0.24,CCC(O)CC
995
+ p-Nitrotoluene,-2.64,1,137.138,0,1,1,43.14,-2.49,Cc1ccc(cc1)N(=O)=O
996
+ 4-Methylpentanol,-1.381,1,102.17699999999999,1,0,3,20.23,-1.14,CC(C)CCCO
997
+ Norethisterone,-2.6689999999999996,1,314.42500000000007,2,4,0,57.53,-4.57,CC34CCC1C(CCC2=CC(=O)CCC12O)C3CCC4(O)C#C
998
+ bromopropylate,-5.832999999999999,1,428.12000000000006,1,2,4,46.53,-4.93,CC(C)OC(=O)C(O)(c1ccc(Br)cc1)c2ccc(Br)cc2
999
+ Pyrazon,-2.603,1,221.647,1,2,1,60.91,-2.878,Nc2cnn(c1ccccc1)c(=O)c2Cl
1000
+ 2-Methylbutan-2-ol,-0.9540000000000001,1,88.14999999999998,1,0,1,20.23,0.15,CCC(C)(C)O
1001
+ p-Cresol,-2.313,1,108.13999999999999,1,1,0,20.23,-0.73,Cc1ccc(O)cc1
1002
+ Ethyl formate,-0.402,1,74.07900000000001,0,0,2,26.3,0.15,CCOC=O
1003
+ "N,N-Dimethylaniline",-2.542,1,121.18299999999995,0,1,1,3.24,-1.92,CN(C)c1ccccc1
1004
+ Decalin,-3.715,2,138.254,0,2,0,0.0,-5.19,C1CCC2CCCCC2C1
1005
+ Butanethiol ,-1.676,1,90.19099999999999,1,0,2,0.0,-2.18,CCCCS
1006
+ Benzo(e)pyrene,-6.007000000000001,2,252.31599999999997,0,5,0,0.0,-7.8,c1ccc2c(c1)c3cccc4ccc5cccc2c5c43
1007
+ Tetrachloroethylene,-3.063,1,165.834,0,0,0,0.0,-2.54,ClC(=C(Cl)Cl)Cl
1008
+ 3-Pentanone,-0.912,1,86.134,0,0,2,17.07,-0.28,CCC(=O)CC
1009
+ Acrylonitrile,-0.354,1,53.06399999999999,0,0,0,23.79,0.15,C=CC#N
1010
+ Flumethasone,-3.5389999999999997,1,410.4570000000002,3,4,2,94.83,-5.6129999999999995,CC1CC2C3CC(F)C4=CC(=O)C=CC4(C)C3(F)C(O)CC2(C)C1(O)C(=O)CO
1011
+ 2-Hexanone,-1.2,1,100.16099999999999,0,0,3,17.07,-0.8,CCCCC(=O)C
1012
+ Terbumeton,-3.505,1,225.296,2,1,4,71.96000000000001,-3.239,CCNc1nc(NC(C)(C)C)nc(OC)n1
1013
+ 3-Methylheptane,-3.3080000000000003,1,114.23199999999999,0,0,4,0.0,-5.16,CCCCC(C)CC
1014
+ "1,2-Dibromoethane",-2.102,1,187.862,0,0,1,0.0,-1.68,BrCCBr
1015
+ Isoprocarb,-2.734,1,193.24599999999998,1,1,2,38.33,-2.863,CNC(=O)Oc1ccccc1C(C)C
1016
+ Niridazole,-1.9480000000000002,1,214.20600000000002,1,2,2,88.37,-3.22,O=C1NCCN1c2ncc(s2)N(=O)=O
1017
+ Benzo(a)fluorene,-5.189,2,216.283,0,4,0,0.0,-6.68,C1c2ccccc2c3ccc4ccccc4c13
1018
+ 2-Chloroanisole,-2.912,1,142.58499999999998,0,1,1,9.23,-2.46,COc1ccccc1Cl
1019
+ Bromophos,-5.604,1,366.0,0,1,4,27.69,-6.09,COP(=S)(OC)Oc1cc(Cl)c(Br)cc1Cl
1020
+ Quinonamid,-3.988,1,332.57000000000005,1,2,3,63.24,-5.03,ClC(Cl)CC(=O)NC2=C(Cl)C(=O)c1ccccc1C2=O
1021
+ "P,P'-DDD",-6.007999999999999,1,320.04600000000005,0,2,3,0.0,-7.2,ClC(Cl)C(c1ccc(Cl)cc1)c2ccc(Cl)cc2
1022
+ Methyl acrylate,-0.878,1,86.09,0,0,1,26.3,-0.22,COC(=O)C=C
1023
+ Chloroxuron,-4.477,1,290.75,1,2,3,41.57000000000001,-4.89,CN(C)C(=O)Nc2ccc(Oc1ccc(Cl)cc1)cc2
1024
+ Azobenzene,-4.034,2,182.226,0,2,2,24.72,-4.45,N(=Nc1ccccc1)c2ccccc2
1025
+ 4-Isopropyltoluene,-3.617,1,134.22199999999998,0,1,1,0.0,-3.77,CC(C)c1ccc(C)cc1
1026
+ "2,6-Dichlorophenol",-3.012,1,163.003,1,1,0,20.23,-1.79,Oc1c(Cl)cccc1Cl
1027
+ Sucrose,0.31,1,342.297,8,2,5,189.52999999999997,0.79,OCC2OC(OC1(CO)OC(CO)C(O)C1O)C(O)C(O)C2O
1028
+ d-inositol,-0.887,1,180.156,6,1,0,121.38000000000001,0.35,OC1C(O)C(O)C(O)C(O)C1O
1029
+ Dyphylline,-0.847,1,254.24599999999995,2,2,3,102.28,-0.17,Cn2c(=O)n(C)c1ncn(CC(O)CO)c1c2=O
1030
+ Chloramphenicol,-2.613,1,323.13200000000006,3,1,6,112.70000000000002,-2.1109999999999998,OCC(NC(=O)C(Cl)Cl)C(O)c1ccc(cc1)N(=O)=O
1031
+ 3-Ethyl-3-pentanol,-1.663,1,116.204,1,0,3,20.23,-0.85,CCC(O)(CC)CC
1032
+ Epitostanol,-4.545,1,306.51500000000004,1,5,0,20.23,-5.41,CC45CCC2C(CCC3CC1SC1CC23C)C4CCC5O
1033
+ "1,2-Dibromobenzene",-4.172,1,235.90599999999998,0,1,0,0.0,-3.5,Brc1ccccc1Br
1034
+ "2,4,6-Trichlorophenol",-3.648,1,197.44799999999998,1,1,0,20.23,-2.34,Oc1c(Cl)cc(Cl)cc1Cl
1035
+ oryzalin,-3.784,1,346.3650000000001,1,1,8,149.67999999999998,-5.16,CCCN(CCC)c1c(cc(cc1N(=O)=O)S(N)(=O)=O)N(=O)=O
1036
+ RTI 20,-3.6630000000000003,1,255.29199999999997,0,3,2,20.310000000000002,-4.7989999999999995,C2c1ccccc1N(CCF)C(=O)c3ccccc23
1037
+ "2,4-Dimethyl-3-pentanone",-1.7519999999999998,1,114.18799999999997,0,0,2,17.07,-1.3,CC(C)C(=O)C(C)C
1038
+ 5-(3-Methyl-2-butenyl)-5-isoPrbarbital,-2.465,1,238.28699999999998,2,1,3,75.27000000000001,-2.593,O=C1NC(=O)NC(=O)C1(C(C)C)CC=C(C)C
1039
+ gentisin,-1.2919999999999998,1,262.261,2,3,1,75.99000000000001,-2.943,c1c(O)C2C(=O)C3cc(O)ccC3OC2cc1(OC)
1040
+ Caffeine,-1.4980000000000002,1,194.19399999999996,0,2,0,61.82,-0.8759999999999999,Cn1cnc2n(C)c(=O)n(C)c(=O)c12
1041
+ Spironolactone,-3.842,1,416.58300000000025,0,5,1,60.44,-4.173,CC(=O)SC4CC1=CC(=O)CCC1(C)C5CCC2(C)C(CCC23CCC(=O)O3)C45
1042
+ "3,4-Dimethylphenol",-2.6210000000000004,1,122.16699999999999,1,1,0,20.23,-1.38,Cc1ccc(O)cc1C
1043
+ Diphenyl ether ,-4.254,2,170.211,0,2,2,9.23,-3.96,O(c1ccccc1)c2ccccc2
1044
+ "2,2',4,4',5,5'-PCB",-7.343,1,360.88200000000006,0,2,1,0.0,-8.56,Clc1cc(Cl)c(cc1Cl)c2cc(Cl)c(Cl)cc2Cl
1045
+ nicotinamide,-0.9640000000000001,1,122.12699999999997,1,1,1,55.980000000000004,0.61,NC(=O)c1cccnc1
1046
+ Thiophenol ,-2.758,1,110.18099999999997,1,1,0,0.0,-2.12,Sc1ccccc1
1047
+ XMC,-2.688,1,179.219,1,1,1,38.33,-2.5810000000000004,CNC(=O)Oc1cc(C)cc(C)c1
1048
+ Chlordane,-6.039,1,409.7819999999999,0,3,0,0.0,-6.86,ClC1CC2C(C1Cl)C3(Cl)C(=C(Cl)C2(Cl)C3(Cl)Cl)Cl
1049
+ Dimethyldisulfide,-1.524,1,94.20400000000001,0,0,1,0.0,-1.44,CSSC
1050
+ Benzamide,-1.501,1,121.13899999999995,1,1,1,43.09,-0.96,NC(=O)c1ccccc1
1051
+ o-Chlorobromobenzene,-3.84,1,191.45499999999998,0,1,0,0.0,-3.19,Clc1ccccc1Br
1052
+ Monotropitoside,-1.493,1,446.40500000000003,6,3,6,184.6,-0.742,COC(=O)c1ccccc1OC2OC(COC3OCC(O)C(O)C3O)C(O)C(O)C2O
1053
+ 3-Heptanol ,-1.6780000000000002,1,116.20399999999998,1,0,4,20.23,-1.47,CCCCC(O)CC
1054
+ RTI 15,-3.891,1,268.32,1,3,1,58.120000000000005,-4.553999999999999,CCN2c1nc(C)cc(C)c1NC(=O)c3cccnc23
1055
+ "3,5-Dichlorophenol",-3.428,1,163.003,1,1,0,20.23,-1.34,Oc1cc(Cl)cc(Cl)c1
1056
+ 1-Methylphenanthrene,-4.87,1,192.261,0,3,0,0.0,-5.85,Cc1cccc2c1ccc3ccccc32
1057
+ 2-Ethyl-1-hexanol,-2.089,1,130.231,1,0,5,20.23,-2.11,CCCCC(CC)CO
1058
+ Diallate,-3.827,1,270.225,0,0,4,20.310000000000002,-4.2860000000000005,CC(C)N(C(C)C)C(=O)SCC(=CCl)Cl
1059
+ Toluene ,-2.713,1,92.14099999999999,0,1,0,0.0,-2.21,Cc1ccccc1
1060
+ Nitrapyrin,-3.833,1,230.909,0,1,0,12.89,-3.76,Clc1cccc(n1)C(Cl)(Cl)Cl
1061
+ Cycloheptene,-2.5989999999999998,2,96.173,0,1,0,0.0,-3.18,C1CCC=CCC1
1062
+ Thiram,-2.444,1,240.44400000000002,0,0,0,6.48,-3.9,CN(C)C(=S)SSC(=S)N(C)C
1063
+ Griseofulvin,-3.3280000000000003,1,352.7700000000001,0,3,3,71.06,-3.2460000000000004,COC1=CC(=O)CC(C)C13Oc2c(Cl)c(OC)cc(OC)c2C3=O
1064
+ 1-Decanol,-2.8139999999999996,1,158.285,1,0,8,20.23,-3.63,CCCCCCCCCCO
1065
+ "3,3-Dimethylpentane",-2.938,1,100.20499999999998,0,0,2,0.0,-4.23,CCC(C)(C)CC
1066
+ vamidothion,-1.446,1,287.34299999999996,1,0,8,64.63000000000001,1.1440000000000001,CNC(=O)C(C)SCCSP(=O)(OC)(OC)
1067
+ "2,3,4,5-Tetrachlorophenol",-4.335,1,231.893,1,1,0,20.23,-3.15,Oc1cc(Cl)c(Cl)c(Cl)c1Cl
1068
+ Butyraldehyde,-0.7490000000000001,1,72.107,0,0,2,17.07,-0.01,CCCC=O
1069
+ dexamethasone acetate,-3.9330000000000003,1,434.5040000000003,2,4,3,100.9,-4.9,CC4CC3C2CCC1=CC(=O)C=CC1(C)C2(F)C(O)CC3(C)C4(O)C(=O)COC(C)=O
1070
+ Butane,-1.9069999999999998,1,58.123999999999995,0,0,1,0.0,-2.57,CCCC
1071
+ o-Methoxyphenol,-1.9409999999999998,1,124.13899999999997,1,1,1,29.46,-1.96,COc1ccccc1O
1072
+ Fluoromethalone,-3.5069999999999997,1,376.46800000000013,2,4,1,74.6,-4.099,CC1CC2C3CCC(O)(C(=O)C)C3(C)CC(O)C2(F)C4(C)C=CC(=O)C=C14
1073
+ Pentachloroethane,-3.3819999999999997,1,202.29500000000002,0,0,0,0.0,-2.6,ClC(Cl)C(Cl)(Cl)Cl
1074
+ Diethyl phthalate ,-3.016,1,222.23999999999995,0,1,4,52.60000000000001,-2.35,CCOC(=O)c1ccccc1C(=O)OCC
1075
+ 2-Methylpropan-1-ol,-0.672,1,74.12299999999999,1,0,1,20.23,0.1,CC(C)CO
1076
+ Isobutylbenzene,-3.57,1,134.22199999999998,0,1,2,0.0,-4.12,CC(C)Cc1ccccc1
1077
+ Diiodomethane,-2.958,1,267.835,0,0,0,0.0,-2.34,ICI
1078
+ 4-Heptanol,-1.6780000000000002,1,116.204,1,0,4,20.23,-1.4,CCCC(O)CCC
1079
+ Pentyl acetate,-1.8330000000000002,1,130.18699999999998,0,0,4,26.3,-1.89,CCCCCOC(=O)C
1080
+ "2,3,5,6-Tetrachlorophenol",-4.203,1,231.893,1,1,0,20.23,-3.37,Oc1c(Cl)c(Cl)cc(Cl)c1Cl
1081
+ Propylbenzene ,-3.281,1,120.19499999999995,0,1,2,0.0,-3.37,CCCc1ccccc1
1082
+ "1,2-Dichlorotetrafluoroethane",-2.697,1,170.92000000000002,0,0,1,0.0,-2.74,FC(F)(Cl)C(F)(F)Cl
1083
+ 2-butenal,-0.604,1,70.09100000000001,0,0,1,17.07,0.32,CC=CC=O
1084
+ tetramethylurea,-0.495,1,116.16399999999999,0,0,0,23.550000000000004,0.94,CN(C)C(=O)N(C)C
1085
+ "1,2,4,5-Tetramethylbenzene",-3.6639999999999997,1,134.22199999999998,0,1,0,0.0,-4.59,Cc1cc(C)c(C)cc1C
1086
+ norethindrone acetate,-4.2410000000000005,1,340.4630000000001,0,4,1,43.370000000000005,-4.8,CC(=O)OC3(CCC4C2CCC1=CC(=O)CCC1C2CCC34C)C#C
1087
+ Ditalimfos,-3.992,1,299.28800000000007,0,2,5,55.84,-3.35,CCOP(=S)(OCC)N2C(=O)c1ccccc1C2=O
1088
+ salicylanilide,-3.782,1,213.23600000000002,2,2,2,49.33,-3.59,c1ccccc1NC(=O)c2c(O)cccc2
1089
+ Sulfallate,-3.2539999999999996,1,223.79399999999998,0,0,4,3.24,-3.39,CCN(CC)C(=S)SCC(Cl)=C
1090
+ Chloroethane,-1.165,1,64.515,0,0,0,0.0,-1.06,ClCC
1091
+ Mefluidide,-3.165,1,310.297,2,1,3,75.27000000000001,-3.24,CC(=O)Nc1cc(NS(=O)(=O)C(F)(F)F)c(C)cc1C
1092
+ Piperine,-3.659,1,285.343,0,3,3,38.77,-3.46,O=C(C=CC=Cc2ccc1OCOc1c2)N3CCCCC3
1093
+ cis-2-Pentene,-2.076,1,70.135,0,0,1,0.0,-2.54,CC/C=C\C
1094
+ thiofanox,-2.7,1,218.32199999999997,1,0,3,50.69,-1.62,CNC(=O)ON=C(CSC)C(C)(C)C
1095
+ Cyclooctyl-5-spirobarbituric acid,-2.2840000000000003,1,224.25999999999996,2,2,0,75.27,-2.9819999999999998,O=C2NC(=O)C1(CCCCCCC1)C(=O)N2
1096
+ butacarb,-4.6419999999999995,1,263.381,1,1,1,38.33,-4.24,c1(C(C)(C)C)cc(C(C)(C)C)cc(OC(=O)NC)c1
1097
+ Eriodictyol,-3.1519999999999997,1,288.255,4,3,1,107.22000000000001,-3.62,Oc2cc(O)c1C(=O)CC(Oc1c2)c3ccc(O)c(O)c3
1098
+ Benzophenone,-3.612,1,182.222,0,2,2,17.07,-3.12,O=C(c1ccccc1)c2ccccc2
1099
+ Eicosane,-7.5760000000000005,1,282.5559999999999,0,0,17,0.0,-8.172,CCCCCCCCCCCCCCCCCCCC
1100
+ hydrazobenzene,-3.492,2,184.242,2,2,3,24.06,-2.92,N(Nc1ccccc1)c2ccccc2
1101
+ 2-Ethyl-1-butanol,-1.381,1,102.17699999999999,1,0,3,20.23,-1.17,CCC(CC)CO
1102
+ 4-hydroxypyridine,-1.655,1,95.10099999999998,1,1,0,33.120000000000005,1.02,Oc1ccncc1
1103
+ "cis 1,2-Dichloroethylene",-1.561,1,96.94400000000002,0,0,0,0.0,-1.3,Cl\C=C/Cl
1104
+ Methylcyclopentane,-2.452,1,84.162,0,1,0,0.0,-3.3,CC1CCCC1
1105
+ 4-Methyl-2-pentanol,-1.308,1,102.17699999999998,1,0,2,20.23,-0.8,CC(C)CC(C)O
1106
+ RTI 11,-3.125,1,254.28900000000002,1,3,0,55.56,-3.928,O2c1ccc(N)cc1N(C)C(=O)c3cc(C)ccc23
1107
+ "2,2-Dimethylpropanol",-1.011,1,88.14999999999999,1,0,0,20.23,-0.4,CC(C)(C)CO
1108
+ Triadimefon,-4.132,1,293.754,0,2,4,57.010000000000005,-3.61,CC(C)(C)C(=O)C(Oc1ccc(Cl)cc1)n2cncn2
1109
+ Isocarboxazid,-2.251,1,231.25500000000002,2,2,4,67.16,-2.461,Cc1cc(no1)C(=O)NNCc2ccccc2
1110
+ Propylene,-1.235,1,42.080999999999996,0,0,0,0.0,-1.08,CC=C
1111
+ Dichlorophen,-4.9239999999999995,1,269.127,2,2,2,40.46,-3.9530000000000003,Oc1ccc(Cl)cc1Cc2cc(Cl)ccc2O
1112
+ Desmedipham,-4.1819999999999995,1,300.314,2,2,4,76.66,-4.632,CCOC(=O)Nc2cccc(OC(=O)Nc1ccccc1)c2
1113
+ Anthraquinone,-3.34,1,208.21599999999998,0,3,0,34.14,-5.19,O=C1c2ccccc2C(=O)c3ccccc13
1114
+ 2-Octanol,-2.033,1,130.231,1,0,5,20.23,-2.09,CCCCCCC(C)O
1115
+ Oxycarboxin,-2.169,1,267.306,1,2,2,72.47,-2.281,CC1=C(C(=O)Nc2ccccc2)S(=O)(=O)CCO1
1116
+ Butylbenzene,-3.585,1,134.22199999999998,0,1,3,0.0,-4.06,CCCCc1ccccc1
1117
+ parabanic acid,1.091,1,114.05999999999999,2,1,0,75.27,-0.4,O=C1NC(=O)C(=O)N1
1118
+ Abate,-6.678,1,466.47900000000016,0,2,10,55.38000000000001,-6.237,COP(=S)(OC)Oc1ccc(Sc2ccc(OP(=S)(OC)OC)cc2)cc1
1119
+ Chlorthalidone,-2.5639999999999996,1,338.7720000000001,3,3,2,109.49000000000001,-3.451,NS(=O)(=O)c1cc(ccc1Cl)C2(O)NC(=O)c3ccccc23
1120
+ Isobutyl acetate,-1.463,1,116.15999999999998,0,0,2,26.3,-1.21,CC(C)COC(=O)C
1121
+ "2,2,3-Trimethylbutane",-2.9219999999999997,1,100.20499999999998,0,0,0,0.0,-4.36,CC(C)C(C)(C)C
1122
+ "2,3,3',4,4'6-PCB",-7.746,1,395.3270000000001,0,2,1,0.0,-7.66,Clc1ccc(c(Cl)c1Cl)c2c(Cl)cc(Cl)c(Cl)c2Cl
1123
+ Phthalonitrile,-1.7169999999999999,1,128.13399999999996,0,1,0,47.58,-2.38,N#Cc1ccccc1C#N
1124
+ m-Nitrotoluene,-2.64,1,137.138,0,1,1,43.14,-2.44,Cc1cccc(c1)N(=O)=O
1125
+ halothane,-2.608,1,197.381,0,0,0,0.0,-1.71,FC(F)(F)C(Cl)Br
1126
+ Oxamyl,-0.9079999999999999,1,219.266,1,0,1,70.99999999999999,0.106,CNC(=O)ON=C(SC)C(=O)N(C)C
1127
+ Thiometon,-3.323,1,246.35899999999998,0,0,7,18.46,-3.091,CCSCCSP(=S)(OC)OC
1128
+ 2-Methylbutane,-2.245,1,72.151,0,0,1,0.0,-3.18,CCC(C)C
1129
+ Stirofos,-4.32,1,365.96400000000006,0,1,5,44.760000000000005,-4.522,COP(=O)(OC)OC(=CCl)c1cc(Cl)c(Cl)cc1Cl
data/huusk.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/ws496_logS.csv ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ID,SMILES,exp,InChi
2
+ 1,CCCCCC,-3.84,"InChI=1/C6H14/c1-3-5-6-4-2/h3-6H2,1-2H3"
3
+ 2,CCC(C)CC,-3.68,"InChI=1/C6H14/c1-4-6(3)5-2/h6H,4-5H2,1-3H3"
4
+ 3,C1CCCCC1,-3.1,InChI=1/C6H12/c1-2-4-6-5-3-1/h1-6H2
5
+ 4,CC(C)CC(C)C,-4.26,"InChI=1/C7H16/c1-6(2)5-7(3)4/h6-7H,5H2,1-4H3"
6
+ 5,CCCCCCCC,-5.24,"InChI=1/C8H18/c1-3-5-7-8-6-4-2/h3-8H2,1-2H3"
7
+ 6,C1CCCCCCC1,-4.15,InChI=1/C8H16/c1-2-4-6-8-7-5-3-1/h1-8H2
8
+ 7,C/C=C/CC,-2.54,"InChI=1/C5H10/c1-3-5-4-2/h3,5H,4H2,1-2H3/b5-3+"
9
+ 8,C=CCC(C)C,-3.24,"InChI=1/C6H12/c1-4-5-6(2)3/h4,6H,1,5H2,2-3H3"
10
+ 9,C=CCCCCCCC,-5.05,"InChI=1/C9H18/c1-3-5-7-9-8-6-4-2/h3H,1,4-9H2,2H3"
11
+ 10,C=CCC=C,-2.09,"InChI=1/C5H8/c1-3-5-4-2/h3-4H,1-2,5H2"
12
+ 11,C=CCCC=C,-2.68,"InChI=1/C6H10/c1-3-5-6-4-2/h3-4H,1-2,5-6H2"
13
+ 12,C=CC=C,-1.87,"InChI=1/C4H6/c1-3-4-2/h3-4H,1-2H2"
14
+ 13,C1=CCC=CC1,-1.97,"InChI=1/C6H8/c1-2-4-6-5-3-1/h1-2,5-6H,3-4H2"
15
+ 14,C#CCCCCCC,-3.66,"InChI=1/C8H14/c1-3-5-7-8-6-4-2/h1H,4-8H2,2H3"
16
+ 15,C#CCCCCCCC,-4.24,"InChI=1/C9H16/c1-3-5-7-9-8-6-4-2/h1H,4-9H2,2H3"
17
+ 16,Cc1ccc(C)cc1,-2.77,"InChI=1/C8H10/c1-7-3-5-8(2)6-4-7/h3-6H,1-2H3"
18
+ 17,Cc1cccc(C)c1,-2.82,"InChI=1/C8H10/c1-7-4-3-5-8(2)6-7/h3-6H,1-2H3"
19
+ 18,Cc1cc(C)cc(C)c1,-3.4,"InChI=1/C9H12/c1-7-4-8(2)6-9(3)5-7/h4-6H,1-3H3"
20
+ 19,Cc1cccc(C)c1C,-3.2,"InChI=1/C9H12/c1-7-5-4-6-8(2)9(7)3/h4-6H,1-3H3"
21
+ 20,CCc1ccc(C)cc1,-3.11,"InChI=1/C9H12/c1-3-9-6-4-8(2)5-7-9/h4-7H,3H2,1-2H3"
22
+ 21,CC(C)c1ccccc1,-3.27,"InChI=1/C9H12/c1-8(2)9-6-4-3-5-7-9/h3-8H,1-2H3"
23
+ 22,CCCCc1ccccc1,-4.06,"InChI=1/C10H14/c1-2-3-7-10-8-5-4-6-9-10/h4-6,8-9H,2-3,7H2,1H3"
24
+ 23,CCc1ccc(CC)cc1,-3.75,"InChI=1/C10H14/c1-3-9-5-7-10(4-2)8-6-9/h5-8H,3-4H2,1-2H3"
25
+ 24,CC[C@@H](C)c1ccccc1,-3.89,"InChI=1/C10H14/c1-3-9(2)10-7-5-4-6-8-10/h4-9H,3H2,1-2H3/t9-/m1/s1"
26
+ 25,Cc1cc(C)c(C)c(C)c1C,-4,"InChI=1/C11H16/c1-7-6-8(2)10(4)11(5)9(7)3/h6H,1-5H3"
27
+ 26,CCC(C)(C)c1ccccc1,-4.15,"InChI=1/C11H16/c1-4-11(2,3)10-8-6-5-7-9-10/h5-9H,4H2,1-3H3"
28
+ 27,CCCCCCc1ccccc1,-5.21,"InChI=1/C12H18/c1-2-3-4-6-9-12-10-7-5-8-11-12/h5,7-8,10-11H,2-4,6,9H2,1H3"
29
+ 28,C=Cc1ccccc1,-2.82,"InChI=1/C8H8/c1-2-8-6-4-3-5-7-8/h2-7H,1H2"
30
+ 29,c1ccc(cc1)CCc1ccccc1,-4.62,"InChI=1/C14H14/c1-3-7-13(8-4-1)11-12-14-9-5-2-6-10-14/h1-10H,11-12H2"
31
+ 30,Cc1ccc2ccccc2c1,-3.77,"InChI=1/C11H10/c1-9-6-7-10-4-2-3-5-11(10)8-9/h2-8H,1H3"
32
+ 31,CCc1cccc2ccccc12,-4.17,"InChI=1/C12H12/c1-2-10-7-5-8-11-6-3-4-9-12(10)11/h3-9H,2H2,1H3"
33
+ 32,CCc1ccc2ccccc2c1,-4.29,"InChI=1/C12H12/c1-2-10-7-8-11-5-3-4-6-12(11)9-10/h3-9H,2H2,1H3"
34
+ 33,Cc1ccc(C)c2ccccc12,-4.14,"InChI=1/C12H12/c1-9-7-8-10(2)12-6-4-3-5-11(9)12/h3-8H,1-2H3"
35
+ 34,Cc1ccc2cc(C)ccc2c1,-4.89,"InChI=1/C12H12/c1-9-3-5-12-8-10(2)4-6-11(12)7-9/h3-8H,1-2H3"
36
+ 35,Cc1ccc(C)c2c(C)cccc12,-4.92,"InChI=1/C13H14/c1-9-7-8-11(3)13-10(2)5-4-6-12(9)13/h4-8H,1-3H3"
37
+ 36,c1ccc2c(c1)ccc1ccccc21,-5.26,InChI=1/C14H10/c1-3-7-13-11(5-1)9-10-12-6-2-4-8-14(12)13/h1-10H
38
+ 37,Cc1c2ccccc2cc2ccccc12,-5.89,"InChI=1/C15H12/c1-11-14-8-4-2-6-12(14)10-13-7-3-5-9-15(11)13/h2-10H,1H3"
39
+ 38,c1ccc2c(c1)c1cccc3cccc2c13,-6,InChI=1/C16H10/c1-2-8-13-12(7-1)14-9-3-5-11-6-4-10-15(13)16(11)14/h1-10H
40
+ 39,c1ccc2cc3c(cc2c1)Cc1ccccc31,-8.04,"InChI=1/C17H12/c1-2-6-13-11-17-15(9-12(13)5-1)10-14-7-3-4-8-16(14)17/h1-9,11H,10H2"
41
+ 40,c1ccc2c(c1)c1ccccc1c1ccccc21,-6.74,InChI=1/C18H12/c1-2-8-14-13(7-1)15-9-3-4-11-17(15)18-12-6-5-10-16(14)18/h1-12H
42
+ 41,c1ccc2c(c1)cc1ccc3cccc4ccc2c1c34,-8.19,InChI=1/C20H12/c1-2-7-17-15(4-1)12-16-9-8-13-5-3-6-14-10-11-18(17)20(16)19(13)14/h1-12H
43
+ 42,Cc1c2ccccc2c(C)c2c1ccc1ccccc21,-7.02,"InChI=1/C20H16/c1-13-16-8-5-6-9-17(16)14(2)20-18(13)12-11-15-7-3-4-10-19(15)20/h3-12H,1-2H3"
44
+ 43,c1ccc2c(c1)c1cccc3ccc4cccc2c4c13,-7.8,InChI=1/C20H12/c1-2-8-16-15(7-1)17-9-3-5-13-11-12-14-6-4-10-18(16)20(14)19(13)17/h1-12H
45
+ 44,c1ccc2c(c1)ccc1c3ccccc3ccc21,-8.06,InChI=1/C18H12/c1-3-7-15-13(5-1)9-11-18-16-8-4-2-6-14(16)10-12-17(15)18/h1-12H
46
+ 45,c1cc2cccc3c4cccc5cccc(c(c1)c23)c45,-8.8,InChI=1/C20H12/c1-5-13-6-2-11-17-18-12-4-8-14-7-3-10-16(20(14)18)15(9-1)19(13)17/h1-12H
47
+ 46,c1cc2ccc3ccc4ccc5cccc6c(c1)c2c3c4c56,-9.03,InChI=1/C22H12/c1-3-13-7-9-15-11-12-16-10-8-14-4-2-6-18-17(5-1)19(13)21(15)22(16)20(14)18/h1-12H
48
+ 47,c1ccc2cc3cc(ccc3cc2c1)N,-5.17,"InChI=1/C14H11N/c15-14-6-5-12-7-10-3-1-2-4-11(10)8-13(12)9-14/h1-9H,15H2"
49
+ 48,CCc1ccc2cc3ccccc3cc2c1,-6.89,"InChI=1/C16H14/c1-2-12-7-8-15-10-13-5-3-4-6-14(13)11-16(15)9-12/h3-11H,2H2,1H3"
50
+ 49,c1ccc2c(c1)ccc1c3cccc4cccc(c34)c21,-8,InChI=1/C20H12/c1-2-8-15-13(5-1)11-12-17-16-9-3-6-14-7-4-10-18(19(14)16)20(15)17/h1-12H
51
+ 50,c1ccc2cc3c4cccc5cccc(c3cc2c1)c45,-8.49,InChI=1/C20H12/c1-2-6-15-12-19-17-10-4-8-13-7-3-9-16(20(13)17)18(19)11-14(15)5-1/h1-12H
52
+ 51,C(Br)Cl,-0.89,InChI=1/CH2BrCl/c2-1-3/h1H2
53
+ 52,C(Br)Br,-1.17,InChI=1/CH2Br2/c2-1-3/h1H2
54
+ 53,C(Cl)(Cl)Cl,-1.17,InChI=1/CHCl3/c2-1(3)4/h1H
55
+ 54,C(Br)(Br)Cl,-1.9,InChI=1/CHBr2Cl/c2-1(3)4/h1H
56
+ 55,C(Cl)(Cl)(Cl)Cl,-2.31,"InChI=1/CCl4/c2-1(3,4)5"
57
+ 56,C(Br)(Br)(Br)Br,-3.14,"InChI=1/CBr4/c2-1(3,4)5"
58
+ 57,CC(Cl)Cl,-1.29,"InChI=1/C2H4Cl2/c1-2(3)4/h2H,1H3"
59
+ 58,C(CCl)Br,-1.32,InChI=1/C2H4BrCl/c3-1-2-4/h1-2H2
60
+ 59,C(C(Cl)Cl)Cl,-1.48,"InChI=1/C2H3Cl3/c3-1-2(4)5/h2H,1H2"
61
+ 60,C(C(Cl)Cl)(Cl)Cl,-1.74,InChI=1/C2H2Cl4/c3-1(4)2(5)6/h1-2H
62
+ 61,C(C(Cl)(Cl)Cl)(Cl)(Cl)Cl,-3.67,"InChI=1/C2Cl6/c3-1(4,5)2(6,7)8"
63
+ 62,CCCBr,-1.73,"InChI=1/C3H7Br/c1-2-3-4/h2-3H2,1H3"
64
+ 63,C(CBr)CBr,-2.08,InChI=1/C3H6Br2/c4-2-1-3-5/h1-3H2
65
+ 64,C([C@@H](CCl)Br)Br,-2.38,"InChI=1/C3H5Br2Cl/c4-1-3(5)2-6/h3H,1-2H2/t3-/m0/s1"
66
+ 65,CC[C@@H](C)Cl,-1.96,"InChI=1/C4H9Cl/c1-3-4(2)5/h4H,3H2,1-2H3/t4-/m1/s1"
67
+ 66,CCCCBr,-2.37,"InChI=1/C4H9Br/c1-2-3-4-5/h2-4H2,1H3"
68
+ 67,CC(C)CBr,-2.43,"InChI=1/C4H9Br/c1-4(2)3-5/h4H,3H2,1-2H3"
69
+ 68,CCCCCCl,-2.73,"InChI=1/C5H11Cl/c1-2-3-4-5-6/h2-5H2,1H3"
70
+ 69,CCC(CC)Cl,-2.63,"InChI=1/C5H11Cl/c1-3-5(6)4-2/h5H,3-4H2,1-2H3"
71
+ 70,C[C@H](C(C)(C)Cl)Cl,-2.69,"InChI=1/C5H10Cl2/c1-4(6)5(2,3)7/h4H,1-3H3/t4-/m1/s1"
72
+ 71,CCCCCBr,-3.07,"InChI=1/C5H11Br/c1-2-3-4-5-6/h2-5H2,1H3"
73
+ 72,CCCCCCCCBr,-5.06,"InChI=1/C8H17Br/c1-2-3-4-5-6-7-8-9/h2-8H2,1H3"
74
+ 73,C(C(Cl)(F)F)(Cl)(F)F,-3.12,"InChI=1/C2Cl2F4/c3-1(5,6)2(4,7)8"
75
+ 74,[C@@H]1([C@@H]([C@@H]([C@@H]([C@H]([C@H]1Cl)Cl)Cl)Cl)Cl)Cl,-4.51,"InChI=1/C6H6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9/h1-6H/t1-,2-,3-,4-,5-,6-"
76
+ 75,CCCCCCCBr,-4.43,"InChI=1/C7H15Br/c1-2-3-4-5-6-7-8/h2-7H2,1H3"
77
+ 76,CCCCCCCCl,-3.99,"InChI=1/C7H15Cl/c1-2-3-4-5-6-7-8/h2-7H2,1H3"
78
+ 77,C(=C(Cl)Cl)(Cl)Cl,-2.54,InChI=1/C2Cl4/c3-1(4)2(5)6
79
+ 78,C1=C[C@@H]2C[C@H]1[C@@H]1[C@H]2[C@@]2(C(=C([C@]1(C2(Cl)Cl)Cl)Cl)Cl)Cl,-7.33,"InChI=1/C12H8Cl6/c13-8-9(14)11(16)7-5-2-1-4(3-5)6(7)10(8,15)12(11,17)18/h1-2,4-7H,3H2/t4-,5+,6+,7-,10+,11-"
80
+ 79,C1[C@H]2[C@@H]3[C@H]([C@@H]1[C@H]1[C@@H]2O1)[C@@]1(C(=C([C@]3(C1(Cl)Cl)Cl)Cl)Cl)Cl,-6.18,"InChI=1/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2/t2-,3+,4+,5-,6+,7-,10+,11-"
81
+ 80,C=CCBr,-1.5,"InChI=1/C3H5Br/c1-2-3-4/h2H,1,3H2"
82
+ 81,C(=C(\C(=C(Cl)Cl)Cl)/Cl)\Cl,-4.23,InChI=1/C4HCl5/c5-1-2(6)3(7)4(8)9/h1H/b2-1+
83
+ 82,c1ccc(cc1)Cl,-2.38,InChI=1/C6H5Cl/c7-6-4-2-1-3-5-6/h1-5H
84
+ 83,c1cc(ccc1F)I,-3.13,InChI=1/C6H4FI/c7-5-1-3-6(8)4-2-5/h1-4H
85
+ 84,c1cc(cc(c1)Cl)Br,-3.21,InChI=1/C6H4BrCl/c7-5-2-1-3-6(8)4-5/h1-4H
86
+ 85,c1cc(ccc1Br)Cl,-3.63,InChI=1/C6H4BrCl/c7-5-1-3-6(8)4-2-5/h1-4H
87
+ 86,c1ccc(c(c1)Br)Br,-3.5,InChI=1/C6H4Br2/c7-5-3-1-2-4-6(5)8/h1-4H
88
+ 87,c1cc(c(c(c1)Cl)Cl)Cl,-4,InChI=1/C6H3Cl3/c7-4-2-1-3-5(8)6(4)9/h1-3H
89
+ 88,c1c(cc(cc1Cl)Cl)Cl,-4.48,InChI=1/C6H3Cl3/c7-4-1-5(8)3-6(9)2-4/h1-3H
90
+ 89,c1cc(c(c(c1)Br)Br)Br,-5.04,InChI=1/C6H3Br3/c7-4-2-1-3-5(8)6(4)9/h1-3H
91
+ 90,c1c(c(cc(c1F)F)F)F,-2.38,InChI=1/C6H2F4/c7-3-1-4(8)6(10)2-5(3)9/h1-2H
92
+ 91,c1cc(c(c(c1Cl)Cl)Cl)Cl,-4.57,InChI=1/C6H2Cl4/c7-3-1-2-4(8)6(10)5(3)9/h1-2H
93
+ 92,c1c(c(c(c(c1Cl)Cl)Cl)Cl)Cl,-5.65,InChI=1/C6HCl5/c7-2-1-3(8)5(10)6(11)4(2)9/h1H
94
+ 93,Cc1ccc(cc1)Br,-3.19,"InChI=1/C7H7Br/c1-6-2-4-7(8)5-3-6/h2-5H,1H3"
95
+ 94,c1cc(ccc1F)F,-1.97,InChI=1/C6H4F2/c7-5-1-2-6(8)4-3-5/h1-4H
96
+ 95,c1ccc(cc1)CCl,-2.39,"InChI=1/C7H7Cl/c8-6-7-4-2-1-3-5-7/h1-5H,6H2"
97
+ 96,Cc1ccc(cc1)Cl,-3.08,"InChI=1/C7H7Cl/c1-6-2-4-7(8)5-3-6/h2-5H,1H3"
98
+ 97,c1ccc(cc1)c1ccccc1Cl,-4.54,InChI=1/C12H9Cl/c13-12-9-5-4-8-11(12)10-6-2-1-3-7-10/h1-9H
99
+ 98,c1cc(cc(c1)Cl)c1cccc(c1)Cl,-5.8,InChI=1/C12H8Cl2/c13-11-5-1-3-9(7-11)10-4-2-6-12(14)8-10/h1-8H
100
+ 99,c1ccc(cc1)c1ccc(c(c1)Cl)Cl,-6.39,InChI=1/C12H8Cl2/c13-11-7-6-10(8-12(11)14)9-4-2-1-3-5-9/h1-8H
101
+ 100,c1ccc(cc1)c1cc(c(cc1Cl)Cl)Cl,-6.27,InChI=1/C12H7Cl3/c13-10-7-12(15)11(14)6-9(10)8-4-2-1-3-5-8/h1-7H
102
+ 101,c1ccc(cc1)c1c(cc(cc1Cl)Cl)Cl,-6.14,InChI=1/C12H7Cl3/c13-9-6-10(14)12(11(15)7-9)8-4-2-1-3-5-8/h1-7H
103
+ 102,c1cc(cc(c1)Cl)c1cc(ccc1Cl)Cl,-6.01,InChI=1/C12H7Cl3/c13-9-3-1-2-8(6-9)11-7-10(14)4-5-12(11)15/h1-7H
104
+ 103,c1ccc(c(c1)c1ccc(c(c1)Cl)Cl)Cl,-6.29,InChI=1/C12H7Cl3/c13-10-4-2-1-3-9(10)8-5-6-11(14)12(15)7-8/h1-7H
105
+ 104,c1cc(ccc1c1ccc(cc1Cl)Cl)Cl,-6.21,InChI=1/C12H7Cl3/c13-9-3-1-8(2-4-9)11-6-5-10(14)7-12(11)15/h1-7H
106
+ 105,c1cc(c2cc(ccc2Cl)Cl)c(c(c1)Cl)Cl,-6.47,InChI=1/C12H6Cl4/c13-7-4-5-10(14)9(6-7)8-2-1-3-11(15)12(8)16/h1-6H
107
+ 106,c1cc(c(cc1Cl)c1cc(ccc1Cl)Cl)Cl,-7,InChI=1/C12H6Cl4/c13-7-1-3-11(15)9(5-7)10-6-8(14)2-4-12(10)16/h1-6H
108
+ 107,c1cc(c(c2cc(ccc2Cl)Cl)c(c1)Cl)Cl,-6.8,InChI=1/C12H6Cl4/c13-7-4-5-9(14)8(6-7)12-10(15)2-1-3-11(12)16/h1-6H
109
+ 108,c1cc(c(c(c1)Cl)c1c(cccc1Cl)Cl)Cl,-7.39,InChI=1/C12H6Cl4/c13-7-3-1-4-8(14)11(7)12-9(15)5-2-6-10(12)16/h1-6H
110
+ 109,c1cc(c2ccc(c(c2Cl)Cl)Cl)c(c(c1)Cl)Cl,-7.05,InChI=1/C12H5Cl5/c13-8-3-1-2-6(10(8)15)7-4-5-9(14)12(17)11(7)16/h1-5H
111
+ 110,c1cc(c(cc1c1cc(c(cc1Cl)Cl)Cl)Cl)Cl,-7.39,InChI=1/C12H5Cl5/c13-8-2-1-6(3-10(8)15)7-4-11(16)12(17)5-9(7)14/h1-5H
112
+ 111,c1ccc(cc1)c1c(c(c(c(c1Cl)Cl)Cl)Cl)Cl,-7.92,InChI=1/C12H5Cl5/c13-8-7(6-4-2-1-3-5-6)9(14)11(16)12(17)10(8)15/h1-5H
113
+ 112,c1cc(c(c(c1c1cc(c(cc1Cl)Cl)Cl)Cl)Cl)Cl,-8.32,InChI=1/C12H4Cl6/c13-7-2-1-5(11(17)12(7)18)6-3-9(15)10(16)4-8(6)14/h1-4H
114
+ 113,c1cc(c(cc1Cl)c1cc(c(c(c1Cl)Cl)Cl)Cl)Cl,-7.68,InChI=1/C12H4Cl6/c13-5-1-2-8(14)6(3-5)7-4-9(15)11(17)12(18)10(7)16/h1-4H
115
+ 114,c1cc(c(cc1Cl)c1c(c(cc(c1Cl)Cl)Cl)Cl)Cl,-7.42,InChI=1/C12H4Cl6/c13-5-1-2-7(14)6(3-5)10-11(17)8(15)4-9(16)12(10)18/h1-4H
116
+ 115,c1c(cc(c(c1Cl)c1c(cc(cc1Cl)Cl)Cl)Cl)Cl,-8.71,InChI=1/C12H4Cl6/c13-5-1-7(15)11(8(16)2-5)12-9(17)3-6(14)4-10(12)18/h1-4H
117
+ 116,c1cc(c(cc1c1c(cc(c(c1Cl)Cl)Cl)Cl)Cl)Cl,-7.66,InChI=1/C12H4Cl6/c13-6-2-1-5(3-7(6)14)10-8(15)4-9(16)11(17)12(10)18/h1-4H
118
+ 117,c1cc(c(c(c1c1ccc(c(c1Cl)Cl)Cl)Cl)Cl)Cl,-8.01,InChI=1/C12H4Cl6/c13-7-3-1-5(9(15)11(7)17)6-2-4-8(14)12(18)10(6)16/h1-4H
119
+ 118,c1c(c(cc(c1Cl)Cl)Cl)c1c(cc(c(c1Cl)Cl)Cl)Cl,-7.92,InChI=1/C12H3Cl7/c13-5-2-7(15)6(14)1-4(5)10-8(16)3-9(17)11(18)12(10)19/h1-3H
120
+ 119,c1cc(c(cc1Cl)c1c(c(c(c(c1Cl)Cl)Cl)Cl)Cl)Cl,-8.94,InChI=1/C12H3Cl7/c13-4-1-2-6(14)5(3-4)7-8(15)10(17)12(19)11(18)9(7)16/h1-3H
121
+ 120,c1c(c2cc(c(c(c2Cl)Cl)Cl)Cl)c(c(c(c1Cl)Cl)Cl)Cl,-9.16,InChI=1/C12H2Cl8/c13-5-1-3(7(15)11(19)9(5)17)4-2-6(14)10(18)12(20)8(4)16/h1-2H
122
+ 121,c1c(c2c(c(c(c(c2Cl)Cl)Cl)Cl)Cl)c(c(c(c1Cl)Cl)Cl)Cl,-10.26,InChI=1/C12HCl9/c13-3-1-2(5(14)9(18)6(3)15)4-7(16)10(19)12(21)11(20)8(4)17/h1H
123
+ 122,c1c(c(c(c2c(c(c(c(c2Cl)Cl)Cl)Cl)Cl)c(c1Cl)Cl)Cl)Cl,-10.41,InChI=1/C12HCl9/c13-2-1-3(14)7(16)4(6(2)15)5-8(17)10(19)12(21)11(20)9(5)18/h1H
124
+ 123,c1cc(ccc1C(c1ccc(cc1)Cl)C(Cl)Cl)Cl,-7.2,"InChI=1/C14H10Cl4/c15-11-5-1-9(2-6-11)13(14(17)18)10-3-7-12(16)8-4-10/h1-8,13-14H"
125
+ 124,CCC(CC)O,-0.24,"InChI=1/C5H12O/c1-3-5(6)4-2/h5-6H,3-4H2,1-2H3"
126
+ 125,CCC(C)(C)O,0.08,"InChI=1/C5H12O/c1-4-5(2,3)6/h6H,4H2,1-3H3"
127
+ 126,CC(C)[C@@H](C)O,-0.2,"InChI=1/C5H12O/c1-4(2)5(3)6/h4-6H,1-3H3/t5-/m1/s1"
128
+ 127,CCCC[C@@H](C)O,-0.89,"InChI=1/C6H14O/c1-3-4-5-6(2)7/h6-7H,3-5H2,1-2H3/t6-/m1/s1"
129
+ 128,CC(C)[C@@H](C)CO,-0.39,"InChI=1/C6H14O/c1-5(2)6(3)4-7/h5-7H,4H2,1-3H3/t6-/m0/s1"
130
+ 129,CC[C@H](C(C)C)O,-0.7,"InChI=1/C6H14O/c1-4-6(7)5(2)3/h5-7H,4H2,1-3H3/t6-/m1/s1"
131
+ 130,CC(C)(C)CCO,-0.5,"InChI=1/C6H14O/c1-6(2,3)4-5-7/h7H,4-5H2,1-3H3"
132
+ 131,C[C@H](C(C)(C)C)O,-0.62,"InChI=1/C6H14O/c1-5(7)6(2,3)4/h5,7H,1-4H3/t5-/m1/s1"
133
+ 132,CCC(C)(CC)O,-0.38,"InChI=1/C6H14O/c1-4-6(3,7)5-2/h7H,4-5H2,1-3H3"
134
+ 133,CCCCCCCO,-1.81,"InChI=1/C7H16O/c1-2-3-4-5-6-7-8/h8H,2-7H2,1H3"
135
+ 134,CCCC[C@@H](CC)O,-1.47,"InChI=1/C7H16O/c1-3-5-6-7(8)4-2/h7-8H,3-6H2,1-2H3/t7-/m1/s1"
136
+ 135,CC(C)C[C@H](C)CO,-1.6,"InChI=1/C7H16O/c1-6(2)4-7(3)5-8/h6-8H,4-5H2,1-3H3/t7-/m0/s1"
137
+ 136,CCCCC(C)(C)O,-1.08,"InChI=1/C7H16O/c1-4-5-6-7(2,3)8/h8H,4-6H2,1-3H3"
138
+ 137,CC(C)C(C(C)C)O,-1.22,"InChI=1/C7H16O/c1-5(2)7(8)6(3)4/h5-8H,1-4H3"
139
+ 138,CC[C@@H](C)C(C)(C)O,-0.89,"InChI=1/C7H16O/c1-5-6(2)7(3,4)8/h6,8H,5H2,1-4H3/t6-/m1/s1"
140
+ 139,CC(C)(C)C(C)(C)O,-0.72,"InChI=1/C7H16O/c1-6(2,3)7(4,5)8/h8H,1-5H3"
141
+ 140,CCC(C)(CC)O,-1.6,"InChI=1/C6H14O/c1-4-6(3,7)5-2/h7H,4-5H2,1-3H3"
142
+ 141,CC[C@@](C)(C(C)(C)C)O,-1.27,"InChI=1/C8H18O/c1-6-8(5,9)7(2,3)4/h9H,6H2,1-5H3/t8-/m0/s1"
143
+ 142,CCCCCC(C)(C)O,-1.72,"InChI=1/C8H18O/c1-4-5-6-7-8(2,3)9/h9H,4-7H2,1-3H3"
144
+ 143,CCCCCCCCCCCCO,-4.67,"InChI=1/C12H26O/c1-2-3-4-5-6-7-8-9-10-11-12-13/h13H,2-12H2,1H3"
145
+ 144,c1ccc(cc1)CCCO,-1.38,"InChI=1/C9H12O/c10-8-4-7-9-5-2-1-3-6-9/h1-3,5-6,10H,4,7-8H2"
146
+ 145,CCCCCCCCCO,-3.01,"InChI=1/C9H20O/c1-2-3-4-5-6-7-8-9-10/h10H,2-9H2,1H3"
147
+ 146,C[C@H]1CC[C@H](CC1)O,-0.88,"InChI=1/C7H14O/c1-6-2-4-7(8)5-3-6/h6-8H,2-5H2,1H3/t6-,7+"
148
+ 147,c1cc(cc(c1)O)O,0.81,"InChI=1/C6H6O2/c7-5-2-1-3-6(8)4-5/h1-4,7-8H"
149
+ 148,c1cc(ccc1O)O,-0.17,"InChI=1/C6H6O2/c7-5-1-2-6(8)4-3-5/h1-4,7-8H"
150
+ 149,Cc1ccccc1O,-0.62,"InChI=1/C7H8O/c1-6-4-2-3-5-7(6)8/h2-5,8H,1H3"
151
+ 150,c1ccc(cc1)CO,-0.4,"InChI=1/C7H8O/c8-6-7-4-2-1-3-5-7/h1-5,8H,6H2"
152
+ 151,Cc1cccc(c1)O,-0.68,"InChI=1/C7H8O/c1-6-3-2-4-7(8)5-6/h2-5,8H,1H3"
153
+ 152,Cc1ccc(c(C)c1)O,-1.19,"InChI=1/C8H10O/c1-6-3-4-8(9)7(2)5-6/h3-5,9H,1-2H3"
154
+ 153,Cc1cc(C)cc(c1)O,-1.4,"InChI=1/C8H10O/c1-6-3-7(2)5-8(9)4-6/h3-5,9H,1-2H3"
155
+ 154,CC(C)(C)c1ccc(cc1)O,-2.41,"InChI=1/C10H14O/c1-10(2,3)8-4-6-9(11)7-5-8/h4-7,11H,1-3H3"
156
+ 155,c1ccc2c(c1)cccc2O,-2.22,"InChI=1/C10H8O/c11-10-7-3-5-8-4-1-2-6-9(8)10/h1-7,11H"
157
+ 156,c1cc2c(cccc2O)c(c1)O,-2.92,"InChI=1/C10H8O2/c11-9-5-1-3-7-8(9)4-2-6-10(7)12/h1-6,11-12H"
158
+ 157,c1ccc(cc1)c1ccccc1O,-2.39,"InChI=1/C12H10O/c13-12-9-5-4-8-11(12)10-6-2-1-3-7-10/h1-9,13H"
159
+ 158,CCCCCC=O,-1.3,"InChI=1/C6H12O/c1-2-3-4-5-6-7/h6H,2-5H2,1H3"
160
+ 159,CCCCCCCCC=O,-3.17,"InChI=1/C9H18O/c1-2-3-4-5-6-7-8-9-10/h9H,2-8H2,1H3"
161
+ 160,c1ccc(cc1)C=O,-1.19,InChI=1/C7H6O/c8-6-7-4-2-1-3-5-7/h1-6H
162
+ 161,C1CCC(=O)CC1,-0.6,InChI=1/C6H10O/c7-6-4-2-1-3-5-6/h1-5H2
163
+ 162,CCCCCC(=O)C,-1.42,"InChI=1/C7H14O/c1-3-4-5-6-7(2)8/h3-6H2,1-2H3"
164
+ 163,CCCCC(=O)CCCC,-2.59,"InChI=1/C9H18O/c1-3-5-7-9(10)8-6-4-2/h3-8H2,1-2H3"
165
+ 164,CC(C)CCC(=O)C,-1.33,"InChI=1/C7H14O/c1-6(2)4-5-7(3)8/h6H,4-5H2,1-3H3"
166
+ 165,C1=CC(=O)C=CC1=O,-0.99,InChI=1/C6H4O2/c7-5-1-2-6(8)4-3-5/h1-4H
167
+ 166,CC(=O)CC(=O)C,0.22,"InChI=1/C5H8O2/c1-4(6)3-5(2)7/h3H2,1-2H3"
168
+ 167,CCCCCCCCC(=O)C,-3.31,"InChI=1/C10H20O/c1-3-4-5-6-7-8-9-10(2)11/h3-9H2,1-2H3"
169
+ 168,CC(=O)[C@@H]1CC[C@H]2[C@@H]3CCC4=CC(=O)CC[C@]4(C)[C@H]3CC[C@]12C,-4.43,"InChI=1/C21H30O2/c1-13(22)17-6-7-18-16-5-4-14-12-15(23)8-10-20(14,2)19(16)9-11-21(17,18)3/h12,16-19H,4-11H2,1-3H3/t16-,17-,18-,19-,20-,21+/m0/s1"
170
+ 169,c1ccc2c(c1)C(=O)c1ccccc1C2=O,-5.19,InChI=1/C14H8O2/c15-13-9-5-1-2-6-10(9)14(16)12-8-4-3-7-11(12)13/h1-8H
171
+ 170,C(CC(=O)O)CC(=O)O,1,"InChI=1/C5H8O4/c6-4(7)2-1-3-5(8)9/h1-3H2,(H,6,7)(H,8,9)/f/h6,8H"
172
+ 171,C(CCC(=O)O)CC(=O)O,-0.82,"InChI=1/C6H10O4/c7-5(8)3-1-2-4-6(9)10/h1-4H2,(H,7,8)(H,9,10)/f/h7,9H"
173
+ 172,CCCCCCCC(=O)O,-2.3,"InChI=1/C8H16O2/c1-2-3-4-5-6-7-8(9)10/h2-7H2,1H3,(H,9,10)/f/h9H"
174
+ 173,CCCCCCCCCC(=O)O,-3.44,"InChI=1/C10H20O2/c1-2-3-4-5-6-7-8-9-10(11)12/h2-9H2,1H3,(H,11,12)/f/h11H"
175
+ 174,c1ccc(cc1)C(=O)O,-1.55,"InChI=1/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)/f/h8H"
176
+ 175,c1ccc(c(c1)C(=O)O)C(=O)O,-2.11,"InChI=1/C8H6O4/c9-7(10)5-3-1-2-4-6(5)8(11)12/h1-4H,(H,9,10)(H,11,12)/f/h9,11H"
177
+ 176,CCCCCCCCCCCCCCCC(=O)O,-6.81,"InChI=1/C16H32O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16(17)18/h2-15H2,1H3,(H,17,18)/f/h17H"
178
+ 177,C=C1C[C@]23C[C@]1(CC[C@H]2[C@]12C=C[C@@H]([C@](C)([C@H]1[C@@H]3C(=O)O)C(=O)O2)O)O,-1.84,"InChI=1/C19H22O6/c1-9-7-17-8-18(9,24)5-3-10(17)19-6-4-11(20)16(2,15(23)25-19)13(19)12(17)14(21)22/h4,6,10-13,20,24H,1,3,5,7-8H2,2H3,(H,21,22)/t10-,11+,12-,13-,16-,17+,18+,19-/m1/s1/f/h21H"
179
+ 178,CCC(CC)C(=O)O,-0.81,"InChI=1/C6H12O2/c1-3-5(4-2)6(7)8/h5H,3-4H2,1-2H3,(H,7,8)/f/h7H"
180
+ 179,CCCC(CCC)C(=O)O,-1.86,"InChI=1/C8H16O2/c1-3-5-7(6-4-2)8(9)10/h7H,3-6H2,1-2H3,(H,9,10)/f/h9H"
181
+ 180,c1ccc(cc1)OCC(=O)O,-1.1,"InChI=1/C8H8O3/c9-8(10)6-11-7-4-2-1-3-5-7/h1-5H,6H2,(H,9,10)/f/h9H"
182
+ 181,CCCCCCCCCCC(=O)O,-3.55,"InChI=1/C11H22O2/c1-2-3-4-5-6-7-8-9-10-11(12)13/h2-10H2,1H3,(H,12,13)/f/h12H"
183
+ 182,CCCCCCCCCCCCCC(=O)O,-5.33,"InChI=1/C14H28O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14(15)16/h2-13H2,1H3,(H,15,16)/f/h15H"
184
+ 183,COC=O,0.58,"InChI=1/C2H4O2/c1-4-2-3/h2H,1H3"
185
+ 184,CCCC(=O)OC,-0.82,"InChI=1/C5H10O2/c1-3-4-5(6)7-2/h3-4H2,1-2H3"
186
+ 185,CC(C)COC=O,-1.01,"InChI=1/C5H10O2/c1-5(2)3-7-4-6/h4-5H,3H2,1-2H3"
187
+ 186,CCCCOC(=O)C,-1.24,"InChI=1/C6H12O2/c1-3-4-5-8-6(2)7/h3-5H2,1-2H3"
188
+ 187,CCCCC(=O)OC,-1.36,"InChI=1/C6H12O2/c1-3-4-5-6(7)8-2/h3-5H2,1-2H3"
189
+ 188,C=CC(=O)OCC(C)C,-1.21,"InChI=1/C7H12O2/c1-4-7(8)9-5-6(2)3/h4,6H,1,5H2,2-3H3"
190
+ 189,CCCC(=O)OCCC,-1.92,"InChI=1/C7H14O2/c1-3-5-7(8)9-6-4-2/h3-6H2,1-2H3"
191
+ 190,CCCCCCOC(=O)C,-2.46,"InChI=1/C8H16O2/c1-3-4-5-6-7-10-8(2)9/h3-7H2,1-2H3"
192
+ 191,CCCCCC(=O)OCC,-2.31,"InChI=1/C8H16O2/c1-3-5-6-7-8(9)10-4-2/h3-7H2,1-2H3"
193
+ 192,CCCCCCC(=O)OCC,-2.71,"InChI=1/C9H18O2/c1-3-5-6-7-8-9(10)11-4-2/h3-8H2,1-2H3"
194
+ 193,CCCCCCCC(=O)OCC,-3.39,"InChI=1/C10H20O2/c1-3-5-6-7-8-9-10(11)12-4-2/h3-9H2,1-2H3"
195
+ 194,CCCCCCCCC(=O)OCC,-3.8,"InChI=1/C11H22O2/c1-3-5-6-7-8-9-10-11(12)13-4-2/h3-10H2,1-2H3"
196
+ 195,CCCCCCCCCC(=O)OCC,-4.1,"InChI=1/C12H24O2/c1-3-5-6-7-8-9-10-11-12(13)14-4-2/h3-11H2,1-2H3"
197
+ 196,COC(=O)c1ccccc1,-1.85,"InChI=1/C8H8O2/c1-10-8(9)7-5-3-2-4-6-7/h2-6H,1H3"
198
+ 197,CC(C)COC(=O)c1ccccc1C(=O)OCC(C)C,-4.66,"InChI=1/C16H22O4/c1-11(2)9-19-15(17)13-7-5-6-8-14(13)16(18)20-10-12(3)4/h5-8,11-12H,9-10H2,1-4H3"
199
+ 198,CC(C)OC,-0.06,"InChI=1/C4H10O/c1-4(2)5-3/h4H,1-3H3"
200
+ 199,CC(C)(C)OC,-0.24,"InChI=1/C5H12O/c1-5(2,3)6-4/h1-4H3"
201
+ 200,CCCOCCC,-1.62,"InChI=1/C6H14O/c1-3-5-7-6-4-2/h3-6H2,1-2H3"
202
+ 201,CC(C)OC(C)C,-1.1,"InChI=1/C6H14O/c1-5(2)7-6(3)4/h5-6H,1-4H3"
203
+ 202,CCOCCOCC,-0.77,"InChI=1/C6H14O2/c1-3-7-5-6-8-4-2/h3-6H2,1-2H3"
204
+ 203,c1ccc(cc1)Oc1ccccc1,-3.96,InChI=1/C12H10O/c1-3-7-11(8-4-1)13-12-9-5-2-6-10-12/h1-10H
205
+ 204,c1ccc2c(c1)Oc1ccccc1O2,-5.31,InChI=1/C12H8O2/c1-2-6-10-9(5-1)13-11-7-3-4-8-12(11)14-10/h1-8H
206
+ 205,C1CCOC1,0.56,InChI=1/C4H8O/c1-2-4-5-3-1/h1-4H2
207
+ 206,C([C@@H]([C@@H]([C@@H](C(=O)CO)O)O)O)O,0.64,"InChI=1/C6H12O6/c7-1-3(9)5(11)6(12)4(10)2-8/h3,5-9,11-12H,1-2H2/t3-,5-,6+/m0/s1"
208
+ 207,C[C@@]12CCC(=O)C=C1CC[C@@H]1[C@H]3CC[C@@](C(=O)CO)([C@]3(C)C[C@@H]([C@@H]21)O)O,-2.97,"InChI=1/C21H30O5/c1-19-7-5-13(23)9-12(19)3-4-14-15-6-8-21(26,17(25)11-22)20(15,2)10-16(24)18(14)19/h9,14-16,18,22,24,26H,3-8,10-11H2,1-2H3/t14-,15-,16+,18+,19-,20-,21-/m1/s1"
209
+ 208,C[C@@H]1C[C@H]2[C@H]3CCC4=CC(=O)C=C[C@]4(C)[C@@]3([C@H](C[C@]2(C)[C@]1(C(=O)CO)O)O)F,-3.64,"InChI=1/C22H29FO5/c1-12-8-16-15-5-4-13-9-14(25)6-7-19(13,2)21(15,23)17(26)10-20(16,3)22(12,28)18(27)11-24/h6-7,9,12,15-17,24,26,28H,4-5,8,10-11H2,1-3H3/t12-,15-,16+,17+,19+,20+,21-,22+/m1/s1"
210
+ 209,CC(=O)OCC(=O)[C@]1(CC[C@H]2[C@H]3CCC4=CC(=O)CC[C@]4(C)[C@@H]3[C@H](C[C@]12C)O)O,-4.46,"InChI=1/C23H32O6/c1-13(24)29-12-19(27)23(28)9-7-17-16-5-4-14-10-15(25)6-8-21(14,2)20(16)18(26)11-22(17,23)3/h10,16-18,20,26,28H,4-9,11-12H2,1-3H3/t16-,17+,18+,20+,21+,22+,23+/m1/s1"
211
+ 210,C[C@@]12C=CC(=O)C=C1CC[C@@H]1[C@@H]3CC[C@](C(=O)CO)([C@@]3(C)C[C@H]([C@@H]21)O)O,-3.21,"InChI=1/C21H28O5/c1-19-7-5-13(23)9-12(19)3-4-14-15-6-8-21(26,17(25)11-22)20(15,2)10-16(24)18(14)19/h5,7,9,14-16,18,22,24,26H,3-4,6,8,10-11H2,1-2H3/t14-,15+,16-,18+,19-,20+,21+/m1/s1"
212
+ 211,CC(=O)S[C@H]1CC2=CC(=O)CC[C@@]2(C)[C@@H]2CC[C@@]3(C)[C@@H](CC[C@@]43CCC(=O)O4)[C@@H]12,-4.28,"InChI=1/C24H32O4S/c1-14(25)29-19-13-15-12-16(26)4-8-22(15,2)17-5-9-23(3)18(21(17)19)6-10-24(23)11-7-20(27)28-24/h12,17-19,21H,4-11,13H2,1-3H3/t17-,18+,19+,21+,22-,23+,24+/m1/s1"
213
+ 212,C[C@]12CC[C@H]3c4ccc(cc4CC[C@@H]3[C@@H]1CCC2=O)O,-3.96,"InChI=1/C18H22O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-16,19H,2,4,6-9H2,1H3/t14-,15-,16-,18-/m0/s1"
214
+ 213,C[C@]12CCC(=O)C=C1CC[C@H]1[C@@H]3CCC(=O)[C@@]3(C)CC[C@H]21,-3.69,"InChI=1/C19H26O2/c1-18-9-7-13(20)11-12(18)3-4-14-15-5-6-17(21)19(15,2)10-8-16(14)18/h11,14-16H,3-10H2,1-2H3/t14-,15-,16-,18-,19-/m0/s1"
215
+ 214,C[C@@]12CCC(=O)C=C1CC[C@@H]1[C@H]3CC[C@H](C(=O)CO)[C@]3(C)CC[C@@H]21,-3.75,"InChI=1/C21H30O3/c1-20-9-7-14(23)11-13(20)3-4-15-16-5-6-18(19(24)12-22)21(16,2)10-8-17(15)20/h11,15-18,22H,3-10,12H2,1-2H3/t15-,16-,17-,18-,20-,21-/m1/s1"
216
+ 215,CC1(C)O[C@@H]2C[C@H]3[C@@H]4CCC5=CC(=O)C=C[C@]5(C)[C@]4([C@H](C[C@]3(C)[C@]2(C(=O)CO)O1)O)F,-4.32,"InChI=1/C24H31FO6/c1-20(2)30-19-10-16-15-6-5-13-9-14(27)7-8-21(13,3)23(15,25)17(28)11-22(16,4)24(19,31-20)18(29)12-26/h7-9,15-17,19,26,28H,5-6,10-12H2,1-4H3/t15-,16-,17-,19+,21-,22-,23-,24+/m0/s1"
217
+ 216,C[C@]12CCC(=O)C=C1CC[C@@H]1[C@H]3CC[C@](C(=O)CO)([C@]3(C)C[C@H]([C@@]21F)O)O,-3.43,"InChI=1/C21H29FO5/c1-18-7-5-13(24)9-12(18)3-4-15-14-6-8-20(27,17(26)11-23)19(14,2)10-16(25)21(15,18)22/h9,14-16,23,25,27H,3-8,10-11H2,1-2H3/t14-,15-,16-,18+,19-,20+,21-/m1/s1"
218
+ 217,C[C@H]1C[C@@H]2[C@H]3CC[C@](C(=O)C)([C@]3(C)C[C@H]([C@]2([C@]2(C)C=CC(=O)C=C12)F)O)O,-4.1,"InChI=1/C22H29FO4/c1-12-9-17-15-6-8-21(27,13(2)24)20(15,4)11-18(26)22(17,23)19(3)7-5-14(25)10-16(12)19/h5,7,10,12,15,17-18,26-27H,6,8-9,11H2,1-4H3/t12-,15+,17+,18+,19+,20+,21-,22+/m0/s1"
219
+ 218,CCCCC(=O)O[C@]1([C@@H](C)C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@]3([C@@H](C[C@]12C)O)F)C(=O)CO,-4.71,"InChI=1/C27H37FO6/c1-5-6-7-23(33)34-27(22(32)15-29)16(2)12-20-19-9-8-17-13-18(30)10-11-24(17,3)26(19,28)21(31)14-25(20,27)4/h10-11,13,16,19-21,29,31H,5-9,12,14-15H2,1-4H3/t16-,19-,20-,21+,24-,25-,26-,27+/m0/s1"
220
+ 219,COc1ccccc1O,-1.96,"InChI=1/C7H8O2/c1-9-7-5-3-2-4-6(7)8/h2-5,8H,1H3"
221
+ 220,COc1ccc(cc1)C=O,-1.49,"InChI=1/C8H8O2/c1-10-8-4-2-7(6-9)3-5-8/h2-6H,1H3"
222
+ 221,c1cc(ccc1Cl)O,-0.7,"InChI=1/C6H5ClO/c7-5-1-3-6(8)4-2-5/h1-4,8H"
223
+ 222,c1c(cc(c(c1Cl)O)Cl)Cl,-2.34,"InChI=1/C6H3Cl3O/c7-3-1-4(8)6(10)5(9)2-3/h1-2,10H"
224
+ 223,Cc1cc(ccc1Cl)O,-1.57,"InChI=1/C7H7ClO/c1-5-4-6(9)2-3-7(5)8/h2-4,9H,1H3"
225
+ 224,C(COCCCl)Cl,-1.12,InChI=1/C4H8Cl2O/c5-1-3-7-4-2-6/h1-4H2
226
+ 225,C1[C@H]2[C@H]3[C@@H]([C@@H]1[C@@H]1[C@H]2O1)[C@@]1(C(=C([C@]3(C1(Cl)Cl)Cl)Cl)Cl)Cl,-6.29,"InChI=1/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2/t2-,3+,4-,5+,6-,7+,10+,11-"
227
+ 226,CCCCCCN,-1.1,"InChI=1/C6H15N/c1-2-3-4-5-6-7/h2-7H2,1H3"
228
+ 227,CCCCNCCCC,-1.44,"InChI=1/C8H19N/c1-3-5-7-9-8-6-4-2/h9H,3-8H2,1-2H3"
229
+ 228,CCCCCCCCN,-2.75,"InChI=1/C8H19N/c1-2-3-4-5-6-7-8-9/h2-9H2,1H3"
230
+ 229,c1cc(ccc1N)N,-0.38,"InChI=1/C6H8N2/c7-5-1-2-6(8)4-3-5/h1-4H,7-8H2"
231
+ 230,Cc1ccccc1N,-0.85,"InChI=1/C7H9N/c1-6-4-2-3-5-7(6)8/h2-5H,8H2,1H3"
232
+ 231,CNc1ccccc1,-1.28,"InChI=1/C7H9N/c1-8-7-5-3-2-4-6-7/h2-6,8H,1H3"
233
+ 232,Cc1cccc(c1)N,-0.85,"InChI=1/C7H9N/c1-6-3-2-4-7(8)5-6/h2-5H,8H2,1H3"
234
+ 233,CCN(CC)c1ccccc1,-3.03,"InChI=1/C10H15N/c1-3-11(4-2)10-8-6-5-7-9-10/h5-9H,3-4H2,1-2H3"
235
+ 234,c1cc(ccc1Cc1ccc(cc1)N)N,-2.3,"InChI=1/C13H14N2/c14-12-5-1-10(2-6-12)9-11-3-7-13(15)8-4-11/h1-8H,9,14-15H2"
236
+ 235,c1ccncc1,0.76,InChI=1/C5H5N/c1-2-4-6-5-3-1/h1-5H
237
+ 236,Cc1ccc(C)nc1,0.4,"InChI=1/C7H9N/c1-6-3-4-7(2)8-5-6/h3-5H,1-2H3"
238
+ 237,Cc1cccc(C)n1,0.45,"InChI=1/C7H9N/c1-6-4-3-5-7(2)8-6/h3-5H,1-2H3"
239
+ 238,Cc1ccncc1C,0.36,"InChI=1/C7H9N/c1-6-3-4-8-5-7(6)2/h3-5H,1-2H3"
240
+ 239,Cc1cc(C)cnc1,0.38,"InChI=1/C7H9N/c1-6-3-7(2)5-8-4-6/h3-5H,1-2H3"
241
+ 240,c1cnccc1c1ccncc1,-1.54,InChI=1/C10H8N2/c1-5-11-6-2-9(1)10-3-7-12-8-4-10/h1-8H
242
+ 241,c1cnccc1C(=O)NN,0.01,"InChI=1/C6H7N3O/c7-9-6(10)5-1-3-8-4-2-5/h1-4H,7H2,(H,9,10)"
243
+ 242,c1ccc2c(c1)ccc1c2cccn1,-3.36,InChI=1/C13H9N/c1-2-5-11-10(4-1)7-8-13-12(11)6-3-9-14-13/h1-9H
244
+ 243,c1ccc2c(c1)nn[nH]2,-0.78,"InChI=1/C6H5N3/c1-2-4-6-5(3-1)7-9-8-6/h1-4H,(H,7,8,9)"
245
+ 244,c1cc[nH]c1,-0.17,InChI=1/C4H5N/c1-2-4-5-3-1/h1-5H
246
+ 245,c1ccsc1,-1.45,InChI=1/C4H4S/c1-2-4-5-3-1/h1-4H
247
+ 246,c1ccc2c(c1)cc[nH]2,-1.52,"InChI=1/C8H7N/c1-2-4-8-7(3-1)5-6-9-8/h1-6,9H"
248
+ 247,c1ccnnc1,1.1,InChI=1/C4H4N2/c1-2-4-6-5-3-1/h1-4H
249
+ 248,c1cc2c(cccn2)c(c1)O,-2.54,"InChI=1/C9H7NO/c11-9-5-1-4-8-7(9)3-2-6-10-8/h1-6,11H"
250
+ 249,c1cc2cc(ccc2nc1)O,-2.16,"InChI=1/C9H7NO/c11-8-3-4-9-7(6-8)2-1-5-10-9/h1-6,11H"
251
+ 250,CCC1(CC)C(=NC(=O)N(C)C1=O)O,-2.23,"InChI=1/C9H14N2O3/c1-4-9(5-2)6(12)10-8(14)11(3)7(9)13/h4-5H2,1-3H3,(H,10,12,14)"
252
+ 251,CCCCC1C(=O)N(c2ccccc2)N(c2ccccc2)C1=O,-3.81,"InChI=1/C19H20N2O2/c1-2-3-14-17-18(22)20(15-10-6-4-7-11-15)21(19(17)23)16-12-8-5-9-13-16/h4-13,17H,2-3,14H2,1H3"
253
+ 252,CN(C)CCCN1c2ccccc2Sc2ccc(cc12)Cl,-5.01,"InChI=1/C17H19ClN2S/c1-19(2)10-5-11-20-14-6-3-4-7-16(14)21-17-9-8-13(18)12-15(17)20/h3-4,6-9,12H,5,10-11H2,1-2H3"
254
+ 253,CN1[C@H]2CC[C@@H]1C[C@H](C2)OC(=O)[C@@H](CO)c1ccccc1,-2.12,"InChI=1/C17H23NO3/c1-18-13-7-8-14(18)10-15(9-13)21-17(20)16(11-19)12-5-3-2-4-6-12/h2-6,13-16,19H,7-11H2,1H3/t13-,14+,15-,16-/m0/s1"
255
+ 254,c1cc(CNc2cc(c(cc2C(=O)O)S(=O)(=O)N)Cl)oc1,-3.66,"InChI=1/C12H11ClN2O5S/c13-9-5-10(15-6-7-2-1-3-20-7)8(12(16)17)4-11(9)21(14,18)19/h1-5,15H,6H2,(H,16,17)(H2,14,18,19)/f/h16H,14H2"
256
+ 255,CC/C(=C(/CC)\c1ccc(cc1)O)/c1ccc(cc1)O,-4.35,"InChI=1/C18H20O2/c1-3-17(13-5-9-15(19)10-6-13)18(4-2)14-7-11-16(20)12-8-14/h5-12,19-20H,3-4H2,1-2H3/b18-17+"
257
+ 256,CCC1(CC)C(=NC(=O)N=C1O)O,-1.39,"InChI=1/C8H12N2O3/c1-3-8(4-2)5(11)9-7(13)10-6(8)12/h3-4H2,1-2H3,(H2,9,10,11,12,13)"
258
+ 257,CCCC(C)(COC(=N)O)COC(=N)O,-1.67,"InChI=1/C9H18N2O4/c1-3-4-9(2,5-14-7(10)12)6-15-8(11)13/h3-6H2,1-2H3,(H2,10,12)(H2,11,13)"
259
+ 258,Cc1cc(C)nc(n1)NS(=O)(=O)c1ccc(cc1)N,-2.27,"InChI=1/C12H14N4O2S/c1-8-7-9(2)15-12(14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)"
260
+ 259,c1ccc2c(c1)N(CCCN1CCN(CC1)CCO)c1cc(ccc1S2)Cl,-4.16,"InChI=1/C21H26ClN3OS/c22-17-6-7-21-19(16-17)25(18-4-1-2-5-20(18)27-21)9-3-8-23-10-12-24(13-11-23)14-15-26/h1-2,4-7,16,26H,3,8-15H2"
261
+ 260,CN(C)CCCN1c2ccccc2Sc2ccccc12,-4.3,"InChI=1/C17H20N2S/c1-18(2)12-7-13-19-14-8-3-5-10-16(14)20-17-11-6-4-9-15(17)19/h3-6,8-11H,7,12-13H2,1-2H3"
262
+ 261,c1c(c(cc2c1N=CNS2(=O)=O)S(=O)(=O)N)Cl,-3.05,"InChI=1/C7H6ClN3O4S2/c8-4-1-5-7(2-6(4)16(9,12)13)17(14,15)11-3-10-5/h1-3H,(H,10,11)(H2,9,12,13)"
263
+ 262,CCOc1ccc(cc1)N=C(C)O,-2.37,"InChI=1/C10H13NO2/c1-3-13-10-6-4-9(5-7-10)11-8(2)12/h4-7H,3H2,1-2H3,(H,11,12)"
264
+ 263,c1cc(ccc1N)S(=O)(=O)N,-1.36,"InChI=1/C6H8N2O2S/c7-5-1-3-6(4-2-5)11(8,9)10/h1-4H,7H2,(H2,8,9,10)"
265
+ 264,c1cn(c2c1c(N)ncn2)[C@@H]1[C@@H]([C@H]([C@@H](CO)O1)O)O,-1.95,"InChI=1/C11H14N4O4/c12-9-5-1-2-15(10(5)14-4-13-9)11-8(18)7(17)6(3-16)19-11/h1-2,4,6-8,11,16-18H,3H2,(H2,12,13,14)/t6-,7+,8-,11+/m1/s1"
266
+ 265,c1cc(ccc1N)S(=O)(=O)Nc1nccs1,-2.43,"InChI=1/C9H9N3O2S2/c10-7-1-3-8(4-2-7)16(13,14)12-9-11-5-6-15-9/h1-6H,10H2,(H,11,12)"
267
+ 266,C=CCC1(C(C)C)C(=NC(=O)N=C1O)O,-1.71,"InChI=1/C10H14N2O3/c1-4-5-10(6(2)3)7(13)11-9(15)12-8(10)14/h4,6H,1,5H2,2-3H3,(H2,11,12,13,14,15)"
268
+ 267,CCC[C@@H](C)C1(CC=C)C(=NC(=S)N=C1O)O,-3.46,"InChI=1/C12H18N2O2S/c1-4-6-8(3)12(7-5-2)9(15)13-11(17)14-10(12)16/h5,8H,2,4,6-7H2,1,3H3,(H2,13,14,15,16,17)/t8-/m1/s1"
269
+ 268,CCCCC1(CC)C(=NC(=O)N=C1O)O,-1.64,"InChI=1/C10H16N2O3/c1-3-5-6-10(4-2)7(13)11-9(15)12-8(10)14/h3-6H2,1-2H3,(H2,11,12,13,14,15)"
270
+ 269,COc1ccc(nn1)NS(=O)(=O)c1ccc(cc1)N,-3.28,"InChI=1/C11H12N4O3S/c1-18-11-7-6-10(13-14-11)15-19(16,17)9-4-2-8(12)3-5-9/h2-7H,12H2,1H3,(H,13,15)"
271
+ 270,c1ccc2c(c1)ccc(=O)o2,-1.89,InChI=1/C9H6O2/c10-9-6-5-7-3-1-2-4-8(7)11-9/h1-6H
272
+ 271,CCc1nnc(NS(=O)(=O)c2ccc(cc2)N)s1,-1.94,"InChI=1/C10H12N4O2S2/c1-2-9-12-13-10(17-9)14-18(15,16)8-5-3-7(11)4-6-8/h3-6H,2,11H2,1H3,(H,13,14)"
273
+ 272,C=CCC1(c2ccccc2)C(=NC(=O)N=C1O)O,-2.18,"InChI=1/C13H12N2O3/c1-2-8-13(9-6-4-3-5-7-9)10(16)14-12(18)15-11(13)17/h2-7H,1,8H2,(H2,14,15,16,17,18)"
274
+ 273,CCC1(c2ccccc2)C(=NCN=C1O)O,-2.64,"InChI=1/C12H14N2O2/c1-2-12(9-6-4-3-5-7-9)10(15)13-8-14-11(12)16/h3-7H,2,8H2,1H3,(H,13,15)(H,14,16)"
275
+ 274,c1c(c(cc2c1N[C@H](C(Cl)Cl)NS2(=O)=O)S(=O)(=O)N)Cl,-2.68,"InChI=1/C8H8Cl3N3O4S2/c9-3-1-4-6(2-5(3)19(12,15)16)20(17,18)14-8(13-4)7(10)11/h1-2,7-8,13-14H,(H2,12,15,16)/t8-/m0/s1"
276
+ 275,CC(=NS(=O)(=O)c1ccc(cc1)N)O,-1.23,"InChI=1/C8H10N2O3S/c1-6(11)10-14(12,13)8-4-2-7(9)3-5-8/h2-5H,9H2,1H3,(H,10,11)"
277
+ 276,C([C@@H]([C@@H](CO)O)O)O,0.7,"InChI=1/C4H10O4/c5-1-3(7)4(8)2-6/h3-8H,1-2H2/t3-,4+"
278
+ 277,c1c2c(ncnc2O)n[nH]1,-2.38,"InChI=1/C5H4N4O/c10-5-3-1-8-9-4(3)6-2-7-5/h1-2H,(H2,6,7,8,9,10)"
279
+ 278,COc1cc2c(cc1OC)N1C(=O)C[C@H]3[C@@H]4[C@H]5C[C@H]6[C@]2(CCN6CC5=CCO3)[C@@H]14,-2.09,"InChI=1/C23H26N2O4/c1-27-16-8-14-15(9-17(16)28-2)25-20(26)10-18-21-13-7-19-23(14,22(21)25)4-5-24(19)11-12(13)3-6-29-18/h3,8-9,13,18-19,21-22H,4-7,10-11H2,1-2H3/t13-,18-,19-,21-,22-,23+/m0/s1"
280
+ 279,CCN(CC)CCN=C(c1cc(c(cc1OC)N)Cl)O,-3.18,"InChI=1/C14H22ClN3O2/c1-4-18(5-2)7-6-17-14(19)10-8-11(15)12(16)9-13(10)20-3/h8-9H,4-7,16H2,1-3H3,(H,17,19)"
281
+ 280,CCOc1ccc2c(c1)sc(n2)S(=O)(=O)N,-3.81,"InChI=1/C9H10N2O3S2/c1-2-14-6-3-4-7-8(5-6)15-9(11-7)16(10,12)13/h3-5H,2H2,1H3,(H2,10,12,13)"
282
+ 281,CCC1(C2=CCCCCC2)C(=NC(=O)N=C1O)O,-3,"InChI=1/C13H18N2O3/c1-2-13(9-7-5-3-4-6-8-9)10(16)14-12(18)15-11(13)17/h7H,2-6,8H2,1H3,(H2,14,15,16,17,18)"
283
+ 282,c1cc(c(cc1[C@@H]1CC(=O)c2c(cc(cc2O1)O)O)O)O,-3.62,"InChI=1/C15H12O6/c16-8-4-11(19)15-12(20)6-13(21-14(15)5-8)7-1-2-9(17)10(18)3-7/h1-5,13,16-19H,6H2/t13-/m0/s1"
284
+ 283,COc1cnc(nc1)NS(=O)(=O)c1ccc(cc1)N,-2.58,"InChI=1/C11H12N4O3S/c1-18-9-6-13-11(14-7-9)15-19(16,17)10-4-2-8(12)3-5-10/h2-7H,12H2,1H3,(H,13,14,15)"
285
+ 284,Cc1cc(no1)NS(=O)(=O)c1ccc(cc1)N,-2.62,"InChI=1/C10H11N3O3S/c1-7-6-10(12-16-7)13-17(14,15)9-4-2-8(11)3-5-9/h2-6H,11H2,1H3,(H,12,13)"
286
+ 285,COc1cc(Cc2c[nH]c(=N)[nH]c2=N)cc(c1OC)OC,-2.86,"InChI=1/C14H18N4O3/c1-19-10-5-8(6-11(20-2)12(10)21-3)4-9-7-17-14(16)18-13(9)15/h5-7H,4H2,1-3H3,(H4,15,16,17,18)"
287
+ 286,Cc1nc(cc(n1)OC)NS(=O)(=O)c1ccc(cc1)N,-2.54,"InChI=1/C12H14N4O3S/c1-8-14-11(7-12(15-8)19-2)16-20(17,18)10-5-3-9(13)4-6-10/h3-7H,13H2,1-2H3,(H,14,15,16)"
288
+ 287,CC(C)Cc1ccc(cc1)C(C)C(=O)O,-3.99,"InChI=1/C13H18O2/c1-9(2)8-11-4-6-12(7-5-11)10(3)13(14)15/h4-7,9-10H,8H2,1-3H3,(H,14,15)/f/h14H"
289
+ 288,Cn1cnc2c1c(=O)n(C)c(=O)n2C,-0.97,"InChI=1/C8H10N4O2/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2/h4H,1-3H3"
290
+ 289,c1ccc2c(c1)ccc(n2)O,-2.14,"InChI=1/C9H7NO/c11-9-6-5-7-3-1-2-4-8(7)10-9/h1-6H,(H,10,11)"
291
+ 290,Cc1cnc(nc1O)O,-1.52,"InChI=1/C5H6N2O2/c1-3-2-6-5(9)7-4(3)8/h2H,1H3,(H2,6,7,8,9)"
292
+ 291,CN1CCOCC1,1,"InChI=1/C5H11NO/c1-6-2-4-7-5-3-6/h2-5H2,1H3"
293
+ 292,C1CCNCC1,1.07,"InChI=1/C5H11N/c1-2-4-6-5-3-1/h6H,1-5H2"
294
+ 293,Cn1ccc(nc1=O)O,-0.8,"InChI=1/C5H6N2O2/c1-7-3-2-4(8)6-5(7)9/h2-3H,1H3,(H,6,8,9)"
295
+ 294,CN1CCCCC1,0.23,"InChI=1/C6H13N/c1-7-5-3-2-4-6-7/h2-6H2,1H3"
296
+ 295,c1ccc(c(c1)N)O,-0.72,"InChI=1/C6H7NO/c7-5-3-1-2-4-6(5)8/h1-4,8H,7H2"
297
+ 296,c1ccc(c(c1)C(=O)O)N,-1.52,"InChI=1/C7H7NO2/c8-6-4-2-1-3-5(6)7(9)10/h1-4H,8H2,(H,9,10)/f/h9H"
298
+ 297,CCOC(=N)O,0.85,"InChI=1/C3H7NO2/c1-2-6-3(4)5/h2H2,1H3,(H2,4,5)"
299
+ 298,c1ccc(cc1)COC(=N)O,-0.35,"InChI=1/C8H9NO2/c9-8(10)11-6-7-4-2-1-3-5-7/h1-5H,6H2,(H2,9,10)"
300
+ 299,C(=N)(N)O,0.96,"InChI=1/CH4N2O/c2-1(3)4/h(H4,2,3,4)"
301
+ 300,CNC(=N)O,1.13,"InChI=1/C2H6N2O/c1-4-2(3)5/h1H3,(H3,3,4,5)"
302
+ 301,CN(C)C(=O)N(C)C,0.94,InChI=1/C5H12N2O/c1-6(2)5(8)7(3)4/h1-4H3
303
+ 302,c1ccc(cc1)CNC(=N)O,-0.95,"InChI=1/C8H10N2O/c9-8(11)10-6-7-4-2-1-3-5-7/h1-5H,6H2,(H3,9,10,11)"
304
+ 303,c1ccc(cc1)C(=N)O,-0.96,"InChI=1/C7H7NO/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H2,8,9)"
305
+ 304,c1ccc(c(c1)C(=N)O)C(=N)O,-2.92,"InChI=1/C8H8N2O2/c9-7(11)5-3-1-2-4-6(5)8(10)12/h1-4H,(H2,9,11)(H2,10,12)"
306
+ 305,CC(=Nc1ccc(cc1)O)O,-1.03,"InChI=1/C8H9NO2/c1-6(10)9-7-2-4-8(11)5-3-7/h2-5,11H,1H3,(H,9,10)"
307
+ 306,CC(C)Oc1ccccc1OC(=NC)O,-2.05,"InChI=1/C11H15NO3/c1-8(2)14-9-6-4-5-7-10(9)15-11(13)12-3/h4-8H,1-3H3,(H,12,13)"
308
+ 307,CN1CC[C@@]23[C@@H]4C=C[C@H]([C@H]3Oc3c(ccc(C[C@H]14)c23)OC)O,-1.52,"InChI=1/C18H21NO3/c1-19-8-7-18-11-4-5-13(20)17(18)22-16-14(21-2)6-3-10(15(16)18)9-12(11)19/h3-6,11-13,17,20H,7-9H2,1-2H3/t11-,12+,13-,17-,18-/m1/s1"
309
+ 308,CCCN(=O)=O,-0.8,"InChI=1/C3H7NO2/c1-2-3-4(5)6/h2-3H2,1H3"
310
+ 309,c1ccc(cc1)N(=O)=O,-1.8,InChI=1/C6H5NO2/c8-7(9)6-4-2-1-3-5-6/h1-5H
311
+ 310,Cc1cccc(c1)N(=O)=O,-2.44,"InChI=1/C7H7NO2/c1-6-3-2-4-7(5-6)8(9)10/h2-5H,1H3"
312
+ 311,Cc1ccc(cc1)N(=O)=O,-2.49,"InChI=1/C7H7NO2/c1-6-2-4-7(5-3-6)8(9)10/h2-5H,1H3"
313
+ 312,c1cc(ccc1N(=O)=O)O,-0.74,"InChI=1/C6H5NO3/c8-6-3-1-5(2-4-6)7(9)10/h1-4,8H"
314
+ 313,c1ccc(c(c1)N(=O)=O)O,-1.74,"InChI=1/C6H5NO3/c8-6-4-2-1-3-5(6)7(9)10/h1-4,8H"
315
+ 314,c1cc(ccc1C(=O)O)N(=O)=O,-1.68,"InChI=1/C7H5NO4/c9-7(10)5-1-3-6(4-2-5)8(11)12/h1-4H,(H,9,10)/f/h9H"
316
+ 315,COc1ccccc1N(=O)=O,-1.96,"InChI=1/C7H7NO3/c1-11-7-5-3-2-4-6(7)8(9)10/h2-5H,1H3"
317
+ 316,c1ccc(c(c1)N)N(=O)=O,-1.96,"InChI=1/C6H6N2O2/c7-5-3-1-2-4-6(5)8(9)10/h1-4H,7H2"
318
+ 317,c1cc(ccc1N)N(=O)=O,-2.37,"InChI=1/C6H6N2O2/c7-5-1-3-6(4-2-5)8(9)10/h1-4H,7H2"
319
+ 318,c1cc(ccc1Cl)N,-1.66,"InChI=1/C6H6ClN/c7-5-1-3-6(8)4-2-5/h1-4H,8H2"
320
+ 319,c1cc(cc(c1)N)Cl,-1.37,"InChI=1/C6H6ClN/c7-5-2-1-3-6(8)4-5/h1-4H,8H2"
321
+ 320,CCN=c1nc(Cl)[nH]c(=NCC)[nH]1,-4.55,"InChI=1/C7H12ClN5/c1-3-9-6-11-5(8)12-7(13-6)10-4-2/h3-4H2,1-2H3,(H2,9,10,11,12,13)"
322
+ 321,CC(C)OC(=Nc1cccc(c1)Cl)O,-3.38,"InChI=1/C10H12ClNO2/c1-7(2)14-10(13)12-9-5-3-4-8(11)6-9/h3-7H,1-2H3,(H,12,13)"
323
+ 322,CN(C(=O)Nc1ccc(cc1)Cl)OC,-2.57,"InChI=1/C9H11ClN2O2/c1-12(14-2)9(13)11-8-5-3-7(10)4-6-8/h3-6H,1-2H3,(H,11,13)"
324
+ 323,CCC(=Nc1ccc(c(c1)Cl)Cl)O,-3,"InChI=1/C9H9Cl2NO/c1-2-9(13)12-6-3-4-7(10)8(11)5-6/h3-5H,2H2,1H3,(H,12,13)"
325
+ 324,Cc1c(c(=O)n(c(n1)O)C(C)(C)C)Cl,-2.48,"InChI=1/C9H13ClN2O2/c1-5-6(10)7(13)12(8(14)11-5)9(2,3)4/h1-4H3,(H,11,14)"
326
+ 325,CCCN(CCC)c1c(cc(cc1N(=O)=O)C(F)(F)F)N(=O)=O,-5.68,"InChI=1/C13H16F3N3O4/c1-3-5-17(6-4-2)12-10(18(20)21)7-9(13(14,15)16)8-11(12)19(22)23/h7-8H,3-6H2,1-2H3"
327
+ 326,CC(C)(C)C(=O)[C@@H](n1cncn1)Oc1ccc(cc1)Cl,-3.61,"InChI=1/C14H16ClN3O2/c1-14(2,3)12(19)13(18-9-16-8-17-18)20-11-6-4-10(15)5-7-11/h4-9,13H,1-3H3/t13-/m0/s1"
328
+ 327,CCCCS,-2.18,"InChI=1/C4H10S/c1-2-3-4-5/h5H,2-4H2,1H3"
329
+ 328,c1ccc(cc1)S,-2.12,"InChI=1/C6H6S/c7-6-4-2-1-3-5-6/h1-5,7H"
330
+ 329,CCOc1nc(=NC(C)C)nc([nH]1)SC,-3.04,"InChI=1/C9H16N4OS/c1-5-14-8-11-7(10-6(2)3)12-9(13-8)15-4/h6H,5H2,1-4H3,(H,10,11,12,13)"
331
+ 330,CCN=c1nc([nH]c(n1)SC)NC(C)(C)C,-4,"InChI=1/C10H19N5S/c1-6-11-7-12-8(15-10(2,3)4)14-9(13-7)16-5/h6H2,1-5H3,(H2,11,12,13,14,15)"
332
+ 331,c1ccc2c(c1)C(=NS2(=O)=O)O,-1.64,"InChI=1/C7H5NO3S/c9-7-5-3-1-2-4-6(5)12(10,11)8-7/h1-4H,(H,8,9)"
333
+ 332,Cc1ccccc1S(=O)(=O)N,-2.02,"InChI=1/C7H9NO2S/c1-6-4-2-3-5-7(6)11(8,9)10/h2-5H,1H3,(H2,8,9,10)"
334
+ 333,CCOP(=S)(OCC)SCSCC,-4.11,"InChI=1/C7H17O2PS3/c1-4-8-10(11,9-5-2)13-7-12-6-3/h4-7H2,1-3H3"
335
+ 334,COP(=S)(OC)SCN1C(=O)c2ccccc2C1=O,-4.1,"InChI=1/C11H12NO4PS2/c1-15-17(18,16-2)19-7-12-10(13)8-5-3-4-6-9(8)11(12)14/h3-6H,7H2,1-2H3"
336
+ 335,COP(=S)(OC)Oc1ccc(cc1)N(=O)=O,-3.68,"InChI=1/C8H10NO5PS/c1-12-15(16,13-2)14-8-5-3-7(4-6-8)9(10)11/h3-6H,1-2H3"
337
+ 336,COP(=S)(OC)Oc1ccc(cc1Cl)N(=O)=O,-4.31,"InChI=1/C8H9ClNO5PS/c1-13-16(17,14-2)15-8-4-3-6(10(11)12)5-7(8)9/h3-5H,1-2H3"
338
+ 337,CCOP(=S)(OCC)SCSP(=S)(OCC)OCC,-5.54,"InChI=1/C9H22O4P2S4/c1-5-10-14(16,11-6-2)18-9-19-15(17,12-7-3)13-8-4/h5-9H2,1-4H3"
339
+ 338,COP(=S)(OC)Oc1cc(c(cc1Cl)Cl)Cl,-5.72,"InChI=1/C8H8Cl3O3PS/c1-12-15(16,13-2)14-8-4-6(10)5(9)3-7(8)11/h3-4H,1-2H3"
340
+ 339,CC(CCC(=O)O)C1CCC2C3CC(C4CC(CCC4(C)C3CCC12C)O)O,-3.82,"InChI=1/C24H40O4/c1-14(4-7-22(27)28)17-5-6-18-16-13-21(26)20-12-15(25)8-10-24(20,3)19(16)9-11-23(17,18)2/h14-21,25-26H,4-13H2,1-3H3,(H,27,28)/f/h27H"
341
+ 340,C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@H]3[C@H](CC[C@]12C)[C@@]1(C)CC[C@H](C[C@H]1C[C@@H]3O)O,-4.29,"InChI=1/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14-,15+,16-,17-,18+,19+,20+,22+,23+,24-/m1/s1/f/h27H"
342
+ 341,C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@H]3[C@H](CC[C@]12C)[C@@]1(C)CC[C@H](C[C@H]1C[C@H]3O)O,-3.64,"InChI=1/C24H40O4/c1-14(4-7-21(27)28)17-5-6-18-22-19(9-11-24(17,18)3)23(2)10-8-16(25)12-15(23)13-20(22)26/h14-20,22,25-26H,4-13H2,1-3H3,(H,27,28)/t14-,15+,16-,17-,18+,19+,20-,22+,23+,24-/m1/s1/f/h27H"
343
+ 342,C[C@H](CCC(=O)O)[C@H]1CC[C@H]2[C@H]3[C@H](C[C@@H]([C@]12C)O)[C@@]1(C)CC[C@H](C[C@H]1C[C@H]3O)O,-4.35,"InChI=1/C24H40O5/c1-13(4-7-21(28)29)16-5-6-17-22-18(12-20(27)24(16,17)3)23(2)9-8-15(25)10-14(23)11-19(22)26/h13-20,22,25-27H,4-12H2,1-3H3,(H,28,29)/t13-,14+,15-,16-,17+,18+,19-,20+,22+,23+,24-/m1/s1/f/h28H"
344
+ 343,Cc1nnc2CN=C(c3ccccc3Cl)c3cc(ccc3n12)Cl,-4.08,"InChI=1/C17H12Cl2N4/c1-10-21-22-16-9-20-17(12-4-2-3-5-14(12)19)13-8-11(18)6-7-15(13)23(10)16/h2-8H,9H2,1H3"
345
+ 344,C[C@H]1CNCCN1,0.74,"InChI=1/C5H12N2/c1-5-4-6-2-3-7-5/h5-7H,2-4H2,1H3/t5-/m0/s1"
346
+ 345,CC(=Nc1ccc(cc1)Br)O,-3.08,"InChI=1/C8H8BrNO/c1-6(11)10-8-4-2-7(9)3-5-8/h2-5H,1H3,(H,10,11)"
347
+ 346,CC(=Nc1ccc(cc1)N(=O)=O)O,-2.69,"InChI=1/C8H8N2O3/c1-6(11)9-7-2-4-8(5-3-7)10(12)13/h2-5H,1H3,(H,9,11)"
348
+ 347,CN(C)C(=N[C@H]1C[C@H]2[C@H]3CC[C@H](C3)[C@H]2C1)O,-3.01,"InChI=1/C13H22N2O/c1-15(2)13(16)14-10-6-11-8-3-4-9(5-8)12(11)7-10/h8-12H,3-7H2,1-2H3,(H,14,16)/t8-,9+,10-,11-,12+"
349
+ 348,C#C[C@@]1(CC[C@@H]2[C@@H]3CCC4=CC(=O)CC[C@H]4[C@H]3CC[C@@]12C)O,-4.79,"InChI=1/C20H26O2/c1-3-20(22)11-9-18-17-6-4-13-12-14(21)5-7-15(13)16(17)8-10-19(18,20)2/h1,12,15-18,22H,4-11H2,2H3/t15-,16-,17-,18-,19-,20+/m1/s1"
350
+ 349,c1cc(c(Cc2cc(ccc2O)Cl)cc1Cl)O,-3.95,"InChI=1/C13H10Cl2O2/c14-10-1-3-12(16)8(6-10)5-9-7-11(15)2-4-13(9)17/h1-4,6-7,16-17H,5H2"
351
+ 350,c1c(cc(c(c1C(=O)O)Cl)N)Cl,-2.47,"InChI=1/C7H5Cl2NO2/c8-3-1-4(7(11)12)6(9)5(10)2-3/h1-2H,10H2,(H,11,12)/f/h11H"
352
+ 351,C[C@@]1(c2cccc(c2C(=O)C2=C([C@]3([C@H](C[C@@H]12)[C@@H](C(=C(C3=O)C(=N)O)O)N(C)C)O)O)O)O,-3.12,"InChI=1/C22H24N2O8/c1-21(31)8-5-4-6-11(25)12(8)16(26)13-9(21)7-10-15(24(2)3)17(27)14(20(23)30)19(29)22(10,32)18(13)28/h4-6,9-10,15,25,27-28,31-32H,7H2,1-3H3,(H2,23,30)/t9-,10-,15+,21+,22+/m1/s1"
353
+ 352,C[C@@]1(c2cccc(c2C(=O)C2=C([C@]3([C@@H]([C@@H](C(=C(C3=O)C(=N)O)O)N(C)C)[C@@H]([C@H]12)O)O)O)O)O,-3.14,"InChI=1/C22H24N2O9/c1-21(32)7-5-4-6-8(25)9(7)15(26)10-12(21)17(28)13-14(24(2)3)16(27)11(20(23)31)19(30)22(13,33)18(10)29/h4-6,12-14,17,25,27-29,32-33H,1-3H3,(H2,23,31)/t12-,13+,14+,17-,21+,22+/m1/s1"
354
+ 353,CN(C)C(=O)C(c1ccccc1)c1ccccc1,-2.98,"InChI=1/C16H17NO/c1-17(2)16(18)15(13-9-5-3-6-10-13)14-11-7-4-8-12-14/h3-12,15H,1-2H3"
355
+ 354,CC(C)N(C(C)C)C(=O)SCC(=C(Cl)Cl)Cl,-4.88,"InChI=1/C10H16Cl3NOS/c1-6(2)14(7(3)4)10(15)16-5-8(11)9(12)13/h6-7H,5H2,1-4H3"
356
+ 355,CCCCN(CC)C(=O)SCCC,-3.35,"InChI=1/C10H21NOS/c1-4-7-8-11(6-3)10(12)13-9-5-2/h4-9H2,1-3H3"
357
+ 356,CCCCOC(=O)c1ccc(cc1)O,-2.72,"InChI=1/C11H14O3/c1-2-3-8-14-11(13)9-4-6-10(12)7-5-9/h4-7,12H,2-3,8H2,1H3"
358
+ 357,COC(=O)c1ccc(cc1)O,-1.78,"InChI=1/C8H8O3/c1-11-8(10)6-2-4-7(9)5-3-6/h2-5,9H,1H3"
359
+ 358,CCOC(=O)CCC(=O)OCC,-0.96,"InChI=1/C8H14O4/c1-3-11-7(9)5-6-8(10)12-4-2/h3-6H2,1-2H3"
360
+ 359,CCCCOC(=O)c1ccccc1,-3.48,"InChI=1/C11H14O2/c1-2-3-9-13-11(12)10-7-5-4-6-8-10/h4-8H,2-3,9H2,1H3"
361
+ 360,COC(=O)c1ccc(cc1)N,-1.59,"InChI=1/C8H9NO2/c1-11-8(10)6-2-4-7(9)5-3-6/h2-5H,9H2,1H3"
362
+ 361,CCCCCOC(=O)c1ccc(cc1)N,-3.26,"InChI=1/C12H17NO2/c1-2-3-4-9-15-12(14)10-5-7-11(13)8-6-10/h5-8H,2-4,9,13H2,1H3"
363
+ 362,CCCCCCOC(=O)c1ccc(cc1)N,-3.95,"InChI=1/C13H19NO2/c1-2-3-4-5-10-16-13(15)11-6-8-12(14)9-7-11/h6-9H,2-5,10,14H2,1H3"
364
+ 363,CCCCCCCCOC(=O)c1ccc(cc1)N,-5.4,"InChI=1/C15H23NO2/c1-2-3-4-5-6-7-12-18-15(17)13-8-10-14(16)11-9-13/h8-11H,2-7,12,16H2,1H3"
365
+ 364,Cc1cccc(c1)N=C(C)O,-2.09,"InChI=1/C9H11NO/c1-7-4-3-5-9(6-7)10-8(2)11/h3-6H,1-2H3,(H,10,11)"
366
+ 365,CC[C@H]1CCCCN1,-0.35,"InChI=1/C7H15N/c1-2-7-5-3-4-6-8-7/h7-8H,2-6H2,1H3/t7-/m0/s1"
367
+ 366,CCCCOc1ccc(c(c1)N(CC)CC)C(=O)OCC,-3.84,"InChI=1/C17H27NO3/c1-5-9-12-21-14-10-11-15(17(19)20-8-4)16(13-14)18(6-2)7-3/h10-11,13H,5-9,12H2,1-4H3"
368
+ 367,C=CCc1ccc(cc1)OC,-2.92,"InChI=1/C10H12O/c1-3-4-9-5-7-10(11-2)8-6-9/h3,5-8H,1,4H2,2H3"
369
+ 368,COc1c(c(c(c(c1O)Cl)Cl)Cl)Cl,-4.02,"InChI=1/C7H4Cl4O2/c1-13-7-5(11)3(9)2(8)4(10)6(7)12/h12H,1H3"
370
+ 369,C=C(C)[C@@H]1CC=C(C)C(=O)C1,-2.06,"InChI=1/C10H14O/c1-7(2)9-5-4-8(3)10(11)6-9/h4,9H,1,5-6H2,2-3H3/t9-/m1/s1"
371
+ 370,C1(=O)C(=NC(=O)N=C1O)O,-1.25,"InChI=1/C4H2N2O4/c7-1-2(8)5-4(10)6-3(1)9/h(H2,5,6,8,9,10)"
372
+ 371,c1cncnc1O,0.59,"InChI=1/C4H4N2O/c7-4-1-2-5-3-6-4/h1-3H,(H,5,6,7)"
373
+ 372,C1CC(=O)N=C1O,0.3,"InChI=1/C4H5NO2/c6-3-1-2-4(7)5-3/h1-2H2,(H,5,6,7)"
374
+ 373,C(CS(=O)CCCl)Cl,-1.16,InChI=1/C4H8Cl2OS/c5-1-3-8(7)4-2-6/h1-4H2
375
+ 374,C([C@@H](C(=O)O)N)C(=N)O,-0.74,"InChI=1/C4H8N2O3/c5-2(4(8)9)1-3(6)7/h2H,1,5H2,(H2,6,7)(H,8,9)/t2-/m0/s1"
376
+ 375,CC[C@H](C(=O)O)N,0.31,"InChI=1/C4H9NO2/c1-2-3(5)4(6)7/h3H,2,5H2,1H3,(H,6,7)/t3-/m1/s1/f/h6H"
377
+ 376,c1cc(cnc1)O,-0.46,"InChI=1/C5H5NO/c7-5-2-1-3-6-4-5/h1-4,7H"
378
+ 377,c1c[nH]ccc1=O,1.02,"InChI=1/C5H5NO/c7-5-1-3-6-4-2-5/h1-4H,(H,6,7)"
379
+ 378,c1nc2c(N)nc[nH]c2n1,-2.12,"InChI=1/C5H5N5/c6-4-3-5(9-1-7-3)10-2-8-4/h1-2H,(H3,6,7,8,9,10)"
380
+ 379,CC1(C)C(=O)N(C(=O)N1Cl)Cl,-2.6,InChI=1/C5H6Cl2N2O2/c1-5(2)3(10)8(6)4(11)9(5)7/h1-2H3
381
+ 380,Cc1cc(nc(n1)S)O,-2.43,"InChI=1/C5H6N2OS/c1-3-2-4(8)7-5(9)6-3/h2H,1H3,(H2,6,7,8,9)"
382
+ 381,CC[C@@H]1C(=NC(=N1)O)O,-0.06,"InChI=1/C5H8N2O2/c1-2-3-4(8)7-5(9)6-3/h3H,2H2,1H3,(H2,6,7,8,9)/t3-/m1/s1"
383
+ 382,CC(=O)/N=c\1/n(C)nc(s1)S(=O)(=O)N,-1.83,"InChI=1/C5H8N4O3S2/c1-3(10)7-4-9(2)8-5(13-4)14(6,11)12/h1-2H3,(H2,6,11,12)/b7-4-"
384
+ 383,C[C@@H]1CCCC[C@@H]1NC(=Nc1ccccc1)O,-4.98,"InChI=1/C14H20N2O/c1-11-7-5-6-10-13(11)16-14(17)15-12-8-3-2-4-9-12/h2-4,8-9,11,13H,5-7,10H2,1H3,(H2,15,16,17)/t11-,13+/m1/s1"
385
+ 384,c1cnc2c(cncn2)n1,0.02,InChI=1/C6H4N4/c1-2-9-6-5(8-1)3-7-4-10-6/h1-4H
386
+ 385,CSc1ncc2c(nccn2)n1,-2.36,"InChI=1/C7H6N4S/c1-12-7-10-4-5-6(11-7)9-3-2-8-5/h2-4H,1H3"
387
+ 386,c1cnc2c(c(ncn2)S)n1,-2.77,"InChI=1/C6H4N4S/c11-6-4-5(9-3-10-6)8-2-1-7-4/h1-3H,(H,8,9,10,11)"
388
+ 387,Cn1c(=O)c2c([nH]c(n2)O)nc1O,-1.56,"InChI=1/C6H6N4O3/c1-10-4(11)2-3(9-6(10)13)8-5(12)7-2/h1H3,(H,9,13)(H2,7,8,12)"
389
+ 388,c1ccc(cc1)S(=O)(=O)N,-1.56,"InChI=1/C6H7NO2S/c7-10(8,9)6-4-2-1-3-5-6/h1-5H,(H2,7,8,9)"
390
+ 389,C1[C@H]([C@@H]2[C@@H]([C@H](CO2)ON(=O)=O)O1)ON(=O)=O,-2.63,"InChI=1/C6H8N2O8/c9-7(10)15-3-1-13-6-4(16-8(11)12)2-14-5(3)6/h3-6H,1-2H2/t3-,4+,5-,6-/m1/s1"
391
+ 390,CN(C)C(=S)SSC(=S)N(C)C,-3.9,InChI=1/C6H12N2S4/c1-7(2)5(9)11-12-6(10)8(3)4/h1-4H3
392
+ 391,[C@@H]1([C@@H]([C@@H]([C@H]([C@@H]([C@H]1O)O)O)O)O)O,0.35,"InChI=1/C6H12O6/c7-1-2(8)4(10)6(12)5(11)3(1)9/h1-12H/t1-,2-,3-,4-,5+,6+/m0/s1"
393
+ 392,CCCCCOC(=N)O,-1.47,"InChI=1/C6H13NO2/c1-2-3-4-5-9-6(7)8/h2-5H2,1H3,(H2,7,8)"
394
+ 393,CC(C)(C)COC(=N)O,-0.8,"InChI=1/C6H13NO2/c1-6(2,3)4-9-5(7)8/h4H2,1-3H3,(H2,7,8)"
395
+ 394,c1c(cc(c(c1Br)O)Br)C#N,-3.33,"InChI=1/C7H3Br2NO/c8-5-1-4(3-10)2-6(9)7(5)11/h1-2,11H"
396
+ 395,c1c(cc(c(c1C(=O)O)O)I)I,-3.31,"InChI=1/C7H4I2O3/c8-3-1-4(7(11)12)6(10)5(9)2-3/h1-2,10H,(H,11,12)/f/h11H"
397
+ 396,Cc1c2c(nccn2)ncn1,-0.47,"InChI=1/C7H6N4/c1-5-6-7(11-4-10-5)9-3-2-8-6/h2-4H,1H3"
398
+ 397,Cc1cnc2cncnc2n1,0.06,"InChI=1/C7H6N4/c1-5-2-9-6-3-8-4-10-7(6)11-5/h2-4H,1H3"
399
+ 398,COc1ncc2c(nccn2)n1,-1.11,"InChI=1/C7H6N4O/c1-12-7-10-4-5-6(11-7)9-3-2-8-5/h2-4H,1H3"
400
+ 399,COc1cnc2cncnc2n1,-0.91,"InChI=1/C7H6N4O/c1-12-6-3-9-5-2-8-4-10-7(5)11-6/h2-4H,1H3"
401
+ 400,CSc1c2c(nccn2)ncn1,-2.36,"InChI=1/C7H6N4S/c1-12-7-5-6(10-4-11-7)9-3-2-8-5/h2-4H,1H3"
402
+ 401,CSc1cnc2cncnc2n1,-1.55,"InChI=1/C7H6N4S/c1-12-6-3-9-5-2-8-4-10-7(5)11-6/h2-4H,1H3"
403
+ 402,c1ccc(c(c1)C=O)O,-0.86,"InChI=1/C7H6O2/c8-5-6-3-1-2-4-7(6)9/h1-5,9H"
404
+ 403,c1c(cc(c(c1O)O)O)C(=O)O,-1.16,"InChI=1/C7H6O5/c8-4-1-3(7(11)12)2-5(9)6(4)10/h1-2,8-10H,(H,11,12)/f/h11H"
405
+ 404,C[C@@H]1CCCC(=O)C1,-1.87,"InChI=1/C7H12O/c1-6-3-2-4-7(8)5-6/h6H,2-5H2,1H3/t6-/m1/s1"
406
+ 405,CCCCCN=C(O)OC,-1.92,"InChI=1/C7H15NO2/c1-3-4-5-6-8-7(9)10-2/h3-6H2,1-2H3,(H,8,9)"
407
+ 406,c1cc(ccc1C(=O)O)C(F)(F)F,-1.6,"InChI=1/C8H5F3O2/c9-8(10,11)6-3-1-5(2-4-6)7(12)13/h1-4H,(H,12,13)/f/h12H"
408
+ 407,c1ccc(c(c1)Cl)OCC(=O)O,-2.16,"InChI=1/C8H7ClO3/c9-6-3-1-2-4-7(6)12-5-8(10)11/h1-4H,5H2,(H,10,11)/f/h10H"
409
+ 408,CC(=O)OC1CCCCC1,-1.67,"InChI=1/C8H14O2/c1-7(9)10-8-5-3-2-4-6-8/h8H,2-6H2,1H3"
410
+ 409,COc1cc(ccc1O)C(=O)O,-2.05,"InChI=1/C8H8O4/c1-12-7-4-5(8(10)11)2-3-6(7)9/h2-4,9H,1H3,(H,10,11)/f/h10H"
411
+ 410,CC(=O)c1ccc(cc1)N,-1.61,"InChI=1/C8H9NO/c1-6(10)7-2-4-8(9)5-3-7/h2-5H,9H2,1H3"
412
+ 411,CNc1ccccc1C(=O)O,-2.88,"InChI=1/C8H9NO2/c1-9-7-5-3-2-4-6(7)8(10)11/h2-5,9H,1H3,(H,10,11)/f/h10H"
413
+ 412,Cc1ccc(C)c(c1)O,-1.54,"InChI=1/C8H10O/c1-6-3-4-7(2)8(9)5-6/h3-5,9H,1-2H3"
414
+ 413,Cc1cccc(C)c1O,-1.31,"InChI=1/C8H10O/c1-6-4-3-5-7(2)8(6)9/h3-5,9H,1-2H3"
415
+ 414,Cc1ccc(cc1C)O,-1.41,"InChI=1/C8H10O/c1-6-3-4-8(9)5-7(6)2/h3-5,9H,1-2H3"
416
+ 415,C1CC2C(C(C1O2)C(=O)O)C(=O)O,-0.27,"InChI=1/C8H10O5/c9-7(10)5-3-1-2-4(13-3)6(5)8(11)12/h3-6H,1-2H2,(H,9,10)(H,11,12)/f/h9,11H"
417
+ 416,CCN(CC)C(=S)SCC(=C)Cl,-3.39,"InChI=1/C8H14ClNS2/c1-4-10(5-2)8(11)12-6-7(3)9/h3-6H2,1-2H3"
418
+ 417,CC(C)CC(=O)CC(=O)C,-1.6,"InChI=1/C8H14O2/c1-6(2)4-8(10)5-7(3)9/h6H,4-5H2,1-3H3"
419
+ 418,C[C@H]1CC[C@H](C)CC1,-4.47,"InChI=1/C8H16/c1-7-3-5-8(2)6-4-7/h7-8H,3-6H2,1-2H3/t7-,8-"
420
+ 419,C[C@H]1CCCC[C@@H]1C,-4.27,"InChI=1/C8H16/c1-7-5-3-4-6-8(7)2/h7-8H,3-6H2,1-2H3/t7-,8-/m0/s1"
421
+ 420,CCCC1CCCC1,-4.74,"InChI=1/C8H16/c1-2-5-8-6-3-4-7-8/h8H,2-7H2,1H3"
422
+ 421,CC(C(=O)O)Oc1ccc(cc1Cl)Cl,-2.45,"InChI=1/C9H8Cl2O3/c1-5(9(12)13)14-8-3-2-6(10)4-7(8)11/h2-5H,1H3,(H,12,13)/f/h12H"
423
+ 422,CC(C(=O)O)Oc1ccccc1Cl,-2.22,"InChI=1/C9H9ClO3/c1-6(9(11)12)13-8-5-3-2-4-7(8)10/h2-6H,1H3,(H,11,12)/f/h11H"
424
+ 423,Cc1cc(ccc1OCC(=O)O)Cl,-2.23,"InChI=1/C9H9ClO3/c1-6-4-7(10)2-3-8(6)13-5-9(11)12/h2-4H,5H2,1H3,(H,11,12)/f/h11H"
425
+ 424,c1ccc(cc1)CCC(=O)O,-1.41,"InChI=1/C9H10O2/c10-9(11)7-6-8-4-2-1-3-5-8/h1-5H,6-7H2,(H,10,11)/f/h10H"
426
+ 425,COc1ccc(cc1)C(=O)OC,-2.41,"InChI=1/C9H10O3/c1-11-8-5-3-7(4-6-8)9(10)12-2/h3-6H,1-2H3"
427
+ 426,Cc1cccc(c1)OC(=NC)O,-1.8,"InChI=1/C9H11NO2/c1-7-4-3-5-8(6-7)12-9(11)10-2/h3-6H,1-2H3,(H,10,11)"
428
+ 427,c1cc(c(cc1C[C@@H](C(=O)O)N)O)O,-1.6,"InChI=1/C9H11NO4/c10-6(9(13)14)3-5-1-2-7(11)8(12)4-5/h1-2,4,6,11-12H,3,10H2,(H,13,14)/t6-/m0/s1/f/h13H"
429
+ 428,CC(C)(C(=O)O)C(C)(CC(=O)O)C(=O)O,-0.29,"InChI=1/C9H14O6/c1-8(2,6(12)13)9(3,7(14)15)4-5(10)11/h4H2,1-3H3,(H,10,11)(H,12,13)(H,14,15)/f/h10,12,14H"
430
+ 429,C(CCCC(=O)O)CCCC(=O)O,-1.89,"InChI=1/C9H16O4/c10-8(11)6-4-2-1-3-5-7-9(12)13/h1-7H2,(H,10,11)(H,12,13)/f/h10,12H"
431
+ 430,CCCCCCCCOC(=N)O,-3.3,"InChI=1/C9H19NO2/c1-2-3-4-5-6-7-8-12-9(10)11/h2-8H2,1H3,(H2,10,11)"
432
+ 431,C1=C[C@H]2[C@@H](C1)[C@@]1(C(=C([C@]2(C1(Cl)Cl)Cl)Cl)Cl)Cl,-5.64,"InChI=1/C10H6Cl6/c11-6-7(12)9(14)5-3-1-2-4(5)8(6,13)10(9,15)16/h1-2,4-5H,3H2/t4-,5+,8+,9-/m0/s1"
433
+ 432,c1ccc(cc1)n1c(=O)c(c(cn1)N)Br,-3.12,"InChI=1/C10H8BrN3O/c11-9-8(12)6-13-14(10(9)15)7-4-2-1-3-5-7/h1-6H,12H2"
434
+ 433,c1cc(ccc1N)S(=O)(=O)N=c1cncc[nH]1,-3.7,"InChI=1/C10H10N4O2S/c11-8-1-3-9(4-2-8)17(15,16)14-10-7-12-5-6-13-10/h1-7H,11H2,(H,13,14)"
435
+ 434,Cc1cc(ccc1O[C@H](C)C(=O)O)Cl,-2.55,"InChI=1/C10H11ClO3/c1-6-5-8(11)3-4-9(6)14-7(2)10(12)13/h3-5,7H,1-2H3,(H,12,13)/t7-/m1/s1/f/h12H"
436
+ 435,C1CCc2cc(ccc2C1)O,-1.99,"InChI=1/C10H12O/c11-10-6-5-8-3-1-2-4-9(8)7-10/h5-7,11H,1-4H2"
437
+ 436,CCCN=C(NS(=O)(=O)c1ccc(cc1)Cl)O,-3.03,"InChI=1/C10H13ClN2O3S/c1-2-7-12-10(14)13-17(15,16)9-5-3-8(11)4-6-9/h3-6H,2,7H2,1H3,(H2,12,13,14)"
438
+ 437,CC(C)c1ccc(C)c(c1)O,-2.08,"InChI=1/C10H14O/c1-7(2)9-5-4-8(3)10(11)6-9/h4-7,11H,1-3H3"
439
+ 438,c1ccc(cc1)N(CCO)CCO,-0.73,"InChI=1/C10H15NO2/c12-8-6-11(7-9-13)10-4-2-1-3-5-10/h1-5,12-13H,6-9H2"
440
+ 439,CC1(C)[C@@H]2CC[C@@](C)(C2)C1=O,-1.85,"InChI=1/C10H16O/c1-9(2)7-4-5-10(3,6-7)8(9)11/h7H,4-6H2,1-3H3/t7-,10+/m1/s1"
441
+ 440,C=C(C)[C@@H]1CC[C@@H](C)C(=O)C1,-2.18,"InChI=1/C10H16O/c1-7(2)9-5-4-8(3)10(11)6-9/h8-9H,1,4-6H2,2-3H3/t8-,9-/m1/s1"
442
+ 441,Cc1ccc2ccc(C)nc2c1,-1.94,"InChI=1/C11H11N/c1-8-3-5-10-6-4-9(2)12-11(10)7-8/h3-7H,1-2H3"
443
+ 442,Cc1ccnc(n1)NS(=O)(=O)c1ccc(cc1)N,-2.85,"InChI=1/C11H12N4O2S/c1-8-6-7-13-11(14-8)15-18(16,17)10-4-2-9(12)3-5-10/h2-7H,12H2,1H3,(H,13,14,15)"
444
+ 443,Cc1c(C)oc(n1)NS(=O)(=O)c1ccc(cc1)N,-2.44,"InChI=1/C11H13N3O3S/c1-7-8(2)17-11(13-7)14-18(15,16)10-5-3-9(12)4-6-10/h3-6H,12H2,1-2H3,(H,13,14)"
445
+ 444,CCSC(=O)N(CC(C)C)CC(C)C,-3.68,"InChI=1/C11H23NOS/c1-6-14-11(13)12(7-9(2)3)8-10(4)5/h9-10H,6-8H2,1-5H3"
446
+ 445,c1cc(ccc1O)O.C1=CC(=O)C=CC1=O,-1.73,"InChI=1/C6H6O2.C6H4O2/c2*7-5-1-2-6(8)4-3-5/h1-4,7-8H;1-4H"
447
+ 446,C#C[C@@H](C)N(C)C(=O)Nc1ccc(cc1)Cl,-3.9,"InChI=1/C12H13ClN2O/c1-4-9(2)15(3)12(16)14-11-7-5-10(13)6-8-11/h1,5-9H,2-3H3,(H,14,16)/t9-/m1/s1"
448
+ 447,Cc1cc(nc(C)n1)NS(=O)(=O)c1ccc(cc1)N,-2.24,"InChI=1/C12H14N4O2S/c1-8-7-12(15-9(2)14-8)16-19(17,18)11-5-3-10(13)4-6-11/h3-7H,13H2,1-2H3,(H,14,15,16)"
449
+ 448,CC(C)(C)c1nnc(NS(=O)(=O)C2=CC[C@@H](CC2)N)s1,-3.74,"InChI=1/C12H20N4O2S2/c1-12(2,3)10-14-15-11(19-10)16-20(17,18)9-6-4-8(13)5-7-9/h6,8H,4-5,7,13H2,1-3H3,(H,15,16)/t8-/m0/s1"
450
+ 449,CCCCN=C(NS(=O)(=O)c1ccc(C)cc1)O,-3.39,"InChI=1/C12H18N2O3S/c1-3-4-9-13-12(15)14-18(16,17)11-7-5-10(2)6-8-11/h5-8H,3-4,9H2,1-2H3,(H2,13,14,15)"
451
+ 450,CCCCN(CCCC)CCCC,-3.12,"InChI=1/C12H27N/c1-4-7-10-13(11-8-5-2)12-9-6-3/h4-12H2,1-3H3"
452
+ 451,c1ccc(cc1)N=C(Nc1ccccc1)O,-3.15,"InChI=1/C13H12N2O/c16-13(14-11-7-3-1-4-8-11)15-12-9-5-2-6-10-12/h1-10H,(H2,14,15,16)"
453
+ 452,Cc1c(cc(c(c1N(=O)=O)OC(=O)C)C(C)(C)C)N(=O)=O,-4.47,"InChI=1/C13H16N2O6/c1-7-10(14(17)18)6-9(13(3,4)5)12(21-8(2)16)11(7)15(19)20/h6H,1-5H3"
454
+ 453,CCCCCCCCCCCC(=O)OC,-4.69,"InChI=1/C13H26O2/c1-3-4-5-6-7-8-9-10-11-12-13(14)15-2/h3-12H2,1-2H3"
455
+ 454,c1ccc(c(c1)C(=C(Cl)Cl)c1ccc(cc1)Cl)Cl,-6.36,InChI=1/C14H8Cl4/c15-10-7-5-9(6-8-10)13(14(17)18)11-3-1-2-4-12(11)16/h1-8H
456
+ 455,c1ccc2c(c1)C(=O)c1ccc(c(c1C2=O)O)O,-2.78,"InChI=1/C14H8O4/c15-10-6-5-9-11(14(10)18)13(17)8-4-2-1-3-7(8)12(9)16/h1-6,15,18H"
457
+ 456,c1cc(c(c(c1)F)C(=NC(=O)Nc1ccc(cc1)Cl)O)F,-6.02,"InChI=1/C14H9ClF2N2O2/c15-8-4-6-9(7-5-8)18-14(21)19-13(20)12-10(16)2-1-3-11(12)17/h1-7H,(H2,18,19,20,21)"
458
+ 457,c1ccc(c(c1)[C@@H](c1ccc(cc1)Cl)C(Cl)Cl)Cl,-6.51,"InChI=1/C14H10Cl4/c15-10-7-5-9(6-8-10)13(14(17)18)11-3-1-2-4-12(11)16/h1-8,13-14H/t13-/m1/s1"
459
+ 458,c1ccc(c(c1)C(=O)O)Nc1cccc(c1)C(F)(F)F,-4.36,"InChI=1/C14H10F3NO2/c15-14(16,17)9-4-3-5-10(8-9)18-12-7-2-1-6-11(12)13(19)20/h1-8,18H,(H,19,20)/f/h19H"
460
+ 459,c1ccc2c(c1)cc1ccccc1c2O,-4.73,"InChI=1/C14H10O/c15-14-12-7-3-1-5-10(12)9-11-6-2-4-8-13(11)14/h1-9,15H"
461
+ 460,Cc1cccc(c1C)Nc1ccccc1C(=O)O,-2.28,"InChI=1/C15H15NO2/c1-10-6-5-9-13(11(10)2)16-14-8-4-3-7-12(14)15(17)18/h3-9,16H,1-2H3,(H,17,18)/f/h17H"
462
+ 461,Cc1cc(ccc1NS(=O)(=O)C(F)(F)F)S(=O)(=O)c1ccccc1,-3.8,"InChI=1/C14H12F3NO4S2/c1-10-9-12(23(19,20)11-5-3-2-4-6-11)7-8-13(10)18-24(21,22)14(15,16)17/h2-9,18H,1H3"
463
+ 462,Cc1ccc2c(c1)sc(c1ccc(cc1)N)n2,-3.68,"InChI=1/C14H12N2S/c1-9-2-7-12-13(8-9)17-14(16-12)10-3-5-11(15)6-4-10/h2-8H,15H2,1H3"
464
+ 463,c1ccc(cc1)C[C@H](c1ccccc1)O,-2.52,"InChI=1/C14H14O/c15-14(13-9-5-2-6-10-13)11-12-7-3-1-4-8-12/h1-10,14-15H,11H2/t14-/m1/s1"
465
+ 464,CCOP(=S)(OCC)Oc1ccc2c(C)c(c(=O)oc2c1)Cl,-5.38,"InChI=1/C14H16ClO5PS/c1-4-17-21(22,18-5-2)20-10-6-7-11-9(3)13(15)14(16)19-12(11)8-10/h6-8H,4-5H2,1-3H3"
466
+ 465,CN(C)CCN(Cc1cccs1)c1ccccn1,-2.64,"InChI=1/C14H19N3S/c1-16(2)9-10-17(12-13-6-5-11-18-13)14-7-3-4-8-15-14/h3-8,11H,9-10,12H2,1-2H3"
467
+ 466,C[C@H]1CCCC[C@H]1NC(=Nc1ccccc1)O,-4.11,"InChI=1/C14H20N2O/c1-11-7-5-6-10-13(11)16-14(17)15-12-8-3-2-4-9-12/h2-4,8-9,11,13H,5-7,10H2,1H3,(H2,15,16,17)/t11-,13+/m0/s1"
468
+ 467,Cc1cccc(c1C)Nc1ccccc1C(=O)O,-3.78,"InChI=1/C15H15NO2/c1-10-6-5-9-13(11(10)2)16-14-8-4-3-7-12(14)15(17)18/h3-9,16H,1-2H3,(H,17,18)/f/h17H"
469
+ 468,CC(=CCc1c(ccc2ccc(=O)oc12)OC)C,-4.31,"InChI=1/C15H16O3/c1-10(2)4-7-12-13(17-3)8-5-11-6-9-14(16)18-15(11)12/h4-6,8-9H,7H2,1-3H3"
470
+ 469,CCc1cccc(C)c1N([C@H](C)COC)C(=O)CCl,-2.73,"InChI=1/C15H22ClNO2/c1-5-13-8-6-7-11(2)15(13)17(14(18)9-16)12(3)10-19-4/h6-8,12H,5,9-10H2,1-4H3/t12-/m1/s1"
471
+ 470,C1CCN2C[C@@H]3C[C@@H](CN4CCCC[C@H]34)[C@H]2C1,-1.89,"InChI=1/C15H26N2/c1-3-7-16-11-13-9-12(14(16)5-1)10-17-8-4-2-6-15(13)17/h12-15H,1-11H2/t12-,13-,14+,15+/m0/s1"
472
+ 471,CCCCCCCCCCCCCCC(=O)O,-4.31,"InChI=1/C15H30O2/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15(16)17/h2-14H2,1H3,(H,16,17)/f/h16H"
473
+ 472,CN(C)C(=O)Nc1ccc(cc1)Oc1ccc(cc1)OC,-4.16,"InChI=1/C16H18N2O3/c1-18(2)16(19)17-12-4-6-14(7-5-12)21-15-10-8-13(20-3)9-11-15/h4-11H,1-3H3,(H,17,19)"
474
+ 473,CCCCCCCCCCCCCCCCO,-7.26,"InChI=1/C16H34O/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17/h17H,2-16H2,1H3"
475
+ 474,c1ccc(c(c1)[C@@](c1ccc(cc1)Cl)(c1cncnc1)O)Cl,-4.38,"InChI=1/C17H12Cl2N2O/c18-14-7-5-12(6-8-14)17(22,13-9-20-11-21-10-13)15-3-1-2-4-16(15)19/h1-11,22H/t17-/m1/s1"
476
+ 475,CCN(CC)C(=O)[C@@H](C)Oc1cccc2ccccc12,-3.57,"InChI=1/C17H21NO2/c1-4-18(5-2)17(19)13(3)20-16-12-8-10-14-9-6-7-11-15(14)16/h6-13H,4-5H2,1-3H3/t13-/m1/s1"
477
+ 476,CNCCCN1c2ccccc2CCc2ccccc12,-3.66,"InChI=1/C18H22N2/c1-19-13-6-14-20-17-9-4-2-7-15(17)11-12-16-8-3-5-10-18(16)20/h2-5,7-10,19H,6,11-14H2,1H3"
478
+ 477,CC[C@@H](c1ccc(cc1)O)[C@@H](CC)c1ccc(cc1)O,-4.43,"InChI=1/C18H22O2/c1-3-17(13-5-9-15(19)10-6-13)18(4-2)14-7-11-16(20)12-8-14/h5-12,17-20H,3-4H2,1-2H3/t17-,18-/m0/s1"
479
+ 478,C[C@@]12CC[C@H]3c4ccc(cc4CC[C@@H]3[C@H]1CC[C@@H]2O)O,-4.84,"InChI=1/C18H24O2/c1-18-9-8-14-13-5-3-12(19)10-11(13)2-4-15(14)16(18)6-7-17(18)20/h3,5,10,14-17,19-20H,2,4,6-9H2,1H3/t14-,15-,16+,17-,18+/m0/s1"
480
+ 479,C=C[C@@H]1CN2CC[C@H]1C[C@@H]2[C@@H](c1ccnc2ccccc12)O,-3.09,"InChI=1/C19H22N2O/c1-2-13-12-21-10-8-14(13)11-18(21)19(22)16-7-9-20-17-6-4-3-5-15(16)17/h2-7,9,13-14,18-19,22H,1,8,10-12H2/t13-,14+,18-,19-/m1/s1"
481
+ 480,C[C@]12CC[C@H](C[C@@H]1CC[C@@H]1[C@@H]3CCC(=O)[C@@]3(C)CC[C@@H]21)O,-4.4,"InChI=1/C19H30O2/c1-18-9-7-13(20)11-12(18)3-4-14-15-5-6-17(21)19(15,2)10-8-16(14)18/h12-16,20H,3-11H2,1-2H3/t12-,13+,14+,15-,16+,18-,19-/m0/s1"
482
+ 481,C[C@]12CC[C@H](C[C@@H]1CC[C@@H]1[C@@H]3CCC(=O)[C@@]3(C)C[C@@H]([C@@H]21)O)O,-3.59,"InChI=1/C19H30O3/c1-18-8-7-12(20)9-11(18)3-4-13-14-5-6-16(22)19(14,2)10-15(21)17(13)18/h11-15,17,20-21H,3-10H2,1-2H3/t11-,12+,13+,14-,15-,17-,18-,19-/m0/s1"
483
+ 482,CN(C)CCC=C1c2ccccc2CCc2ccccc12,-4.46,"InChI=1/C20H23N/c1-21(2)15-7-12-20-18-10-5-3-8-16(18)13-14-17-9-4-6-11-19(17)20/h3-6,8-12H,7,13-15H2,1-2H3"
484
+ 483,CN1CCN(CCCN2c3ccccc3Sc3ccc(cc23)Cl)CC1,-4.4,"InChI=1/C20H24ClN3S/c1-22-11-13-23(14-12-22)9-4-10-24-17-5-2-3-6-19(17)25-20-8-7-16(21)15-18(20)24/h2-3,5-8,15H,4,9-14H2,1H3"
485
+ 484,C=C[C@@H]1CN2CC[C@@H]1C[C@H]2[C@@H](c1ccnc2ccc(cc12)OC)O,-2.76,"InChI=1/C20H24N2O2/c1-3-13-12-22-9-7-14(13)10-19(22)20(23)16-6-8-21-18-5-4-15(24-2)11-17(16)18/h3-6,8,11,13-14,19-20,23H,1,7,9-10,12H2,2H3/t13-,14-,19+,20-/m1/s1"
486
+ 485,Cc1c(CC(=O)O)c2cc(ccc2n1C(=O)C=Cc1ccccc1)OC,-5.54,"InChI=1/C21H19NO4/c1-14-17(13-21(24)25)18-12-16(26-2)9-10-19(18)22(14)20(23)11-8-15-6-4-3-5-7-15/h3-12H,13H2,1-2H3,(H,24,25)/f/h24H"
487
+ 486,CC1(C)[C@@H](C=C(Cl)Cl)[C@@H]1C(=O)OCc1cccc(c1)Oc1ccccc1,-6.29,"InChI=1/C21H20Cl2O3/c1-21(2)17(12-18(22)23)19(21)20(24)25-13-14-7-6-10-16(11-14)26-15-8-4-3-5-9-15/h3-12,17,19H,13H2,1-2H3/t17-,19+/m0/s1"
488
+ 487,CN(C)[C@@H]1[C@H]2C[C@H]3C(=C([C@@]2(C(=O)C(=C1O)C(=N)O)O)O)C(=O)c1c(ccc(c1[C@@H]3O)Cl)O,-2.52,"InChI=1/C21H21ClN2O8/c1-24(2)14-7-5-6-10(16(27)12-9(25)4-3-8(22)11(12)15(6)26)18(29)21(7,32)19(30)13(17(14)28)20(23)31/h3-4,6-7,14-15,25-26,28-29,32H,5H2,1-2H3,(H2,23,31)/t6-,7+,14+,15+,21-/m0/s1"
489
+ 488,c1ccc2c(c1)N(CCCN1CCC(CC1)O)c1cc(ccc1S2)C#N,-3.98,"InChI=1/C21H23N3OS/c22-15-16-6-7-21-19(14-16)24(18-4-1-2-5-20(18)26-21)11-3-10-23-12-8-17(25)9-13-23/h1-2,4-7,14,17,25H,3,8-13H2"
490
+ 489,C#C[C@@]1(CC[C@@H]2[C@H]3CCC4=CC(=O)CC[C@@]4(C)[C@@H]3CC[C@@]12C)O,-5.66,"InChI=1/C21H28O2/c1-4-21(23)12-9-18-16-6-5-14-13-15(22)7-10-19(14,2)17(16)8-11-20(18,21)3/h1,13,16-18,23H,5-12H2,2-3H3/t16-,17+,18+,19+,20+,21-/m0/s1"
491
+ 490,CC(=O)[C@@H]1CC[C@@H]2[C@H]3CC=C4C[C@H](CC[C@]4(C)[C@@H]3CC[C@@]12C)O,-4.65,"InChI=1/C21H32O2/c1-13(22)17-6-7-18-16-5-4-14-12-15(23)8-10-20(14,2)19(16)9-11-21(17,18)3/h4,15-19,23H,5-12H2,1-3H3/t15-,16+,17-,18+,19+,20-,21-/m0/s1"
492
+ 491,CCOC(=O)C(c1c(c2ccccc2oc1=O)O)c1c(c2ccccc2oc1=O)O,-3.66,"InChI=1/C22H16O8/c1-2-28-20(25)15(16-18(23)11-7-3-5-9-13(11)29-21(16)26)17-19(24)12-8-4-6-10-14(12)30-22(17)27/h3-10,15,23-24H,2H2,1H3"
493
+ 492,CN1CCc2cc3c(c(c2[C@@H]1[C@H]1c2ccc(c(c2C(=O)O1)OC)OC)OC)OCO3,-3.14,"InChI=1/C22H23NO7/c1-23-8-7-11-9-14-20(29-10-28-14)21(27-4)15(11)17(23)18-12-5-6-13(25-2)19(26-3)16(12)22(24)30-18/h5-6,9,17-18H,7-8,10H2,1-4H3/t17-,18-/m1/s1"
494
+ 493,C[C@H]1c2cccc(c2C(=O)C2=C([C@]3([C@@H]([C@@H](C(=C(C3=O)C(=N)O)O)N(C)C)[C@H]([C@@H]12)O)O)O)O,-2.87,"InChI=1/C22H24N2O8/c1-7-8-5-4-6-9(25)11(8)16(26)12-10(7)17(27)14-15(24(2)3)18(28)13(21(23)31)20(30)22(14,32)19(12)29/h4-7,10,14-15,17,25,27-29,32H,1-3H3,(H2,23,31)/t7-,10-,14-,15-,17-,22-/m0/s1"
495
+ 494,CC(=C[C@@H]1[C@@H](C(=O)OCc2cccc(c2)Oc2ccccc2)C1(C)C)C,-5.24,"InChI=1/C23H26O3/c1-16(2)13-20-21(23(20,3)4)22(24)25-15-17-9-8-12-19(14-17)26-18-10-6-5-7-11-18/h5-14,20-21H,15H2,1-4H3/t20-,21+/m1/s1"
496
+ 495,CC(=O)OCCN1CCN(CCCN2c3ccccc3Sc3ccc(cc23)Cl)CC1,-4.95,"InChI=1/C23H28ClN3O2S/c1-18(28)29-16-15-26-13-11-25(12-14-26)9-4-10-27-20-5-2-3-6-22(20)30-23-8-7-19(24)17-21(23)27/h2-3,5-8,17H,4,9-16H2,1H3"
497
+ 496,CC(=O)OCCN1CCN(CCCN2c3ccccc3Sc3ccc(cc23)Cl)CC1,-4.7,"InChI=1/C23H28ClN3O2S/c1-18(28)29-16-15-26-13-11-25(12-14-26)9-4-10-27-20-5-2-3-6-22(20)30-23-8-7-19(24)17-21(23)27/h2-3,5-8,17H,4,9-16H2,1H3"
extra_code/__pycache__/feature_search.cpython-312.pyc ADDED
Binary file (47.4 kB). View file
 
extra_code/__pycache__/feature_selection.cpython-312.pyc ADDED
Binary file (53.2 kB). View file
 
extra_code/ano_model.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import sys
4
+ import numpy as np
5
+ from sklearn.model_selection import train_test_split, KFold
6
+ from sklearn.metrics import r2_score
7
+ import tensorflow as tf
8
+ import logging
9
+ import psutil
10
+ import subprocess
11
+ import matplotlib.pyplot as plt
12
+ from tensorflow.keras.mixed_precision import set_global_policy
13
+
14
+ set_global_policy('mixed_float16')
15
+
16
+ BATCHSIZE = int(sys.argv[1])
17
+ EPOCHS = int(sys.argv[2])
18
+ lr = float(sys.argv[3])
19
+ fps_file = sys.argv[4]
20
+ y_true_file = sys.argv[5]
21
+ ##################################################################
22
+ model_name = sys.argv[6] if len(sys.argv) > 6 else None
23
+ target_path = sys.argv[7] if len(sys.argv) > 7 else None
24
+ cv = int(sys.argv[8]) if len(sys.argv) > 8 and sys.argv[8] != 'None' else None
25
+ test_size = float(sys.argv[9]) if len(sys.argv) > 9 else 0.1
26
+
27
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
28
+
29
+ def print_cpu_memory():
30
+ memory_info = psutil.virtual_memory()
31
+ logging.info(f"Total Memory: {memory_info.total / (1024 ** 3):.2f} GB")
32
+ logging.info(f"Available Memory: {memory_info.available / (1024 ** 3):.2f} GB")
33
+ logging.info(f"Used Memory: {memory_info.used / (1024 ** 3):.2f} GB")
34
+ logging.info(f"Memory Usage: {memory_info.percent}%")
35
+ def print_gpu_memory(status=""):
36
+ try:
37
+ result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,nounits,noheader'],
38
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
39
+ if result.returncode == 0:
40
+ lines = result.stdout.strip().split('\n')
41
+ for idx, line in enumerate(lines):
42
+ used, total = line.split(', ')
43
+ logging.info(f"[{status}] GPU {idx}: Memory Usage: {used} MB / {total} MB")
44
+ except Exception as e:
45
+ logging.error(f"Error executing nvidia-smi: {e}")
46
+ def save_history_plot(history, target_path, model_name, test_size, fold=None):
47
+ plt.figure(figsize=(12, 8))
48
+ plt.subplot(2, 1, 1)
49
+ plt.plot(history.history['loss'], label='Training Loss')
50
+ if 'val_loss' in history.history:
51
+ plt.plot(history.history['val_loss'], label='Validation Loss')
52
+ plt.title(f'Model Loss (test_size={test_size})')
53
+ plt.ylabel('Loss')
54
+ plt.xlabel('Epoch')
55
+ plt.legend()
56
+
57
+ plt.subplot(2, 1, 2)
58
+ for metric in history.history:
59
+ if metric.startswith('val_'):
60
+ continue
61
+ plt.plot(history.history[metric], label=f'Training {metric}')
62
+ val_metric = f'val_{metric}'
63
+ if val_metric in history.history:
64
+ plt.plot(history.history[val_metric], label=f'Validation {metric}')
65
+
66
+ plt.title(f'Model Metrics (test_size={test_size})')
67
+ plt.ylabel('Value')
68
+ plt.xlabel('Epoch')
69
+ plt.legend()
70
+
71
+ plt.tight_layout()
72
+
73
+ file_name = f"{model_name}_history{'_fold'+str(fold) if fold else ''}_test_size[{test_size}].png"
74
+ plt.savefig(os.path.join(target_path, model_name, file_name), dpi=300)
75
+ plt.close()
76
+ def load_model(target_path, model_name, test_size, cv=None):
77
+ model_path = f"{target_path}/{model_name}/{model_name}_full_model{'_cv'+str(cv) if cv else ''}_test_size[{test_size}].keras"
78
+ try:
79
+ if os.path.exists(model_path):
80
+ model = tf.keras.models.load_model(model_path, compile=False)
81
+ logging.info(f"Model successfully loaded from {model_path}")
82
+ return model
83
+ else:
84
+ logging.error(f"Model path does not exist: {model_path}")
85
+ return None
86
+ except Exception as e:
87
+ logging.error(f"Error loading model: {e}")
88
+ return None
89
+ def preprocess_data(xtr, ytr, use_parallel=False):
90
+ dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))
91
+ if use_parallel:
92
+ dataset = dataset.map(lambda x, y: (x, y), num_parallel_calls=tf.data.AUTOTUNE)
93
+ dataset = dataset.shuffle(buffer_size=len(xtr)).batch(BATCHSIZE).cache().prefetch(tf.data.AUTOTUNE)
94
+ return dataset
95
+
96
+ # def train_model(model, train_dataset, target_path, model_name, fold=None):
97
+ def train_model(model, train_dataset, valid_dataset, target_path, model_name, fold=None):
98
+ checkpoint_dir = f"{target_path}/checkpoints/{model_name}"
99
+ os.makedirs(checkpoint_dir, exist_ok=True)
100
+ checkpoint_path = os.path.join(checkpoint_dir, f"model{'_fold'+str(fold) if fold else ''}.keras")
101
+ cp = tf.keras.callbacks.ModelCheckpoint(
102
+ filepath=checkpoint_path,
103
+ save_weights_only=False,
104
+ save_best_only=True,
105
+ monitor='val_loss',
106
+ mode='min',
107
+ verbose=1,
108
+ )
109
+ # es = tf.keras.callbacks.EarlyStopping(
110
+ # monitor='val_loss',
111
+ # patience=EPOCHS,
112
+ # restore_best_weights=True,
113
+ # mode='min',
114
+ # verbose=0,
115
+ # )
116
+
117
+ history = model.fit(
118
+ train_dataset,
119
+ epochs=EPOCHS,
120
+ validation_data=valid_dataset,
121
+ # callbacks=[cp, es],
122
+ callbacks=[cp], #, es],
123
+ verbose=0,
124
+ )
125
+ save_history_plot(history, target_path, model_name, fold)
126
+ del train_dataset
127
+ gc.collect()
128
+ def clear_gpu_memory():
129
+ tf.keras.backend.clear_session()
130
+ gc.collect()
131
+ logging.info("GPU memory cleared.")
132
+ def main():
133
+ try:
134
+ os.makedirs(f"{target_path}/{model_name}", exist_ok=True)
135
+ model = load_model(target_path, model_name, test_size, cv)
136
+ if model is None:
137
+ raise ValueError("Failed to load model")
138
+
139
+ model.compile(
140
+ optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
141
+ loss=tf.keras.losses.MeanSquaredError(),
142
+ metrics=[
143
+ tf.keras.metrics.MeanSquaredError(),
144
+ tf.keras.metrics.MeanAbsoluteError(),
145
+ tf.keras.metrics.RootMeanSquaredError()
146
+ ]
147
+ )
148
+
149
+ fps = np.load(fps_file)
150
+ y_true = np.load(y_true_file)
151
+
152
+ model_input_shape = model.input_shape
153
+ if model_input_shape[1] != fps.shape[1]:
154
+ raise ValueError(f"Model input dimension ({model_input_shape[1]}) does not match data dimension ({fps.shape[1]})")
155
+
156
+ if cv is not None and cv > 1:
157
+ xtr, xte, ytr, yte = train_test_split(fps, y_true, test_size=test_size, random_state=42)
158
+ kf = KFold(n_splits=cv, shuffle=True, random_state=42)
159
+ avg_r2_score = []
160
+
161
+ for fold, (train_index, test_index) in enumerate(kf.split(xtr), 1):
162
+ xtr_cv, xte_cv = xtr[train_index], xtr[test_index]
163
+ ytr_cv, yte_cv = ytr[train_index], ytr[test_index]
164
+
165
+ train_dataset = preprocess_data(xtr_cv, ytr_cv, use_parallel=True)
166
+ train_model(model, train_dataset, target_path, model_name, fold)
167
+
168
+ ypred = model.predict(xte_cv, verbose=0)
169
+ r2_scores = r2_score(yte_cv, ypred)
170
+
171
+ if np.isnan(r2_scores) or np.isinf(r2_scores) or r2_scores <= 0:
172
+ logging.warning(f"[cv][{fold}th] : R2 score : 0.000000 (prune)")
173
+ else:
174
+ logging.info(f"[cv][{fold}th] : R2 score : {r2_scores:.6f}")
175
+
176
+ avg_r2_score.append(r2_scores)
177
+ clear_gpu_memory()
178
+ print_cpu_memory()
179
+ print_gpu_memory(f"Fold {fold}")
180
+ r2_result_res_avg = np.mean(avg_r2_score)
181
+ logging.info(f"[cv][{fold}th][Avg] : R2 score : {r2_result_res_avg:.6f}")
182
+ ypred = model.predict(xte, verbose=0)
183
+ r2_result = r2_score(yte, ypred)
184
+ os.makedirs(f"save_model/{model_name}", exist_ok=True)
185
+ model.save(f"save_model/{model_name}/{model_name}_full_model{'_cv'+str(cv) if cv else ''}_test_size[{test_size}]_r2score[{r2_result:<.4f}].keras")
186
+ del model
187
+ logging.info(f"[cv][{fold}th][Result] : R2 score : {r2_result:.6f}")
188
+ print(f"{r2_result:.6f}")
189
+ else:
190
+ xtr, xte, ytr, yte = train_test_split(fps, y_true, test_size=test_size, random_state=42)
191
+ xtr, xtev, ytr, ytev = train_test_split(xtr, ytr, test_size=0.1, random_state=42)
192
+ train_dataset = preprocess_data(xtr, ytr, use_parallel=True)
193
+ valid_dataset = preprocess_data(xtev, ytev, use_parallel=True)
194
+ train_model(model, train_dataset, valid_dataset, target_path, model_name)
195
+ # train_model(model, train_dataset, target_path, model_name)
196
+
197
+ ypred = model.predict(xte, verbose=0)
198
+ r2_result = r2_score(yte, ypred)
199
+
200
+ os.makedirs(f"save_model/{model_name}", exist_ok=True)
201
+ model.save(f"save_model/{model_name}/{model_name}_full_model{'_cv'+str(cv) if cv else ''}_test_size[{test_size}]_r2score[{r2_result:<.4f}].keras")
202
+ del model
203
+
204
+ if np.isnan(r2_result) or np.isinf(r2_result) or r2_result <= 0:
205
+ logging.warning("R2: 0.000000 (prune)")
206
+ else:
207
+ logging.info(f"R2: {r2_result:.6f}")
208
+ print(f"{r2_result:.6f}")
209
+
210
+ except Exception as e:
211
+ logging.error(f"Error in learning process: {e}")
212
+ print("0.000000")
213
+
214
+ finally:
215
+ clear_gpu_memory()
216
+ print_cpu_memory()
217
+ print_gpu_memory("Final")
218
+
219
+ if __name__ == "__main__":
220
+ main()
extra_code/basic_model.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import sys
4
+ import numpy as np
5
+ import tensorflow as tf
6
+ from tensorflow.keras.models import model_from_json
7
+ from sklearn.model_selection import train_test_split
8
+ import logging
9
+
10
+ # Environment settings
11
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow INFO and WARNING messages
12
+ os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
13
+ os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
14
+ os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'
15
+ os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'
16
+
17
+ # Suppress TensorFlow logging
18
+ logging.getLogger('tensorflow').setLevel(logging.ERROR)
19
+
20
+ BATCHSIZE = int(sys.argv[1])
21
+ EPOCHS = int(sys.argv[2])
22
+ fps_file = sys.argv[3]
23
+ y_true_file = sys.argv[4]
24
+
25
+ def load_model():
26
+ with open('save_model/model_config.json', 'r') as json_file:
27
+ model_json = json_file.read()
28
+ model = model_from_json(model_json)
29
+ model.load_weights('save_model/model_weights.weights.h5')
30
+ model.compile(optimizer=tf.keras.optimizers.Adam(),
31
+ loss=tf.keras.losses.MeanSquaredError(),
32
+ metrics=[tf.keras.metrics.MeanSquaredError(),
33
+ tf.keras.metrics.MeanAbsoluteError(),
34
+ tf.keras.metrics.RootMeanSquaredError()])
35
+ return model
36
+
37
+ def preprocess_data(xtr, ytr):
38
+ buffer_size = min(10000, len(xtr))
39
+ dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))
40
+ dataset = dataset.shuffle(buffer_size=buffer_size).batch(BATCHSIZE).prefetch(tf.data.AUTOTUNE)
41
+ return dataset
42
+
43
+ def train_model(model, train_dataset, epochs):
44
+ model.fit(train_dataset, epochs=epochs, verbose=0)
45
+ model.save('save_model/trained_model.keras')
46
+ return model
47
+
48
+ def clear_gpu_memory():
49
+ tf.keras.backend.clear_session()
50
+ gc.collect()
51
+ print("GPU memory cleared.", file=sys.stderr)
52
+
53
+ if __name__ == "__main__":
54
+ fps = np.load(fps_file)
55
+ y_true = np.load(y_true_file)
56
+
57
+ xtr, xte, ytr, yte = train_test_split(fps, y_true, test_size=0.2, random_state=42)
58
+ train_dataset = preprocess_data(xtr, ytr)
59
+
60
+ model = load_model()
61
+ trained_model = train_model(model, train_dataset, EPOCHS)
62
+
63
+ clear_gpu_memory()
extra_code/feature_search.py ADDED
@@ -0,0 +1,702 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pandas as pd
4
+ import gc
5
+ from concurrent.futures import ProcessPoolExecutor, as_completed
6
+
7
+ from rdkit import Chem
8
+ from rdkit.Chem import AllChem, DataStructs, Draw
9
+ from rdkit import RDConfig
10
+ from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges
11
+ from rdkit.Chem.AllChem import GetMorganGenerator
12
+ from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray
13
+ from rdkit.Avalon.pyAvalonTools import GetAvalonFP
14
+ from rdkit.Chem.Descriptors import ExactMolWt
15
+
16
+ def mol3d(mol):
17
+ mol = Chem.AddHs(mol)
18
+ optimization_methods = [
19
+ (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),
20
+ (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),
21
+ (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})
22
+ ]
23
+
24
+ for method, args, kwargs in optimization_methods:
25
+ try:
26
+ method(*args, **kwargs)
27
+ if mol.GetNumConformers() > 0:
28
+ return mol
29
+ except ValueError as e:
30
+ print(f"Error: {e} - Trying next optimization method [{method}]")
31
+
32
+ print(f"Invalid mol for 3d {Chem.MolToSmiles(mol)} - No conformer generated")
33
+ return None
34
+
35
+ def generating_newfps(fps, descriptor, descriptor_name, save_res="np"):
36
+ try:
37
+ if descriptor is None:
38
+ return fps
39
+
40
+ if save_res == "pd":
41
+ new_fps = pd.DataFrame(fps) if not isinstance(fps, pd.DataFrame) else fps
42
+
43
+ if isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
44
+ try:
45
+ descriptors_df = pd.DataFrame(
46
+ {f"{descriptor_name}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
47
+ )
48
+ new_fps = pd.concat([new_fps, descriptors_df], axis=1)
49
+ del descriptor
50
+ except Exception as e:
51
+ print(f"[-1-] Error occured: {e}")
52
+
53
+ elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
54
+ try:
55
+ arrays_1d = [arr[:, None] for arr in descriptor if arr.ndim == 1]
56
+ arrays_2d = [arr for arr in descriptor if arr.ndim == 2]
57
+ combined_1d = np.concatenate(arrays_1d, axis=1) if arrays_1d else None
58
+ combined_2d = np.concatenate(arrays_2d, axis=1) if arrays_2d else None
59
+
60
+ if combined_1d is not None:
61
+ df_1d = pd.DataFrame(
62
+ combined_1d,
63
+ columns=[f'{descriptor_name}_{i+1}' for i in range(combined_1d.shape[1])]
64
+ )
65
+ new_fps = pd.concat([new_fps, df_1d], axis=1)
66
+
67
+ if combined_2d is not None:
68
+ df_2d = pd.DataFrame(
69
+ combined_2d,
70
+ columns=[f'{descriptor_name}_{i+1}' for i in range(combined_2d.shape[1])]
71
+ )
72
+ new_fps = pd.concat([new_fps, df_2d], axis=1)
73
+
74
+ del descriptor, arrays_1d, arrays_2d
75
+ if combined_1d is not None: del combined_1d
76
+ if combined_2d is not None: del combined_2d
77
+ except Exception as e:
78
+ print(f"[-2-] Error occured: {e}")
79
+
80
+ elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
81
+ try:
82
+ descriptor = np.asarray(descriptor).astype('float')
83
+ descriptors_df = pd.DataFrame(
84
+ {f"{descriptor_name}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
85
+ )
86
+ new_fps = pd.concat([new_fps, descriptors_df], axis=1)
87
+ del descriptor
88
+ except Exception as e:
89
+ print(f"[-3-] Error occured: {e}")
90
+
91
+ else:
92
+ descriptor = np.asarray(descriptor).astype('float')
93
+ new_fps[descriptor_name] = descriptor.flatten()
94
+ del descriptor
95
+
96
+ new_fps = new_fps.replace([np.inf, -np.inf], np.nan).fillna(0)
97
+ return new_fps
98
+
99
+ else:
100
+ new_fps = fps
101
+
102
+ if descriptor is None:
103
+ pass
104
+ elif isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
105
+ try:
106
+ new_fps = np.concatenate([new_fps, descriptor], axis=1)
107
+ del descriptor
108
+ except Exception as e:
109
+ print(f"[-1-] Error occured: {e}")
110
+ elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
111
+ try:
112
+ arrays_1d = [arr[:, None] for arr in descriptor if arr.ndim == 1]
113
+ arrays_2d = [arr for arr in descriptor if arr.ndim == 2]
114
+ combined_1d = np.concatenate(arrays_1d, axis=1) if arrays_1d else None
115
+ combined_2d = np.concatenate(arrays_2d, axis=1) if arrays_2d else None
116
+ to_concat = [new_fps] + [arr for arr in [combined_1d, combined_2d] if arr is not None]
117
+ new_fps = np.concatenate(to_concat, axis=1)
118
+ del descriptor, arrays_1d, arrays_2d
119
+ if combined_1d is not None: del combined_1d
120
+ if combined_2d is not None: del combined_2d
121
+ except Exception as e:
122
+ print(f"[-2-] Error occured: {e}")
123
+ elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
124
+ try:
125
+ descriptor = np.asarray(descriptor).astype('float')
126
+ new_fps = np.concatenate([new_fps, descriptor], axis=1)
127
+ del descriptor
128
+ except Exception as e:
129
+ print(f"[-3-] Error occured: {e}")
130
+ else:
131
+ descriptor = np.asarray(descriptor).astype('float')
132
+ new_fps = np.concatenate([new_fps, descriptor[:,None]], axis=1)
133
+ del descriptor
134
+
135
+ new_fps = np.nan_to_num(new_fps, nan=0.0, posinf=0.0, neginf=0.0).astype('float')
136
+ return new_fps
137
+
138
+ except Exception as e:
139
+ print(f"Error occurred in {descriptor_name}: {e}")
140
+ return fps
141
+
142
+ def Normalization(descriptor):
143
+ descriptor = np.asarray(descriptor)
144
+ epsilon = 1e-10
145
+ max_value = 1e15
146
+ descriptor = np.clip(descriptor, -max_value, max_value)
147
+ descriptor_custom = np.where(np.abs(descriptor) < epsilon, epsilon, descriptor)
148
+ descriptor_log = np.sign(descriptor_custom) * np.log1p(np.abs(descriptor_custom))
149
+ descriptor_log = np.nan_to_num(descriptor_log, nan=0.0, posinf=0.0, neginf=0.0)
150
+ del epsilon
151
+ gc.collect()
152
+ return descriptor_log
153
+
154
+ def values_chi(mol, chi_type):
155
+ i = 0
156
+ chi_func = Chem.GraphDescriptors.ChiNn_ if chi_type == 'n' else Chem.GraphDescriptors.ChiNv_
157
+ while chi_func(mol, i) != 0.0:
158
+ i += 1
159
+ return np.array([chi_func(mol, j) for j in range(i)])
160
+
161
+ def generate_chi(mols, chi_type):
162
+ n_jobs = os.cpu_count()
163
+ with ProcessPoolExecutor(max_workers=n_jobs) as executor:
164
+ futures = [executor.submit(values_chi, mol, chi_type) for mol in mols]
165
+ descriptor = [future.result() for future in futures]
166
+
167
+ max_length = max(len(x) for x in descriptor)
168
+ padded_descriptor = np.array([np.pad(x, (0, max_length - len(x)), 'constant') for x in descriptor])
169
+
170
+ return padded_descriptor
171
+
172
+ def sanitize_and_compute_descriptor(mol):
173
+ try:
174
+ mol = Chem.RemoveHs(mol)
175
+ Chem.SanitizeMol(mol)
176
+ try:
177
+ Chem.rdPartialCharges.ComputeGasteigerCharges(mol)
178
+ except Exception as e:
179
+ print(f"Gasteiger charge calculation failed: {e}")
180
+ return [0] * 8
181
+
182
+ try:
183
+ return Chem.rdMolDescriptors.BCUT2D(mol)
184
+ except Exception as e:
185
+ print(f"BCUT2D calculation failed: {e}")
186
+ return [Descriptors.MolWt(mol)] * 8
187
+ except Exception as e:
188
+ return [0] * 8
189
+
190
+ def compute_descriptors_parallel(mols, n_jobs=None):
191
+ with ProcessPoolExecutor(max_workers=n_jobs) as executor:
192
+ futures = [executor.submit(sanitize_and_compute_descriptor, mol) for mol in mols if mol is not None]
193
+ descriptors = [future.result() for future in futures]
194
+ return np.array(descriptors)
195
+
196
+ def process_molecules_parallel(mols, max_workers=4, chunk_size=100):
197
+ results = []
198
+ for i in range(0, len(mols), chunk_size):
199
+ chunk = mols[i:i + chunk_size]
200
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
201
+ futures = [executor.submit(mol3d, mol) for mol in chunk]
202
+ for future in as_completed(futures):
203
+ result = future.result()
204
+ if result is not None:
205
+ results.append(result)
206
+ gc.collect()
207
+ return results
208
+
209
+ def search_data_descriptor_compress(trial, fps, mols, name, target_path="result", save_res="np"):
210
+ ####################################
211
+ phase0 = 1 #trial.suggest_int("MolWt", 0, 1) # 'MolWt'
212
+ phase1 = 1 #trial.suggest_int("MolLogP", 0, 1) # 'MolLogP'
213
+ phase2 = 1 #trial.suggest_int("MolMR", 0, 1) # 'MolMR'
214
+ phase3 = 1 #trial.suggest_int("TPSA", 0, 1) # 'TPSA'
215
+ phase4 = trial.suggest_int("NumRotatableBonds", 0, 1) # 'NumRotatableBonds'
216
+ phase5 = trial.suggest_int("HeavyAtomCount", 0, 1) # 'HeavyAtomCount'
217
+ phase6 = trial.suggest_int("NumHAcceptors", 0, 1) # 'NumHAcceptors'
218
+ phase7 = trial.suggest_int("NumHDonors", 0, 1) # 'NumHDonors'
219
+ phase8 = trial.suggest_int("NumHeteroatoms", 0, 1) # 'NumHeteroatoms'
220
+ phase9 = trial.suggest_int("NumValenceElectrons", 0, 1) # 'NumValenceElectrons'
221
+ phase10 = trial.suggest_int("NHOHCount", 0, 1) # 'NHOHCount'
222
+ phase11 = trial.suggest_int("NOCount", 0, 1) # 'NOCount'
223
+ phase12 = trial.suggest_int("RingCount", 0, 1) # 'RingCount'
224
+ phase13 = trial.suggest_int("NumAromaticRings", 0, 1) # 'NumAromaticRings'
225
+ phase14 = trial.suggest_int("NumSaturatedRings", 0, 1) # 'NumSaturatedRings'
226
+ phase15 = trial.suggest_int("NumAliphaticRings", 0, 1) # 'NumAliphaticRings'
227
+ phase16 = trial.suggest_int("LabuteASA", 0, 1) # 'LabuteASA'
228
+ phase17 = trial.suggest_int("BalabanJ", 0, 1) # 'BalabanJ'
229
+ phase18 = trial.suggest_int("BertzCT", 0, 1) # 'BertzCT'
230
+ phase19 = trial.suggest_int("Ipc", 0, 1) # 'Ipc'
231
+ phase20 = trial.suggest_int("kappa_Series[1-3]_ind", 0, 1) # 'kappa_Series[1-3]_ind'
232
+ phase21 = trial.suggest_int("Chi_Series[13]_ind", 0, 1) # 'Chi_Series[13]_ind'
233
+ phase22 = trial.suggest_int("Phi", 0, 1) # 'Phi'
234
+ phase23 = trial.suggest_int("HallKierAlpha", 0, 1) # 'HallKierAlpha'
235
+ phase24 = trial.suggest_int("NumAmideBonds", 0, 1) # 'NumAmideBonds'
236
+ phase25 = trial.suggest_int("FractionCSP3", 0, 1) # 'FractionCSP3'
237
+ phase26 = trial.suggest_int("NumSpiroAtoms", 0, 1) # 'NumSpiroAtoms'
238
+ phase27 = trial.suggest_int("NumBridgeheadAtoms", 0, 1) # 'NumBridgeheadAtoms'
239
+ phase28 = trial.suggest_int("PEOE_VSA_Series[1-14]_ind", 0, 1) # 'PEOE_VSA_Series[1-14]_ind'
240
+ phase29 = trial.suggest_int("SMR_VSA_Series[1-10]_ind", 0, 1) # 'SMR_VSA_Series[1-10]_ind'
241
+ phase30 = trial.suggest_int("SlogP_VSA_Series[1-12]_ind", 0, 1)# 'SlogP_VSA_Series[1-12]_ind'
242
+ phase31 = trial.suggest_int("EState_VSA_Series[1-11]_ind", 0, 1)# 'EState_VSA_Series[1-11]_ind'
243
+ phase32 = trial.suggest_int("VSA_EState_Series[1-10]", 0, 1) # 'VSA_EState_Series[1-10]'
244
+ phase33 = trial.suggest_int("MQNs", 0, 1) # 'MQNs'
245
+ phase34 = trial.suggest_int("AUTOCORR2D", 0, 1) # 'AUTOCORR2D'
246
+ phase35 = trial.suggest_int("BCUT2D", 0, 1) # 'BCUT2D'
247
+ phase36 = trial.suggest_int("Asphericity", 0, 1) # 'Asphericity'
248
+ phase37 = trial.suggest_int("PBF", 0, 1) # 'PBF'
249
+ phase38 = trial.suggest_int("RadiusOfGyration", 0, 1) # 'RadiusOfGyration'
250
+ phase39 = trial.suggest_int("InertialShapeFactor", 0, 1) # 'InertialShapeFactor'
251
+ phase40 = trial.suggest_int("Eccentricity", 0, 1) # 'Eccentricity'
252
+ phase41 = trial.suggest_int("SpherocityIndex", 0, 1) # 'SpherocityIndex'
253
+ phase42 = trial.suggest_int("PMI_series[1-3]_ind", 0, 1) # 'PMI_series[1-3]_ind'
254
+ phase43 = trial.suggest_int("NPR_series[1-2]_ind", 0, 1) # 'NPR_series[1-2]_ind'
255
+ phase44 = trial.suggest_int("AUTOCORR3D", 0, 1) # 'AUTOCORR3D'
256
+ phase45 = trial.suggest_int("RDF", 0, 1) # 'RDF'
257
+ phase46 = trial.suggest_int("MORSE", 0, 1) # 'MORSE'
258
+ phase47 = trial.suggest_int("WHIM", 0, 1) # 'WHIM'
259
+ phase48 = trial.suggest_int("GETAWAY", 0, 1) # 'GETAWAY'
260
+ ####################################
261
+ def clear_descriptor_memory(descriptor):
262
+ del descriptor
263
+ gc.collect()
264
+ ####################################
265
+ ####################################
266
+ if phase0 == 1:
267
+ descriptor = [Descriptors.ExactMolWt(alpha) for alpha in mols]
268
+ fps = generating_newfps(fps, descriptor, 'MolWt', save_res)
269
+ clear_descriptor_memory(descriptor)
270
+ if phase1 == 1:
271
+ descriptor = [Chem.Crippen.MolLogP(alpha) for alpha in mols]
272
+ fps = generating_newfps(fps, descriptor, 'MolLogP', save_res)
273
+ clear_descriptor_memory(descriptor)
274
+ if phase2 == 1:
275
+ descriptor = [Chem.Crippen.MolMR(alpha) for alpha in mols]
276
+ fps = generating_newfps(fps, descriptor, 'MolMR', save_res)
277
+ clear_descriptor_memory(descriptor)
278
+ if phase3 == 1:
279
+ descriptor = [Descriptors.TPSA(alpha) for alpha in mols]
280
+ fps = generating_newfps(fps, descriptor, 'TPSA', save_res)
281
+ clear_descriptor_memory(descriptor)
282
+ if phase4 == 1:
283
+ descriptor = [Chem.Lipinski.NumRotatableBonds(alpha) for alpha in mols]
284
+ fps = generating_newfps(fps, descriptor, 'NumRotatableBonds', save_res)
285
+ clear_descriptor_memory(descriptor)
286
+ if phase5 == 1:
287
+ descriptor = [Chem.Lipinski.HeavyAtomCount(alpha) for alpha in mols]
288
+ fps = generating_newfps(fps, descriptor, 'HeavyAtomCount', save_res)
289
+ clear_descriptor_memory(descriptor)
290
+ if phase6 == 1:
291
+ descriptor = [Chem.Lipinski.NumHAcceptors(alpha) for alpha in mols]
292
+ fps = generating_newfps(fps, descriptor, 'NumHAcceptors', save_res)
293
+ clear_descriptor_memory(descriptor)
294
+ if phase7 == 1:
295
+ descriptor = [Chem.Lipinski.NumHDonors(alpha) for alpha in mols]
296
+ fps = generating_newfps(fps, descriptor, 'NumHDonors', save_res)
297
+ clear_descriptor_memory(descriptor)
298
+ if phase8 == 1:
299
+ descriptor = [Chem.Lipinski.NumHeteroatoms(alpha) for alpha in mols]
300
+ fps = generating_newfps(fps, descriptor, 'NumHeteroatoms', save_res)
301
+ clear_descriptor_memory(descriptor)
302
+ if phase9 == 1:
303
+ descriptor = [Chem.Descriptors.NumValenceElectrons(alpha) for alpha in mols]
304
+ fps = generating_newfps(fps, descriptor, 'NumValenceElectrons', save_res)
305
+ clear_descriptor_memory(descriptor)
306
+ if phase10 == 1:
307
+ descriptor = [Chem.Lipinski.NHOHCount(alpha) for alpha in mols]
308
+ fps = generating_newfps(fps, descriptor, 'NHOHCount', save_res)
309
+ clear_descriptor_memory(descriptor)
310
+ if phase11 == 1:
311
+ descriptor = [Chem.Lipinski.NOCount(alpha) for alpha in mols]
312
+ fps = generating_newfps(fps, descriptor, 'NOCount', save_res)
313
+ clear_descriptor_memory(descriptor)
314
+ if phase12 == 1:
315
+ descriptor = [Chem.Lipinski.RingCount(alpha) for alpha in mols]
316
+ fps = generating_newfps(fps, descriptor, 'RingCount', save_res)
317
+ clear_descriptor_memory(descriptor)
318
+ if phase13 == 1:
319
+ descriptor = [Chem.Lipinski.NumAromaticRings(alpha) for alpha in mols]
320
+ fps = generating_newfps(fps, descriptor, 'NumAromaticRings', save_res)
321
+ clear_descriptor_memory(descriptor)
322
+ if phase14 == 1:
323
+ descriptor = [Chem.Lipinski.NumSaturatedRings(alpha) for alpha in mols]
324
+ fps = generating_newfps(fps, descriptor, 'NumSaturatedRings', save_res)
325
+ clear_descriptor_memory(descriptor)
326
+ if phase15 == 1:
327
+ descriptor = [Chem.Lipinski.NumAliphaticRings(alpha) for alpha in mols]
328
+ fps = generating_newfps(fps, descriptor, 'NumAliphaticRings', save_res)
329
+ clear_descriptor_memory(descriptor)
330
+ if phase16 == 1:
331
+ descriptor = [Chem.rdMolDescriptors.CalcLabuteASA(alpha) for alpha in mols]
332
+ fps = generating_newfps(fps, descriptor, 'LabuteASA', save_res)
333
+ clear_descriptor_memory(descriptor)
334
+ if phase17 == 1:
335
+ descriptor = [Chem.GraphDescriptors.BalabanJ(alpha) for alpha in mols]
336
+ # descriptor = Normalization(descriptor)
337
+ fps = generating_newfps(fps, descriptor, 'BalabanJ', save_res)
338
+ clear_descriptor_memory(descriptor)
339
+ if phase18 == 1:
340
+ descriptor = [Chem.GraphDescriptors.BertzCT(alpha) for alpha in mols]
341
+ # descriptor = Normalization(descriptor)
342
+ fps = generating_newfps(fps, descriptor, 'BertzCT', save_res)
343
+ clear_descriptor_memory(descriptor)
344
+ if phase19 == 1:
345
+ descriptor = [Chem.GraphDescriptors.Ipc(alpha) for alpha in mols]
346
+ descriptor = Normalization(descriptor)
347
+ fps = generating_newfps(fps, descriptor, 'Ipc', save_res)
348
+ clear_descriptor_memory(descriptor)
349
+ if phase20 == 1:
350
+ d1 = [Chem.GraphDescriptors.Kappa1(alpha) for alpha in mols]
351
+ d2 = [Chem.GraphDescriptors.Kappa2(alpha) for alpha in mols]
352
+ d3 = [Chem.GraphDescriptors.Kappa3(alpha) for alpha in mols]
353
+ d1 = np.asarray(d1)
354
+ d2 = np.asarray(d2)
355
+ d3 = np.asarray(d3)
356
+ fps = generating_newfps(fps, [d1,d2,d3], 'kappa_Series[1-3]_ind', save_res)
357
+ clear_descriptor_memory(d1)
358
+ clear_descriptor_memory(d2)
359
+ clear_descriptor_memory(d3)
360
+ if phase21 == 1:
361
+ d1 = [Chem.GraphDescriptors.Chi0(alpha) for alpha in mols]
362
+ d2 = [Chem.GraphDescriptors.Chi0n(alpha) for alpha in mols]
363
+ d3 = [Chem.GraphDescriptors.Chi0v(alpha) for alpha in mols]
364
+ d4 = [Chem.GraphDescriptors.Chi1(alpha) for alpha in mols]
365
+ d5 = [Chem.GraphDescriptors.Chi1n(alpha) for alpha in mols]
366
+ d6 = [Chem.GraphDescriptors.Chi1v(alpha) for alpha in mols]
367
+ d7 = [Chem.GraphDescriptors.Chi2n(alpha) for alpha in mols]
368
+ d8 = [Chem.GraphDescriptors.Chi2v(alpha) for alpha in mols]
369
+ d9 = [Chem.GraphDescriptors.Chi3n(alpha) for alpha in mols]
370
+ d10 = [Chem.GraphDescriptors.Chi3v(alpha) for alpha in mols]
371
+ d11 = [Chem.GraphDescriptors.Chi4n(alpha) for alpha in mols]
372
+ d12 = [Chem.GraphDescriptors.Chi4v(alpha) for alpha in mols]
373
+ d13 = generate_chi(mols, 'n')
374
+ d14 = generate_chi(mols, 'v')
375
+ d1 = np.asarray(d1)
376
+ d2 = np.asarray(d2)
377
+ d3 = np.asarray(d3)
378
+ d4 = np.asarray(d4)
379
+ d5 = np.asarray(d5)
380
+ d6 = np.asarray(d6)
381
+ d7 = np.asarray(d7)
382
+ d8 = np.asarray(d8)
383
+ d9 = np.asarray(d9)
384
+ d10 = np.asarray(d10)
385
+ d11 = np.asarray(d11)
386
+ d12 = np.asarray(d12)
387
+ d13 = np.asarray(d13)
388
+ d14 = np.asarray(d14)
389
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14], 'Chi_Series[13]_ind', save_res)
390
+ clear_descriptor_memory(d1)
391
+ clear_descriptor_memory(d2)
392
+ clear_descriptor_memory(d3)
393
+ clear_descriptor_memory(d4)
394
+ clear_descriptor_memory(d5)
395
+ clear_descriptor_memory(d6)
396
+ clear_descriptor_memory(d7)
397
+ clear_descriptor_memory(d8)
398
+ clear_descriptor_memory(d9)
399
+ clear_descriptor_memory(d10)
400
+ clear_descriptor_memory(d11)
401
+ clear_descriptor_memory(d12)
402
+ clear_descriptor_memory(d13)
403
+ clear_descriptor_memory(d14)
404
+ if phase22 == 1:
405
+ descriptor = [Chem.rdMolDescriptors.CalcPhi(alpha) for alpha in mols]
406
+ fps = generating_newfps(fps, descriptor, 'Phi', save_res)
407
+ clear_descriptor_memory(descriptor)
408
+ if phase23 == 1:
409
+ descriptor = [Chem.GraphDescriptors.HallKierAlpha(alpha) for alpha in mols]
410
+ fps = generating_newfps(fps, descriptor, 'HallKierAlpha', save_res)
411
+ clear_descriptor_memory(descriptor)
412
+ if phase24 == 1:
413
+ descriptor = [Chem.rdMolDescriptors.CalcNumAmideBonds(alpha) for alpha in mols]
414
+ fps = generating_newfps(fps, descriptor, 'NumAmideBonds', save_res)
415
+ clear_descriptor_memory(descriptor)
416
+ if phase25 == 1:
417
+ descriptor = [Chem.Lipinski.FractionCSP3(alpha) for alpha in mols]
418
+ fps = generating_newfps(fps, descriptor, 'FractionCSP3', save_res)
419
+ clear_descriptor_memory(descriptor)
420
+ if phase26 == 1:
421
+ descriptor = [Chem.rdMolDescriptors.CalcNumSpiroAtoms(alpha) for alpha in mols]
422
+ fps = generating_newfps(fps, descriptor, 'NumSpiroAtoms', save_res)
423
+ clear_descriptor_memory(descriptor)
424
+ if phase27 == 1:
425
+ descriptor = [Chem.rdMolDescriptors.CalcNumBridgeheadAtoms(alpha) for alpha in mols]
426
+ fps = generating_newfps(fps, descriptor, 'NumBridgeheadAtoms', save_res)
427
+ clear_descriptor_memory(descriptor)
428
+ if phase28 == 1:
429
+ d1 = [Chem.MolSurf.PEOE_VSA1(alpha) for alpha in mols]
430
+ d2 = [Chem.MolSurf.PEOE_VSA2(alpha) for alpha in mols]
431
+ d3 = [Chem.MolSurf.PEOE_VSA3(alpha) for alpha in mols]
432
+ d4 = [Chem.MolSurf.PEOE_VSA4(alpha) for alpha in mols]
433
+ d5 = [Chem.MolSurf.PEOE_VSA5(alpha) for alpha in mols]
434
+ d6 = [Chem.MolSurf.PEOE_VSA6(alpha) for alpha in mols]
435
+ d7 = [Chem.MolSurf.PEOE_VSA7(alpha) for alpha in mols]
436
+ d8 = [Chem.MolSurf.PEOE_VSA8(alpha) for alpha in mols]
437
+ d9 = [Chem.MolSurf.PEOE_VSA9(alpha) for alpha in mols]
438
+ d10 = [Chem.MolSurf.PEOE_VSA10(alpha) for alpha in mols]
439
+ d11 = [Chem.MolSurf.PEOE_VSA11(alpha) for alpha in mols]
440
+ d12 = [Chem.MolSurf.PEOE_VSA12(alpha) for alpha in mols]
441
+ d13 = [Chem.MolSurf.PEOE_VSA13(alpha) for alpha in mols]
442
+ d14 = [Chem.MolSurf.PEOE_VSA14(alpha) for alpha in mols]
443
+ d1 = np.asarray(d1)
444
+ d2 = np.asarray(d2)
445
+ d3 = np.asarray(d3)
446
+ d4 = np.asarray(d4)
447
+ d5 = np.asarray(d5)
448
+ d6 = np.asarray(d6)
449
+ d7 = np.asarray(d7)
450
+ d8 = np.asarray(d8)
451
+ d9 = np.asarray(d9)
452
+ d10 = np.asarray(d10)
453
+ d11 = np.asarray(d11)
454
+ d12 = np.asarray(d12)
455
+ d13 = np.asarray(d13)
456
+ d14 = np.asarray(d14)
457
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14],'PEOE_VSA_Series[1-14]_ind', save_res)
458
+ clear_descriptor_memory(d1)
459
+ clear_descriptor_memory(d2)
460
+ clear_descriptor_memory(d3)
461
+ clear_descriptor_memory(d4)
462
+ clear_descriptor_memory(d5)
463
+ clear_descriptor_memory(d6)
464
+ clear_descriptor_memory(d7)
465
+ clear_descriptor_memory(d8)
466
+ clear_descriptor_memory(d9)
467
+ clear_descriptor_memory(d10)
468
+ clear_descriptor_memory(d11)
469
+ clear_descriptor_memory(d12)
470
+ clear_descriptor_memory(d13)
471
+ clear_descriptor_memory(d14)
472
+ if phase29 == 1:
473
+ d1 = [Chem.MolSurf.SMR_VSA1(alpha) for alpha in mols]
474
+ d2 = [Chem.MolSurf.SMR_VSA2(alpha) for alpha in mols]
475
+ d3 = [Chem.MolSurf.SMR_VSA3(alpha) for alpha in mols]
476
+ d4 = [Chem.MolSurf.SMR_VSA4(alpha) for alpha in mols]
477
+ d5 = [Chem.MolSurf.SMR_VSA5(alpha) for alpha in mols]
478
+ d6 = [Chem.MolSurf.SMR_VSA6(alpha) for alpha in mols]
479
+ d7 = [Chem.MolSurf.SMR_VSA7(alpha) for alpha in mols]
480
+ d8 = [Chem.MolSurf.SMR_VSA8(alpha) for alpha in mols]
481
+ d9 = [Chem.MolSurf.SMR_VSA9(alpha) for alpha in mols]
482
+ d10 = [Chem.MolSurf.SMR_VSA10(alpha) for alpha in mols]
483
+ d1 = np.asarray(d1)
484
+ d2 = np.asarray(d2)
485
+ d3 = np.asarray(d3)
486
+ d4 = np.asarray(d4)
487
+ d5 = np.asarray(d5)
488
+ d6 = np.asarray(d6)
489
+ d7 = np.asarray(d7)
490
+ d8 = np.asarray(d8)
491
+ d9 = np.asarray(d9)
492
+ d10 = np.asarray(d10)
493
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10],'SMR_VSA_Series[1-10]_ind', save_res)
494
+ clear_descriptor_memory(d1)
495
+ clear_descriptor_memory(d2)
496
+ clear_descriptor_memory(d3)
497
+ clear_descriptor_memory(d4)
498
+ clear_descriptor_memory(d5)
499
+ clear_descriptor_memory(d6)
500
+ clear_descriptor_memory(d7)
501
+ clear_descriptor_memory(d8)
502
+ clear_descriptor_memory(d9)
503
+ clear_descriptor_memory(d10)
504
+ if phase30 == 1:
505
+ d1 = [Chem.MolSurf.SlogP_VSA1(alpha) for alpha in mols]
506
+ d2 = [Chem.MolSurf.SlogP_VSA2(alpha) for alpha in mols]
507
+ d3 = [Chem.MolSurf.SlogP_VSA3(alpha) for alpha in mols]
508
+ d4 = [Chem.MolSurf.SlogP_VSA4(alpha) for alpha in mols]
509
+ d5 = [Chem.MolSurf.SlogP_VSA5(alpha) for alpha in mols]
510
+ d6 = [Chem.MolSurf.SlogP_VSA6(alpha) for alpha in mols]
511
+ d7 = [Chem.MolSurf.SlogP_VSA7(alpha) for alpha in mols]
512
+ d8 = [Chem.MolSurf.SlogP_VSA8(alpha) for alpha in mols]
513
+ d9 = [Chem.MolSurf.SlogP_VSA9(alpha) for alpha in mols]
514
+ d10= [Chem.MolSurf.SlogP_VSA10(alpha) for alpha in mols]
515
+ d11= [Chem.MolSurf.SlogP_VSA11(alpha) for alpha in mols]
516
+ d12= [Chem.MolSurf.SlogP_VSA12(alpha) for alpha in mols]
517
+ d1 = np.asarray(d1)
518
+ d2 = np.asarray(d2)
519
+ d3 = np.asarray(d3)
520
+ d4 = np.asarray(d4)
521
+ d5 = np.asarray(d5)
522
+ d6 = np.asarray(d6)
523
+ d7 = np.asarray(d7)
524
+ d8 = np.asarray(d8)
525
+ d9 = np.asarray(d9)
526
+ d10 = np.asarray(d10)
527
+ d11 = np.asarray(d11)
528
+ d12 = np.asarray(d12)
529
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12],'SlogP_VSA_Series[1-12]_ind', save_res)
530
+ clear_descriptor_memory(d1)
531
+ clear_descriptor_memory(d2)
532
+ clear_descriptor_memory(d3)
533
+ clear_descriptor_memory(d4)
534
+ clear_descriptor_memory(d5)
535
+ clear_descriptor_memory(d6)
536
+ clear_descriptor_memory(d7)
537
+ clear_descriptor_memory(d8)
538
+ clear_descriptor_memory(d9)
539
+ clear_descriptor_memory(d10)
540
+ clear_descriptor_memory(d11)
541
+ clear_descriptor_memory(d12)
542
+ if phase31 == 1:
543
+ d1 = [Chem.EState.EState_VSA.EState_VSA1(alpha) for alpha in mols]
544
+ d2 = [Chem.EState.EState_VSA.EState_VSA2(alpha) for alpha in mols]
545
+ d3 = [Chem.EState.EState_VSA.EState_VSA3(alpha) for alpha in mols]
546
+ d4 = [Chem.EState.EState_VSA.EState_VSA4(alpha) for alpha in mols]
547
+ d5 = [Chem.EState.EState_VSA.EState_VSA5(alpha) for alpha in mols]
548
+ d6 = [Chem.EState.EState_VSA.EState_VSA6(alpha) for alpha in mols]
549
+ d7 = [Chem.EState.EState_VSA.EState_VSA7(alpha) for alpha in mols]
550
+ d8 = [Chem.EState.EState_VSA.EState_VSA8(alpha) for alpha in mols]
551
+ d9 = [Chem.EState.EState_VSA.EState_VSA9(alpha) for alpha in mols]
552
+ d10 = [Chem.EState.EState_VSA.EState_VSA10(alpha) for alpha in mols]
553
+ d11 = [Chem.EState.EState_VSA.EState_VSA11(alpha) for alpha in mols]
554
+ d1 = np.asarray(d1)
555
+ d2 = np.asarray(d2)
556
+ d3 = np.asarray(d3)
557
+ d4 = np.asarray(d4)
558
+ d5 = np.asarray(d5)
559
+ d6 = np.asarray(d6)
560
+ d7 = np.asarray(d7)
561
+ d8 = np.asarray(d8)
562
+ d9 = np.asarray(d9)
563
+ d10 = np.asarray(d10)
564
+ d11 = np.asarray(d11)
565
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11],'EState_VSA_Series[1-11]_ind', save_res)
566
+ clear_descriptor_memory(d1)
567
+ clear_descriptor_memory(d2)
568
+ clear_descriptor_memory(d3)
569
+ clear_descriptor_memory(d4)
570
+ clear_descriptor_memory(d5)
571
+ clear_descriptor_memory(d6)
572
+ clear_descriptor_memory(d7)
573
+ clear_descriptor_memory(d8)
574
+ clear_descriptor_memory(d9)
575
+ clear_descriptor_memory(d10)
576
+ clear_descriptor_memory(d11)
577
+ if phase32 == 1:
578
+ d1 = [Chem.EState.EState_VSA.VSA_EState1(alpha) for alpha in mols]
579
+ d2 = [Chem.EState.EState_VSA.VSA_EState2(alpha) for alpha in mols]
580
+ d3 = [Chem.EState.EState_VSA.VSA_EState3(alpha) for alpha in mols]
581
+ d4 = [Chem.EState.EState_VSA.VSA_EState4(alpha) for alpha in mols]
582
+ d5 = [Chem.EState.EState_VSA.VSA_EState5(alpha) for alpha in mols]
583
+ d6 = [Chem.EState.EState_VSA.VSA_EState6(alpha) for alpha in mols]
584
+ d7 = [Chem.EState.EState_VSA.VSA_EState7(alpha) for alpha in mols]
585
+ d8 = [Chem.EState.EState_VSA.VSA_EState8(alpha) for alpha in mols]
586
+ d9 = [Chem.EState.EState_VSA.VSA_EState9(alpha) for alpha in mols]
587
+ d10 = [Chem.EState.EState_VSA.VSA_EState10(alpha) for alpha in mols]
588
+ d1 = np.asarray(d1)
589
+ d2 = np.asarray(d2)
590
+ d3 = np.asarray(d3)
591
+ d4 = np.asarray(d4)
592
+ d5 = np.asarray(d5)
593
+ d6 = np.asarray(d6)
594
+ d7 = np.asarray(d7)
595
+ d8 = np.asarray(d8)
596
+ d9 = np.asarray(d9)
597
+ d10 = np.asarray(d10)
598
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10],'VSA_EState_Series[1-10]', save_res)
599
+ clear_descriptor_memory(d1)
600
+ clear_descriptor_memory(d2)
601
+ clear_descriptor_memory(d3)
602
+ clear_descriptor_memory(d4)
603
+ clear_descriptor_memory(d5)
604
+ clear_descriptor_memory(d6)
605
+ clear_descriptor_memory(d7)
606
+ clear_descriptor_memory(d8)
607
+ clear_descriptor_memory(d9)
608
+ clear_descriptor_memory(d10)
609
+ if phase33 == 1:
610
+ descriptor = [Chem.rdMolDescriptors.MQNs_(alpha) for alpha in mols]
611
+ # descriptor = Normalization(descriptor)
612
+ fps = generating_newfps(fps, descriptor, 'MQNs', save_res)
613
+ clear_descriptor_memory(descriptor)
614
+ if phase34 == 1:
615
+ descriptor = [Chem.rdMolDescriptors.CalcAUTOCORR2D(alpha) for alpha in mols]
616
+ fps = generating_newfps(fps, descriptor, 'AUTOCORR2D', save_res)
617
+ clear_descriptor_memory(descriptor)
618
+ if phase35 == 1:
619
+ descriptor = compute_descriptors_parallel(mols)
620
+ fps = generating_newfps(fps, descriptor, 'BCUT2D', save_res)
621
+ clear_descriptor_memory(descriptor)
622
+ ####################################################
623
+ mols2 = process_molecules_parallel(mols, max_workers=8)
624
+ del mols
625
+ gc.collect()
626
+ ####################################################
627
+ if phase36 == 1:
628
+ descriptor = [Chem.rdMolDescriptors.CalcAsphericity(alpha) for alpha in mols2]
629
+ fps = generating_newfps(fps, descriptor, 'Asphericity', save_res)
630
+ clear_descriptor_memory(descriptor)
631
+ if phase37 == 1:
632
+ descriptor = [Chem.rdMolDescriptors.CalcPBF(alpha) for alpha in mols2]
633
+ fps = generating_newfps(fps, descriptor, 'PBF', save_res)
634
+ clear_descriptor_memory(descriptor)
635
+ if phase38 == 1:
636
+ descriptor = [Chem.rdMolDescriptors.CalcRadiusOfGyration(alpha) for alpha in mols2]
637
+ fps = generating_newfps(fps, descriptor, 'RadiusOfGyration', save_res)
638
+ clear_descriptor_memory(descriptor)
639
+ if phase39 == 1:
640
+ descriptor = [Chem.rdMolDescriptors.CalcInertialShapeFactor(alpha) for alpha in mols2]
641
+ fps = generating_newfps(fps, descriptor, 'InertialShapeFactor', save_res)
642
+ clear_descriptor_memory(descriptor)
643
+ if phase40 == 1:
644
+ descriptor = [Chem.rdMolDescriptors.CalcEccentricity(alpha) for alpha in mols2]
645
+ fps = generating_newfps(fps, descriptor, 'Eccentricity', save_res)
646
+ clear_descriptor_memory(descriptor)
647
+ if phase41 == 1:
648
+ descriptor = [Chem.rdMolDescriptors.CalcSpherocityIndex(alpha) for alpha in mols2]
649
+ fps = generating_newfps(fps, descriptor, 'SpherocityIndex', save_res)
650
+ clear_descriptor_memory(descriptor)
651
+ if phase42 == 1:
652
+ d1 = [Chem.rdMolDescriptors.CalcPMI1(alpha) for alpha in mols2]
653
+ d2 = [Chem.rdMolDescriptors.CalcPMI2(alpha) for alpha in mols2]
654
+ d3 = [Chem.rdMolDescriptors.CalcPMI3(alpha) for alpha in mols2]
655
+ d1 = Normalization(d1)
656
+ d2 = Normalization(d2)
657
+ d3 = Normalization(d3)
658
+ d1 = np.asarray(d1)
659
+ d2 = np.asarray(d2)
660
+ d3 = np.asarray(d3)
661
+ fps = generating_newfps(fps, [d1,d2,d3], 'PMI_series[1-3]_ind', save_res)
662
+ clear_descriptor_memory(d1)
663
+ clear_descriptor_memory(d2)
664
+ clear_descriptor_memory(d3)
665
+ if phase43 == 1:
666
+ d1 = [Chem.rdMolDescriptors.CalcNPR1(alpha) for alpha in mols2]
667
+ d2 = [Chem.rdMolDescriptors.CalcNPR2(alpha) for alpha in mols2]
668
+ d1 = np.asarray(d1)
669
+ d2 = np.asarray(d2)
670
+ fps = generating_newfps(fps, [d1,d2], 'NPR_series[1-2]_ind', save_res)
671
+ clear_descriptor_memory(d1)
672
+ clear_descriptor_memory(d2)
673
+ if phase44 == 1:
674
+ descriptor = [Chem.rdMolDescriptors.CalcAUTOCORR3D(mols) for mols in mols2]
675
+ fps = generating_newfps(fps, descriptor, 'AUTOCORR3D', save_res)
676
+ clear_descriptor_memory(descriptor)
677
+ if phase45 == 1:
678
+ descriptor = [Chem.rdMolDescriptors.CalcRDF(mols) for mols in mols2]
679
+ descriptor = Normalization(descriptor)
680
+ fps = generating_newfps(fps, descriptor, 'RDF', save_res)
681
+ clear_descriptor_memory(descriptor)
682
+ if phase46 == 1:
683
+ descriptor = [Chem.rdMolDescriptors.CalcMORSE(mols) for mols in mols2]
684
+ descriptor = Normalization(descriptor)
685
+ fps = generating_newfps(fps, descriptor, 'MORSE', save_res)
686
+ clear_descriptor_memory(descriptor)
687
+ if phase47 == 1:
688
+ descriptor = [Chem.rdMolDescriptors.CalcWHIM(mols) for mols in mols2]
689
+ descriptor = Normalization(descriptor)
690
+ fps = generating_newfps(fps, descriptor, 'WHIM', save_res)
691
+ clear_descriptor_memory(descriptor)
692
+ if phase48 == 1:
693
+ descriptor = [Chem.rdMolDescriptors.CalcGETAWAY(mols) for mols in mols2]
694
+ descriptor = Normalization(descriptor)
695
+ fps = generating_newfps(fps, descriptor, 'GETAWAY', save_res)
696
+ clear_descriptor_memory(descriptor)
697
+ #########################################
698
+ if save_res == "pd":
699
+ fps.to_csv(f'{target_path}/{name}_feature_selection.csv')
700
+
701
+ fps = fps.astype('float')
702
+ return fps
extra_code/feature_selection.py ADDED
@@ -0,0 +1,951 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import pandas as pd
4
+ import gc
5
+ from concurrent.futures import ProcessPoolExecutor, as_completed
6
+
7
+ from rdkit import Chem
8
+ from rdkit.Chem import AllChem, DataStructs, Draw
9
+ from rdkit import RDConfig
10
+ from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges
11
+ from rdkit.Chem.AllChem import GetMorganGenerator
12
+ from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray
13
+ from rdkit.Avalon.pyAvalonTools import GetAvalonFP
14
+ from rdkit.Chem.Descriptors import ExactMolWt
15
+
16
+ import tensorflow as tf
17
+ from tensorflow import keras
18
+ from tensorflow.keras import layers
19
+ from tensorflow.keras.models import Sequential
20
+ from tensorflow.keras.layers import Dense, Dropout, Activation
21
+ from tensorflow.keras.regularizers import l2
22
+ from tensorflow.keras.optimizers import Adam
23
+ from tensorflow.keras import regularizers
24
+
25
+ import optuna
26
+
27
+ def mol3d(mol):
28
+ mol = Chem.AddHs(mol)
29
+ optimization_methods = [
30
+ (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),
31
+ (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),
32
+ (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})
33
+ ]
34
+
35
+ for method, args, kwargs in optimization_methods:
36
+ try:
37
+ method(*args, **kwargs)
38
+ if mol.GetNumConformers() > 0:
39
+ return mol
40
+ except ValueError as e:
41
+ print(f"Error: {e} - Trying next optimization method [{method}]")
42
+
43
+ print(f"Invalid mol for 3d {Chem.MolToSmiles(mol)} - No conformer generated")
44
+ return None
45
+
46
+ import numpy as np
47
+ import pandas as pd
48
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
49
+ from typing import Union, List, Optional
50
+
51
+ def process_chunk_optimized(chunk_data):
52
+ chunk, name_prefix, start_idx = chunk_data
53
+ return pd.DataFrame(
54
+ chunk,
55
+ columns=[f"{name_prefix}_{j+1}" for j in range(start_idx, start_idx + chunk.shape[1])]
56
+ )
57
+
58
+ def generate_df_concurrently(descriptor: np.ndarray, name_prefix: str, chunk_size: int = 1000) -> Optional[pd.DataFrame]:
59
+ try:
60
+ chunks = [
61
+ (descriptor[:, i:min(i + chunk_size, descriptor.shape[1])], name_prefix, i)
62
+ for i in range(0, descriptor.shape[1], chunk_size)
63
+ ]
64
+
65
+ with ProcessPoolExecutor() as executor:
66
+ chunk_dfs = list(executor.map(process_chunk_optimized, chunks))
67
+
68
+ return pd.concat(chunk_dfs, axis=1) if chunk_dfs else None
69
+
70
+ except Exception as e:
71
+ print(f"[-1-] Error in generating DataFrame concurrently: {e}")
72
+ return pd.DataFrame(
73
+ {f"{name_prefix}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
74
+ )
75
+
76
+ def generating_newfps(
77
+ fps: Union[np.ndarray, pd.DataFrame],
78
+ descriptor: Optional[Union[np.ndarray, List[np.ndarray], List[List]]],
79
+ descriptor_name: str,
80
+ save_res: str = "np",
81
+ chunk_size: int = 1000
82
+ ) -> Union[np.ndarray, pd.DataFrame]:
83
+ try:
84
+ if descriptor is None:
85
+ return fps
86
+
87
+ if save_res == "pd":
88
+ new_fps = pd.DataFrame(fps) if not isinstance(fps, pd.DataFrame) else fps
89
+
90
+ if isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
91
+ descriptors_df = generate_df_concurrently(descriptor, descriptor_name, chunk_size)
92
+ if descriptors_df is not None:
93
+ new_fps = pd.concat([new_fps, descriptors_df], axis=1)
94
+
95
+ elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
96
+ try:
97
+ combined = np.hstack([
98
+ arr if arr.ndim > 1 else arr.reshape(-1, 1)
99
+ for arr in descriptor
100
+ ])
101
+ descriptors_df = generate_df_concurrently(combined, descriptor_name, chunk_size)
102
+ if descriptors_df is not None:
103
+ new_fps = pd.concat([new_fps, descriptors_df], axis=1)
104
+ except Exception as e:
105
+ print(f"[-2-] Error processing array list: {e}")
106
+
107
+ elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
108
+ try:
109
+ descriptor_array = np.asarray(descriptor, dtype=np.float32)
110
+ descriptors_df = generate_df_concurrently(descriptor_array, descriptor_name, chunk_size)
111
+ if descriptors_df is not None:
112
+ new_fps = pd.concat([new_fps, descriptors_df], axis=1)
113
+ except Exception as e:
114
+ print(f"[-3-] Error processing nested list: {e}")
115
+
116
+ else:
117
+ try:
118
+ descriptor_array = np.asarray(descriptor, dtype=np.float32)
119
+ new_fps[descriptor_name] = descriptor_array.flatten()
120
+ except Exception as e:
121
+ print(f"[-4-] Error processing single descriptor: {e}")
122
+
123
+ new_fps.replace([np.inf, -np.inf], np.nan, inplace=True)
124
+ new_fps.fillna(0, inplace=True)
125
+ return new_fps
126
+
127
+ else: # numpy 처리
128
+ try:
129
+ if isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
130
+ new_fps = np.concatenate([fps, descriptor], axis=1)
131
+ elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
132
+ combined_arrays = [
133
+ arr if arr.ndim > 1 else arr.reshape(-1, 1)
134
+ for arr in descriptor
135
+ ]
136
+ new_fps = np.concatenate([fps] + combined_arrays, axis=1)
137
+ elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
138
+ descriptor_array = np.asarray(descriptor, dtype=np.float32)
139
+ new_fps = np.concatenate([fps, descriptor_array], axis=1)
140
+ else:
141
+ descriptor_array = np.asarray(descriptor, dtype=np.float32)
142
+ new_fps = np.concatenate([fps, descriptor_array[:, None]], axis=1)
143
+
144
+ return np.nan_to_num(new_fps, nan=0.0, posinf=0.0, neginf=0.0).astype('float32')
145
+ except Exception as e:
146
+ print(f"[-5-] Error in numpy processing: {e}")
147
+ return fps
148
+
149
+ except Exception as e:
150
+ print(f"[-6-] General error in {descriptor_name}: {e}")
151
+ return fps
152
+
153
+ # def generating_newfps(fps, descriptor, descriptor_name, save_res="np"):
154
+ # try:
155
+ # if descriptor is None:
156
+ # return fps
157
+
158
+ # if save_res == "pd":
159
+ # new_fps = pd.DataFrame(fps) if not isinstance(fps, pd.DataFrame) else fps
160
+
161
+ # if isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
162
+ # try:
163
+ # descriptors_df = pd.DataFrame(
164
+ # {f"{descriptor_name}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
165
+ # )
166
+ # new_fps = pd.concat([new_fps, descriptors_df], axis=1)
167
+ # del descriptor
168
+ # except Exception as e:
169
+ # print(f"[-1-] Error occured: {e}")
170
+
171
+ # elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
172
+ # try:
173
+ # arrays_1d = [arr[:, None] for arr in descriptor if arr.ndim == 1]
174
+ # arrays_2d = [arr for arr in descriptor if arr.ndim == 2]
175
+ # combined_1d = np.concatenate(arrays_1d, axis=1) if arrays_1d else None
176
+ # combined_2d = np.concatenate(arrays_2d, axis=1) if arrays_2d else None
177
+
178
+ # if combined_1d is not None:
179
+ # df_1d = pd.DataFrame(
180
+ # combined_1d,
181
+ # columns=[f'{descriptor_name}_{i+1}' for i in range(combined_1d.shape[1])]
182
+ # )
183
+ # new_fps = pd.concat([new_fps, df_1d], axis=1)
184
+
185
+ # if combined_2d is not None:
186
+ # df_2d = pd.DataFrame(
187
+ # combined_2d,
188
+ # columns=[f'{descriptor_name}_{i+1}' for i in range(combined_2d.shape[1])]
189
+ # )
190
+ # new_fps = pd.concat([new_fps, df_2d], axis=1)
191
+
192
+ # del descriptor, arrays_1d, arrays_2d
193
+ # if combined_1d is not None: del combined_1d
194
+ # if combined_2d is not None: del combined_2d
195
+ # except Exception as e:
196
+ # print(f"[-2-] Error occured: {e}")
197
+
198
+ # elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
199
+ # try:
200
+ # descriptor = np.asarray(descriptor).astype('float')
201
+ # descriptors_df = pd.DataFrame(
202
+ # {f"{descriptor_name}_{i+1}": descriptor[:, i] for i in range(descriptor.shape[1])}
203
+ # )
204
+ # new_fps = pd.concat([new_fps, descriptors_df], axis=1)
205
+ # del descriptor
206
+ # except Exception as e:
207
+ # print(f"[-3-] Error occured: {e}")
208
+
209
+ # else:
210
+ # descriptor = np.asarray(descriptor).astype('float')
211
+ # new_fps[descriptor_name] = descriptor.flatten()
212
+ # del descriptor
213
+
214
+ # new_fps = new_fps.replace([np.inf, -np.inf], np.nan).fillna(0)
215
+ # return new_fps
216
+
217
+ # else:
218
+ # new_fps = fps
219
+
220
+ # if descriptor is None:
221
+ # pass
222
+ # elif isinstance(descriptor, np.ndarray) and descriptor.ndim >= 2:
223
+ # try:
224
+ # new_fps = np.concatenate([new_fps, descriptor], axis=1)
225
+ # del descriptor
226
+ # except Exception as e:
227
+ # print(f"[-1-] Error occured: {e}")
228
+ # elif isinstance(descriptor, list) and isinstance(descriptor[0], np.ndarray):
229
+ # try:
230
+ # arrays_1d = [arr[:, None] for arr in descriptor if arr.ndim == 1]
231
+ # arrays_2d = [arr for arr in descriptor if arr.ndim == 2]
232
+ # combined_1d = np.concatenate(arrays_1d, axis=1) if arrays_1d else None
233
+ # combined_2d = np.concatenate(arrays_2d, axis=1) if arrays_2d else None
234
+ # to_concat = [new_fps] + [arr for arr in [combined_1d, combined_2d] if arr is not None]
235
+ # new_fps = np.concatenate(to_concat, axis=1)
236
+ # del descriptor, arrays_1d, arrays_2d
237
+ # if combined_1d is not None: del combined_1d
238
+ # if combined_2d is not None: del combined_2d
239
+ # except Exception as e:
240
+ # print(f"[-2-] Error occured: {e}")
241
+ # elif isinstance(descriptor, list) and isinstance(descriptor[0], list):
242
+ # try:
243
+ # descriptor = np.asarray(descriptor).astype('float')
244
+ # new_fps = np.concatenate([new_fps, descriptor], axis=1)
245
+ # del descriptor
246
+ # except Exception as e:
247
+ # print(f"[-3-] Error occured: {e}")
248
+ # else:
249
+ # descriptor = np.asarray(descriptor).astype('float')
250
+ # new_fps = np.concatenate([new_fps, descriptor[:,None]], axis=1)
251
+ # del descriptor
252
+
253
+ # new_fps = np.nan_to_num(new_fps, nan=0.0, posinf=0.0, neginf=0.0).astype('float')
254
+ # return new_fps
255
+
256
+ # except Exception as e:
257
+ # print(f"Error occurred in {descriptor_name}: {e}")
258
+ # return fps
259
+
260
+ def Normalization(descriptor):
261
+ descriptor = np.asarray(descriptor)
262
+ epsilon = 1e-10
263
+ max_value = 1e15
264
+ descriptor = np.clip(descriptor, -max_value, max_value)
265
+ descriptor_custom = np.where(np.abs(descriptor) < epsilon, epsilon, descriptor)
266
+ descriptor_log = np.sign(descriptor_custom) * np.log1p(np.abs(descriptor_custom))
267
+ descriptor_log = np.nan_to_num(descriptor_log, nan=0.0, posinf=0.0, neginf=0.0)
268
+ del epsilon
269
+ gc.collect()
270
+ return descriptor_log
271
+
272
+ def values_chi(mol, chi_type):
273
+ i = 0
274
+ chi_func = Chem.GraphDescriptors.ChiNn_ if chi_type == 'n' else Chem.GraphDescriptors.ChiNv_
275
+ while chi_func(mol, i) != 0.0:
276
+ i += 1
277
+ return np.array([chi_func(mol, j) for j in range(i)])
278
+
279
+ def generate_chi(mols, chi_type):
280
+ n_jobs = os.cpu_count()
281
+ with ProcessPoolExecutor(max_workers=n_jobs) as executor:
282
+ futures = [executor.submit(values_chi, mol, chi_type) for mol in mols]
283
+ descriptor = [future.result() for future in futures]
284
+
285
+ max_length = max(len(x) for x in descriptor)
286
+ padded_descriptor = np.array([np.pad(x, (0, max_length - len(x)), 'constant') for x in descriptor])
287
+
288
+ return padded_descriptor
289
+
290
+ def sanitize_and_compute_descriptor(mol):
291
+ try:
292
+ mol = Chem.RemoveHs(mol)
293
+ Chem.SanitizeMol(mol)
294
+ try:
295
+ Chem.rdPartialCharges.ComputeGasteigerCharges(mol)
296
+ except Exception as e:
297
+ print(f"Gasteiger charge calculation failed: {e}")
298
+ return [0] * 8
299
+
300
+ try:
301
+ return Chem.rdMolDescriptors.BCUT2D(mol)
302
+ except Exception as e:
303
+ print(f"BCUT2D calculation failed: {e}")
304
+ return [Descriptors.MolWt(mol)] * 8
305
+ except Exception as e:
306
+ return [0] * 8
307
+
308
+ def compute_descriptors_parallel(mols, n_jobs=None):
309
+ with ProcessPoolExecutor(max_workers=n_jobs) as executor:
310
+ futures = [executor.submit(sanitize_and_compute_descriptor, mol) for mol in mols if mol is not None]
311
+ descriptors = [future.result() for future in futures]
312
+ return np.array(descriptors)
313
+
314
+ def process_molecules_parallel(mols, max_workers=4, chunk_size=100):
315
+ results = []
316
+ for i in range(0, len(mols), chunk_size):
317
+ chunk = mols[i:i + chunk_size]
318
+ with ProcessPoolExecutor(max_workers=max_workers) as executor:
319
+ futures = [executor.submit(mol3d, mol) for mol in chunk]
320
+ for future in as_completed(futures):
321
+ result = future.result()
322
+ if result is not None:
323
+ results.append(result)
324
+ gc.collect()
325
+ return results
326
+
327
+ def selection_data_descriptor_compress(selection, fps, mols, name, target_path="result", save_res="np"):
328
+ if save_res == "pd":
329
+ fps = pd.DataFrame({'mols': mols})
330
+ ####################################
331
+ phase0 = 1 #selection[0] #"MolWeight" #
332
+ phase1 = 1 #selection[1] #"Mol_logP" #
333
+ phase2 = 1 #selection[2] #"Mol_MR" #
334
+ phase3 = 1 #selection[3] #"Mol_TPSA" #
335
+ phase4 = selection[4] #"NumRotatableBonds" #
336
+ phase5 = selection[5] #"HeavyAtomCount" #
337
+ phase6 = selection[6] #"NumHAcceptors" #
338
+ phase7 = selection[7] #"NumHDonors" #
339
+ phase8 = selection[8] #"NumHeteroatoms" #
340
+ phase9 = selection[9] #"NumValenceElec" #
341
+ phase10 = selection[10] #"NHOHCount" #
342
+ phase11 = selection[11] #"NOCount" #
343
+ phase12 = selection[12] #"RingCount" #
344
+ phase13 = selection[13] #"NumAromaticRings" #
345
+ phase14 = selection[14] #"NumSaturatedRings" #
346
+ phase15 = selection[15] #"NumAliphaticRings" #
347
+ phase16 = selection[16] #"LabuteASA" #
348
+ phase17 = selection[17] #"BalabanJ" #
349
+ phase18 = selection[18] #"BertzCT" #
350
+ phase19 = selection[19] #"Ipc" #
351
+ phase20 = selection[20] #"kappa_Series[1-3]_ind" #
352
+ phase21 = selection[21] #"Chi_Series[13]_ind" #
353
+ phase22 = selection[22] #"Phi" #
354
+ phase23 = selection[23] #"HallKierAlpha" #
355
+ phase24 = selection[24] #"NumAmideBonds" #
356
+ phase25 = selection[25] #"FractionCSP3" #
357
+ phase26 = selection[26] #"NumSpiroAtoms" #
358
+ phase27 = selection[27] #"NumBridgeheadAtoms" #
359
+ phase28 = selection[28] #"PEOE_VSA_Series[1-14]_ind" #
360
+ phase29 = selection[29] #"SMR_VSA_Series[1-10]_ind" #
361
+ phase30 = selection[30] #"SlogP_VSA_Series[1-12]_ind"#
362
+ phase31 = selection[31] #"EState_VSA_Series[1-11]_ind"#
363
+ phase32 = selection[32] #"VSA_EState_Series[1-10]_ind"#
364
+ phase33 = selection[33] #"MQNs" #
365
+ phase34 = selection[34] #"AUTOCORR2D" #
366
+ phase35 = selection[35] #"BCUT2D" #
367
+ phase36 = selection[36] #"Asphericity" #
368
+ phase37 = selection[37] #"PBF" #
369
+ phase38 = selection[38] #"RadiusOfGyration" #
370
+ phase39 = selection[39] #"InertialShapeFactor"#
371
+ phase40 = selection[40] #"Eccentricity"
372
+ phase41 = selection[41] #"SpherocityIndex"
373
+ phase42 = selection[42] #"PMI_series[1-3]_ind"
374
+ phase43 = selection[43] #"NPR_series[1-2]_ind"
375
+ phase44 = selection[44] #"AUTOCORR3D"
376
+ phase45 = selection[45] #"RDF"
377
+ phase46 = selection[46] #"MORSE"
378
+ phase47 = selection[47] #"WHIM"
379
+ phase48 = selection[48] #"GETAWAY"
380
+ ####################################
381
+ def clear_descriptor_memory(descriptor):
382
+ del descriptor
383
+ gc.collect()
384
+ ####################################
385
+ ####################################
386
+ if phase0 == 1:
387
+ descriptor = [Descriptors.ExactMolWt(alpha) for alpha in mols]
388
+ fps = generating_newfps(fps, descriptor, 'MolWt', save_res)
389
+ clear_descriptor_memory(descriptor)
390
+ if phase1 == 1:
391
+ descriptor = [Chem.Crippen.MolLogP(alpha) for alpha in mols]
392
+ fps = generating_newfps(fps, descriptor, 'MolLogP', save_res)
393
+ clear_descriptor_memory(descriptor)
394
+ if phase2 == 1:
395
+ descriptor = [Chem.Crippen.MolMR(alpha) for alpha in mols]
396
+ fps = generating_newfps(fps, descriptor, 'MolMR', save_res)
397
+ clear_descriptor_memory(descriptor)
398
+ if phase3 == 1:
399
+ descriptor = [Descriptors.TPSA(alpha) for alpha in mols]
400
+ fps = generating_newfps(fps, descriptor, 'TPSA', save_res)
401
+ clear_descriptor_memory(descriptor)
402
+ if phase4 == 1:
403
+ descriptor = [Chem.Lipinski.NumRotatableBonds(alpha) for alpha in mols]
404
+ fps = generating_newfps(fps, descriptor, 'NumRotatableBonds', save_res)
405
+ clear_descriptor_memory(descriptor)
406
+ if phase5 == 1:
407
+ descriptor = [Chem.Lipinski.HeavyAtomCount(alpha) for alpha in mols]
408
+ fps = generating_newfps(fps, descriptor, 'HeavyAtomCount', save_res)
409
+ clear_descriptor_memory(descriptor)
410
+ if phase6 == 1:
411
+ descriptor = [Chem.Lipinski.NumHAcceptors(alpha) for alpha in mols]
412
+ fps = generating_newfps(fps, descriptor, 'NumHAcceptors', save_res)
413
+ clear_descriptor_memory(descriptor)
414
+ if phase7 == 1:
415
+ descriptor = [Chem.Lipinski.NumHDonors(alpha) for alpha in mols]
416
+ fps = generating_newfps(fps, descriptor, 'NumHDonors', save_res)
417
+ clear_descriptor_memory(descriptor)
418
+ if phase8 == 1:
419
+ descriptor = [Chem.Lipinski.NumHeteroatoms(alpha) for alpha in mols]
420
+ fps = generating_newfps(fps, descriptor, 'NumHeteroatoms', save_res)
421
+ clear_descriptor_memory(descriptor)
422
+ if phase9 == 1:
423
+ descriptor = [Chem.Descriptors.NumValenceElectrons(alpha) for alpha in mols]
424
+ fps = generating_newfps(fps, descriptor, 'NumValenceElectrons', save_res)
425
+ clear_descriptor_memory(descriptor)
426
+ if phase10 == 1:
427
+ descriptor = [Chem.Lipinski.NHOHCount(alpha) for alpha in mols]
428
+ fps = generating_newfps(fps, descriptor, 'NHOHCount', save_res)
429
+ clear_descriptor_memory(descriptor)
430
+ if phase11 == 1:
431
+ descriptor = [Chem.Lipinski.NOCount(alpha) for alpha in mols]
432
+ fps = generating_newfps(fps, descriptor, 'NOCount', save_res)
433
+ clear_descriptor_memory(descriptor)
434
+ if phase12 == 1:
435
+ descriptor = [Chem.Lipinski.RingCount(alpha) for alpha in mols]
436
+ fps = generating_newfps(fps, descriptor, 'RingCount', save_res)
437
+ clear_descriptor_memory(descriptor)
438
+ if phase13 == 1:
439
+ descriptor = [Chem.Lipinski.NumAromaticRings(alpha) for alpha in mols]
440
+ fps = generating_newfps(fps, descriptor, 'NumAromaticRings', save_res)
441
+ clear_descriptor_memory(descriptor)
442
+ if phase14 == 1:
443
+ descriptor = [Chem.Lipinski.NumSaturatedRings(alpha) for alpha in mols]
444
+ fps = generating_newfps(fps, descriptor, 'NumSaturatedRings', save_res)
445
+ clear_descriptor_memory(descriptor)
446
+ if phase15 == 1:
447
+ descriptor = [Chem.Lipinski.NumAliphaticRings(alpha) for alpha in mols]
448
+ fps = generating_newfps(fps, descriptor, 'NumAliphaticRings', save_res)
449
+ clear_descriptor_memory(descriptor)
450
+ if phase16 == 1:
451
+ descriptor = [Chem.rdMolDescriptors.CalcLabuteASA(alpha) for alpha in mols]
452
+ fps = generating_newfps(fps, descriptor, 'LabuteASA', save_res)
453
+ clear_descriptor_memory(descriptor)
454
+ if phase17 == 1:
455
+ descriptor = [Chem.GraphDescriptors.BalabanJ(alpha) for alpha in mols]
456
+ # descriptor = Normalization(descriptor)
457
+ fps = generating_newfps(fps, descriptor, 'BalabanJ', save_res)
458
+ clear_descriptor_memory(descriptor)
459
+ if phase18 == 1:
460
+ descriptor = [Chem.GraphDescriptors.BertzCT(alpha) for alpha in mols]
461
+ # descriptor = Normalization(descriptor)
462
+ fps = generating_newfps(fps, descriptor, 'BertzCT', save_res)
463
+ clear_descriptor_memory(descriptor)
464
+ if phase19 == 1:
465
+ descriptor = [Chem.GraphDescriptors.Ipc(alpha) for alpha in mols]
466
+ descriptor = Normalization(descriptor)
467
+ fps = generating_newfps(fps, descriptor, 'Ipc', save_res)
468
+ clear_descriptor_memory(descriptor)
469
+ if phase20 == 1:
470
+ d1 = [Chem.GraphDescriptors.Kappa1(alpha) for alpha in mols]
471
+ d2 = [Chem.GraphDescriptors.Kappa2(alpha) for alpha in mols]
472
+ d3 = [Chem.GraphDescriptors.Kappa3(alpha) for alpha in mols]
473
+ d1 = np.asarray(d1)
474
+ d2 = np.asarray(d2)
475
+ d3 = np.asarray(d3)
476
+ fps = generating_newfps(fps, [d1,d2,d3], 'kappa_Series[1-3]_ind', save_res)
477
+ clear_descriptor_memory(d1)
478
+ clear_descriptor_memory(d2)
479
+ clear_descriptor_memory(d3)
480
+ if phase21 == 1:
481
+ d1 = [Chem.GraphDescriptors.Chi0(alpha) for alpha in mols]
482
+ d2 = [Chem.GraphDescriptors.Chi0n(alpha) for alpha in mols]
483
+ d3 = [Chem.GraphDescriptors.Chi0v(alpha) for alpha in mols]
484
+ d4 = [Chem.GraphDescriptors.Chi1(alpha) for alpha in mols]
485
+ d5 = [Chem.GraphDescriptors.Chi1n(alpha) for alpha in mols]
486
+ d6 = [Chem.GraphDescriptors.Chi1v(alpha) for alpha in mols]
487
+ d7 = [Chem.GraphDescriptors.Chi2n(alpha) for alpha in mols]
488
+ d8 = [Chem.GraphDescriptors.Chi2v(alpha) for alpha in mols]
489
+ d9 = [Chem.GraphDescriptors.Chi3n(alpha) for alpha in mols]
490
+ d10 = [Chem.GraphDescriptors.Chi3v(alpha) for alpha in mols]
491
+ d11 = [Chem.GraphDescriptors.Chi4n(alpha) for alpha in mols]
492
+ d12 = [Chem.GraphDescriptors.Chi4v(alpha) for alpha in mols]
493
+ d13 = generate_chi(mols, 'n')
494
+ d14 = generate_chi(mols, 'v')
495
+ d1 = np.asarray(d1)
496
+ d2 = np.asarray(d2)
497
+ d3 = np.asarray(d3)
498
+ d4 = np.asarray(d4)
499
+ d5 = np.asarray(d5)
500
+ d6 = np.asarray(d6)
501
+ d7 = np.asarray(d7)
502
+ d8 = np.asarray(d8)
503
+ d9 = np.asarray(d9)
504
+ d10 = np.asarray(d10)
505
+ d11 = np.asarray(d11)
506
+ d12 = np.asarray(d12)
507
+ d13 = np.asarray(d13)
508
+ d14 = np.asarray(d14)
509
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14], 'Chi_Series[13]_ind', save_res)
510
+ clear_descriptor_memory(d1)
511
+ clear_descriptor_memory(d2)
512
+ clear_descriptor_memory(d3)
513
+ clear_descriptor_memory(d4)
514
+ clear_descriptor_memory(d5)
515
+ clear_descriptor_memory(d6)
516
+ clear_descriptor_memory(d7)
517
+ clear_descriptor_memory(d8)
518
+ clear_descriptor_memory(d9)
519
+ clear_descriptor_memory(d10)
520
+ clear_descriptor_memory(d11)
521
+ clear_descriptor_memory(d12)
522
+ clear_descriptor_memory(d13)
523
+ clear_descriptor_memory(d14)
524
+ if phase22 == 1:
525
+ descriptor = [Chem.rdMolDescriptors.CalcPhi(alpha) for alpha in mols]
526
+ fps = generating_newfps(fps, descriptor, 'Phi', save_res)
527
+ clear_descriptor_memory(descriptor)
528
+ if phase23 == 1:
529
+ descriptor = [Chem.GraphDescriptors.HallKierAlpha(alpha) for alpha in mols]
530
+ fps = generating_newfps(fps, descriptor, 'HallKierAlpha', save_res)
531
+ clear_descriptor_memory(descriptor)
532
+ if phase24 == 1:
533
+ descriptor = [Chem.rdMolDescriptors.CalcNumAmideBonds(alpha) for alpha in mols]
534
+ fps = generating_newfps(fps, descriptor, 'NumAmideBonds', save_res)
535
+ clear_descriptor_memory(descriptor)
536
+ if phase25 == 1:
537
+ descriptor = [Chem.Lipinski.FractionCSP3(alpha) for alpha in mols]
538
+ fps = generating_newfps(fps, descriptor, 'FractionCSP3', save_res)
539
+ clear_descriptor_memory(descriptor)
540
+ if phase26 == 1:
541
+ descriptor = [Chem.rdMolDescriptors.CalcNumSpiroAtoms(alpha) for alpha in mols]
542
+ fps = generating_newfps(fps, descriptor, 'NumSpiroAtoms', save_res)
543
+ clear_descriptor_memory(descriptor)
544
+ if phase27 == 1:
545
+ descriptor = [Chem.rdMolDescriptors.CalcNumBridgeheadAtoms(alpha) for alpha in mols]
546
+ fps = generating_newfps(fps, descriptor, 'NumBridgeheadAtoms', save_res)
547
+ clear_descriptor_memory(descriptor)
548
+ if phase28 == 1:
549
+ d1 = [Chem.MolSurf.PEOE_VSA1(alpha) for alpha in mols]
550
+ d2 = [Chem.MolSurf.PEOE_VSA2(alpha) for alpha in mols]
551
+ d3 = [Chem.MolSurf.PEOE_VSA3(alpha) for alpha in mols]
552
+ d4 = [Chem.MolSurf.PEOE_VSA4(alpha) for alpha in mols]
553
+ d5 = [Chem.MolSurf.PEOE_VSA5(alpha) for alpha in mols]
554
+ d6 = [Chem.MolSurf.PEOE_VSA6(alpha) for alpha in mols]
555
+ d7 = [Chem.MolSurf.PEOE_VSA7(alpha) for alpha in mols]
556
+ d8 = [Chem.MolSurf.PEOE_VSA8(alpha) for alpha in mols]
557
+ d9 = [Chem.MolSurf.PEOE_VSA9(alpha) for alpha in mols]
558
+ d10 = [Chem.MolSurf.PEOE_VSA10(alpha) for alpha in mols]
559
+ d11 = [Chem.MolSurf.PEOE_VSA11(alpha) for alpha in mols]
560
+ d12 = [Chem.MolSurf.PEOE_VSA12(alpha) for alpha in mols]
561
+ d13 = [Chem.MolSurf.PEOE_VSA13(alpha) for alpha in mols]
562
+ d14 = [Chem.MolSurf.PEOE_VSA14(alpha) for alpha in mols]
563
+ d1 = np.asarray(d1)
564
+ d2 = np.asarray(d2)
565
+ d3 = np.asarray(d3)
566
+ d4 = np.asarray(d4)
567
+ d5 = np.asarray(d5)
568
+ d6 = np.asarray(d6)
569
+ d7 = np.asarray(d7)
570
+ d8 = np.asarray(d8)
571
+ d9 = np.asarray(d9)
572
+ d10 = np.asarray(d10)
573
+ d11 = np.asarray(d11)
574
+ d12 = np.asarray(d12)
575
+ d13 = np.asarray(d13)
576
+ d14 = np.asarray(d14)
577
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12,d13,d14],'PEOE_VSA_Series[1-14]_ind', save_res)
578
+ clear_descriptor_memory(d1)
579
+ clear_descriptor_memory(d2)
580
+ clear_descriptor_memory(d3)
581
+ clear_descriptor_memory(d4)
582
+ clear_descriptor_memory(d5)
583
+ clear_descriptor_memory(d6)
584
+ clear_descriptor_memory(d7)
585
+ clear_descriptor_memory(d8)
586
+ clear_descriptor_memory(d9)
587
+ clear_descriptor_memory(d10)
588
+ clear_descriptor_memory(d11)
589
+ clear_descriptor_memory(d12)
590
+ clear_descriptor_memory(d13)
591
+ clear_descriptor_memory(d14)
592
+ if phase29 == 1:
593
+ d1 = [Chem.MolSurf.SMR_VSA1(alpha) for alpha in mols]
594
+ d2 = [Chem.MolSurf.SMR_VSA2(alpha) for alpha in mols]
595
+ d3 = [Chem.MolSurf.SMR_VSA3(alpha) for alpha in mols]
596
+ d4 = [Chem.MolSurf.SMR_VSA4(alpha) for alpha in mols]
597
+ d5 = [Chem.MolSurf.SMR_VSA5(alpha) for alpha in mols]
598
+ d6 = [Chem.MolSurf.SMR_VSA6(alpha) for alpha in mols]
599
+ d7 = [Chem.MolSurf.SMR_VSA7(alpha) for alpha in mols]
600
+ d8 = [Chem.MolSurf.SMR_VSA8(alpha) for alpha in mols]
601
+ d9 = [Chem.MolSurf.SMR_VSA9(alpha) for alpha in mols]
602
+ d10 = [Chem.MolSurf.SMR_VSA10(alpha) for alpha in mols]
603
+ d1 = np.asarray(d1)
604
+ d2 = np.asarray(d2)
605
+ d3 = np.asarray(d3)
606
+ d4 = np.asarray(d4)
607
+ d5 = np.asarray(d5)
608
+ d6 = np.asarray(d6)
609
+ d7 = np.asarray(d7)
610
+ d8 = np.asarray(d8)
611
+ d9 = np.asarray(d9)
612
+ d10 = np.asarray(d10)
613
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10],'SMR_VSA_Series[1-10]_ind', save_res)
614
+ clear_descriptor_memory(d1)
615
+ clear_descriptor_memory(d2)
616
+ clear_descriptor_memory(d3)
617
+ clear_descriptor_memory(d4)
618
+ clear_descriptor_memory(d5)
619
+ clear_descriptor_memory(d6)
620
+ clear_descriptor_memory(d7)
621
+ clear_descriptor_memory(d8)
622
+ clear_descriptor_memory(d9)
623
+ clear_descriptor_memory(d10)
624
+ if phase30 == 1:
625
+ d1 = [Chem.MolSurf.SlogP_VSA1(alpha) for alpha in mols]
626
+ d2 = [Chem.MolSurf.SlogP_VSA2(alpha) for alpha in mols]
627
+ d3 = [Chem.MolSurf.SlogP_VSA3(alpha) for alpha in mols]
628
+ d4 = [Chem.MolSurf.SlogP_VSA4(alpha) for alpha in mols]
629
+ d5 = [Chem.MolSurf.SlogP_VSA5(alpha) for alpha in mols]
630
+ d6 = [Chem.MolSurf.SlogP_VSA6(alpha) for alpha in mols]
631
+ d7 = [Chem.MolSurf.SlogP_VSA7(alpha) for alpha in mols]
632
+ d8 = [Chem.MolSurf.SlogP_VSA8(alpha) for alpha in mols]
633
+ d9 = [Chem.MolSurf.SlogP_VSA9(alpha) for alpha in mols]
634
+ d10= [Chem.MolSurf.SlogP_VSA10(alpha) for alpha in mols]
635
+ d11= [Chem.MolSurf.SlogP_VSA11(alpha) for alpha in mols]
636
+ d12= [Chem.MolSurf.SlogP_VSA12(alpha) for alpha in mols]
637
+ d1 = np.asarray(d1)
638
+ d2 = np.asarray(d2)
639
+ d3 = np.asarray(d3)
640
+ d4 = np.asarray(d4)
641
+ d5 = np.asarray(d5)
642
+ d6 = np.asarray(d6)
643
+ d7 = np.asarray(d7)
644
+ d8 = np.asarray(d8)
645
+ d9 = np.asarray(d9)
646
+ d10 = np.asarray(d10)
647
+ d11 = np.asarray(d11)
648
+ d12 = np.asarray(d12)
649
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12],'SlogP_VSA_Series[1-12]_ind', save_res)
650
+ clear_descriptor_memory(d1)
651
+ clear_descriptor_memory(d2)
652
+ clear_descriptor_memory(d3)
653
+ clear_descriptor_memory(d4)
654
+ clear_descriptor_memory(d5)
655
+ clear_descriptor_memory(d6)
656
+ clear_descriptor_memory(d7)
657
+ clear_descriptor_memory(d8)
658
+ clear_descriptor_memory(d9)
659
+ clear_descriptor_memory(d10)
660
+ clear_descriptor_memory(d11)
661
+ clear_descriptor_memory(d12)
662
+ if phase31 == 1:
663
+ d1 = [Chem.EState.EState_VSA.EState_VSA1(alpha) for alpha in mols]
664
+ d2 = [Chem.EState.EState_VSA.EState_VSA2(alpha) for alpha in mols]
665
+ d3 = [Chem.EState.EState_VSA.EState_VSA3(alpha) for alpha in mols]
666
+ d4 = [Chem.EState.EState_VSA.EState_VSA4(alpha) for alpha in mols]
667
+ d5 = [Chem.EState.EState_VSA.EState_VSA5(alpha) for alpha in mols]
668
+ d6 = [Chem.EState.EState_VSA.EState_VSA6(alpha) for alpha in mols]
669
+ d7 = [Chem.EState.EState_VSA.EState_VSA7(alpha) for alpha in mols]
670
+ d8 = [Chem.EState.EState_VSA.EState_VSA8(alpha) for alpha in mols]
671
+ d9 = [Chem.EState.EState_VSA.EState_VSA9(alpha) for alpha in mols]
672
+ d10 = [Chem.EState.EState_VSA.EState_VSA10(alpha) for alpha in mols]
673
+ d11 = [Chem.EState.EState_VSA.EState_VSA11(alpha) for alpha in mols]
674
+ d1 = np.asarray(d1)
675
+ d2 = np.asarray(d2)
676
+ d3 = np.asarray(d3)
677
+ d4 = np.asarray(d4)
678
+ d5 = np.asarray(d5)
679
+ d6 = np.asarray(d6)
680
+ d7 = np.asarray(d7)
681
+ d8 = np.asarray(d8)
682
+ d9 = np.asarray(d9)
683
+ d10 = np.asarray(d10)
684
+ d11 = np.asarray(d11)
685
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11],'EState_VSA_Series[1-11]_ind', save_res)
686
+ clear_descriptor_memory(d1)
687
+ clear_descriptor_memory(d2)
688
+ clear_descriptor_memory(d3)
689
+ clear_descriptor_memory(d4)
690
+ clear_descriptor_memory(d5)
691
+ clear_descriptor_memory(d6)
692
+ clear_descriptor_memory(d7)
693
+ clear_descriptor_memory(d8)
694
+ clear_descriptor_memory(d9)
695
+ clear_descriptor_memory(d10)
696
+ clear_descriptor_memory(d11)
697
+ if phase32 == 1:
698
+ d1 = [Chem.EState.EState_VSA.VSA_EState1(alpha) for alpha in mols]
699
+ d2 = [Chem.EState.EState_VSA.VSA_EState2(alpha) for alpha in mols]
700
+ d3 = [Chem.EState.EState_VSA.VSA_EState3(alpha) for alpha in mols]
701
+ d4 = [Chem.EState.EState_VSA.VSA_EState4(alpha) for alpha in mols]
702
+ d5 = [Chem.EState.EState_VSA.VSA_EState5(alpha) for alpha in mols]
703
+ d6 = [Chem.EState.EState_VSA.VSA_EState6(alpha) for alpha in mols]
704
+ d7 = [Chem.EState.EState_VSA.VSA_EState7(alpha) for alpha in mols]
705
+ d8 = [Chem.EState.EState_VSA.VSA_EState8(alpha) for alpha in mols]
706
+ d9 = [Chem.EState.EState_VSA.VSA_EState9(alpha) for alpha in mols]
707
+ d10 = [Chem.EState.EState_VSA.VSA_EState10(alpha) for alpha in mols]
708
+ d1 = np.asarray(d1)
709
+ d2 = np.asarray(d2)
710
+ d3 = np.asarray(d3)
711
+ d4 = np.asarray(d4)
712
+ d5 = np.asarray(d5)
713
+ d6 = np.asarray(d6)
714
+ d7 = np.asarray(d7)
715
+ d8 = np.asarray(d8)
716
+ d9 = np.asarray(d9)
717
+ d10 = np.asarray(d10)
718
+ fps = generating_newfps(fps, [d1,d2,d3,d4,d5,d6,d7,d8,d9,d10],'VSA_EState_Series[1-10]', save_res)
719
+ clear_descriptor_memory(d1)
720
+ clear_descriptor_memory(d2)
721
+ clear_descriptor_memory(d3)
722
+ clear_descriptor_memory(d4)
723
+ clear_descriptor_memory(d5)
724
+ clear_descriptor_memory(d6)
725
+ clear_descriptor_memory(d7)
726
+ clear_descriptor_memory(d8)
727
+ clear_descriptor_memory(d9)
728
+ clear_descriptor_memory(d10)
729
+ if phase33 == 1:
730
+ descriptor = [Chem.rdMolDescriptors.MQNs_(alpha) for alpha in mols]
731
+ # descriptor = Normalization(descriptor)
732
+ fps = generating_newfps(fps, descriptor, 'MQNs', save_res)
733
+ clear_descriptor_memory(descriptor)
734
+ if phase34 == 1:
735
+ descriptor = [Chem.rdMolDescriptors.CalcAUTOCORR2D(alpha) for alpha in mols]
736
+ fps = generating_newfps(fps, descriptor, 'AUTOCORR2D', save_res)
737
+ clear_descriptor_memory(descriptor)
738
+ if phase35 == 1:
739
+ descriptor = compute_descriptors_parallel(mols)
740
+ fps = generating_newfps(fps, descriptor, 'BCUT2D', save_res)
741
+ clear_descriptor_memory(descriptor)
742
+ ####################################################
743
+ mols2 = process_molecules_parallel(mols, max_workers=8)
744
+ del mols
745
+ gc.collect()
746
+ ####################################################
747
+ if phase36 == 1:
748
+ descriptor = [Chem.rdMolDescriptors.CalcAsphericity(alpha) for alpha in mols2]
749
+ fps = generating_newfps(fps, descriptor, 'Asphericity', save_res)
750
+ clear_descriptor_memory(descriptor)
751
+ if phase37 == 1:
752
+ descriptor = [Chem.rdMolDescriptors.CalcPBF(alpha) for alpha in mols2]
753
+ fps = generating_newfps(fps, descriptor, 'PBF', save_res)
754
+ clear_descriptor_memory(descriptor)
755
+ if phase38 == 1:
756
+ descriptor = [Chem.rdMolDescriptors.CalcRadiusOfGyration(alpha) for alpha in mols2]
757
+ fps = generating_newfps(fps, descriptor, 'RadiusOfGyration', save_res)
758
+ clear_descriptor_memory(descriptor)
759
+ if phase39 == 1:
760
+ descriptor = [Chem.rdMolDescriptors.CalcInertialShapeFactor(alpha) for alpha in mols2]
761
+ fps = generating_newfps(fps, descriptor, 'InertialShapeFactor', save_res)
762
+ clear_descriptor_memory(descriptor)
763
+ if phase40 == 1:
764
+ descriptor = [Chem.rdMolDescriptors.CalcEccentricity(alpha) for alpha in mols2]
765
+ fps = generating_newfps(fps, descriptor, 'Eccentricity', save_res)
766
+ clear_descriptor_memory(descriptor)
767
+ if phase41 == 1:
768
+ descriptor = [Chem.rdMolDescriptors.CalcSpherocityIndex(alpha) for alpha in mols2]
769
+ fps = generating_newfps(fps, descriptor, 'SpherocityIndex', save_res)
770
+ clear_descriptor_memory(descriptor)
771
+ if phase42 == 1:
772
+ d1 = [Chem.rdMolDescriptors.CalcPMI1(alpha) for alpha in mols2]
773
+ d2 = [Chem.rdMolDescriptors.CalcPMI2(alpha) for alpha in mols2]
774
+ d3 = [Chem.rdMolDescriptors.CalcPMI3(alpha) for alpha in mols2]
775
+ d1 = Normalization(d1)
776
+ d2 = Normalization(d2)
777
+ d3 = Normalization(d3)
778
+ d1 = np.asarray(d1)
779
+ d2 = np.asarray(d2)
780
+ d3 = np.asarray(d3)
781
+ fps = generating_newfps(fps, [d1,d2,d3], 'PMI_series[1-3]_ind', save_res)
782
+ clear_descriptor_memory(d1)
783
+ clear_descriptor_memory(d2)
784
+ clear_descriptor_memory(d3)
785
+ if phase43 == 1:
786
+ d1 = [Chem.rdMolDescriptors.CalcNPR1(alpha) for alpha in mols2]
787
+ d2 = [Chem.rdMolDescriptors.CalcNPR2(alpha) for alpha in mols2]
788
+ d1 = np.asarray(d1)
789
+ d2 = np.asarray(d2)
790
+ fps = generating_newfps(fps, [d1,d2], 'NPR_series[1-2]_ind', save_res)
791
+ clear_descriptor_memory(d1)
792
+ clear_descriptor_memory(d2)
793
+ if phase44 == 1:
794
+ descriptor = [Chem.rdMolDescriptors.CalcAUTOCORR3D(mols) for mols in mols2]
795
+ fps = generating_newfps(fps, descriptor, 'AUTOCORR3D', save_res)
796
+ clear_descriptor_memory(descriptor)
797
+ if phase45 == 1:
798
+ descriptor = [Chem.rdMolDescriptors.CalcRDF(mols) for mols in mols2]
799
+ descriptor = Normalization(descriptor)
800
+ fps = generating_newfps(fps, descriptor, 'RDF', save_res)
801
+ clear_descriptor_memory(descriptor)
802
+ if phase46 == 1:
803
+ descriptor = [Chem.rdMolDescriptors.CalcMORSE(mols) for mols in mols2]
804
+ descriptor = Normalization(descriptor)
805
+ fps = generating_newfps(fps, descriptor, 'MORSE', save_res)
806
+ clear_descriptor_memory(descriptor)
807
+ if phase47 == 1:
808
+ descriptor = [Chem.rdMolDescriptors.CalcWHIM(mols) for mols in mols2]
809
+ descriptor = Normalization(descriptor)
810
+ fps = generating_newfps(fps, descriptor, 'WHIM', save_res)
811
+ clear_descriptor_memory(descriptor)
812
+ if phase48 == 1:
813
+ descriptor = [Chem.rdMolDescriptors.CalcGETAWAY(mols) for mols in mols2]
814
+ descriptor = Normalization(descriptor)
815
+ fps = generating_newfps(fps, descriptor, 'GETAWAY', save_res)
816
+ clear_descriptor_memory(descriptor)
817
+ #########################################
818
+ if save_res == "pd":
819
+ fps.to_csv(f'{target_path}/{name}_feature_selection.csv')
820
+
821
+ fps = fps.astype('float')
822
+ return fps
823
+
824
+
825
+ def selection_fromStudy_compress(study_name, storage, unfixed=False, showlog=True):
826
+ model_fea = np.zeros(49, dtype=int)
827
+ study = optuna.load_study(study_name=study_name, storage=storage)
828
+
829
+ best_trial = study.best_trial
830
+
831
+ required_features = ["MolWt", "MolLogP", "MolMR", "TPSA"]
832
+ required_indices = [0, 1, 2, 3]
833
+
834
+ param_to_index = {
835
+ "MolWt": 0,
836
+ "MolLogP": 1,
837
+ "MolMR": 2,
838
+ "TPSA": 3,
839
+ "NumRotatableBonds": 4,
840
+ "HeavyAtomCount": 5,
841
+ "NumHAcceptors": 6,
842
+ "NumHDonors": 7,
843
+ "NumHeteroatoms": 8,
844
+ "NumValenceElectrons": 9,
845
+ "NHOHCount": 10,
846
+ "NOCount": 11,
847
+ "RingCount": 12,
848
+ "NumAromaticRings": 13,
849
+ "NumSaturatedRings": 14,
850
+ "NumAliphaticRings": 15,
851
+ "LabuteASA": 16,
852
+ "BalabanJ": 17,
853
+ "BertzCT": 18,
854
+ "Ipc": 19,
855
+ "kappa_Series[1-3]_ind": 20,
856
+ "Chi_Series[13]_ind": 21,
857
+ "Phi": 22,
858
+ "HallKierAlpha": 23,
859
+ "NumAmideBonds": 24,
860
+ "FractionCSP3": 25,
861
+ "NumSpiroAtoms": 26,
862
+ "NumBridgeheadAtoms": 27,
863
+ "PEOE_VSA_Series[1-14]_ind": 28,
864
+ "SMR_VSA_Series[1-10]_ind": 29,
865
+ "SlogP_VSA_Series[1-12]_ind": 30,
866
+ "EState_VSA_Series[1-11]_ind": 31,
867
+ "VSA_EState_Series[1-10]": 32,
868
+ "MQNs": 33,
869
+ "AUTOCORR2D": 34,
870
+ "BCUT2D": 35,
871
+ "Asphericity": 36,
872
+ "PBF": 37,
873
+ "RadiusOfGyration": 38,
874
+ "InertialShapeFactor": 39,
875
+ "Eccentricity": 40,
876
+ "SpherocityIndex": 41,
877
+ "PMI_series[1-3]_ind": 42,
878
+ "NPR_series[1-2]_ind": 43,
879
+ "AUTOCORR3D": 44,
880
+ "RDF": 45,
881
+ "MORSE": 46,
882
+ "WHIM": 47,
883
+ "GETAWAY": 48
884
+ }
885
+
886
+ if not unfixed:
887
+ model_fea[required_indices] = 1
888
+
889
+ for param in best_trial.params:
890
+ if param in param_to_index and param not in required_features:
891
+ model_fea[param_to_index[param]] = best_trial.params[param]
892
+ else:
893
+ for param in best_trial.params:
894
+ if param in param_to_index:
895
+ model_fea[param_to_index[param]] = best_trial.params[param]
896
+
897
+ if showlog:
898
+ print(f"Best trial for study '{study_name}':")
899
+ print("Best trial value:", best_trial.value)
900
+ print("Best trial parameters:", best_trial.params)
901
+ print("Generated fea:", model_fea)
902
+ if not unfixed:
903
+ print("Fixed features:", required_features)
904
+
905
+ return model_fea
906
+
907
+ def selection_structure_compress(study_name, storage, input_dim, returnOnly=False):
908
+ study = optuna.load_study(study_name=study_name, storage=storage)
909
+ best_trial = study.best_trial
910
+ print("Best trial params:", best_trial.params)
911
+
912
+ try:
913
+ lr = best_trial.params["lr"]
914
+ except Exception as e:
915
+ print(f"Error occurred: {e}")
916
+ print("Error occurred: changing name 'lr' to 'Learning_rate'")
917
+ lr = best_trial.params["Learning_rate"]
918
+
919
+ if returnOnly:
920
+ return lr
921
+
922
+ n_layers = best_trial.params["n_layers"]
923
+ model = tf.keras.Sequential()
924
+ layer_dropout = best_trial.params["layer_dropout"]
925
+ model.add(tf.keras.layers.Input(shape=(input_dim,)))
926
+
927
+ for i in range(n_layers):
928
+ num_hidden = best_trial.params[f"n_units_l_{i}"]
929
+ num_decay = best_trial.params[f"n_decay_l_{i}"]
930
+
931
+ model.add(tf.keras.layers.Dense(
932
+ num_hidden,
933
+ activation="relu",
934
+ kernel_initializer='glorot_uniform',
935
+ kernel_regularizer=tf.keras.regularizers.l2(num_decay),
936
+ ))
937
+ model.add(tf.keras.layers.LeakyReLU(alpha=0.01))
938
+ if layer_dropout == 1:
939
+ fdropout = best_trial.params[f"F_dropout_{i}"]
940
+ model.add(tf.keras.layers.Dropout(rate=fdropout))
941
+
942
+ if layer_dropout == 0:
943
+ final_dropout = best_trial.params["last_dropout"]
944
+ model.add(tf.keras.layers.Dropout(rate=final_dropout))
945
+
946
+ model.add(tf.keras.layers.Dense(units=1))
947
+
948
+ print(f"Model created from best trial of '{study_name}':")
949
+ print(" Params:", best_trial.params)
950
+ print(" Best trial value:", best_trial.value)
951
+ return model, lr
extra_code/learning_process.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import sys
4
+ import numpy as np
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.metrics import r2_score
7
+ import matplotlib.pyplot as plt
8
+ import tensorflow as tf
9
+ import logging
10
+
11
+ from tensorflow.keras.mixed_precision import set_global_policy
12
+ set_global_policy('mixed_float16')
13
+
14
+ BATCHSIZE = int(sys.argv[1])
15
+ EPOCHS = int(sys.argv[2])
16
+ lr = float(sys.argv[3])
17
+ fps_file = sys.argv[4]
18
+ y_true_file = sys.argv[5]
19
+ trial_number = int(sys.argv[6]) if len(sys.argv) > 6 else None
20
+
21
+ def save_history_plot(history):
22
+ plt.figure(figsize=(12, 8))
23
+ plt.subplot(2, 1, 1)
24
+ plt.plot(history.history['loss'], label='Training Loss')
25
+ plt.title(f'Model Loss')
26
+ plt.ylabel('Loss')
27
+ plt.xlabel('Epoch')
28
+ plt.legend()
29
+
30
+ plt.subplot(2, 1, 2)
31
+ for metric in history.history:
32
+ if metric != 'loss':
33
+ plt.plot(history.history[metric], label=metric)
34
+ plt.title(f'Model Metrics')
35
+ plt.ylabel('Value')
36
+ plt.xlabel('Epoch')
37
+ plt.legend()
38
+
39
+ plt.tight_layout()
40
+ plt.savefig(f"save_model/full_model.png", dpi=300)
41
+ plt.close()
42
+
43
+ def load_model():
44
+ model_path = "save_model/full_model.keras"
45
+ try:
46
+ model = tf.keras.models.load_model(model_path, compile=False)
47
+ logging.info(f"Model successfully loaded from {model_path}")
48
+ return model
49
+ except Exception as e:
50
+ logging.error(f"Error loading model: {e}")
51
+ return None
52
+
53
+ def preprocess_data(xtr, ytr):
54
+ dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))
55
+ dataset = dataset.shuffle(buffer_size=len(xtr)).batch(BATCHSIZE).cache().prefetch(tf.data.AUTOTUNE)
56
+ return dataset
57
+
58
+ def train_model(model, train_dataset, valid_dataset):
59
+ cb = []
60
+ if trial_number is not None:
61
+ class ReportIntermediateCallback(tf.keras.callbacks.Callback):
62
+ def on_epoch_end(self, epoch, logs=None):
63
+ if logs and 'val_loss' in logs:
64
+ print(f"intermediate_value:{epoch}:{-logs['val_loss']}")
65
+ sys.stdout.flush()
66
+ cb.append(ReportIntermediateCallback())
67
+
68
+ cb.append(
69
+ tf.keras.callbacks.EarlyStopping(
70
+ monitor='val_loss',
71
+ patience=50,
72
+ restore_best_weights=True,
73
+ mode='min',
74
+ verbose=1
75
+ )
76
+ )
77
+
78
+ history = model.fit(
79
+ train_dataset,
80
+ epochs=EPOCHS,
81
+ validation_data=valid_dataset,
82
+ callbacks=cb,
83
+ verbose=0
84
+ )
85
+ save_history_plot(history)
86
+ return history
87
+
88
+ def clear_gpu_memory():
89
+ tf.keras.backend.clear_session()
90
+ gc.collect()
91
+ logging.info("GPU memory cleared.")
92
+
93
+ def main():
94
+ try:
95
+ model = load_model()
96
+ if model is None:
97
+ raise ValueError("Failed to load model")
98
+
99
+ model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
100
+ loss=tf.keras.losses.MeanSquaredError(),
101
+ metrics=[tf.keras.losses.MeanSquaredError(),
102
+ tf.keras.losses.MeanAbsoluteError(),
103
+ tf.keras.metrics.RootMeanSquaredError()])
104
+
105
+ fps = np.load(fps_file)
106
+ y_true = np.load(y_true_file)
107
+
108
+ model_input_shape = model.input_shape
109
+ if model_input_shape[1] != fps.shape[1]:
110
+ raise ValueError(f"Model input dimension ({model_input_shape[1]}) does not match data dimension ({fps.shape[1]})")
111
+
112
+ xtr, xte, ytr, yte = train_test_split(fps, y_true, test_size=0.2, random_state=42)
113
+ xtr, xtev, ytr, ytev = train_test_split(xtr, ytr, test_size=0.1, random_state=42)
114
+ train_dataset = preprocess_data(xtr, ytr)
115
+ valid_dataset = preprocess_data(xtev, ytev)
116
+
117
+ train_model(model, train_dataset, valid_dataset)
118
+
119
+ ypred = model.predict(xte, verbose=0)
120
+
121
+ if np.any(np.isnan(ypred)) or np.any(np.isinf(ypred)):
122
+ raise ValueError("Invalid predictions: NaN or inf values encountered.")
123
+
124
+ r2_result = r2_score(yte, ypred)
125
+
126
+ if np.isnan(r2_result) or np.isinf(r2_result) or r2_result <= 0:
127
+ print("R2: 0.0 (prune)")
128
+ else:
129
+ print(f"R2: {r2_result:.6f}")
130
+
131
+ except Exception as e:
132
+ logging.error(f"Error in learning process: {e}")
133
+ print("0.000000")
134
+
135
+ finally:
136
+ clear_gpu_memory()
137
+
138
+ if __name__ == "__main__":
139
+ main()