In [1]:
import os
import sys
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import gc
import time
import subprocess
import logging
from concurrent.futures import ProcessPoolExecutor, as_completed

In [2]:
from rdkit import Chem
from rdkit.Chem import AllChem, DataStructs, Draw
from rdkit import RDConfig
from rdkit.Chem import Descriptors, rdMolDescriptors, Lipinski, rdDistGeom, rdPartialCharges
from rdkit.Chem.AllChem import GetMorganGenerator
from rdkit.DataStructs.cDataStructs import ConvertToNumpyArray
from rdkit.Avalon.pyAvalonTools import GetAvalonFP

In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

2024-11-04 22:59:19.830835: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-04 22:59:19.845573: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-04 22:59:19.849643: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-04 22:59:19.860597: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, root_mean_squared_error

In [5]:
import optuna
from optuna.trial import TrialState
from optuna.integration import TFKerasPruningCallback

In [6]:
from extra_code.feature_selection import selection_data_descriptor_compress, selection_fromStudy_compress

In [7]:
tf.keras.backend.clear_session()
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
 try:
 for gpu in gpus:
 tf.config.experimental.set_memory_growth(gpu, True)
 except RuntimeError as e:
 print(e)

I0000 00:00:1730728761.540219 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730728761.611323 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1730728761.611412 2495981 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


In [8]:
target_path = "result/6_ANO_network_[fea_struc]"
os.makedirs(target_path, exist_ok=True)

In [9]:
data_ws = pd.read_csv('./data/ws496_logS.csv', dtype={'SMILES': 'string'})
smiles_ws = data_ws['SMILES']
y_ws = data_ws.iloc[:, 2]

data_delaney = pd.read_csv('./data/delaney-processed.csv', dtype={'smiles': 'string'})
smiles_de = data_delaney['smiles']
y_de = data_delaney.iloc[:, 1]

data_lovric2020 = pd.read_csv('./data/Lovric2020_logS0.csv', dtype={'isomeric_smiles': 'string'})
smiles_lo = data_lovric2020['isomeric_smiles']
y_lo = data_lovric2020.iloc[:, 1]

data_huuskonen = pd.read_csv('./data/huusk.csv', dtype={'SMILES': 'string'})
smiles_hu = data_huuskonen['SMILES']
y_hu = data_huuskonen.iloc[:, -1].astype('float')

In [10]:
def mol3d(mol):
 mol = Chem.AddHs(mol)
 optimization_methods = [
 (AllChem.EmbedMolecule, (mol, AllChem.ETKDGv3()), {}),
 (AllChem.UFFOptimizeMolecule, (mol,), {'maxIters': 200}),
 (AllChem.MMFFOptimizeMolecule, (mol,), {'maxIters': 200})
 ]

 for method, args, kwargs in optimization_methods:
 try:
 method(*args, **kwargs)
 if mol.GetNumConformers() > 0:
 return mol
 except ValueError as e:
 print(f"Error: {e} - Trying next optimization method [{method}]")

 print(f"Invalid mol for 3d {'\033[94m'}{Chem.MolToSmiles(mol)}{'\033[0m'} - No conformer generated")
 return None

In [11]:
def convert_smiles_to_mol(smiles, fail_folder=None, index=None, yvalue=None):
 mol = Chem.MolFromSmiles(smiles)
 if mol is None:
 print(f"[convert_smiles_to_mol] Cannot convert {smiles} to Mols")
 return None, {"smiles": smiles, "y_value": yvalue, "error": "Invalid SMILES"}

 try:
 Chem.Kekulize(mol, clearAromaticFlags=True)
 isomeric_smiles = Chem.MolToSmiles(mol, isomericSmiles=True)
 mol = Chem.MolFromSmiles(isomeric_smiles)
 except Exception as e:
 print(f"[convert_smiles_to_mol] failed {smiles} isomeric_smiles by {e}")
 if fail_folder and index is not None:
 img_path = os.path.join(fail_folder, f"mol_{index}.png")
 img = Draw.MolToImage(mol)
 img.save(img_path)
 return None, {"smiles": smiles, "y_value": yvalue, "error": f"Isomeric SMILES error: {e}"}

 try:
 Chem.SanitizeMol(mol)
 except Exception as e:
 print(f"[convert_smiles_to_mol] failed {smiles} SanitizeMol by {e}")
 if fail_folder and index is not None:
 img_path = os.path.join(fail_folder, f"mol_{index}.png")
 img = Draw.MolToImage(mol)
 img.save(img_path)
 return None, {"smiles": smiles, "y_value": yvalue, "error": f"SanitizeMol error: {e}"}

 return mol, None

In [12]:
def process_smiles(smiles, yvalue, fail_folder, index):
 mol, error = convert_smiles_to_mol(smiles, fail_folder, index, yvalue)
 if error:
 return None, None, error

 mol_3d = mol3d(mol)
 if mol_3d:
 return smiles, yvalue, None
 else:
 img_path = os.path.join(fail_folder, f"mol_{index}.png")
 img = Draw.MolToImage(mol)
 img.save(img_path)
 return None, None, {"smiles": smiles, "y_value": yvalue}

def process_dataset(smiles_list, y_values, dataset_name, target_path="result", max_workers=None):
 start = time.time()
 valid_smiles, valid_y = [], []
 error_smiles_list = []
 fail_folder = f"{target_path}/failed/{dataset_name}"
 os.makedirs(fail_folder, exist_ok=True)

 with ProcessPoolExecutor(max_workers=max_workers) as executor:
 futures = [
 executor.submit(process_smiles, smiles, yvalue, fail_folder, i)
 for i, (smiles, yvalue) in enumerate(zip(smiles_list, y_values))
 ]
 for future in as_completed(futures):
 smiles, yvalue, error = future.result()
 if error:
 error_smiles_list.append(error)
 elif smiles is not None and yvalue is not None:
 valid_smiles.append(smiles)
 valid_y.append(yvalue)

 if error_smiles_list:
 error_df = pd.DataFrame(error_smiles_list)
 error_df.to_csv(os.path.join(fail_folder, "failed_smiles.csv"), index=False)
 print(f" [{dataset_name:<10}] : {time.time()-start:.4f} sec")
 return valid_smiles, valid_y

In [13]:
smiles_ws, y_ws = process_dataset(smiles_ws, y_ws, "ws496", target_path)
smiles_de, y_de = process_dataset(smiles_de, y_de, "delaney", target_path)
smiles_lo, y_lo = process_dataset(smiles_lo, y_lo, "Lovric2020_logS0", target_path)
smiles_hu, y_hu = process_dataset(smiles_hu, y_hu, "huusk", target_path)

 [ws496 ] : 0.8667 sec
 [delaney ] : 1.4338 sec
Error: Bad Conformer Id - Trying next optimization method []
Error: Bad Conformer Id - Trying next optimization method []
Invalid mol for 3d [94m[H]O[C@@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H][0m - No conformer generated
Error: Bad Conformer Id - Trying next optimization method []
Error: Bad Conformer Id - Trying next optimization method []
Invalid mol for 3d [94m[H]O[C@]([H])(c1c([H])c([H])nc2c([H])c([H])c(OC([H])([H])[H])c([H])c12)[C@@]1([H])[N@]2C([H])([H])C([H])([H])[C@@]([H])(C1([H])[H])[C@@]([H])(C([H])=C([H])[H])C2([H])[H][0m - No conformer generated
 [Lovric2020_logS0] : 8.8955 sec
 [huusk ] : 1.5899 sec


In [14]:
LEN_OF_FF = 2048
LEN_OF_MA = 167
LEN_OF_AV = 512

In [15]:
def get_fingerprints(mol):
 if mol is None:
 return None, None, None
 
 morgan_generator = GetMorganGenerator(radius=2, fpSize=LEN_OF_FF)
 ecfp = morgan_generator.GetFingerprint(mol)
 ecfp_array = np.zeros((LEN_OF_FF,),dtype=int)
 DataStructs.ConvertToNumpyArray(ecfp, ecfp_array)
 
 maccs = Chem.rdMolDescriptors.GetMACCSKeysFingerprint(mol)

 avalon_fp = GetAvalonFP(mol)
 avalon_array = np.zeros((LEN_OF_AV,),dtype=int)
 DataStructs.ConvertToNumpyArray(avalon_fp, avalon_array)
 
 return ecfp_array, maccs, avalon_array

def fp_converter(data, use_parallel=True):
 mols = [Chem.MolFromSmiles(smi) for smi in data]
 
 if use_parallel:
 try: 
 with ProcessPoolExecutor() as executor:
 results = list(executor.map(get_fingerprints, mols))
 except Exception as e:
 print(f"Parallel processing failed due to: {e}. Falling back to sequential processing.")
 use_parallel = False
 
 if not use_parallel:
 results = [get_fingerprints(mol) for mol in mols]
 
 ECFP, MACCS, AvalonFP = zip(*results)
 
 ECFP_container = np.vstack([arr for arr in ECFP if arr is not None])
 MACCS_container = np.zeros((len(MACCS), LEN_OF_MA), dtype=int)
 AvalonFP_container = np.vstack([arr for arr in AvalonFP if arr is not None])

 for i, fp in enumerate(MACCS):
 if fp is not None:
 DataStructs.ConvertToNumpyArray(fp, MACCS_container[i])
 
 return mols, ECFP_container, MACCS_container, AvalonFP_container

In [16]:
mol_ws, x_ws, MACCS_ws, AvalonFP_ws = fp_converter(smiles_ws,target_path)
mol_de, x_de, MACCS_de, AvalonFP_de = fp_converter(smiles_de,target_path)
mol_lo, x_lo, MACCS_lo, AvalonFP_lo = fp_converter(smiles_lo,target_path)
mol_hu, x_hu, MACCS_hu, AvalonFP_hu = fp_converter(smiles_hu,target_path)
del smiles_ws
del smiles_de
del smiles_lo
del smiles_hu
gc.collect()

0

In [17]:
def concatenate_to_numpy(*dataframes):
 numpy_arrays = [df.to_numpy() if isinstance(df, pd.DataFrame) else df for df in dataframes]
 if not all(isinstance(arr, np.ndarray) for arr in numpy_arrays):
 raise ValueError("All inputs must be either pandas DataFrame or numpy array")
 return np.concatenate(numpy_arrays, axis=1)

In [18]:
group_nws = concatenate_to_numpy(x_ws, MACCS_ws, AvalonFP_ws)
group_nde = concatenate_to_numpy(x_de, MACCS_de, AvalonFP_de)
group_nlo = concatenate_to_numpy(x_lo, MACCS_lo, AvalonFP_lo)
group_nhu = concatenate_to_numpy(x_hu, MACCS_hu, AvalonFP_hu)
del x_ws, MACCS_ws, AvalonFP_ws
del x_de, MACCS_de, AvalonFP_de
del x_lo, MACCS_lo, AvalonFP_lo
del x_hu, MACCS_hu, AvalonFP_hu
gc.collect()

0

In [19]:
try:
 storage = optuna.storages.RDBStorage(url="sqlite:///ano_analysis.db", engine_kwargs={"connect_args": {"timeout": 10000}})
 # storage_urls = "postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}"
 # storage = optuna.storages.RDBStorage(url=storage_urls)
except Exception as e:
 print(f"Error occured: {e}")

In [20]:
try:
 ws_fea = selection_fromStudy_compress('ANO_ws_feature', storage)
except Exception as e:
 print(f"Error occured: {e}") 
 ws_fea =[
 1, # 1 - "MolWeight"
 1, # 2 - "Mol_logP"
 1, # 3 - "Mol_MR"
 1, # 4 - "Mol_TPSA"
 0, # 5 - "NumRotatableBonds"
 0, # 6 - "HeavyAtomCount"
 0, # 7 - "NumHAcceptors"
 0, # 8 - "NumHDonors"
 0, # 9 - "NumHeteroatoms"
 1, # 10 - "NumValenceElec"
 1, # 11 - "NHOHCount"
 1, # 12 - "NOCount"
 0, # 13 - "RingCount"
 1, # 14 - "NumAromaticRings"
 0, # 15 - "NumSaturatedRings"
 0, # 16 - "NumAliphaticRings"
 0, # 17 - "LabuteASA"
 0, # 18 - "NumValenceElectrons"
 1, # 19 - "BalabanJ"
 1, # 20 - "BertzCT"
 0, # 21 - "Ipc"
 0, # 22 - "kappa_Series[1-3]_ind"
 1, # 23 - "Chi_Series[13]_ind"
 1, # 24 - "Phi"
 0, # 25 - "HallKierAlpha"
 0, # 26 - "NumAmideBonds"
 1, # 27 - "FractionCSP3"
 0, # 28 - "NumSpiroAtoms"
 1, # 29 - "NumBridgeheadAtoms"
 1, # 30 - "PEOE_VSA_Series[1-14]_ind"
 1, # 31 - "SMR_VSA_Series[1-10]_ind"
 0, # 32 - "SlogP_VSA_Series[1-12]_ind"
 1, # 33 - "EState_VSA_Series[1-11]_ind"
 0, # 34 - "VSA_EState_Series[1-10]_ind"
 0, # 35 - "Asphericity"
 1, # 36 - "PBF"
 0, # 37 - "RadiusOfGyration"
 0, # 38 - "InertialShapeFactor"
 1, # 39 - "Eccentricity"
 0, # 40 - "SpherocityIndex"
 1, # 41 - "PMI_series[1-3]_ind"
 0, # 42 - "NPR_series[1-2]_ind"
 0, # 43 - "MQNs"
 0, # 44 - "AUTOCORR2D"
 1, # 45 - "BCUT2D"
 0, # 46 - "AUTOCORR3D"
 1, # 47 - "RDF"
 0, # 48 - "MORSE"
 1, # 49 - "WHIM"
 0, # 50 - "GETAWAY" 
 ]

Best trial for study 'ANO_ws_feature':
Best trial value: 0.932153
Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 0, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 0, 'Phi': 0, 'HallKierAlpha': 0, 'NumAmideBonds': 0, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 0, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'AUTOCORR3D': 1, 'RDF': 0, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 0}
Generat

In [21]:
try:
 de_fea = selection_fromStudy_compress('ANO_de_feature', storage)
except Exception as e:
 print(f"Error occured: {e}") 
 de_fea =[
 1, # 1 - "MolWeight"
 1, # 2 - "Mol_logP"
 1, # 3 - "Mol_MR"
 1, # 4 - "Mol_TPSA"
 0, # 5 - "NumRotatableBonds"
 0, # 6 - "HeavyAtomCount"
 1, # 7 - "NumHAcceptors"
 1, # 8 - "NumHDonors"
 0, # 9 - "NumHeteroatoms"
 0, # 10 - "NumValenceElec"
 1, # 11 - "NHOHCount"
 0, # 12 - "NOCount"
 0, # 13 - "RingCount"
 0, # 14 - "NumAromaticRings"
 0, # 15 - "NumSaturatedRings"
 1, # 16 - "NumAliphaticRings"
 1, # 17 - "LabuteASA"
 0, # 18 - "NumValenceElectrons"
 1, # 19 - "BalabanJ"
 1, # 20 - "BertzCT"
 1, # 21 - "Ipc"
 0, # 22 - "kappa_Series[1-3]_ind"
 0, # 23 - "Chi_Series[13]_ind"
 0, # 24 - "Phi"
 1, # 25 - "HallKierAlpha"
 1, # 26 - "NumAmideBonds"
 1, # 27 - "FractionCSP3"
 1, # 28 - "NumSpiroAtoms"
 0, # 29 - "NumBridgeheadAtoms"
 1, # 30 - "PEOE_VSA_Series[1-14]_ind"
 1, # 31 - "SMR_VSA_Series[1-10]_ind"
 0, # 32 - "SlogP_VSA_Series[1-12]_ind"
 0, # 33 - "EState_VSA_Series[1-11]_ind"
 0, # 34 - "VSA_EState_Series[1-10]_ind"
 1, # 35 - "Asphericity"
 0, # 36 - "PBF"
 0, # 37 - "RadiusOfGyration"
 0, # 38 - "InertialShapeFactor"
 0, # 39 - "Eccentricity"
 0, # 40 - "SpherocityIndex"
 0, # 41 - "PMI_series[1-3]_ind"
 1, # 42 - "NPR_series[1-2]_ind"
 0, # 43 - "MQNs"
 1, # 44 - "AUTOCORR2D"
 1, # 45 - "BCUT2D"
 0, # 46 - "AUTOCORR3D"
 1, # 47 - "RDF"
 0, # 48 - "MORSE"
 1, # 49 - "WHIM"
 0, # 50 - "GETAWAY" 
 ]

Best trial for study 'ANO_de_feature':
Best trial value: 0.973052
Best trial parameters: {'NumRotatableBonds': 1, 'HeavyAtomCount': 1, 'NumHAcceptors': 0, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 1, 'NHOHCount': 1, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 1, 'BalabanJ': 0, 'BertzCT': 0, 'Ipc': 0, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 0, 'NumAmideBonds': 1, 'FractionCSP3': 1, 'NumSpiroAtoms': 1, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 1, 'SlogP_VSA_Series[1-12]_ind': 0, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 1, 'RadiusOfGyration': 1, 'InertialShapeFactor': 0, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 1}
Generat

In [22]:
try:
 lo_fea = selection_fromStudy_compress('ANO_lo_feature', storage)
except Exception as e:
 print(f"Error occured: {e}") 
 lo_fea =[
 1, # 1 - "MolWeight"
 1, # 2 - "Mol_logP"
 1, # 3 - "Mol_MR"
 1, # 4 - "Mol_TPSA"
 1, # 5 - "NumRotatableBonds"
 0, # 6 - "HeavyAtomCount"
 0, # 7 - "NumHAcceptors"
 0, # 8 - "NumHDonors"
 1, # 9 - "NumHeteroatoms"
 1, # 10 - "NumValenceElec"
 1, # 11 - "NHOHCount"
 1, # 12 - "NOCount"
 0, # 13 - "RingCount"
 1, # 14 - "NumAromaticRings"
 0, # 15 - "NumSaturatedRings"
 0, # 16 - "NumAliphaticRings"
 0, # 17 - "LabuteASA"
 1, # 18 - "NumValenceElectrons"
 0, # 19 - "BalabanJ"
 0, # 20 - "BertzCT"
 0, # 21 - "Ipc"
 1, # 22 - "kappa_Series[1-3]_ind"
 0, # 23 - "Chi_Series[13]_ind"
 1, # 24 - "Phi"
 1, # 25 - "HallKierAlpha"
 0, # 26 - "NumAmideBonds"
 1, # 27 - "FractionCSP3"
 1, # 28 - "NumSpiroAtoms"
 0, # 29 - "NumBridgeheadAtoms"
 1, # 30 - "PEOE_VSA_Series[1-14]_ind"
 1, # 31 - "SMR_VSA_Series[1-10]_ind"
 1, # 32 - "SlogP_VSA_Series[1-12]_ind"
 0, # 33 - "EState_VSA_Series[1-11]_ind"
 1, # 34 - "VSA_EState_Series[1-10]_ind"
 1, # 35 - "Asphericity"
 0, # 36 - "PBF"
 1, # 37 - "RadiusOfGyration"
 0, # 38 - "InertialShapeFactor"
 0, # 39 - "Eccentricity"
 1, # 40 - "SpherocityIndex"
 0, # 41 - "PMI_series[1-3]_ind"
 1, # 42 - "NPR_series[1-2]_ind"
 0, # 43 - "MQNs"
 0, # 44 - "AUTOCORR2D"
 0, # 45 - "BCUT2D"
 0, # 46 - "AUTOCORR3D"
 1, # 47 - "RDF"
 0, # 48 - "MORSE"
 0, # 49 - "WHIM"
 0, # 50 - "GETAWAY" 
 ]

Best trial for study 'ANO_lo_feature':
Best trial value: 0.843203
Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 0, 'NumHDonors': 1, 'NumHeteroatoms': 0, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 0, 'RingCount': 0, 'NumAromaticRings': 1, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'BalabanJ': 1, 'BertzCT': 0, 'Ipc': 1, 'kappa_Series[1-3]_ind': 1, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 1, 'PEOE_VSA_Series[1-14]_ind': 0, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 0, 'VSA_EState_Series[1-10]': 1, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 0, 'PBF': 0, 'RadiusOfGyration': 1, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 1, 'NPR_series[1-2]_ind': 0, 'AUTOCORR3D': 0, 'RDF': 0, 'MORSE': 1, 'WHIM': 0, 'GETAWAY': 0}
Generat

In [23]:
try:
 hu_fea = selection_fromStudy_compress('ANO_hu_feature', storage)
except Exception as e:
 print(f"Error occured: {e}") 
 hu_fea =[
 1, # 1 - "MolWeight"
 1, # 2 - "Mol_logP"
 1, # 3 - "Mol_MR"
 1, # 4 - "Mol_TPSA"
 0, # 5 - "NumRotatableBonds"
 1, # 6 - "HeavyAtomCount"
 0, # 7 - "NumHAcceptors"
 1, # 8 - "NumHDonors"
 1, # 9 - "NumHeteroatoms"
 1, # 10 - "NumValenceElec"
 0, # 11 - "NHOHCount"
 1, # 12 - "NOCount"
 1, # 13 - "RingCount"
 1, # 14 - "NumAromaticRings"
 1, # 15 - "NumSaturatedRings"
 0, # 16 - "NumAliphaticRings"
 0, # 17 - "LabuteASA"
 0, # 18 - "NumValenceElectrons"
 1, # 19 - "BalabanJ"
 1, # 20 - "BertzCT"
 1, # 21 - "Ipc"
 0, # 22 - "kappa_Series[1-3]_ind"
 1, # 23 - "Chi_Series[13]_ind"
 1, # 24 - "Phi"
 0, # 25 - "HallKierAlpha"
 1, # 26 - "NumAmideBonds"
 0, # 27 - "FractionCSP3"
 1, # 28 - "NumSpiroAtoms"
 0, # 29 - "NumBridgeheadAtoms"
 1, # 30 - "PEOE_VSA_Series[1-14]_ind"
 1, # 31 - "SMR_VSA_Series[1-10]_ind"
 1, # 32 - "SlogP_VSA_Series[1-12]_ind"
 1, # 33 - "EState_VSA_Series[1-11]_ind"
 1, # 34 - "VSA_EState_Series[1-10]_ind"
 1, # 35 - "Asphericity"
 1, # 36 - "PBF"
 1, # 37 - "RadiusOfGyration"
 1, # 38 - "InertialShapeFactor"
 0, # 39 - "Eccentricity"
 0, # 40 - "SpherocityIndex"
 1, # 41 - "PMI_series[1-3]_ind"
 1, # 42 - "NPR_series[1-2]_ind"
 1, # 43 - "MQNs"
 1, # 44 - "AUTOCORR2D"
 1, # 45 - "BCUT2D"
 1, # 46 - "AUTOCORR3D"
 1, # 47 - "RDF"
 0, # 48 - "MORSE"
 0, # 49 - "WHIM"
 0, # 50 - "GETAWAY" 
 ]

Best trial for study 'ANO_hu_feature':
Best trial value: 0.939862
Best trial parameters: {'NumRotatableBonds': 0, 'HeavyAtomCount': 0, 'NumHAcceptors': 1, 'NumHDonors': 0, 'NumHeteroatoms': 1, 'NumValenceElectrons': 0, 'NHOHCount': 0, 'NOCount': 1, 'RingCount': 1, 'NumAromaticRings': 0, 'NumSaturatedRings': 1, 'NumAliphaticRings': 1, 'LabuteASA': 0, 'BalabanJ': 0, 'BertzCT': 1, 'Ipc': 1, 'kappa_Series[1-3]_ind': 0, 'Chi_Series[13]_ind': 1, 'Phi': 1, 'HallKierAlpha': 1, 'NumAmideBonds': 0, 'FractionCSP3': 0, 'NumSpiroAtoms': 0, 'NumBridgeheadAtoms': 0, 'PEOE_VSA_Series[1-14]_ind': 1, 'SMR_VSA_Series[1-10]_ind': 0, 'SlogP_VSA_Series[1-12]_ind': 1, 'EState_VSA_Series[1-11]_ind': 1, 'VSA_EState_Series[1-10]': 0, 'MQNs': 1, 'AUTOCORR2D': 1, 'BCUT2D': 1, 'Asphericity': 1, 'PBF': 0, 'RadiusOfGyration': 0, 'InertialShapeFactor': 1, 'Eccentricity': 1, 'SpherocityIndex': 1, 'PMI_series[1-3]_ind': 0, 'NPR_series[1-2]_ind': 1, 'AUTOCORR3D': 0, 'RDF': 1, 'MORSE': 0, 'WHIM': 0, 'GETAWAY': 1}
Generat

In [24]:
new_ws = selection_data_descriptor_compress(ws_fea, group_nws, mol_ws, 'ws')
new_de = selection_data_descriptor_compress(de_fea, group_nde, mol_de, 'de')
new_lo = selection_data_descriptor_compress(lo_fea, group_nlo, mol_lo, 'lo')
new_hu = selection_data_descriptor_compress(hu_fea, group_nhu, mol_hu, 'hu')
del ws_fea, group_nws, mol_ws
del de_fea, group_nde, mol_de
del lo_fea, group_nlo, mol_lo
del hu_fea, group_nhu, mol_hu
gc.collect()
# 6m 10.3s

BCUT2D calculation failed: ERROR: No Gasteiger Partial Charge parameters for Element: Sn Mode: sp3


0

In [25]:
import logging
import warnings

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 --tf_xla_enable_xla_devices'
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda --xla_gpu_force_compilation_parallelism=1'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
os.environ['TF_NUMA_NODES'] = '1'

warnings.filterwarnings('ignore')

warnings.simplefilter(action='ignore', category=FutureWarning)

logging.getLogger('tensorflow').setLevel(logging.ERROR)

tf.get_logger().setLevel('ERROR')
tf.autograph.set_verbosity(0)

def suppress_warnings(condition=True):
 if condition:
 logging.getLogger('tensorflow').setLevel(logging.ERROR)
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
 else:
 logging.getLogger('tensorflow').setLevel(logging.WARNING)
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'

suppress_warnings(condition=True)

In [26]:
BATCHSIZE = 16
EPOCHS = 1000
# lr = 0.0001
# decay = 1e-4

In [27]:
# def new_model(trial):
# n_layers = trial.suggest_int("n_layers", 1, 3)
# model = tf.keras.Sequential()
# layer_dropout = trial.suggest_int("layer_dropout", 0,1)
# for i in range(n_layers):
# num_hidden = trial.suggest_int("n_units_l_{}".format(i), 2, 1e4-1)
# num_decay = trial.suggest_categorical("n_decay_l_{}".format(i), [1e-3,1e-4,1e-5])
# model.add(
# tf.keras.layers.Dense(
# num_hidden,
# activation="relu",
# kernel_initializer='glorot_uniform',
# kernel_regularizer=tf.keras.regularizers.l2(num_decay),
# )
# )
# if layer_dropout==1:
# fdropout1 = trial.suggest_categorical("F_dropout_{}".format(i),[0.1,0.2])
# model.add(Dropout(rate=fdropout1))
# if layer_dropout==0:
# fdropout2 = trial.suggest_categorical("Final_dropout",[0.1,0.2])
# model.add(Dropout(rate=fdropout2))
# model.add(Dense(units=1))
# learningr = trial.suggest_categorical("Learning_rate",[0.01,0.001,0.0001])
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),
# loss=tf.keras.losses.MeanSquaredError(),
# metrics=[tf.keras.losses.MeanSquaredError(),
# tf.keras.losses.MeanAbsoluteError(),
# tf.keras.metrics.RootMeanSquaredError()])
# return model

def search_model(trial, input_dim):
 n_layers = trial.suggest_int("n_layers", 1, 3)
 model = tf.keras.Sequential()
 model.add(tf.keras.layers.Input(shape=(input_dim,)))
 layer_dropout = trial.suggest_int("layer_dropout", 0, 1)
 
 for i in range(n_layers):
 num_hidden = trial.suggest_int(f"n_units_l_{i}", 2, 9999)
 num_decay = trial.suggest_categorical(f"n_decay_l_{i}", [1e-4,1e-5,1e-6])
 model.add(
 tf.keras.layers.Dense(
 num_hidden,
 # activation="relu",
 kernel_initializer='glorot_uniform',
 kernel_regularizer=tf.keras.regularizers.l2(num_decay),
 )
 )
 model.add(tf.keras.layers.LeakyReLU(alpha=0.01))
 if layer_dropout == 1:
 fdropout1 = trial.suggest_categorical(f"F_dropout_{i}", [0.1, 0.2, 0.3])
 model.add(tf.keras.layers.Dropout(rate=fdropout1))
 
 if layer_dropout == 0:
 fdropout2 = trial.suggest_categorical("last_dropout", [0.1, 0.2, 0.3])
 model.add(tf.keras.layers.Dropout(rate=fdropout2))
 
 model.add(tf.keras.layers.Dense(units=1))
 # # Colab
 # learningr = trial.suggest_categorical("Learning_rate",[0.01,0.001,0.0001])
 # model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learningr),
 # loss=tf.keras.losses.MeanSquaredError(),
 # metrics=[tf.keras.losses.MeanSquaredError(),
 # tf.keras.losses.MeanAbsoluteError(),
 # tf.keras.metrics.RootMeanSquaredError()])
 return model


def save_model(trial, x_data):
 model_path = "save_model/full_model.keras"
 
 if not os.path.exists(model_path):
 try:
 model = search_model(trial, x_data.shape[1])
 os.makedirs("save_model", exist_ok=True)
 model.save(model_path)
 print(f"Model successfully saved to {model_path}")
 except Exception as e:
 print(f"Error saving model: {e}")
 else:
 print(f"Model already exists at {model_path}")
 os.remove(model_path)
 save_model(trial, x_data)

In [28]:
from sklearn.model_selection import train_test_split
xtr_fws, xte_fws, ytr_fws, yte_fws = train_test_split(new_ws, y_ws, test_size = 0.1, random_state = 42)
xtr_fde, xte_fde, ytr_fde, yte_fde = train_test_split(new_de, y_de, test_size = 0.1, random_state = 42)
xtr_flo, xte_flo, ytr_flo, yte_flo = train_test_split(new_lo, y_lo, test_size = 0.1, random_state = 42)
xtr_fhu, xte_fhu, ytr_fhu, yte_fhu = train_test_split(new_hu, y_hu, test_size = 0.1, random_state = 42)

In [29]:
# # Colab
# def preprocess_data(xtr, ytr):
# dataset = tf.data.Dataset.from_tensor_slices((xtr, ytr))
# dataset = dataset.shuffle(buffer_size=len(xtr)).batch(BATCHSIZE).cache().prefetch(tf.data.AUTOTUNE)
# return dataset

# cb = tf.keras.callbacks.EarlyStopping(
# monitor='loss', 
# patience=5,
# restore_best_weights=True,
# # min_delta=0.001,
# mode='min',
# verbose=1
# )

In [30]:
# # Colab
# def objective_ws_network(trial):
# tf.keras.backend.clear_session()
# model = search_model(trial, xtr_fws.shape[1])
# train_data = preprocess_data(xtr_fws, ytr_fws)
# model.fit(
# train_data,
# batch_size=BATCHSIZE,
# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],
# epochs=EPOCHS,
# verbose=0,
# )
# y_pred_search = model.predict(xte_fws, verbose=0)
# score = r2_score(yte_fws, y_pred_search)
# del model
# tf.keras.backend.clear_session()
# gc.collect()
# return score

In [31]:
# # Colab
# def objective_de_network(trial):
# tf.keras.backend.clear_session()
# model = search_model(trial, xtr_fde.shape[1])
# train_data = preprocess_data(xtr_fde, ytr_fde)
# model.fit(
# train_data,
# batch_size=BATCHSIZE,
# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],
# epochs=EPOCHS,
# verbose=0,
# )
# y_pred_search = model.predict(xte_fde, verbose=0)
# score = r2_score(yte_fde, y_pred_search)
# del model
# tf.keras.backend.clear_session()
# gc.collect()
# return score

In [32]:
# # Colab
# def objective_lo_network(trial):
# tf.keras.backend.clear_session()
# model = search_model(trial, xtr_flo.shape[1])
# train_data = preprocess_data(xtr_flo, ytr_flo)
# model.fit(
# train_data,
# batch_size=BATCHSIZE,
# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],
# epochs=EPOCHS,
# verbose=0,
# )
# y_pred_search = model.predict(xte_flo, verbose=0)
# score = r2_score(yte_flo, y_pred_search)
# del model
# tf.keras.backend.clear_session()
# gc.collect()
# return score

In [33]:
# # Colab
# def objective_hu_network(trial):
# tf.keras.backend.clear_session()
# model = search_model(trial, xtr_fhu.shape[1])
# train_data = preprocess_data(xtr_fhu, ytr_fhu)
# model.fit(
# train_data,
# batch_size=BATCHSIZE,
# callbacks=[cb,TFKerasPruningCallback(trial,'loss')],
# epochs=EPOCHS,
# verbose=0,
# )
# y_pred_search = model.predict(xte_fhu, verbose=0)
# score = r2_score(yte_fhu, y_pred_search)
# del model
# tf.keras.backend.clear_session()
# gc.collect()
# return score

In [34]:
def objective_ws_network(trial):
 r2_result = None
 current_step = 0 
 try:
 y_true = np.asarray(y_ws).astype('float')
 np.save('new_fps.npy', new_ws)
 np.save('y_true.npy', y_true)
 
 save_model(trial, new_ws)

 lr = trial.suggest_categorical(f"lr", [0.001,0.0001,0.00001])

 result = subprocess.run(['python3', './extra_code/learning_process.py',
 str(BATCHSIZE), str(EPOCHS), 
 str(lr), 
 'new_fps.npy', 'y_true.npy', str(trial.number)],
 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
 
 if result.stderr:
 filtered_stderr = '\n'.join([line for line in result.stderr.split('\n') 
 if "could not open file to read NUMA node" not in line 
 and "Your kernel may have been built without NUMA support" not in line])
 if filtered_stderr:
 print(f"Error in subprocess: {filtered_stderr}", file=sys.stderr)

 lines = result.stdout.splitlines()
 for line in lines:
 if line.startswith("intermediate_value:"):
 _, step, value = line.split(":")
 step = int(step)
 value = float(value)
 current_step = step
 
 trial.report(value, step)
 
 if trial.should_prune():
 raise optuna.exceptions.TrialPruned()

 for line in reversed(lines):
 if "R2:" in line:
 if "(prune)" in line:
 raise optuna.exceptions.TrialPruned()
 else:
 r2_result = float(line.split(":")[1].strip())
 break

 except optuna.exceptions.TrialPruned:
 print(f"Trial pruned at step {current_step}")
 raise
 except Exception as e:
 print(f"Exception occurred: {e}", file=sys.stderr)
 r2_result = 0.0

 gc.collect()
 return r2_result if r2_result is not None else 0.0

In [35]:
def objective_de_network(trial):
 r2_result = None
 current_step = 0 
 try:
 y_true = np.asarray(y_de).astype('float')
 np.save('new_fps.npy', new_de)
 np.save('y_true.npy', y_true)
 
 save_model(trial, new_de)

 lr = trial.suggest_categorical(f"lr", [0.001,0.0001,0.00001])

 result = subprocess.run(['python3', './extra_code/learning_process.py',
 str(BATCHSIZE), str(EPOCHS), 
 str(lr), 
 'new_fps.npy', 'y_true.npy', str(trial.number)],
 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
 
 if result.stderr:
 filtered_stderr = '\n'.join([line for line in result.stderr.split('\n') 
 if "could not open file to read NUMA node" not in line 
 and "Your kernel may have been built without NUMA support" not in line])
 if filtered_stderr:
 print(f"Error in subprocess: {filtered_stderr}", file=sys.stderr)

 lines = result.stdout.splitlines()
 for line in lines:
 if line.startswith("intermediate_value:"):
 _, step, value = line.split(":")
 step = int(step)
 value = float(value)
 current_step = step
 
 trial.report(value, step)
 
 if trial.should_prune():
 raise optuna.exceptions.TrialPruned()

 for line in reversed(lines):
 if "R2:" in line:
 if "(prune)" in line:
 raise optuna.exceptions.TrialPruned()
 else:
 r2_result = float(line.split(":")[1].strip())
 break

 except optuna.exceptions.TrialPruned:
 print(f"Trial pruned at step {current_step}")
 raise
 except Exception as e:
 print(f"Exception occurred: {e}", file=sys.stderr)
 r2_result = 0.0

 gc.collect()
 return r2_result if r2_result is not None else 0.0

In [36]:
def objective_lo_network(trial):
 r2_result = None
 current_step = 0 
 try:
 y_true = np.asarray(y_lo).astype('float')
 np.save('new_fps.npy', new_lo)
 np.save('y_true.npy', y_true)
 
 save_model(trial, new_lo)

 lr = trial.suggest_categorical(f"lr", [0.001,0.0001,0.00001])

 result = subprocess.run(['python3', './extra_code/learning_process.py',
 str(BATCHSIZE), str(EPOCHS), 
 str(lr), 
 'new_fps.npy', 'y_true.npy', str(trial.number)],
 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
 
 if result.stderr:
 filtered_stderr = '\n'.join([line for line in result.stderr.split('\n') 
 if "could not open file to read NUMA node" not in line 
 and "Your kernel may have been built without NUMA support" not in line])
 if filtered_stderr:
 print(f"Error in subprocess: {filtered_stderr}", file=sys.stderr)

 lines = result.stdout.splitlines()
 for line in lines:
 if line.startswith("intermediate_value:"):
 _, step, value = line.split(":")
 step = int(step)
 value = float(value)
 current_step = step
 
 trial.report(value, step)
 
 if trial.should_prune():
 raise optuna.exceptions.TrialPruned()

 for line in reversed(lines):
 if "R2:" in line:
 if "(prune)" in line:
 raise optuna.exceptions.TrialPruned()
 else:
 r2_result = float(line.split(":")[1].strip())
 break

 except optuna.exceptions.TrialPruned:
 print(f"Trial pruned at step {current_step}")
 raise
 except Exception as e:
 print(f"Exception occurred: {e}", file=sys.stderr)
 r2_result = 0.0

 gc.collect()
 return r2_result if r2_result is not None else 0.0

In [37]:
def objective_hu_network(trial):
 r2_result = None
 current_step = 0 
 try:
 y_true = np.asarray(y_hu).astype('float')
 np.save('new_fps.npy', new_hu)
 np.save('y_true.npy', y_true)
 
 save_model(trial, new_hu)

 lr = trial.suggest_categorical(f"lr", [0.001,0.0001,0.00001])

 result = subprocess.run(['python3', './extra_code/learning_process.py',
 str(BATCHSIZE), str(EPOCHS), 
 str(lr), 
 'new_fps.npy', 'y_true.npy', str(trial.number)],
 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
 
 if result.stderr:
 filtered_stderr = '\n'.join([line for line in result.stderr.split('\n') 
 if "could not open file to read NUMA node" not in line 
 and "Your kernel may have been built without NUMA support" not in line])
 if filtered_stderr:
 print(f"Error in subprocess: {filtered_stderr}", file=sys.stderr)

 lines = result.stdout.splitlines()
 for line in lines:
 if line.startswith("intermediate_value:"):
 _, step, value = line.split(":")
 step = int(step)
 value = float(value)
 current_step = step
 
 trial.report(value, step)
 
 if trial.should_prune():
 raise optuna.exceptions.TrialPruned()

 for line in reversed(lines):
 if "R2:" in line:
 if "(prune)" in line:
 raise optuna.exceptions.TrialPruned()
 else:
 r2_result = float(line.split(":")[1].strip())
 break

 except optuna.exceptions.TrialPruned:
 print(f"Trial pruned at step {current_step}")
 raise
 except Exception as e:
 print(f"Exception occurred: {e}", file=sys.stderr)
 r2_result = 0.0

 gc.collect()
 return r2_result if r2_result is not None else 0.0

In [38]:
storage = optuna.storages.RDBStorage(url="sqlite:///ano_analysis.db", engine_kwargs={"connect_args": {"timeout": 10000}})
# storage_urls = "postgresql+psycopg2://postgres:{pwd}}@localhost:{num}}"
# storage = optuna.storages.RDBStorage(url=storage_urls)

In [42]:
try:
 # optuna.delete_study(study_name="ANO_ws_network", storage=storage)
 # optuna.delete_study(study_name="ANO_de_network", storage=storage)
 optuna.delete_study(study_name="ANO_lo_network", storage=storage)
 # optuna.delete_study(study_name="ANO_hu_network", storage=storage)
 pass
except:
 pass 

In [43]:
TRIALS=1

In [44]:
# study_de_network = optuna.create_study(study_name='ANO_de_network_fixed', storage=storage, direction="maximize", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) 
study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)
# study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(min_resource=100,max_resource=1000,reduction_factor=3), load_if_exists=True)
# study_de_network = optuna.create_study(study_name='ANO_de_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)
study_de_network.optimize(objective_de_network, n_trials=TRIALS)
pruned_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials_de_newtork = study_de_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
#74m 22.0s
#386m 42.2 - 100 trial 1000 epochs
#278m 23.3s

[I 2024-11-04 23:06:11,544] Using an existing study with name 'ANO_de_network' instead of creating a new one.


Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729176.331625 2499072 service.cc:146] XLA service 0x55701a22e460 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729176.331663 2499072 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729176.465814 2499072 service.cc:146] XLA service 0x55701a205900 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729176.465843 2499072 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729180.613395 2499185 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:09:13,063] Trial 1115 finished with value: 0.965164 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 7490, 'n_decay_l_0': 1e-05, 'n_units_l_1': 2373, 'n_decay_l_1': 1e-06, 'n_units_l_2': 6613, 'n_decay_l_2': 1e-05, 'last_dropout

In [45]:
# study_ws_network = optuna.create_study(study_name='ANO_ws_network_fixed', storage=storage, direction="maximize", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) 
study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)
# study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)
# study_ws_network = optuna.create_study(study_name='ANO_ws_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)
study_ws_network.optimize(objective_ws_network, n_trials=TRIALS)
pruned_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials_ws_newtork = study_ws_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
# 108m 38.1s
#160m 18.2 - 100 trial 1000 epochs

[I 2024-11-04 23:09:13,086] Using an existing study with name 'ANO_ws_network' instead of creating a new one.


Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729356.277557 2507565 service.cc:146] XLA service 0x55c7cad07060 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729356.277598 2507565 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729356.416113 2507565 service.cc:146] XLA service 0x55c7cac0bd20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729356.416147 2507565 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729359.300797 2507682 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:09:27,954] Trial 193 finished with value: 0.939087 and parameters: {'n_layers': 2, 'layer_dropout': 0, 'n_units_l_0': 800, 'n_decay_l_0': 1e-06, 'n_units_l_1': 530, 'n_decay_l_1': 1e-05, 'last_dropout': 0.1, 'lr': 0.001}. Best is trial 104 with v

In [46]:
TRIALS=10

In [47]:
# study_lo_network = optuna.create_study(study_name='ANO_lo_network_fixed', storage=storage, direction="maximize", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) 
study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)
# study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)
# study_lo_network = optuna.create_study(study_name='ANO_lo_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)
study_lo_network.optimize(objective_lo_network, n_trials=TRIALS)
pruned_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials_lo_newtork = study_lo_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

[I 2024-11-04 23:09:27,984] A new study created in RDB with name: ANO_lo_network


Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729368.680796 2510630 service.cc:146] XLA service 0x56035729eda0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729368.680848 2510630 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729368.837668 2510630 service.cc:146] XLA service 0x5603572f9c70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729368.837708 2510630 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729371.354407 2510735 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:09:45,883] Trial 0 finished with value: 0.723669 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 2941, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.1, 'lr': 0.0001}. Best is trial 0 with value: 0.723669.


Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729388.955076 2513776 service.cc:146] XLA service 0x5615ce07e010 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729388.955114 2513776 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729389.098245 2513776 service.cc:146] XLA service 0x5615ce09f3b0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729389.098279 2513776 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729391.952147 2513881 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:10:24,537] Trial 1 finished with value: 0.780745 and parameters: {'n_layers': 1, 'layer_dropout': 0, 'n_units_l_0': 7693, 'n_decay_l_0': 0.0001, 'last_dropout': 0.1, 'lr': 0.0001}. Best is trial 1 with value: 0.780745.


Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729427.740828 2518105 service.cc:146] XLA service 0x563fb10b11b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729427.740872 2518105 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729427.891154 2518105 service.cc:146] XLA service 0x563fb0f93760 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729427.891196 2518105 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729428.275905 2518208 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:10:56,189] Trial 2 finished with value: 0.861173 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 3091, 'n_decay_l_0': 1e-05, 'F_dropout_0': 0.2, 'lr': 0.001}. Best is trial 2 with value: 0.861173.


Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729457.830007 2524433 service.cc:146] XLA service 0x56115c187470 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729457.830062 2524433 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729457.978097 2524433 service.cc:146] XLA service 0x56115c0a57a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729457.978151 2524433 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729463.165301 2524546 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:12:29,474] Trial 3 pruned. 


Trial pruned at step 50
Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729550.554875 2527992 service.cc:146] XLA service 0x559ab4767160 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729550.554932 2527992 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729550.699674 2527992 service.cc:146] XLA service 0x559ab47a71a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729550.699710 2527992 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729554.340454 2528101 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:13:00,476] Trial 4 pruned. 


Trial pruned at step 50
Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729581.148610 2530567 service.cc:146] XLA service 0x5603394ef990 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729581.148655 2530567 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729581.280871 2530567 service.cc:146] XLA service 0x5603394f7710 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729581.280904 2530567 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729584.114676 2530679 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:13:19,868] Trial 5 pruned. 


Trial pruned at step 50
Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729603.760434 2536173 service.cc:146] XLA service 0x55b2351fb2f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729603.760485 2536173 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729603.892722 2536173 service.cc:146] XLA service 0x55b232ce1e60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729603.892765 2536173 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729609.099171 2536280 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:14:49,961] Trial 6 finished with value: 0.761589 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 8234, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.3, 'n_units_l_1': 5907, 'n_decay_l_1': 1e-05, 'F_dropout_1': 0.1, 'n_units_l_2': 5363

Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729693.449528 2539291 service.cc:146] XLA service 0x562dce6885b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729693.449582 2539291 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729693.592325 2539291 service.cc:146] XLA service 0x562dce5c4c30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729693.592355 2539291 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729697.337810 2539396 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:15:31,025] Trial 7 pruned. 


Trial pruned at step 50
Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729732.655927 2542190 service.cc:146] XLA service 0x557b8aaafa90 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729732.655980 2542190 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729732.804313 2542190 service.cc:146] XLA service 0x557b8aa19180 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729732.804347 2542190 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729737.528836 2542300 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:17:30,392] Trial 8 finished with value: 0.849003 and parameters: {'n_layers': 3, 'layer_dropout': 0, 'n_units_l_0': 9926, 'n_decay_l_0': 1e-05, 'n_units_l_1': 6304, 'n_decay_l_1': 1e-06, 'n_units_l_2': 1149, 'n_decay_l_2': 0.0001, 'last_dropout':

Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729854.300336 2546634 service.cc:146] XLA service 0x5635047e3fd0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729854.300383 2546634 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729854.440994 2546634 service.cc:146] XLA service 0x5635046b3d50 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729854.441033 2546634 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729857.327780 2546746 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:18:18,438] Trial 9 finished with value: 0.859502 and parameters: {'n_layers': 1, 'layer_dropout': 1, 'n_units_l_0': 6912, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.3, 'lr': 0.0001}. Best is trial 2 with value: 0.861173.


In [48]:
TRIALS=1

In [49]:
# study_hu_network = optuna.create_study(study_name='ANO_hu_network_fixed', storage=storage, direction="maximize", pruner=optuna.pruners.SuccessiveHalvingPruner(),load_if_exists=True) 
study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(min_resource=50,max_resource=EPOCHS,reduction_factor=3), load_if_exists=True)
# study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(reduction_factor=64, min_early_stopping_rate=10), load_if_exists=True)
# study_hu_network = optuna.create_study(study_name='ANO_hu_network', storage=storage, direction="maximize", pruner=optuna.pruners.HyperbandPruner(), load_if_exists=True)
study_hu_network.optimize(objective_hu_network, n_trials=TRIALS)
pruned_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials_hu_newtork = study_hu_network.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

[I 2024-11-04 23:18:18,463] Using an existing study with name 'ANO_hu_network' instead of creating a new one.


Model already exists at save_model/full_model.keras
Model successfully saved to save_model/full_model.keras


I0000 00:00:1730729902.168016 2552533 service.cc:146] XLA service 0x55fd29098ab0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729902.168077 2552533 service.cc:154] StreamExecutor device (0): Host, Default Version
I0000 00:00:1730729902.305499 2552533 service.cc:146] XLA service 0x55fd28631810 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730729902.305538 2552533 service.cc:154] StreamExecutor device (0): NVIDIA GeForce RTX 3060 Laptop GPU, Compute Capability 8.6
I0000 00:00:1730729907.273542 2552637 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.

[I 2024-11-04 23:19:12,205] Trial 144 finished with value: 0.936649 and parameters: {'n_layers': 3, 'layer_dropout': 1, 'n_units_l_0': 1510, 'n_decay_l_0': 1e-06, 'F_dropout_0': 0.1, 'n_units_l_1': 2489, 'n_decay_l_1': 1e-06, 'F_dropout_1': 0.1, 'n_units_l_2': 15

In [50]:
print("Study statistics: [ws_structure] ")
print(" Number of finished trials: ", len(study_ws_network.trials))
print(" Number of pruned trials: ", len(pruned_trials_ws_newtork))
print(" Number of complete trials: ", len(complete_trials_ws_newtork))
print("Best trial:")
trials_tmp = study_ws_network.best_trial
print(" Value: ", trials_tmp.value)
print(" Params: ")
for key, value in trials_tmp.params.items():
 print(" {}: {}".format(key, value))

Study statistics: [ws_structure] 
 Number of finished trials: 194
 Number of pruned trials: 3
 Number of complete trials: 168
Best trial:
 Value: 0.970129
 Params: 
 n_layers: 2
 layer_dropout: 0
 n_units_l_0: 205
 n_decay_l_0: 1e-06
 n_units_l_1: 742
 n_decay_l_1: 0.0001
 last_dropout: 0.1
 lr: 0.0001


In [51]:
print("Study statistics: [de_structure] ")
print(" Number of finished trials: ", len(study_de_network.trials))
print(" Number of pruned trials: ", len(pruned_trials_de_newtork))
print(" Number of complete trials: ", len(complete_trials_de_newtork))
print("Best trial:")
trials_tmp = study_de_network.best_trial
print(" Value: ", trials_tmp.value)
print(" Params: ")
for key, value in trials_tmp.params.items():
 print(" {}: {}".format(key, value))

Study statistics: [de_structure] 
 Number of finished trials: 1116
 Number of pruned trials: 59
 Number of complete trials: 1032
Best trial:
 Value: 0.983023
 Params: 
 n_layers: 3
 layer_dropout: 0
 n_units_l_0: 7946
 n_decay_l_0: 1e-05
 n_units_l_1: 2662
 n_decay_l_1: 1e-06
 n_units_l_2: 6499
 n_decay_l_2: 1e-05
 last_dropout: 0.3
 lr: 0.001


In [52]:
print("Study statistics: [lo_structure] ")
print(" Number of finished trials: ", len(study_lo_network.trials))
print(" Number of pruned trials: ", len(pruned_trials_lo_newtork))
print(" Number of complete trials: ", len(complete_trials_lo_newtork))
print("Best trial:")
trials_tmp = study_lo_network.best_trial
print(" Value: ", trials_tmp.value)
print(" Params: ")
for key, value in trials_tmp.params.items():
 print(" {}: {}".format(key, value))

Study statistics: [lo_structure] 
 Number of finished trials: 10
 Number of pruned trials: 4
 Number of complete trials: 6
Best trial:
 Value: 0.861173
 Params: 
 n_layers: 1
 layer_dropout: 1
 n_units_l_0: 3091
 n_decay_l_0: 1e-05
 F_dropout_0: 0.2
 lr: 0.001


In [53]:
print("Study statistics: [hu_structure] ")
print(" Number of finished trials: ", len(study_hu_network.trials))
print(" Number of pruned trials: ", len(pruned_trials_hu_newtork))
print(" Number of complete trials: ", len(complete_trials_hu_newtork))
print("Best trial:")
trials_tmp = study_hu_network.best_trial
print(" Value: ", trials_tmp.value)
print(" Params: ")
for key, value in trials_tmp.params.items():
 print(" {}: {}".format(key, value))

Study statistics: [hu_structure] 
 Number of finished trials: 145
 Number of pruned trials: 55
 Number of complete trials: 78
Best trial:
 Value: 0.943809
 Params: 
 n_layers: 3
 layer_dropout: 1
 n_units_l_0: 3891
 n_decay_l_0: 0.0001
 F_dropout_0: 0.1
 n_units_l_1: 7719
 n_decay_l_1: 1e-05
 F_dropout_1: 0.3
 n_units_l_2: 342
 n_decay_l_2: 1e-05
 F_dropout_2: 0.1
 lr: 0.0001
